+ support for widestring manager based widechar conversions

(widechar<->char, widechar<>*string), based on patch from
    Rimgaudas Laucius (mantis #7758)
  * no longer perform compile-time widechar/string->char/ansi/
    shortstring conversions if they would destroy information
    (they can't cope with widechars with ord>=128). This means
    that you can now properly constant widechars/widestrings
    in source code with a {$codepage } set without risking that
    the compiler will mangle everything afterwards
  * support ESysEINVAL return code from iconv (happens if last
    multibyte char is incomplete)
  * fixed writing of widechars (were converted to char -> lost
    information)

git-svn-id: trunk@8274 -
This commit is contained in:
Jonas Maebe 2007-08-12 20:01:08 +00:00
parent c408d9bb1d
commit 815cd2b39d
12 changed files with 341 additions and 44 deletions

2
.gitattributes vendored
View File

@ -8253,6 +8253,8 @@ tests/webtbs/tw7643.pp svneol=native#text/plain
tests/webtbs/tw7679.pp svneol=native#text/plain
tests/webtbs/tw7719.pp svneol=native#text/plain
tests/webtbs/tw7756.pp svneol=native#text/plain
tests/webtbs/tw7758.pp svneol=native#text/plain
tests/webtbs/tw7758a.pp svneol=native#text/plain
tests/webtbs/tw7803.pp svneol=native#text/plain
tests/webtbs/tw7806.pp svneol=native#text/plain
tests/webtbs/tw7808.pp svneol=native#text/plain

View File

@ -514,9 +514,7 @@ interface
procedure tcgtypeconvnode.second_char_to_char;
begin
{$warning todo: add RTL routine for widechar-char conversion }
{ Quick hack to at least generate 'working' code (PFV) }
second_int_to_int;
internalerror(2007081202);
end;

View File

@ -875,12 +875,16 @@ implementation
begin
result:=nil;
if left.nodetype=stringconstn then
begin
if (left.nodetype=stringconstn) and
((tstringdef(left.resultdef).stringtype<>st_widestring) or
(tstringdef(resultdef).stringtype=st_widestring) or
{ non-ascii chars would be replaced with '?' -> loses info }
not hasnonasciichars(pcompilerwidestring(tstringconstnode(left).value_str))) then
begin
tstringconstnode(left).changestringtype(resultdef);
result:=left;
left:=nil;
end
end
else
begin
{ get the correct procedure name }
@ -913,7 +917,13 @@ implementation
begin
result:=nil;
if left.nodetype=ordconstn then
{ we can't do widechar to ansichar conversions at compile time, since }
{ this maps all non-ascii chars to '?' -> loses information }
if (left.nodetype=ordconstn) and
((tstringdef(resultdef).stringtype=st_widestring) or
(torddef(left.resultdef).ordtype=uchar) or
{ >=128 is destroyed }
(tordconstnode(left).value.uvalue<128)) then
begin
if tstringdef(resultdef).stringtype=st_widestring then
begin
@ -927,22 +937,30 @@ implementation
end
else
begin
hp:=cstringconstnode.createstr(chr(tordconstnode(left).value.uvalue));
if torddef(left.resultdef).ordtype=uwidechar then
hp:=cstringconstnode.createstr(unicode2asciichar(tcompilerwidechar(tordconstnode(left).value.uvalue)))
else
hp:=cstringconstnode.createstr(chr(tordconstnode(left).value.uvalue));
tstringconstnode(hp).changestringtype(resultdef);
end;
result:=hp;
end
else
{ shortstrings are handled 'inline' }
if tstringdef(resultdef).stringtype <> st_shortstring then
{ shortstrings are handled 'inline' (except for widechars) }
if (tstringdef(resultdef).stringtype <> st_shortstring) or
(torddef(left.resultdef).ordtype = uwidechar) then
begin
{ create the parameter }
{ create the procname }
if torddef(left.resultdef).ordtype<>uwidechar then
procname := 'fpc_char_to_'
else
procname := 'fpc_wchar_to_';
procname:=procname+tstringdef(resultdef).stringtypname;
{ and the parameter }
para := ccallparanode.create(left,nil);
left := nil;
{ and the procname }
procname := 'fpc_char_to_' +tstringdef(resultdef).stringtypname;
{ and finally the call }
result := ccallnode.createinternres(procname,para,resultdef);
end
@ -987,7 +1005,11 @@ implementation
begin
result:=nil;
if left.nodetype=ordconstn then
if (left.nodetype=ordconstn) and
((torddef(resultdef).ordtype<>uchar) or
(torddef(left.resultdef).ordtype<>uwidechar) or
{ >= 128 is replaced by '?' currently -> loses information }
(tordconstnode(left).value.uvalue<128)) then
begin
if (torddef(resultdef).ordtype=uchar) and
(torddef(left.resultdef).ordtype=uwidechar) then
@ -2248,9 +2270,21 @@ implementation
function ttypeconvnode.first_char_to_char : tnode;
var
fname: string[18];
begin
first_char_to_char:=first_int_to_int;
if (torddef(resultdef).ordtype=uchar) and
(torddef(left.resultdef).ordtype=uwidechar) then
fname := 'fpc_wchar_to_char'
else if (torddef(resultdef).ordtype=uwidechar) and
(torddef(left.resultdef).ordtype=uchar) then
fname := 'fpc_char_to_wchar'
else
internalerror(2007081201);
result := ccallnode.createintern(fname,ccallparanode.create(left,nil));
left:=nil;
firstpass(result);
end;

View File

@ -55,6 +55,7 @@ unit widestr;
function unicode2asciichar(c : tcompilerwidechar) : char;
procedure ascii2unicode(p : pchar;l : SizeInt;r : pcompilerwidestring);
procedure unicode2ascii(r : pcompilerwidestring;p : pchar);
function hasnonasciichars(const p: pcompilerwidestring): boolean;
function getcharwidestring(r : pcompilerwidestring;l : SizeInt) : tcompilerwidechar;
function cpavailable(const s : string) : boolean;
@ -166,10 +167,11 @@ unit widestr;
end;
function unicode2asciichar(c : tcompilerwidechar) : char;
begin
{$warning TODO unicode2asciichar}
unicode2asciichar:=#0;
if word(c)<128 then
unicode2asciichar:=char(word(c))
else
unicode2asciichar:='?';
end;
procedure ascii2unicode(p : pchar;l : SizeInt;r : pcompilerwidestring);
@ -242,6 +244,23 @@ unit widestr;
end;
function hasnonasciichars(const p: pcompilerwidestring): boolean;
var
source : tcompilerwidecharptr;
i : longint;
begin
source:=tcompilerwidecharptr(p^.data);
result:=true;
for i:=1 to p^.len do
begin
if word(source^)>=128 then
exit;
inc(source);
end;
result:=false;
end;
function cpavailable(const s : string) : boolean;
begin
cpavailable:=mappingavailable(s);

View File

@ -187,6 +187,7 @@ Procedure fpc_AnsiStr_ShortStr_Concat (Var S1: AnsiString; Var S2 : ShortString)
function fpc_AnsiStr_To_ShortStr (high_of_res: SizeInt;const S2 : Ansistring): shortstring; compilerproc;
Function fpc_ShortStr_To_AnsiStr (Const S2 : ShortString): ansistring; compilerproc;
Function fpc_Char_To_AnsiStr(const c : Char): AnsiString; compilerproc;
Function fpc_PChar_To_AnsiStr(const p : pchar): ansistring; compilerproc;
Function fpc_CharArray_To_AnsiStr(const arr: array of char; zerobased: boolean = true): ansistring; compilerproc;
{$ifndef FPC_STRTOCHARARRAYPROC}
@ -224,7 +225,7 @@ function fpc_WideStr_Concat_multi (const sarr:array of Widestring): widestring;
Procedure fpc_WideStr_Concat (Var DestS : Widestring;const S1,S2 : WideString); compilerproc;
Procedure fpc_WideStr_Concat_multi (Var DestS : Widestring;const sarr:array of Widestring); compilerproc;
{$endif STR_CONCAT_PROCS}
Function fpc_Char_To_WideStr(const c : WideChar): WideString; compilerproc;
Function fpc_Char_To_WideStr(const c : Char): WideString; compilerproc;
Function fpc_PChar_To_WideStr(const p : pchar): WideString; compilerproc;
Function fpc_CharArray_To_WideStr(const arr: array of char; zerobased: boolean = true): WideString; compilerproc;
{$ifndef FPC_STRTOCHARARRAYPROC}
@ -250,6 +251,11 @@ Function fpc_widestr_Copy (Const S : WideString; Index,Size : SizeInt) : WideSt
{$ifndef FPC_WINLIKEWIDESTRING}
function fpc_widestr_Unique(Var S : Pointer): Pointer; compilerproc;
{$endif FPC_WINLIKEWIDESTRING}
Function fpc_Char_To_WChar(const c : Char): WideChar; compilerproc;
Function fpc_WChar_To_Char(const c : WideChar): Char; compilerproc;
Function fpc_WChar_To_ShortStr(const c : WideChar): ShortString; compilerproc;
Function fpc_WChar_To_AnsiStr(const c : WideChar): AnsiString; compilerproc;
Function fpc_WChar_To_WideStr(const c : WideChar): WideString; compilerproc;
{$endif FPC_HAS_FEATURE_WIDESTRINGS}
{$ifdef FPC_HAS_FEATURE_WIDESTRINGS}

View File

@ -832,7 +832,7 @@ End;
Procedure fpc_Write_Text_WideChar(Len : Longint;var t : Text;c : WideChar); iocheck; [Public,Alias:'FPC_WRITE_TEXT_WIDECHAR']; compilerproc;
var
ch : char;
a : ansistring;
Begin
If (InOutRes<>0) then
exit;
@ -848,9 +848,9 @@ Begin
fpc_WriteBlanks(t,Len-1);
If TextRec(t).BufPos>=TextRec(t).BufSize Then
FileFunc(TextRec(t).InOutFunc)(TextRec(t));
ch:=c;
TextRec(t).Bufptr^[TextRec(t).BufPos]:=ch;
Inc(TextRec(t).BufPos);
{ a widechar can be translated into more than a single ansichar }
a:=c;
fpc_WriteBuffer(t,pchar(a)^,length(a));
End;

View File

@ -592,15 +592,21 @@ end;
{$endif STR_CONCAT_PROCS}
Function fpc_Char_To_WChar(const c : Char): WideChar; compilerproc;
var
w: widestring;
begin
widestringmanager.Ansi2WideMoveProc(@c, w, 1);
fpc_Char_To_WChar:= w[1];
end;
Function fpc_Char_To_WideStr(const c : WideChar): WideString; compilerproc;
Function fpc_Char_To_WideStr(const c : Char): WideString; compilerproc;
{
Converts a Char to a WideString;
}
begin
if c = #0 then
{ result is automatically set to '' }
exit;
Setlength(fpc_Char_To_WideStr,1);
fpc_Char_To_WideStr[1]:=c;
{ Terminating Zero }
@ -608,6 +614,52 @@ begin
end;
Function fpc_WChar_To_Char(const c : WideChar): Char; compilerproc;
{
Converts a WideChar to a Char;
}
var
s: ansistring;
begin
widestringmanager.Wide2AnsiMoveProc(@c, s, 1);
if length(s)=1 then
fpc_WChar_To_Char:= s[1]
else
fpc_WChar_To_Char:='?';
end;
Function fpc_WChar_To_WideStr(const c : WideChar): WideString; compilerproc;
{
Converts a WideChar to a WideString;
}
begin
Setlength (fpc_WChar_To_WideStr,1);
fpc_WChar_To_WideStr[1]:= c;
end;
Function fpc_WChar_To_AnsiStr(const c : WideChar): AnsiString; compilerproc;
{
Converts a WideChar to a AnsiString;
}
begin
widestringmanager.Wide2AnsiMoveProc(@c, fpc_WChar_To_AnsiStr, 1);
end;
Function fpc_WChar_To_ShortStr(const c : WideChar): ShortString; compilerproc;
{
Converts a WideChar to a ShortString;
}
var
s: ansistring;
begin
widestringmanager.Wide2AnsiMoveProc(@c, s, 1);
fpc_WChar_To_ShortStr:= s;
end;
Function fpc_PChar_To_WideStr(const p : pchar): WideString; compilerproc;
Var
L : SizeInt;

View File

@ -122,6 +122,7 @@ procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
destpos: pchar;
mynil : pchar;
my0 : size_t;
err: cint;
begin
mynil:=nil;
my0:=0;
@ -134,7 +135,11 @@ procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
outleft:=outlength;
while iconv(iconv_wide2ansi,ppchar(@srcpos),@srclen,@destpos,@outleft)=size_t(-1) do
begin
case fpgetCerrno of
err:=fpgetCerrno;
case err of
{ last character is incomplete sequence }
ESysEINVAL,
{ incomplete sequence in the middle }
ESysEILSEQ:
begin
{ skip and set to '?' }
@ -145,6 +150,8 @@ procedure Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
dec(outleft);
{ reset }
iconv(iconv_wide2ansi,@mynil,@my0,@mynil,@my0);
if err=ESysEINVAL then
break;
end;
ESysE2BIG:
begin
@ -174,19 +181,21 @@ procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
destpos: pchar;
mynil : pchar;
my0 : size_t;
err: cint;
begin
mynil:=nil;
my0:=0;
// extra space
outlength:=len+1;
setlength(dest,outlength);
outlength:=len+1;
srcpos:=source;
destpos:=pchar(dest);
outleft:=outlength*2;
while iconv(iconv_ansi2wide,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
begin
case fpgetCerrno of
err:=fpgetCerrno;
case err of
ESysEINVAL,
ESysEILSEQ:
begin
{ skip and set to '?' }
@ -197,6 +206,8 @@ procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
dec(outleft,2);
{ reset }
iconv(iconv_ansi2wide,@mynil,@my0,@mynil,@my0);
if err=ESysEINVAL then
break;
end;
ESysE2BIG:
begin

View File

@ -926,9 +926,11 @@ procedure Win32Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
destlen: SizeInt;
begin
// retrieve length including trailing #0
destlen:=WideCharToMultiByte(CP_ACP, 0, source, len+1, nil, 0, nil, nil);
setlength(dest, destlen-1);
WideCharToMultiByte(CP_ACP, 0, source, len+1, @dest[1], destlen, nil, nil);
// not anymore, because this must also be usable for single characters
destlen:=WideCharToMultiByte(CP_ACP, 0, source, len, nil, 0, nil, nil);
// this will null-terminate
setlength(dest, destlen);
WideCharToMultiByte(CP_ACP, 0, source, len, @dest[1], destlen, nil, nil);
end;
procedure Win32Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
@ -936,9 +938,11 @@ procedure Win32Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
destlen: SizeInt;
begin
// retrieve length including trailing #0
destlen:=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, source, len+1, nil, 0);
setlength(dest, destlen-1);
MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, source, len+1, @dest[1], destlen);
// not anymore, because this must also be usable for single characters
destlen:=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, source, len, nil, 0);
// this will null-terminate
setlength(dest, destlen);
MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, source, len, @dest[1], destlen);
end;

View File

@ -968,9 +968,11 @@ procedure Win32Wide2AnsiMove(source:pwidechar;var dest:ansistring;len:SizeInt);
destlen: SizeInt;
begin
// retrieve length including trailing #0
destlen:=WideCharToMultiByte(CP_ACP, 0, source, len+1, nil, 0, nil, nil);
setlength(dest, destlen-1);
WideCharToMultiByte(CP_ACP, 0, source, len+1, @dest[1], destlen, nil, nil);
// not anymore, because this must also be usable for single characters
destlen:=WideCharToMultiByte(CP_ACP, 0, source, len, nil, 0, nil, nil);
// this will null-terminate
setlength(dest, destlen);
WideCharToMultiByte(CP_ACP, 0, source, len, @dest[1], destlen, nil, nil);
end;
procedure Win32Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
@ -978,9 +980,11 @@ procedure Win32Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
destlen: SizeInt;
begin
// retrieve length including trailing #0
destlen:=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, source, len+1, nil, 0);
setlength(dest, destlen-1);
MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, source, len+1, @dest[1], destlen);
// not anymore, because this must also be usable for single characters
destlen:=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, source, len, nil, 0);
// this will null-terminate
setlength(dest, destlen);
MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, source, len, @dest[1], destlen);
end;

148
tests/webtbs/tw7758.pp Normal file
View File

@ -0,0 +1,148 @@
{$codepage utf8}
uses
{$ifdef unix}
cwstring,
{$endif}
sysutils;
const
cwc=widechar('a');
c2=widechar('é');
c3=widestring('é');
var
c: char;
wc,wc2: widechar;
s,s2,a: ansistring;
w: widestring;
ss: shortstring;
begin
c:=#0;
w:=c;
if (length(w)<>1) or
(w[1]<>#0) then
halt(1);
s:='é';
w:=s;
wc:=w[1];
s2:=wc;
if (w <> s2) or
(s <> s2) then
halt(2);
c:=#0;
wc:=c;
c:=wc;
if (c<>#0) or
(wc<>#0) then
halt(5);
ss:=wc;
wc:=ss[1];
if (length(ss)<>1) or
(ss[1]<>#0) or
(wc<>#0) then
halt(6);
a:=wc;
wc:=a[1];
if (length(a)<>1) or
(a[1]<>#0) or
(wc<>#0) then
halt(7);
c:='a';
wc:=c;
c:=wc;
if (c<>'a') or
(wc<>'a') then
halt(8);
ss:=wc;
wc:=ss[1];
if (length(ss)<>1) or
(ss[1]<>'a') or
(wc<>'a') then
halt(9);
a:=wc;
wc:=a[1];
if (length(a)<>1) or
(a[1]<>'a') or
(wc<>'a') then
halt(10);
wc2:=cwc;
if (wc2<>'a') or
(wc2<>cwc) then
halt(3);
ss:=cwc;
if (length(ss)<>1) or
(ss[1] <> 'a') then
halt(4);
c:=cwc;
if (c<>'a') or
(c<>cwc) then
halt(13);
w:=cwc;
if (length(w)<>1) or
(w[1] <> 'a') then
halt(11);
s:=cwc;
if (length(s)<>1) or
(s[1] <> 'a') then
halt(12);
wc:=c2;
c:=c2;
wc2:=c;
if ((c<>c2) and
(c<>'?')) or
(wc<>c2) or
((wc2<>c2) and
(wc2<>'?')) then
halt(14);
ss:=c2;
w:=ss;
wc:=w[1];
if (length(w)<>1) or
(w[1]<>c2) or
(wc<>c2) then
halt(15);
a:=c2;
w:=a;
wc:=w[1];
if (length(w)<>1) or
(w[1]<>c2) or
(wc<>c2) then
halt(16);
ss:=c3;
w:=ss;
wc:=w[1];
if (length(w)<>1) or
(wc <> c2) then
halt(17);
c:=c3[1];
if ((c<>c2) and
(c<>'?')) then
halt(18);
w:=c3;
if (length(w)<>1) or
(w[1] <> c2) then
halt(19);
s:=c3;
w:=s;
if (length(w)<>1) or
(w[1] <> c2) then
halt(20);
ss:=c3;
w:=ss;
if (length(w)<>1) or
(w[1] <> c2) then
halt(21);
wc:=c2;
writestr(s,wc);
w:=s;
if (length(w)<>1) or
(w[1]<>c2) then
halt(22);
end.

19
tests/webtbs/tw7758a.pp Normal file
View File

@ -0,0 +1,19 @@
{ %norun }
uses
{$ifdef unix}
cwstring,
{$endif}
sysutils;
{ just to make sure that no all wide->shortstring compile time conversions }
{ fail, but only those resulting in data loss }
const
cw = widestring('abc');
de = 'a'+shortstring(cw);
wc = widechar('a');
df = shortstring(wc)+'abcd';
dg = char(wc)+'abcd';
begin
end.