diff --git a/rtl/inc/compproc.inc b/rtl/inc/compproc.inc index eb93792a1a..5ece40b09d 100644 --- a/rtl/inc/compproc.inc +++ b/rtl/inc/compproc.inc @@ -471,7 +471,7 @@ Procedure fpc_Write_Text_AnsiStr (Len : Longint; Var f : Text; const S : AnsiStr Procedure fpc_Write_Text_WideStr (Len : Longint; Var f : Text; const S : WideString); compilerproc; {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING} {$ifdef FPC_HAS_FEATURE_WIDESTRINGS} -Procedure fpc_Write_Text_UnicodeStr (Len : Longint; Var f : Text; const S : WideString); compilerproc; +Procedure fpc_Write_Text_UnicodeStr (Len : Longint; Var f : Text; const S : UnicodeString); compilerproc; {$endif FPC_HAS_FEATURE_WIDESTRINGS} Procedure fpc_Write_Text_SInt(Len : Longint;var t : Text;l : ValSInt); compilerproc; Procedure fpc_Write_Text_UInt(Len : Longint;var t : Text;l : ValUInt); compilerproc; @@ -504,16 +504,22 @@ function fpc_SetupWriteStr_Shortstr(out s: shortstring): PText; compilerproc; function fpc_SetupWriteStr_Ansistr(out s: ansistring): PText; compilerproc; {$endif FPC_HAS_FEATURE_ANSISTRINGS} {$ifdef FPC_HAS_FEATURE_WIDESTRINGS} -function fpc_SetupWriteStr_Widestr(out s: widestring): PText; compilerproc; +function fpc_SetupWriteStr_Unicodestr(out s: unicodestring): PText; compilerproc; {$endif FPC_HAS_FEATURE_WIDESTRINGS} +{$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING} +function fpc_SetupWriteStr_Widestr(out s: widestring): PText; compilerproc; +{$endif FPC_WIDESTRING_EQUAL_UNICODESTRING} function fpc_SetupReadStr_Shortstr(const s: shortstring): PText; compilerproc; {$ifdef FPC_HAS_FEATURE_ANSISTRINGS} function fpc_SetupReadStr_Ansistr(const s: ansistring): PText; compilerproc; {$endif FPC_HAS_FEATURE_ANSISTRINGS} {$ifdef FPC_HAS_FEATURE_WIDESTRINGS} -function fpc_SetupReadStr_Widestr(const s: widestring): PText; compilerproc; +function fpc_SetupReadStr_Unicodestr(const s: unicodestring): PText; compilerproc; {$endif FPC_HAS_FEATURE_WIDESTRINGS} +{$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING} +function fpc_SetupReadStr_Widestr(const s: widestring): PText; compilerproc; +{$endif FPC_WIDESTRING_EQUAL_UNICODESTRING} {$endif FPC_HAS_FEATURE_TEXTIO} {$ifdef FPC_HAS_FEATURE_VARIANTS} @@ -541,7 +547,16 @@ Procedure fpc_Read_Text_PChar_As_Array(var f : Text;out s : array of char; zerob {$ifdef FPC_HAS_FEATURE_ANSISTRINGS} Procedure fpc_Read_Text_AnsiStr(var f : Text;out s : AnsiString); compilerproc; {$endif FPC_HAS_FEATURE_ANSISTRINGS} +{$ifdef FPC_HAS_FEATURE_WIDESTRINGS} +Procedure fpc_Read_Text_UnicodeStr(var f : Text;out us : UnicodeString); compilerproc; +{$endif FPC_HAS_FEATURE_WIDESTRINGS} +{$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING} +Procedure fpc_Read_Text_WideStr(var f : Text;out ws : WideString); compilerproc; +{$endif FPC_WIDESTRING_EQUAL_UNICODESTRING} Procedure fpc_Read_Text_Char(var f : Text; out c : char); compilerproc; +{$ifdef FPC_HAS_FEATURE_WIDESTRINGS} +procedure fpc_Read_Text_WideChar(var f : Text; out wc: widechar); compilerproc; +{$endif FPC_HAS_FEATURE_WIDESTRINGS} Procedure fpc_Read_Text_Char_Iso(var f : Text; out c : char); compilerproc; Procedure fpc_Read_Text_SInt(var f : Text; out l :ValSInt); compilerproc; Procedure fpc_Read_Text_UInt(var f : Text; out u :ValUInt); compilerproc; diff --git a/rtl/inc/text.inc b/rtl/inc/text.inc index 68c142196e..7f60669959 100644 --- a/rtl/inc/text.inc +++ b/rtl/inc/text.inc @@ -689,8 +689,8 @@ begin end; -{$ifdef FPC_HAS_FEATURE_ANSISTRINGS} -Procedure fpc_Write_Text_UnicodeStr (Len : Longint; Var f : Text; const S : WideString); iocheck; compilerproc; +{$ifdef FPC_HAS_FEATURE_WIDESTRINGS} +Procedure fpc_Write_Text_UnicodeStr (Len : Longint; Var f : Text; const S : UnicodeString); iocheck; compilerproc; { Writes a UnicodeString to the Text file T } @@ -714,7 +714,7 @@ begin else InOutRes:=103; end; end; -{$endif FPC_HAS_FEATURE_ANSISTRINGS} +{$endif FPC_HAS_FEATURE_WIDESTRINGS} {$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING} @@ -1288,7 +1288,7 @@ End; {$ifdef FPC_HAS_FEATURE_ANSISTRINGS} -Procedure fpc_Read_Text_AnsiStr(var f : Text;out s : AnsiString); iocheck; compilerproc; +Procedure fpc_Read_Text_AnsiStr(var f : Text;out s : AnsiString); [public, alias: 'FPC_READ_TEXT_ANSISTR']; iocheck; compilerproc; var slen,len : SizeInt; Begin @@ -1302,10 +1302,36 @@ Begin // Set actual length SetLength(S,Slen); End; + +Procedure fpc_Read_Text_AnsiStr_Intern(var f : Text;out s : AnsiString); [external name 'FPC_READ_TEXT_ANSISTR']; {$endif FPC_HAS_FEATURE_ANSISTRINGS} -procedure fpc_Read_Text_Char(var f : Text; out c: char); iocheck;compilerproc; +{$ifdef FPC_HAS_FEATURE_WIDESTRINGS} +Procedure fpc_Read_Text_UnicodeStr(var f : Text;out us : UnicodeString); iocheck; compilerproc; +var + s: AnsiString; +Begin + // all standard input is assumed to be ansi-encoded + fpc_Read_Text_AnsiStr_Intern(f,s); + // Convert to unicodestring + us:=s; +End; +{$endif FPC_HAS_FEATURE_WIDESTRINGS} + +{$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING} +Procedure fpc_Read_Text_WideStr(var f : Text;out ws : WideString); iocheck; compilerproc; +var + s: AnsiString; +Begin + // all standard input is assumed to be ansi-encoded + fpc_Read_Text_AnsiStr_Intern(f,s); + // Convert to widestring + ws:=s; +End; +{$endif FPC_WIDESTRING_EQUAL_UNICODESTRING} + +procedure fpc_Read_Text_Char(var f : Text; out c: char); [public, alias: 'FPC_READ_TEXT_CHAR']; iocheck;compilerproc; Begin c:=#0; If not CheckRead(f) then @@ -1319,6 +1345,49 @@ Begin inc(TextRec(f).BufPos); end; +procedure fpc_Read_Text_Char_intern(var f : Text; out c: char); iocheck; [external name 'FPC_READ_TEXT_CHAR']; + + +{$ifdef FPC_HAS_FEATURE_WIDESTRINGS} +procedure fpc_Read_Text_WideChar(var f : Text; out wc: widechar); iocheck;compilerproc; +var + ws: widestring; + i: longint; + { maximum code point length is 6 characters (with UTF-8) } + str: array[0..5] of char; +Begin + fillchar(str[0],sizeof(str),0); + for i:=low(str) to high(str) do + begin + fpc_Read_Text_Char_intern(f,str[i]); + case widestringmanager.CodePointLengthProc(@str[0],i+1) of + -1: { possibly incomplete code point, try with an extra character } + ; + 0: { null character } + begin + wc:=#0; + exit; + end; + else + begin + { valid code point -> convert to widestring} + widestringmanager.Ansi2WideMoveProc(@str[0],ws,i+1); + { has to be exactly one widechar } + if length(ws)=1 then + begin + wc:=ws[1]; + exit + end + else + break; + end; + end; + end; + { invalid widechar input } + inoutres:=106; +end; +{$endif FPC_HAS_FEATURE_WIDESTRINGS} + procedure fpc_Read_Text_Char_Iso(var f : Text; out c: char); iocheck;compilerproc; Begin @@ -1604,6 +1673,22 @@ end; {$ifdef FPC_HAS_FEATURE_WIDESTRINGS} +procedure WriteStrUnicode(var t: textrec); +var + temp: ansistring; + str: punicodestring; +begin + if (t.bufpos=0) then + exit; + str:=punicodestring(ppointer(@t.userdata[StrPtrIndex])^); + setlength(temp,t.bufpos); + move(t.bufptr^,temp[1],t.bufpos); + str^:=str^+temp; + t.bufpos:=0; +end; +{$endif FPC_HAS_FEATURE_WIDESTRINGS} + +{$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING} procedure WriteStrWide(var t: textrec); var temp: ansistring; @@ -1617,8 +1702,7 @@ begin str^:=str^+temp; t.bufpos:=0; end; -{$endif FPC_HAS_FEATURE_WIDESTRINGS} - +{$endif FPC_WIDESTRING_EQUAL_UNICODESTRING} procedure SetupWriteStrCommon(out t: textrec); begin @@ -1657,6 +1741,20 @@ end; {$ifdef FPC_HAS_FEATURE_WIDESTRINGS} +function fpc_SetupWriteStr_Unicodestr(out s: unicodestring): PText; compilerproc; +begin + setupwritestrcommon(ReadWriteStrText); + PPointer(@ReadWriteStrText.userdata[StrPtrIndex])^:=@s; +// automatically done by out-semantics +// setlength(s,0); + ReadWriteStrText.InOutFunc:=@WriteStrUnicode; + ReadWriteStrText.FlushFunc:=@WriteStrUnicode; + result:=@ReadWriteStrText; +end; +{$endif FPC_HAS_FEATURE_WIDESTRINGS} + + +{$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING} function fpc_SetupWriteStr_Widestr(out s: widestring): PText; compilerproc; begin setupwritestrcommon(ReadWriteStrText); @@ -1667,7 +1765,7 @@ begin ReadWriteStrText.FlushFunc:=@WriteStrWide; result:=@ReadWriteStrText; end; -{$endif FPC_HAS_FEATURE_WIDESTRINGS} +{$endif FPC_WIDESTRING_EQUAL_UNICODESTRING} procedure ReadAnsiStrFinal(var t: textrec); @@ -1763,7 +1861,7 @@ end; {$ifdef FPC_HAS_FEATURE_WIDESTRINGS} -function fpc_SetupReadStr_Widestr(const s: widestring): PText; compilerproc; +function fpc_SetupReadStr_Unicodestr(const s: unicodestring): PText; compilerproc; begin { we use an ansistring to avoid code duplication, and let the } { assignment convert the widestring to an equivalent ansistring } @@ -1772,6 +1870,16 @@ end; {$endif FPC_HAS_FEATURE_WIDESTRINGS} +{$ifndef FPC_WIDESTRING_EQUAL_UNICODESTRING} +function fpc_SetupReadStr_Widestr(const s: widestring): PText; compilerproc; +begin + { we use an ansistring to avoid code duplication, and let the } + { assignment convert the widestring to an equivalent ansistring } + result:=fpc_SetupReadStr_Ansistr_Intern(s); +end; +{$endif FPC_WIDESTRING_EQUAL_UNICODESTRING} + + {***************************************************************************** Initializing *****************************************************************************} diff --git a/rtl/inc/ustringh.inc b/rtl/inc/ustringh.inc index 8583400c1f..9b684b863c 100644 --- a/rtl/inc/ustringh.inc +++ b/rtl/inc/ustringh.inc @@ -67,7 +67,16 @@ Type } CompareWideStringProc : function(const s1, s2 : WideString) : PtrInt; CompareTextWideStringProc : function(const s1, s2 : WideString): PtrInt; + { return value: number of code points in the string. Whenever an invalid + code point is encountered, all characters part of this invalid code point + are considered to form one "character" and the next character is + considered to be the start of a new (possibly also invalid) code point } CharLengthPCharProc : function(const Str: PChar): PtrInt; + { return value: + -1 if incomplete or invalid code point + 0 if NULL character, + > 0 if that's the length in bytes of the code point } + CodePointLengthProc : function(const Str: PChar; MaxLookAead: PtrInt): Ptrint; UpperAnsiStringProc : function(const s : ansistring) : ansistring; LowerAnsiStringProc : function(const s : ansistring) : ansistring; diff --git a/rtl/inc/ustrings.inc b/rtl/inc/ustrings.inc index 2ac9d174c5..b9a71dfe3a 100644 --- a/rtl/inc/ustrings.inc +++ b/rtl/inc/ustrings.inc @@ -88,6 +88,21 @@ begin end; +function DefaultCharLengthPChar(const Str: PChar): PtrInt; + begin + DefaultCharLengthPChar:=length(Str); + end; + + +function DefaultCodePointLength(const Str: PChar; MaxLookAead: PtrInt): Ptrint; + begin + if str[0]<>#0 then + DefaultCodePointLength:=1 + else + DefaultCodePointLength:=0; + end; + + Procedure GetUnicodeStringManager (Var Manager : TUnicodeStringManager); begin manager:=widestringmanager; @@ -2506,13 +2521,6 @@ function CompareTextUnicodeString(const s1, s2 : UnicodeString): PtrInt; begin unimplementedunicodestring; end; - - -function CharLengthPChar(const Str: PChar): PtrInt; - begin - unimplementedunicodestring; - end; - {$warnings on} procedure initunicodestringmanager; @@ -2535,7 +2543,8 @@ procedure initunicodestringmanager; {$endif HAS_WIDESTRINGMANAGER} widestringmanager.CompareWideStringProc:=@CompareUnicodeString; widestringmanager.CompareTextWideStringProc:=@CompareTextUnicodeString; - widestringmanager.CharLengthPCharProc:=@CharLengthPChar; + widestringmanager.CharLengthPCharProc:=@DefaultCharLengthPChar; + widestringmanager.CodePointLengthProc:=@DefaultCodePointLength; {$endif FPC_WIDESTRING_EQUAL_UNICODESTRING} end; diff --git a/rtl/win/sysutils.pp b/rtl/win/sysutils.pp index 30a6a52438..8a43066191 100644 --- a/rtl/win/sysutils.pp +++ b/rtl/win/sysutils.pp @@ -1357,7 +1357,16 @@ function Win32CompareTextUnicodeString(const s1, s2 : UnicodeString) : PtrInt; are relevant already for the system unit } procedure InitWin32Widestrings; begin + { return value: number of code points in the string. Whenever an invalid + code point is encountered, all characters part of this invalid code point + are considered to form one "character" and the next character is + considered to be the start of a new (possibly also invalid) code point } //!!! CharLengthPCharProc : function(const Str: PChar): PtrInt; + { return value: + -1 if incomplete or invalid code point + 0 if NULL character, + > 0 if that's the length in bytes of the code point } +//!!!! CodePointLengthProc : function(const Str: PChar; MaxLookAead: PtrInt): Ptrint; widestringmanager.CompareWideStringProc:=@Win32CompareWideString; widestringmanager.CompareTextWideStringProc:=@Win32CompareTextWideString; widestringmanager.UpperAnsiStringProc:=@Win32AnsiUpperCase; diff --git a/tests/test/twide1.pp b/tests/test/twide1.pp index 5e4bc968d6..9300505917 100644 --- a/tests/test/twide1.pp +++ b/tests/test/twide1.pp @@ -5,6 +5,7 @@ uses var w : widestring; + u : unicodestring; a : ansistring; begin @@ -14,6 +15,15 @@ begin halt(1); a:=w; if a[1]<>'A' then - halt(1); + halt(2); + writeln('ok'); + + a:='A'; + u:=a; + if u[1]<>#65 then + halt(3); + a:=u; + if a[1]<>'A' then + halt(4); writeln('ok'); end. diff --git a/tests/test/twide2.pp b/tests/test/twide2.pp index 19b1f9dabe..36379e43e0 100644 --- a/tests/test/twide2.pp +++ b/tests/test/twide2.pp @@ -6,6 +6,7 @@ uses var i : longint; w,w2 : widestring; + u,u2 : unicodestring; a : ansistring; begin @@ -17,4 +18,12 @@ begin a:=w; w2:=a; end; + setlength(u,1000); + for i:=1 to 1000 do + u[i]:=widechar(i); + for i:=1 to 10 do + begin + a:=u; + u2:=a; + end; end. diff --git a/tests/test/twide3.pp b/tests/test/twide3.pp index 940669df9c..f839d46c28 100644 --- a/tests/test/twide3.pp +++ b/tests/test/twide3.pp @@ -5,32 +5,76 @@ {$codepage utf-8} {$mode objfpc} + uses {$ifdef unix} cwstring, {$endif} - sysutils; + SysUtils; {$i+} var t: text; w: widestring; + u: unicodestring; a: ansistring; + wc: widechar; begin assign(t,'twide3.txt'); rewrite(t); writeln(t,'łóżka'); close(t); + reset(t); + + try + read(t,wc); + if wc<>'ł' then + raise Exception.create('wrong widechar read: '+inttostr(ord(wc))+'<>'+inttostr(ord('ł'))); + except + close(t); +// erase(t); + raise; + end; + reset(t); try readln(t,a); w:=a; if (w<>'łóżka') then - raise Exception.create('wrong string read'); + raise Exception.create('wrong ansistring read'); + except + close(t); + erase(t); + raise; + end; + + reset(t); + try + readln(t,w); + if (w<>'łóżka') then + raise Exception.create('wrong widestring read'); + except + close(t); + erase(t); + raise; + end; + + reset(t); + try + readln(t,u); + if (u<>'łóżka') then + raise Exception.create('wrong unicodestring read'); finally close(t); erase(t); end; + + readstr(u,a); + if u<>a then + raise Exception.create('wrong readstr(u,a)'); + readstr(w,a); + if w<>u then + raise Exception.create('wrong readstr(w,a)'); end. diff --git a/tests/test/twide5.pp b/tests/test/twide5.pp index d656758601..b1b12d94c7 100644 --- a/tests/test/twide5.pp +++ b/tests/test/twide5.pp @@ -2,6 +2,7 @@ var ws: widestring; + uns: unicodestring; us: UCS4String; begin // the compiler does not yet support characters which require @@ -42,4 +43,15 @@ begin (ws[7]<>#$d87e) or (ws[8]<>#$dc04) then halt(3); + uns:='鳣ćçŹ'#$d87e#$dc04; + if (length(uns)<>8) or + (uns[1]<>'é') or + (uns[2]<>'ł') or + (uns[3]<>'Ł') or + (uns[4]<>'ć') or + (uns[5]<>'ç') or + (uns[6]<>'Ź') or + (uns[7]<>#$d87e) or + (uns[8]<>#$dc04) then + halt(4); end. diff --git a/tests/test/twide6.pp b/tests/test/twide6.pp index 3dee480de0..9712f2efcd 100644 --- a/tests/test/twide6.pp +++ b/tests/test/twide6.pp @@ -13,11 +13,12 @@ procedure doerror(i : integer); end; -{ normal upper case testing } +{ normal upper case testing (widestring) } procedure testupper; var s: ansistring; w1,w2,w3,w4: widestring; + u1,u2,u3,u4: unicodestring; i: longint; begin w1:='aé'#0'èàł'#$d87e#$dc04; @@ -72,11 +73,74 @@ begin doerror(21); if (w4 <> w2) then doerror(22); - end; -{ normal lower case testing } +{ normal upper case testing (unicodestring) } +procedure testupperu; +var + s: ansistring; + w1,w2,w3,w4: widestring; + u1,u2,u3,u4: unicodestring; + i: longint; +begin + w1:='aé'#0'èàł'#$d87e#$dc04; + w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04; +{$ifdef print} +// the utf-8 output can confuse the testsuite parser + writeln('original: ',w1); + writeln('original upper: ',w2); +{$endif print} + s:=w1; +{$ifdef print} + writeln('ansi: ',s); +{$endif print} + w3:=s; + w4:=AnsiUpperCase(s); + { filter out unsupported characters } + for i:=1 to length(w3) do + if w3[i]='?' then + begin + w2[i]:='?'; + w1[i]:='?'; + end; + w1:=unicodeuppercase(w1); +{$ifdef print} + writeln('wideupper: ',w1); + writeln('original upper: ',w2); + writeln('ansiupper: ',w4); +{$endif print} + if (w1 <> w2) then + doerror(1); + if (w4 <> w2) then + doerror(2); + + w1:='aéèàł'#$d87e#$dc04; + w2:='AÉÈÀŁ'#$d87e#$dc04; + s:=w1; + w3:=s; + w4:=AnsiStrUpper(pchar(s)); + { filter out unsupported characters } + for i:=1 to length(w3) do + if w3[i]='?' then + begin + w2[i]:='?'; + w1[i]:='?'; + end; + w1:=unicodeuppercase(w1); +{$ifdef print} + writeln('unicodeupper: ',w1); + writeln('ansistrupper: ',w4); +{$endif print} + if (w1 <> w2) then + doerror(21); + if (w4 <> w2) then + doerror(22); +end; + + + +{ normal lower case testing (widestring) } procedure testlower; var s: ansistring; @@ -135,6 +199,63 @@ begin end; +{ normal lower case testing (unicodestring) } +procedure testloweru; +var + s: ansistring; + w1,w2,w3,w4: unicodestring; + i: longint; +begin + w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04; + w2:='aé'#0'èàł'#$d87e#$dc04; +{$ifdef print} +// the utf-8 output can confuse the testsuite parser + writeln('original: ',w1); + writeln('original lower: ',w2); +{$endif print} + s:=w1; + w3:=s; + w4:=AnsiLowerCase(s); + { filter out unsupported characters } + for i:=1 to length(w3) do + if w3[i]='?' then + begin + w2[i]:='?'; + w1[i]:='?'; + end; + w1:=unicodelowercase(w1); +{$ifdef print} + writeln('unicodelower: ',w1); + writeln('ansilower: ',w4); +{$endif print} + if (w1 <> w2) then + doerror(3); + if (w4 <> w2) then + doerror(4); + + + w1:='AÉÈÀŁ'#$d87e#$dc04; + w2:='aéèàł'#$d87e#$dc04; + s:=w1; + w3:=s; + w4:=AnsiStrLower(pchar(s)); + { filter out unsupported characters } + for i:=1 to length(w3) do + if w3[i]='?' then + begin + w2[i]:='?'; + w1[i]:='?'; + end; + w1:=unicodelowercase(w1); +{$ifdef print} + writeln('unicodelower: ',w1); + writeln('ansistrlower: ',w4); +{$endif print} + if (w1 <> w2) then + doerror(3); + if (w4 <> w2) then + doerror(4); +end; { upper case testing with a missing utf-16 pair at the end } procedure testupperinvalid; @@ -377,8 +498,12 @@ end; begin testupper; writeln; + testupperu; + writeln; testlower; writeln; + testloweru; + writeln; writeln; testupperinvalid; writeln;