fpc/tests/test/twide6.pp
Jonas Maebe f4c31ecf3c + widestringmanager.codepointlengthproc added, which can be used to
determine the length of a multi-byte character. The return values
    are defined to be the same as those of POSIX' mblen: -1 =
    invalid/incomplete sequence, 0 = #0, > 0 = length of sequence in
    bytes.
  + default implementation for widestringmanager.codepointlengthproc
    (assumes all code points have length 1) and Unix implementation
    (based on mb(r)len); Windows implementation is still required
  * replaced default implementation of
    widestringmanager.CharLengthPCharProc with strlen() of the input
    instead of an error (correct if all code points have length 1,
    still needs Windows implementation)
  + implemented fpc_text_read_{wide,unicode}str() and
    fpc_text_read_widechar() (mantis #18163); fpc_text_read_widechar()
    uses the new widestringmanager.codepointlengthproc()
  + unicodestring support for readstr/writestr
  * fixed declaration of fpc_Write_Text_UnicodeStr (unicodestring
    instead of widestring parameter)
  * extended test/twide*.pp tests to test the new/fixed functionality

git-svn-id: trunk@16533 -
2010-12-10 14:10:01 +00:00

522 lines
11 KiB
ObjectPascal

{%skiptarget=wince}
{$codepage utf-8}
uses
{$ifdef unix}
cwstring,
{$endif}
sysutils;
procedure doerror(i : integer);
begin
writeln('Error: ',i);
halt(i);
end;
{ normal upper case testing (widestring) }
procedure testupper;
var
s: ansistring;
w1,w2,w3,w4: widestring;
u1,u2,u3,u4: unicodestring;
i: longint;
begin
w1:='aé'#0'èàł'#$d87e#$dc04;
w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
{$ifdef print}
// the utf-8 output can confuse the testsuite parser
writeln('original: ',w1);
writeln('original upper: ',w2);
{$endif print}
s:=w1;
{$ifdef print}
writeln('ansi: ',s);
{$endif print}
w3:=s;
w4:=AnsiUpperCase(s);
{ filter out unsupported characters }
for i:=1 to length(w3) do
if w3[i]='?' then
begin
w2[i]:='?';
w1[i]:='?';
end;
w1:=wideuppercase(w1);
{$ifdef print}
writeln('wideupper: ',w1);
writeln('original upper: ',w2);
writeln('ansiupper: ',w4);
{$endif print}
if (w1 <> w2) then
doerror(1);
if (w4 <> w2) then
doerror(2);
w1:='aéèàł'#$d87e#$dc04;
w2:='AÉÈÀŁ'#$d87e#$dc04;
s:=w1;
w3:=s;
w4:=AnsiStrUpper(pchar(s));
{ filter out unsupported characters }
for i:=1 to length(w3) do
if w3[i]='?' then
begin
w2[i]:='?';
w1[i]:='?';
end;
w1:=wideuppercase(w1);
{$ifdef print}
writeln('wideupper: ',w1);
writeln('ansistrupper: ',w4);
{$endif print}
if (w1 <> w2) then
doerror(21);
if (w4 <> w2) then
doerror(22);
end;
{ normal upper case testing (unicodestring) }
procedure testupperu;
var
s: ansistring;
w1,w2,w3,w4: widestring;
u1,u2,u3,u4: unicodestring;
i: longint;
begin
w1:='aé'#0'èàł'#$d87e#$dc04;
w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
{$ifdef print}
// the utf-8 output can confuse the testsuite parser
writeln('original: ',w1);
writeln('original upper: ',w2);
{$endif print}
s:=w1;
{$ifdef print}
writeln('ansi: ',s);
{$endif print}
w3:=s;
w4:=AnsiUpperCase(s);
{ filter out unsupported characters }
for i:=1 to length(w3) do
if w3[i]='?' then
begin
w2[i]:='?';
w1[i]:='?';
end;
w1:=unicodeuppercase(w1);
{$ifdef print}
writeln('wideupper: ',w1);
writeln('original upper: ',w2);
writeln('ansiupper: ',w4);
{$endif print}
if (w1 <> w2) then
doerror(1);
if (w4 <> w2) then
doerror(2);
w1:='aéèàł'#$d87e#$dc04;
w2:='AÉÈÀŁ'#$d87e#$dc04;
s:=w1;
w3:=s;
w4:=AnsiStrUpper(pchar(s));
{ filter out unsupported characters }
for i:=1 to length(w3) do
if w3[i]='?' then
begin
w2[i]:='?';
w1[i]:='?';
end;
w1:=unicodeuppercase(w1);
{$ifdef print}
writeln('unicodeupper: ',w1);
writeln('ansistrupper: ',w4);
{$endif print}
if (w1 <> w2) then
doerror(21);
if (w4 <> w2) then
doerror(22);
end;
{ normal lower case testing (widestring) }
procedure testlower;
var
s: ansistring;
w1,w2,w3,w4: widestring;
i: longint;
begin
w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
w2:='aé'#0'èàł'#$d87e#$dc04;
{$ifdef print}
// the utf-8 output can confuse the testsuite parser
writeln('original: ',w1);
writeln('original lower: ',w2);
{$endif print}
s:=w1;
w3:=s;
w4:=AnsiLowerCase(s);
{ filter out unsupported characters }
for i:=1 to length(w3) do
if w3[i]='?' then
begin
w2[i]:='?';
w1[i]:='?';
end;
w1:=widelowercase(w1);
{$ifdef print}
writeln('widelower: ',w1);
writeln('ansilower: ',w4);
{$endif print}
if (w1 <> w2) then
doerror(3);
if (w4 <> w2) then
doerror(4);
w1:='AÉÈÀŁ'#$d87e#$dc04;
w2:='aéèàł'#$d87e#$dc04;
s:=w1;
w3:=s;
w4:=AnsiStrLower(pchar(s));
{ filter out unsupported characters }
for i:=1 to length(w3) do
if w3[i]='?' then
begin
w2[i]:='?';
w1[i]:='?';
end;
w1:=widelowercase(w1);
{$ifdef print}
writeln('widelower: ',w1);
writeln('ansistrlower: ',w4);
{$endif print}
if (w1 <> w2) then
doerror(3);
if (w4 <> w2) then
doerror(4);
end;
{ normal lower case testing (unicodestring) }
procedure testloweru;
var
s: ansistring;
w1,w2,w3,w4: unicodestring;
i: longint;
begin
w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
w2:='aé'#0'èàł'#$d87e#$dc04;
{$ifdef print}
// the utf-8 output can confuse the testsuite parser
writeln('original: ',w1);
writeln('original lower: ',w2);
{$endif print}
s:=w1;
w3:=s;
w4:=AnsiLowerCase(s);
{ filter out unsupported characters }
for i:=1 to length(w3) do
if w3[i]='?' then
begin
w2[i]:='?';
w1[i]:='?';
end;
w1:=unicodelowercase(w1);
{$ifdef print}
writeln('unicodelower: ',w1);
writeln('ansilower: ',w4);
{$endif print}
if (w1 <> w2) then
doerror(3);
if (w4 <> w2) then
doerror(4);
w1:='AÉÈÀŁ'#$d87e#$dc04;
w2:='aéèàł'#$d87e#$dc04;
s:=w1;
w3:=s;
w4:=AnsiStrLower(pchar(s));
{ filter out unsupported characters }
for i:=1 to length(w3) do
if w3[i]='?' then
begin
w2[i]:='?';
w1[i]:='?';
end;
w1:=unicodelowercase(w1);
{$ifdef print}
writeln('unicodelower: ',w1);
writeln('ansistrlower: ',w4);
{$endif print}
if (w1 <> w2) then
doerror(3);
if (w4 <> w2) then
doerror(4);
end;
{ upper case testing with a missing utf-16 pair at the end }
procedure testupperinvalid;
var
s: ansistring;
w1,w2,w3,w4: widestring;
i: longint;
begin
{ missing utf-16 pair at end }
w1:='aé'#0'èàł'#$d87e;
w2:='AÉ'#0'ÈÀŁ'#$d87e;
{$ifdef print}
// the utf-8 output can confuse the testsuite parser
writeln('original: ',w1);
writeln('original upper: ',w2);
{$endif print}
s:=w1;
w3:=s;
w4:=AnsiUpperCase(s);
{ filter out unsupported characters }
for i:=1 to length(w3) do
if w3[i]='?' then
begin
w2[i]:='?';
w1[i]:='?';
end;
w1:=wideuppercase(w1);
{$ifdef print}
writeln('wideupper: ',w1);
writeln('ansiupper: ',w4);
{$endif print}
if (w1 <> w2) then
doerror(5);
if (w4 <> w2) then
doerror(6);
end;
{ lower case testing with a missing utf-16 pair at the end }
procedure testlowerinvalid;
var
s: ansistring;
w1,w2,w3,w4: widestring;
i: longint;
begin
{ missing utf-16 pair at end}
w1:='AÉ'#0'ÈÀŁ'#$d87e;
w2:='aé'#0'èàł'#$d87e;
{$ifdef print}
// the utf-8 output can confuse the testsuite parser
writeln('original: ',w1);
writeln('original lower: ',w2);
{$endif print}
s:=w1;
w3:=s;
w4:=AnsiLowerCase(s);
{ filter out unsupported characters }
for i:=1 to length(w3) do
if w3[i]='?' then
begin
w2[i]:='?';
w1[i]:='?';
end;
w1:=widelowercase(w1);
{$ifdef print}
writeln('widelower: ',w1);
writeln('ansilower: ',w4);
{$endif print}
if (w1 <> w2) then
doerror(7);
if (w4 <> w2) then
doerror(8);
end;
{ upper case testing with a missing utf-16 pair at the end, followed by a normal char }
procedure testupperinvalid1;
var
s: ansistring;
w1,w2,w3,w4: widestring;
i: longint;
begin
{ missing utf-16 pair at end with char after it}
w1:='aé'#0'èàł'#$d87e'j';
w2:='AÉ'#0'ÈÀŁ'#$d87e'J';
{$ifdef print}
// the utf-8 output can confuse the testsuite parser
writeln('original: ',w1);
writeln('original upper: ',w2);
{$endif print}
s:=w1;
w3:=s;
w4:=AnsiUpperCase(s);
{ filter out unsupported characters }
for i:=1 to length(w3) do
if w3[i]='?' then
begin
w2[i]:='?';
w1[i]:='?';
end;
w1:=wideuppercase(w1);
{$ifdef print}
writeln('wideupper: ',w1);
writeln('ansiupper: ',w4);
{$endif print}
if (w1 <> w2) then
doerror(9);
if (w4 <> w2) then
doerror(10);
end;
{ lower case testing with a missing utf-16 pair at the end, followed by a normal char }
procedure testlowerinvalid1;
var
s: ansistring;
w1,w2,w3,w4: widestring;
i: longint;
begin
{ missing utf-16 pair at end with char after it}
w1:='AÉ'#0'ÈÀŁ'#$d87e'J';
w2:='aé'#0'èàł'#$d87e'j';
{$ifdef print}
// the utf-8 output can confuse the testsuite parser
writeln('original: ',w1);
writeln('original lower: ',w2);
{$endif print}
s:=w1;
w3:=s;
w4:=AnsiLowerCase(s);
{ filter out unsupported characters }
for i:=1 to length(w3) do
if w3[i]='?' then
begin
w2[i]:='?';
w1[i]:='?';
end;
w1:=widelowercase(w1);
{$ifdef print}
writeln('widelower: ',w1);
writeln('ansilower: ',w4);
{$endif print}
if (w1 <> w2) then
doerror(11);
if (w4 <> w2) then
doerror(12);
end;
{ upper case testing with corrupting the utf-8 string after conversion }
procedure testupperinvalid2;
var
s: ansistring;
w1,w2,w3,w4: widestring;
i: longint;
begin
w1:='aé'#0'èàł'#$d87e#$dc04'ö';
w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö';
{$ifdef print}
// the utf-8 output can confuse the testsuite parser
writeln('original: ',w1);
writeln('original upper: ',w2);
{$endif print}
s:=w1;
{ truncate the last utf-8 character }
setlength(s,length(s)-1);
w3:=s;
{ adjust checking values for new length due to corruption }
if length(w3)<>length(w2) then
begin
setlength(w2,length(w3));
setlength(w1,length(w3));
end;
w4:=AnsiUpperCase(s);
{ filter out unsupported characters }
for i:=1 to length(w3) do
if w3[i]='?' then
begin
w2[i]:='?';
w1[i]:='?';
end;
w1:=wideuppercase(w1);
{$ifdef print}
writeln('wideupper: ',w1);
writeln('ansiupper: ',w4);
{$endif print}
if (w1 <> w2) then
doerror(13);
if (w4 <> w2) then
doerror(14);
end;
{ lower case testing with corrupting the utf-8 string after conversion }
procedure testlowerinvalid2;
var
s: ansistring;
w1,w2,w3,w4: widestring;
i: longint;
begin
w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö';
w2:='aé'#0'èàł'#$d87e#$dc04'ö';
{$ifdef print}
// the utf-8 output can confuse the testsuite parser
writeln('original: ',w1);
writeln('original lower: ',w2);
{$endif print}
s:=w1;
{ truncate the last utf-8 character }
setlength(s,length(s)-1);
w3:=s;
{ adjust checking values for new length due to corruption }
if length(w3)<>length(w2) then
begin
setlength(w2,length(w3));
setlength(w1,length(w3));
end;
w4:=AnsiLowerCase(s);
{ filter out unsupported characters }
for i:=1 to length(w3) do
if w3[i]='?' then
begin
w2[i]:='?';
w1[i]:='?';
end;
w1:=widelowercase(w1);
{$ifdef print}
writeln('widelower: ',w1);
writeln('ansilower: ',w4);
{$endif print}
if (w1 <> w2) then
doerror(15);
if (w4 <> w2) then
doerror(16);
end;
begin
testupper;
writeln;
testupperu;
writeln;
testlower;
writeln;
testloweru;
writeln;
writeln;
testupperinvalid;
writeln;
testlowerinvalid;
writeln;
writeln;
testupperinvalid1;
writeln;
testlowerinvalid1;
writeln;
writeln;
testupperinvalid2;
writeln;
testlowerinvalid2;
end.