mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-22 07:09:29 +02:00
* fixed wchar_t type (was: widechar, now is cint/cint32/long depending on
platform) + mbstate_t type for all unixes except BeOS (doesn't exist for BeOS) + implemented UpperAnsiStringProc/LowerAnsiStringProc for unix * fixed Ansi2UCS4Move in cwstring (although it isn't used anywhere currently) + test for Upper/LowerAnsiString git-svn-id: trunk@9393 -
This commit is contained in:
parent
997d7b33db
commit
68595c8b72
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -7300,6 +7300,7 @@ tests/test/twide2.pp svneol=native#text/plain
|
||||
tests/test/twide3.pp svneol=native#text/plain
|
||||
tests/test/twide4.pp svneol=native#text/plain
|
||||
tests/test/twide5.pp svneol=native#text/plain
|
||||
tests/test/twide6.pp svneol=native#text/plain
|
||||
tests/test/twrstr1.pp svneol=native#text/plain
|
||||
tests/test/twrstr2.pp svneol=native#text/plain
|
||||
tests/test/twrstr3.pp svneol=native#text/plain
|
||||
|
@ -90,7 +90,7 @@ type
|
||||
pTime = ^time_t;
|
||||
ptime_t = ^time_t;
|
||||
|
||||
wchar_t = widechar;
|
||||
wchar_t = cint32;
|
||||
pwchar_t = ^wchar_t;
|
||||
|
||||
socklen_t= cuint32;
|
||||
|
@ -77,9 +77,9 @@ type
|
||||
pTime = ^time_t;
|
||||
ptime_t = ^time_t;
|
||||
|
||||
wchar_t = widechar;
|
||||
wchar_t = cint32;
|
||||
pwchar_t = ^wchar_t;
|
||||
wint_t = cint;
|
||||
wint_t = cint32;
|
||||
|
||||
socklen_t= cuint32;
|
||||
TSocklen = socklen_t;
|
||||
@ -163,6 +163,13 @@ type
|
||||
end;
|
||||
pstatfs = ^tstatfs;
|
||||
|
||||
mbstate_t = record
|
||||
case byte of
|
||||
0: (__mbstate8: array[0..127] of char);
|
||||
1: (_mbstateL: clonglong); { for alignment }
|
||||
end;
|
||||
pmbstate_t = ^mbstate_t;
|
||||
|
||||
pthread_t = pointer;
|
||||
pthread_attr_t = record sig: clong; opaque: array[0..{$ifdef cpu64}56{$else}36{$endif}-1] of byte; end;
|
||||
pthread_mutex_t = {$i pmutext.inc}
|
||||
|
@ -80,7 +80,7 @@ type
|
||||
pUid = ^Uid_t;
|
||||
|
||||
wint_t = cint32;
|
||||
wchar_t = widechar;
|
||||
wchar_t = cint32;
|
||||
pwchar_t = ^wchar_t;
|
||||
|
||||
|
||||
@ -186,6 +186,13 @@ type
|
||||
end;
|
||||
PStatFS=^TStatFS;
|
||||
|
||||
mbstate_t = record
|
||||
case byte of
|
||||
0: (__mbstate8: array[0..127] of char);
|
||||
1: (_mbstateL: cint64); { for alignment }
|
||||
end;
|
||||
pmbstate_t = ^mbstate_t;
|
||||
|
||||
ITimerVal= Record
|
||||
It_Interval,
|
||||
It_Value : TimeVal;
|
||||
|
@ -102,7 +102,7 @@ Type
|
||||
pTime = ^time_t;
|
||||
ptime_t = ^time_t;
|
||||
|
||||
wchar_t = widechar;
|
||||
wchar_t = cint32;
|
||||
pwchar_t = ^wchar_t;
|
||||
|
||||
{$ifdef cpu64}
|
||||
@ -154,6 +154,18 @@ Type
|
||||
end;
|
||||
PStatFS=^TStatFS;
|
||||
|
||||
mbstate_value_t = record
|
||||
case byte of
|
||||
0: (__wch: wint_t);
|
||||
1: (__wchb: array[0..3] of char);
|
||||
end;
|
||||
|
||||
mbstate_t = record
|
||||
__count: cint;
|
||||
__value: mbstate_value_t;
|
||||
end;
|
||||
pmbstate_t = ^mbstate_t;
|
||||
|
||||
pthread_t = culong;
|
||||
|
||||
sched_param = record
|
||||
|
@ -129,7 +129,11 @@ Type
|
||||
uint_t = cuint;
|
||||
|
||||
|
||||
wchar_t = widechar;
|
||||
{$ifdef cpu64}
|
||||
wchar_t = cint;
|
||||
{$else cpu64}
|
||||
wchar_t = clong;
|
||||
{$endif cpu64}
|
||||
pwchar_t = ^wchar_t;
|
||||
|
||||
uid_t = cuint32; { used for user ID type }
|
||||
@ -168,6 +172,15 @@ Type
|
||||
end;
|
||||
PStatFS=^TStatFS;
|
||||
|
||||
mbstate_t = record
|
||||
{$ifdef cpu64}
|
||||
__filler: array[0..3] of clong;
|
||||
{$else cpu64}
|
||||
__filler: array[0..5] of cint;
|
||||
{$endif cpu64}
|
||||
end;
|
||||
pmbstate_t = ^mbstate_t;
|
||||
|
||||
|
||||
clock32_t = int32_t;
|
||||
timeval32 = record
|
||||
|
@ -14,6 +14,7 @@
|
||||
**********************************************************************}
|
||||
|
||||
{$mode objfpc}
|
||||
{$inline on}
|
||||
|
||||
unit cwstring;
|
||||
|
||||
@ -45,11 +46,22 @@ Const
|
||||
{$endif}
|
||||
|
||||
{ helper functions from libc }
|
||||
function tolower(__wc:cint):cint;cdecl;external libiconvname name 'tolower';
|
||||
function toupper(__wc:cint):cint;cdecl;external libiconvname name 'toupper';
|
||||
function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
|
||||
function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
|
||||
|
||||
function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
|
||||
function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
|
||||
function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
|
||||
{$ifndef beos}
|
||||
function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
|
||||
function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
|
||||
{$else beos}
|
||||
function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
|
||||
function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
|
||||
{$endif beos}
|
||||
|
||||
|
||||
const
|
||||
{$ifdef linux}
|
||||
@ -253,6 +265,92 @@ procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
|
||||
end;
|
||||
|
||||
|
||||
(*
|
||||
function LowerWideString(const s : WideString) : WideString;
|
||||
var
|
||||
i, slen : SizeInt;
|
||||
{$if sizeof(wchar_t) = 4}
|
||||
nc : wint_t;
|
||||
resindex : SizeInt;
|
||||
len : longint;
|
||||
valid : boolean;
|
||||
{$elseif sizeof(wchar_t) = 2}
|
||||
p : PWideChar;
|
||||
{$endif sizeof(wchar_t)}
|
||||
begin
|
||||
slen:=length(s);
|
||||
SetLength(result,slen);
|
||||
{$if sizeof(wint_t) = 4}
|
||||
i:=1;
|
||||
resindex:=1;
|
||||
while (i<=slen) do
|
||||
begin
|
||||
nc:=utf16toutf32(s,i,len,valid);
|
||||
inc(i,len);
|
||||
if (valid) then
|
||||
ConcatUTF32ToWideStr(towlower(nc),result,resindex)
|
||||
else
|
||||
{ do nothing }
|
||||
pwidechar(@result[i])^:=s[i];
|
||||
end;
|
||||
{ adjust length }
|
||||
setlength(result,resindex-1);
|
||||
{$elseif sizeof(wchar_t) = 2}
|
||||
{ avoid unique calls for each character }
|
||||
p:=@result[1];
|
||||
{ this will fail for surrogate pairs, but that's inherent }
|
||||
{ to choosing sizeof(wint_t)=2, and nothing we can help }
|
||||
for i:=1 to length(s) do
|
||||
p[i]:=WideChar(towlower(wint_t(s[i])));
|
||||
{$else}
|
||||
{$error Unsupported wchar_t size}
|
||||
{$endif}
|
||||
end;
|
||||
|
||||
|
||||
function UpperWideString(const s : WideString) : WideString;
|
||||
var
|
||||
i, slen : SizeInt;
|
||||
{$if sizeof(wint_t) = 4}
|
||||
nc : wint_t;
|
||||
resindex : SizeInt;
|
||||
len : longint;
|
||||
valid : boolean;
|
||||
{$elseif sizeof(wchar_t) = 2}
|
||||
p : PWideChar;
|
||||
{$endif sizeof(wchar_t)}
|
||||
begin
|
||||
slen:=length(s);
|
||||
SetLength(result,slen);
|
||||
{$if sizeof(wchar_t) = 4}
|
||||
i:=1;
|
||||
resindex:=1;
|
||||
while (i<=slen) do
|
||||
begin
|
||||
nc:=utf16toutf32(s,i,len,valid);
|
||||
inc(i,len);
|
||||
if (valid) then
|
||||
ConcatUTF32ToWideStr (towupper(nc),result,resindex)
|
||||
else
|
||||
{ do nothing }
|
||||
pwidechar(@result[i])^:=s[i];
|
||||
end;
|
||||
{ adjust length }
|
||||
setlength(result,resindex-1);
|
||||
{$elseif sizeof(wchar_t) = 2}
|
||||
{ avoid unique calls for each character }
|
||||
p:=@result[1];
|
||||
{ this will fail for surrogate pairs, but that's inherent }
|
||||
{ to choosing sizeof(wint_t)=2, and nothing we can help }
|
||||
for i:=1 to length(s) do
|
||||
p[i]:=WideChar(towupper(wint_t(s[i])));
|
||||
{$else}
|
||||
{$error Unsupported wchar_t size}
|
||||
{$endif}
|
||||
end;
|
||||
*)
|
||||
|
||||
|
||||
function LowerWideString(const s : WideString) : WideString;
|
||||
var
|
||||
i : SizeInt;
|
||||
@ -273,11 +371,193 @@ function UpperWideString(const s : WideString) : WideString;
|
||||
end;
|
||||
|
||||
|
||||
procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
|
||||
begin
|
||||
if (len>length(s)) then
|
||||
if (length(s) < 10*256) then
|
||||
setlength(s,length(s)+10)
|
||||
else
|
||||
setlength(s,length(s)+length(s) shr 8);
|
||||
end;
|
||||
|
||||
|
||||
procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
|
||||
begin
|
||||
EnsureAnsiLen(s,index);
|
||||
pchar(@s[index])^:=c;
|
||||
inc(index);
|
||||
end;
|
||||
|
||||
|
||||
{ concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
|
||||
{$ifndef beos}
|
||||
procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
|
||||
{$else not beos}
|
||||
procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
|
||||
{$endif beos}
|
||||
var
|
||||
p : pchar;
|
||||
mblen : size_t;
|
||||
begin
|
||||
{ we know that s is unique -> avoid uniquestring calls}
|
||||
p:=@s[index];
|
||||
if (nc<$7f) then
|
||||
ConcatCharToAnsiStr(char(nc),s,index)
|
||||
else
|
||||
begin
|
||||
{ en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
|
||||
{ -> 10 should be enough? Should actually use MB_CUR_MAX, but }
|
||||
{ that's a libc macro mapped to internal functions/variables }
|
||||
{ and thus not a stable external API on systems where libc }
|
||||
{ breaks backwards compatibility every now and then }
|
||||
EnsureAnsiLen(s,index+10);
|
||||
{$ifndef beos}
|
||||
mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
|
||||
{$else not beos}
|
||||
mblen:=wctomb(p,wchar_t(nc));
|
||||
{$endif not beos}
|
||||
if (mblen<>size_t(-1)) then
|
||||
inc(index,mblen)
|
||||
else
|
||||
begin
|
||||
{ invalid wide char }
|
||||
p^:='?';
|
||||
inc(index);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
||||
function LowerAnsiString(const s : AnsiString) : AnsiString;
|
||||
var
|
||||
i, slen,
|
||||
resindex : SizeInt;
|
||||
mblen : size_t;
|
||||
{$ifndef beos}
|
||||
ombstate,
|
||||
nmbstate : mbstate_t;
|
||||
{$endif beos}
|
||||
wc : wchar_t;
|
||||
begin
|
||||
fillchar(ombstate,sizeof(ombstate),0);
|
||||
fillchar(nmbstate,sizeof(nmbstate),0);
|
||||
slen:=length(s);
|
||||
SetLength(result,slen+10);
|
||||
i:=1;
|
||||
resindex:=1;
|
||||
while (i<=slen) do
|
||||
begin
|
||||
if (s[i]<=#127) then
|
||||
begin
|
||||
ConcatCharToAnsiStr(char(tolower(cint(s[i]))),result,resindex);
|
||||
inc(i)
|
||||
end
|
||||
else
|
||||
begin
|
||||
{$ifndef beos}
|
||||
mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
|
||||
{$else not beos}
|
||||
mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
|
||||
{$endif not beos}
|
||||
case mblen of
|
||||
size_t(-2):
|
||||
begin
|
||||
{ partial invalid character, copy literally }
|
||||
while (i<=slen) do
|
||||
begin
|
||||
ConcatCharToAnsiStr(s[i],result,resindex);
|
||||
inc(i);
|
||||
end;
|
||||
end;
|
||||
size_t(-1), 0:
|
||||
begin
|
||||
{ invalid or null character }
|
||||
ConcatCharToAnsiStr(s[i],result,resindex);
|
||||
inc(i);
|
||||
end;
|
||||
else
|
||||
begin
|
||||
{ a valid sequence }
|
||||
{ even if mblen = 1, the lowercase version may have a }
|
||||
{ different length }
|
||||
{ We can't do anything special if wchar_t is 16 bit... }
|
||||
{$ifndef beos}
|
||||
ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
|
||||
{$else not beos}
|
||||
ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
|
||||
{$endif not beos}
|
||||
inc(i,mblen);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
SetLength(result,resindex-1);
|
||||
end;
|
||||
|
||||
|
||||
function UpperAnsiString(const s : AnsiString) : AnsiString;
|
||||
var
|
||||
i, slen,
|
||||
resindex : SizeInt;
|
||||
mblen : size_t;
|
||||
ombstate,
|
||||
nmbstate : mbstate_t;
|
||||
wc : wchar_t;
|
||||
begin
|
||||
fillchar(ombstate,sizeof(ombstate),0);
|
||||
fillchar(nmbstate,sizeof(nmbstate),0);
|
||||
slen:=length(s);
|
||||
SetLength(result,slen+10);
|
||||
i:=1;
|
||||
resindex:=1;
|
||||
while (i<=slen) do
|
||||
begin
|
||||
if (s[i]<=#127) then
|
||||
begin
|
||||
ConcatCharToAnsiStr(char(toupper(cint(s[i]))),result,resindex);
|
||||
inc(i)
|
||||
end
|
||||
else
|
||||
begin
|
||||
mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
|
||||
case mblen of
|
||||
size_t(-2):
|
||||
begin
|
||||
{ partial invalid character, copy literally }
|
||||
while (i<=slen) do
|
||||
begin
|
||||
ConcatCharToAnsiStr(s[i],result,resindex);
|
||||
inc(i);
|
||||
end;
|
||||
end;
|
||||
size_t(-1), 0:
|
||||
begin
|
||||
{ invalid or null character }
|
||||
ConcatCharToAnsiStr(s[i],result,resindex);
|
||||
inc(i);
|
||||
end;
|
||||
else
|
||||
begin
|
||||
{ a valid sequence }
|
||||
{ even if mblen = 1, the uppercase version may have a }
|
||||
{ different length }
|
||||
{ We can't do anything special if wchar_t is 16 bit... }
|
||||
ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
|
||||
inc(i,mblen);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
SetLength(result,resindex-1);
|
||||
end;
|
||||
|
||||
|
||||
procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
|
||||
var
|
||||
outlength,
|
||||
outoffset,
|
||||
outleft : size_t;
|
||||
err: cint;
|
||||
srcpos,
|
||||
destpos: pchar;
|
||||
mynil : pchar;
|
||||
@ -294,7 +574,22 @@ procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
|
||||
outleft:=outlength*4;
|
||||
while iconv(iconv_ansi2ucs4,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
|
||||
begin
|
||||
case fpgetCerrno of
|
||||
err:=fpgetCerrno;
|
||||
case err of
|
||||
ESysEINVAL,
|
||||
ESysEILSEQ:
|
||||
begin
|
||||
{ skip and set to '?' }
|
||||
inc(srcpos);
|
||||
dec(len);
|
||||
plongint(destpos)^:=longint('?');
|
||||
inc(destpos,4);
|
||||
dec(outleft,4);
|
||||
{ reset }
|
||||
iconv(iconv_ansi2ucs4,@mynil,@my0,@mynil,@my0);
|
||||
if err=ESysEINVAL then
|
||||
break;
|
||||
end;
|
||||
ESysE2BIG:
|
||||
begin
|
||||
outoffset:=destpos-pchar(dest);
|
||||
@ -375,9 +670,10 @@ begin
|
||||
CompareTextWideStringProc:=@CompareTextWideString;
|
||||
{
|
||||
CharLengthPCharProc
|
||||
|
||||
UpperAnsiStringProc
|
||||
LowerAnsiStringProc
|
||||
}
|
||||
UpperAnsiStringProc:=@UpperAnsiString;
|
||||
LowerAnsiStringProc:=@LowerAnsiString;
|
||||
{
|
||||
CompareStrAnsiStringProc
|
||||
CompareTextAnsiStringProc
|
||||
}
|
||||
|
339
tests/test/twide6.pp
Normal file
339
tests/test/twide6.pp
Normal file
@ -0,0 +1,339 @@
|
||||
{$codepage utf-8}
|
||||
uses
|
||||
{$ifdef unix}
|
||||
cwstring,
|
||||
{$endif}
|
||||
sysutils;
|
||||
|
||||
|
||||
{ normal upper case testing }
|
||||
procedure testupper;
|
||||
var
|
||||
s: ansistring;
|
||||
w1,w2,w3,w4: widestring;
|
||||
i: longint;
|
||||
begin
|
||||
w1:='aé'#0'èàł'#$d87e#$dc04;
|
||||
w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
|
||||
{$ifdef print}
|
||||
// the utf-8 output can confuse the testsuite parser
|
||||
writeln('original: ',w1);
|
||||
writeln('original upper: ',w2);
|
||||
{$endif print}
|
||||
s:=w1;
|
||||
w3:=s;
|
||||
w4:=AnsiUpperCase(s);
|
||||
{ filter out unsupported characters }
|
||||
for i:=1 to length(w3) do
|
||||
if w3[i]='?' then
|
||||
begin
|
||||
w2[i]:='?';
|
||||
w1[i]:='?';
|
||||
end;
|
||||
w1:=wideuppercase(w1);
|
||||
{$ifdef print}
|
||||
writeln('wideupper: ',w1);
|
||||
writeln('ansiupper: ',w4);
|
||||
{$endif print}
|
||||
if (w1 <> w2) then
|
||||
halt(1);
|
||||
if (w4 <> w2) then
|
||||
halt(2);
|
||||
end;
|
||||
|
||||
|
||||
{ normal lower case testing }
|
||||
procedure testlower;
|
||||
var
|
||||
s: ansistring;
|
||||
w1,w2,w3,w4: widestring;
|
||||
i: longint;
|
||||
begin
|
||||
w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
|
||||
w2:='aé'#0'èàł'#$d87e#$dc04;
|
||||
{$ifdef print}
|
||||
// the utf-8 output can confuse the testsuite parser
|
||||
writeln('original: ',w1);
|
||||
writeln('original lower: ',w2);
|
||||
{$endif print}
|
||||
s:=w1;
|
||||
w3:=s;
|
||||
w4:=AnsiLowerCase(s);
|
||||
{ filter out unsupported characters }
|
||||
for i:=1 to length(w3) do
|
||||
if w3[i]='?' then
|
||||
begin
|
||||
w2[i]:='?';
|
||||
w1[i]:='?';
|
||||
end;
|
||||
w1:=widelowercase(w1);
|
||||
{$ifdef print}
|
||||
writeln('widelower: ',w1);
|
||||
writeln('ansilower: ',w4);
|
||||
{$endif print}
|
||||
if (w1 <> w2) then
|
||||
halt(3);
|
||||
if (w4 <> w2) then
|
||||
halt(4);
|
||||
end;
|
||||
|
||||
|
||||
|
||||
{ upper case testing with a missing utf-16 pair at the end }
|
||||
procedure testupperinvalid;
|
||||
var
|
||||
s: ansistring;
|
||||
w1,w2,w3,w4: widestring;
|
||||
i: longint;
|
||||
begin
|
||||
{ missing utf-16 pair at end }
|
||||
w1:='aé'#0'èàł'#$d87e;
|
||||
w2:='AÉ'#0'ÈÀŁ'#$d87e;
|
||||
{$ifdef print}
|
||||
// the utf-8 output can confuse the testsuite parser
|
||||
writeln('original: ',w1);
|
||||
writeln('original upper: ',w2);
|
||||
{$endif print}
|
||||
s:=w1;
|
||||
w3:=s;
|
||||
w4:=AnsiUpperCase(s);
|
||||
{ filter out unsupported characters }
|
||||
for i:=1 to length(w3) do
|
||||
if w3[i]='?' then
|
||||
begin
|
||||
w2[i]:='?';
|
||||
w1[i]:='?';
|
||||
end;
|
||||
w1:=wideuppercase(w1);
|
||||
{$ifdef print}
|
||||
writeln('wideupper: ',w1);
|
||||
writeln('ansiupper: ',w4);
|
||||
{$endif print}
|
||||
if (w1 <> w2) then
|
||||
halt(5);
|
||||
if (w4 <> w2) then
|
||||
halt(6);
|
||||
end;
|
||||
|
||||
|
||||
{ lower case testing with a missing utf-16 pair at the end }
|
||||
procedure testlowerinvalid;
|
||||
var
|
||||
s: ansistring;
|
||||
w1,w2,w3,w4: widestring;
|
||||
i: longint;
|
||||
begin
|
||||
{ missing utf-16 pair at end}
|
||||
w1:='AÉ'#0'ÈÀŁ'#$d87e;
|
||||
w2:='aé'#0'èàł'#$d87e;
|
||||
{$ifdef print}
|
||||
// the utf-8 output can confuse the testsuite parser
|
||||
writeln('original: ',w1);
|
||||
writeln('original lower: ',w2);
|
||||
{$endif print}
|
||||
s:=w1;
|
||||
w3:=s;
|
||||
w4:=AnsiLowerCase(s);
|
||||
{ filter out unsupported characters }
|
||||
for i:=1 to length(w3) do
|
||||
if w3[i]='?' then
|
||||
begin
|
||||
w2[i]:='?';
|
||||
w1[i]:='?';
|
||||
end;
|
||||
w1:=widelowercase(w1);
|
||||
{$ifdef print}
|
||||
writeln('widelower: ',w1);
|
||||
writeln('ansilower: ',w4);
|
||||
{$endif print}
|
||||
if (w1 <> w2) then
|
||||
halt(7);
|
||||
if (w4 <> w2) then
|
||||
halt(8);
|
||||
end;
|
||||
|
||||
|
||||
|
||||
{ upper case testing with a missing utf-16 pair at the end, followed by a normal char }
|
||||
procedure testupperinvalid1;
|
||||
var
|
||||
s: ansistring;
|
||||
w1,w2,w3,w4: widestring;
|
||||
i: longint;
|
||||
begin
|
||||
{ missing utf-16 pair at end with char after it}
|
||||
w1:='aé'#0'èàł'#$d87e'j';
|
||||
w2:='AÉ'#0'ÈÀŁ'#$d87e'J';
|
||||
{$ifdef print}
|
||||
// the utf-8 output can confuse the testsuite parser
|
||||
writeln('original: ',w1);
|
||||
writeln('original upper: ',w2);
|
||||
{$endif print}
|
||||
s:=w1;
|
||||
w3:=s;
|
||||
w4:=AnsiUpperCase(s);
|
||||
{ filter out unsupported characters }
|
||||
for i:=1 to length(w3) do
|
||||
if w3[i]='?' then
|
||||
begin
|
||||
w2[i]:='?';
|
||||
w1[i]:='?';
|
||||
end;
|
||||
w1:=wideuppercase(w1);
|
||||
{$ifdef print}
|
||||
writeln('wideupper: ',w1);
|
||||
writeln('ansiupper: ',w4);
|
||||
{$endif print}
|
||||
if (w1 <> w2) then
|
||||
halt(9);
|
||||
if (w4 <> w2) then
|
||||
halt(10);
|
||||
end;
|
||||
|
||||
|
||||
{ lower case testing with a missing utf-16 pair at the end, followed by a normal char }
|
||||
procedure testlowerinvalid1;
|
||||
var
|
||||
s: ansistring;
|
||||
w1,w2,w3,w4: widestring;
|
||||
i: longint;
|
||||
begin
|
||||
{ missing utf-16 pair at end with char after it}
|
||||
w1:='AÉ'#0'ÈÀŁ'#$d87e'J';
|
||||
w2:='aé'#0'èàł'#$d87e'j';
|
||||
{$ifdef print}
|
||||
// the utf-8 output can confuse the testsuite parser
|
||||
writeln('original: ',w1);
|
||||
writeln('original lower: ',w2);
|
||||
{$endif print}
|
||||
s:=w1;
|
||||
w3:=s;
|
||||
w4:=AnsiLowerCase(s);
|
||||
{ filter out unsupported characters }
|
||||
for i:=1 to length(w3) do
|
||||
if w3[i]='?' then
|
||||
begin
|
||||
w2[i]:='?';
|
||||
w1[i]:='?';
|
||||
end;
|
||||
w1:=widelowercase(w1);
|
||||
{$ifdef print}
|
||||
writeln('widelower: ',w1);
|
||||
writeln('ansilower: ',w4);
|
||||
{$endif print}
|
||||
if (w1 <> w2) then
|
||||
halt(11);
|
||||
if (w4 <> w2) then
|
||||
halt(12);
|
||||
end;
|
||||
|
||||
|
||||
{ upper case testing with corrupting the utf-8 string after conversion }
|
||||
procedure testupperinvalid2;
|
||||
var
|
||||
s: ansistring;
|
||||
w1,w2,w3,w4: widestring;
|
||||
i: longint;
|
||||
begin
|
||||
w1:='aé'#0'èàł'#$d87e#$dc04'ö';
|
||||
w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö';
|
||||
{$ifdef print}
|
||||
// the utf-8 output can confuse the testsuite parser
|
||||
writeln('original: ',w1);
|
||||
writeln('original upper: ',w2);
|
||||
{$endif print}
|
||||
s:=w1;
|
||||
{ truncate the last utf-8 character }
|
||||
setlength(s,length(s)-1);
|
||||
w3:=s;
|
||||
{ adjust checking values for new length due to corruption }
|
||||
if length(w3)<>length(w2) then
|
||||
begin
|
||||
setlength(w2,length(w3));
|
||||
setlength(w1,length(w3));
|
||||
end;
|
||||
w4:=AnsiUpperCase(s);
|
||||
{ filter out unsupported characters }
|
||||
for i:=1 to length(w3) do
|
||||
if w3[i]='?' then
|
||||
begin
|
||||
w2[i]:='?';
|
||||
w1[i]:='?';
|
||||
end;
|
||||
w1:=wideuppercase(w1);
|
||||
{$ifdef print}
|
||||
writeln('wideupper: ',w1);
|
||||
writeln('ansiupper: ',w4);
|
||||
{$endif print}
|
||||
if (w1 <> w2) then
|
||||
halt(13);
|
||||
if (w4 <> w2) then
|
||||
halt(14);
|
||||
end;
|
||||
|
||||
|
||||
{ lower case testing with corrupting the utf-8 string after conversion }
|
||||
procedure testlowerinvalid2;
|
||||
var
|
||||
s: ansistring;
|
||||
w1,w2,w3,w4: widestring;
|
||||
i: longint;
|
||||
begin
|
||||
w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö';
|
||||
w2:='aé'#0'èàł'#$d87e#$dc04'ö';
|
||||
{$ifdef print}
|
||||
// the utf-8 output can confuse the testsuite parser
|
||||
writeln('original: ',w1);
|
||||
writeln('original lower: ',w2);
|
||||
{$endif print}
|
||||
s:=w1;
|
||||
{ truncate the last utf-8 character }
|
||||
setlength(s,length(s)-1);
|
||||
w3:=s;
|
||||
{ adjust checking values for new length due to corruption }
|
||||
if length(w3)<>length(w2) then
|
||||
begin
|
||||
setlength(w2,length(w3));
|
||||
setlength(w1,length(w3));
|
||||
end;
|
||||
w4:=AnsiLowerCase(s);
|
||||
{ filter out unsupported characters }
|
||||
for i:=1 to length(w3) do
|
||||
if w3[i]='?' then
|
||||
begin
|
||||
w2[i]:='?';
|
||||
w1[i]:='?';
|
||||
end;
|
||||
w1:=widelowercase(w1);
|
||||
{$ifdef print}
|
||||
writeln('widelower: ',w1);
|
||||
writeln('ansilower: ',w4);
|
||||
{$endif print}
|
||||
if (w1 <> w2) then
|
||||
halt(15);
|
||||
if (w4 <> w2) then
|
||||
halt(16);
|
||||
end;
|
||||
|
||||
|
||||
|
||||
begin
|
||||
testupper;
|
||||
writeln;
|
||||
testlower;
|
||||
writeln;
|
||||
writeln;
|
||||
testupperinvalid;
|
||||
writeln;
|
||||
testlowerinvalid;
|
||||
writeln;
|
||||
writeln;
|
||||
testupperinvalid1;
|
||||
writeln;
|
||||
testlowerinvalid1;
|
||||
writeln;
|
||||
writeln;
|
||||
testupperinvalid2;
|
||||
writeln;
|
||||
testlowerinvalid2;
|
||||
end.
|
Loading…
Reference in New Issue
Block a user