mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-12 06:46:05 +02:00
* fixed wchar_t type (was: widechar, now is cint/cint32/long depending on
platform) + mbstate_t type for all unixes except BeOS (doesn't exist for BeOS) + implemented UpperAnsiStringProc/LowerAnsiStringProc for unix * fixed Ansi2UCS4Move in cwstring (although it isn't used anywhere currently) + test for Upper/LowerAnsiString git-svn-id: trunk@9393 -
This commit is contained in:
parent
997d7b33db
commit
68595c8b72
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -7300,6 +7300,7 @@ tests/test/twide2.pp svneol=native#text/plain
|
|||||||
tests/test/twide3.pp svneol=native#text/plain
|
tests/test/twide3.pp svneol=native#text/plain
|
||||||
tests/test/twide4.pp svneol=native#text/plain
|
tests/test/twide4.pp svneol=native#text/plain
|
||||||
tests/test/twide5.pp svneol=native#text/plain
|
tests/test/twide5.pp svneol=native#text/plain
|
||||||
|
tests/test/twide6.pp svneol=native#text/plain
|
||||||
tests/test/twrstr1.pp svneol=native#text/plain
|
tests/test/twrstr1.pp svneol=native#text/plain
|
||||||
tests/test/twrstr2.pp svneol=native#text/plain
|
tests/test/twrstr2.pp svneol=native#text/plain
|
||||||
tests/test/twrstr3.pp svneol=native#text/plain
|
tests/test/twrstr3.pp svneol=native#text/plain
|
||||||
|
@ -90,7 +90,7 @@ type
|
|||||||
pTime = ^time_t;
|
pTime = ^time_t;
|
||||||
ptime_t = ^time_t;
|
ptime_t = ^time_t;
|
||||||
|
|
||||||
wchar_t = widechar;
|
wchar_t = cint32;
|
||||||
pwchar_t = ^wchar_t;
|
pwchar_t = ^wchar_t;
|
||||||
|
|
||||||
socklen_t= cuint32;
|
socklen_t= cuint32;
|
||||||
|
@ -77,9 +77,9 @@ type
|
|||||||
pTime = ^time_t;
|
pTime = ^time_t;
|
||||||
ptime_t = ^time_t;
|
ptime_t = ^time_t;
|
||||||
|
|
||||||
wchar_t = widechar;
|
wchar_t = cint32;
|
||||||
pwchar_t = ^wchar_t;
|
pwchar_t = ^wchar_t;
|
||||||
wint_t = cint;
|
wint_t = cint32;
|
||||||
|
|
||||||
socklen_t= cuint32;
|
socklen_t= cuint32;
|
||||||
TSocklen = socklen_t;
|
TSocklen = socklen_t;
|
||||||
@ -163,6 +163,13 @@ type
|
|||||||
end;
|
end;
|
||||||
pstatfs = ^tstatfs;
|
pstatfs = ^tstatfs;
|
||||||
|
|
||||||
|
mbstate_t = record
|
||||||
|
case byte of
|
||||||
|
0: (__mbstate8: array[0..127] of char);
|
||||||
|
1: (_mbstateL: clonglong); { for alignment }
|
||||||
|
end;
|
||||||
|
pmbstate_t = ^mbstate_t;
|
||||||
|
|
||||||
pthread_t = pointer;
|
pthread_t = pointer;
|
||||||
pthread_attr_t = record sig: clong; opaque: array[0..{$ifdef cpu64}56{$else}36{$endif}-1] of byte; end;
|
pthread_attr_t = record sig: clong; opaque: array[0..{$ifdef cpu64}56{$else}36{$endif}-1] of byte; end;
|
||||||
pthread_mutex_t = {$i pmutext.inc}
|
pthread_mutex_t = {$i pmutext.inc}
|
||||||
|
@ -80,7 +80,7 @@ type
|
|||||||
pUid = ^Uid_t;
|
pUid = ^Uid_t;
|
||||||
|
|
||||||
wint_t = cint32;
|
wint_t = cint32;
|
||||||
wchar_t = widechar;
|
wchar_t = cint32;
|
||||||
pwchar_t = ^wchar_t;
|
pwchar_t = ^wchar_t;
|
||||||
|
|
||||||
|
|
||||||
@ -186,6 +186,13 @@ type
|
|||||||
end;
|
end;
|
||||||
PStatFS=^TStatFS;
|
PStatFS=^TStatFS;
|
||||||
|
|
||||||
|
mbstate_t = record
|
||||||
|
case byte of
|
||||||
|
0: (__mbstate8: array[0..127] of char);
|
||||||
|
1: (_mbstateL: cint64); { for alignment }
|
||||||
|
end;
|
||||||
|
pmbstate_t = ^mbstate_t;
|
||||||
|
|
||||||
ITimerVal= Record
|
ITimerVal= Record
|
||||||
It_Interval,
|
It_Interval,
|
||||||
It_Value : TimeVal;
|
It_Value : TimeVal;
|
||||||
|
@ -102,7 +102,7 @@ Type
|
|||||||
pTime = ^time_t;
|
pTime = ^time_t;
|
||||||
ptime_t = ^time_t;
|
ptime_t = ^time_t;
|
||||||
|
|
||||||
wchar_t = widechar;
|
wchar_t = cint32;
|
||||||
pwchar_t = ^wchar_t;
|
pwchar_t = ^wchar_t;
|
||||||
|
|
||||||
{$ifdef cpu64}
|
{$ifdef cpu64}
|
||||||
@ -154,6 +154,18 @@ Type
|
|||||||
end;
|
end;
|
||||||
PStatFS=^TStatFS;
|
PStatFS=^TStatFS;
|
||||||
|
|
||||||
|
mbstate_value_t = record
|
||||||
|
case byte of
|
||||||
|
0: (__wch: wint_t);
|
||||||
|
1: (__wchb: array[0..3] of char);
|
||||||
|
end;
|
||||||
|
|
||||||
|
mbstate_t = record
|
||||||
|
__count: cint;
|
||||||
|
__value: mbstate_value_t;
|
||||||
|
end;
|
||||||
|
pmbstate_t = ^mbstate_t;
|
||||||
|
|
||||||
pthread_t = culong;
|
pthread_t = culong;
|
||||||
|
|
||||||
sched_param = record
|
sched_param = record
|
||||||
|
@ -129,7 +129,11 @@ Type
|
|||||||
uint_t = cuint;
|
uint_t = cuint;
|
||||||
|
|
||||||
|
|
||||||
wchar_t = widechar;
|
{$ifdef cpu64}
|
||||||
|
wchar_t = cint;
|
||||||
|
{$else cpu64}
|
||||||
|
wchar_t = clong;
|
||||||
|
{$endif cpu64}
|
||||||
pwchar_t = ^wchar_t;
|
pwchar_t = ^wchar_t;
|
||||||
|
|
||||||
uid_t = cuint32; { used for user ID type }
|
uid_t = cuint32; { used for user ID type }
|
||||||
@ -168,6 +172,15 @@ Type
|
|||||||
end;
|
end;
|
||||||
PStatFS=^TStatFS;
|
PStatFS=^TStatFS;
|
||||||
|
|
||||||
|
mbstate_t = record
|
||||||
|
{$ifdef cpu64}
|
||||||
|
__filler: array[0..3] of clong;
|
||||||
|
{$else cpu64}
|
||||||
|
__filler: array[0..5] of cint;
|
||||||
|
{$endif cpu64}
|
||||||
|
end;
|
||||||
|
pmbstate_t = ^mbstate_t;
|
||||||
|
|
||||||
|
|
||||||
clock32_t = int32_t;
|
clock32_t = int32_t;
|
||||||
timeval32 = record
|
timeval32 = record
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
**********************************************************************}
|
**********************************************************************}
|
||||||
|
|
||||||
{$mode objfpc}
|
{$mode objfpc}
|
||||||
|
{$inline on}
|
||||||
|
|
||||||
unit cwstring;
|
unit cwstring;
|
||||||
|
|
||||||
@ -45,11 +46,22 @@ Const
|
|||||||
{$endif}
|
{$endif}
|
||||||
|
|
||||||
{ helper functions from libc }
|
{ helper functions from libc }
|
||||||
|
function tolower(__wc:cint):cint;cdecl;external libiconvname name 'tolower';
|
||||||
|
function toupper(__wc:cint):cint;cdecl;external libiconvname name 'toupper';
|
||||||
function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
|
function towlower(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towlower';
|
||||||
function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
|
function towupper(__wc:wint_t):wint_t;cdecl;external libiconvname name 'towupper';
|
||||||
|
|
||||||
function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
|
function wcscoll (__s1:pwchar_t; __s2:pwchar_t):cint;cdecl;external libiconvname name 'wcscoll';
|
||||||
function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
|
function strcoll (__s1:pchar; __s2:pchar):cint;cdecl;external libiconvname name 'strcoll';
|
||||||
function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
|
function setlocale(category: cint; locale: pchar): pchar; cdecl; external clib name 'setlocale';
|
||||||
|
{$ifndef beos}
|
||||||
|
function mbrtowc(pwc: pwchar_t; const s: pchar; n: size_t; ps: pmbstate_t): size_t; cdecl; external clib name 'mbrtowc';
|
||||||
|
function wcrtomb(s: pchar; wc: wchar_t; ps: pmbstate_t): size_t; cdecl; external clib name 'wcrtomb';
|
||||||
|
{$else beos}
|
||||||
|
function mbtowc(pwc: pwchar_t; const s: pchar; n: size_t): size_t; cdecl; external clib name 'mbtowc';
|
||||||
|
function wctomb(s: pchar; wc: wchar_t): size_t; cdecl; external clib name 'wctomb';
|
||||||
|
{$endif beos}
|
||||||
|
|
||||||
|
|
||||||
const
|
const
|
||||||
{$ifdef linux}
|
{$ifdef linux}
|
||||||
@ -253,6 +265,92 @@ procedure Ansi2WideMove(source:pchar;var dest:widestring;len:SizeInt);
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
(*
|
||||||
|
function LowerWideString(const s : WideString) : WideString;
|
||||||
|
var
|
||||||
|
i, slen : SizeInt;
|
||||||
|
{$if sizeof(wchar_t) = 4}
|
||||||
|
nc : wint_t;
|
||||||
|
resindex : SizeInt;
|
||||||
|
len : longint;
|
||||||
|
valid : boolean;
|
||||||
|
{$elseif sizeof(wchar_t) = 2}
|
||||||
|
p : PWideChar;
|
||||||
|
{$endif sizeof(wchar_t)}
|
||||||
|
begin
|
||||||
|
slen:=length(s);
|
||||||
|
SetLength(result,slen);
|
||||||
|
{$if sizeof(wint_t) = 4}
|
||||||
|
i:=1;
|
||||||
|
resindex:=1;
|
||||||
|
while (i<=slen) do
|
||||||
|
begin
|
||||||
|
nc:=utf16toutf32(s,i,len,valid);
|
||||||
|
inc(i,len);
|
||||||
|
if (valid) then
|
||||||
|
ConcatUTF32ToWideStr(towlower(nc),result,resindex)
|
||||||
|
else
|
||||||
|
{ do nothing }
|
||||||
|
pwidechar(@result[i])^:=s[i];
|
||||||
|
end;
|
||||||
|
{ adjust length }
|
||||||
|
setlength(result,resindex-1);
|
||||||
|
{$elseif sizeof(wchar_t) = 2}
|
||||||
|
{ avoid unique calls for each character }
|
||||||
|
p:=@result[1];
|
||||||
|
{ this will fail for surrogate pairs, but that's inherent }
|
||||||
|
{ to choosing sizeof(wint_t)=2, and nothing we can help }
|
||||||
|
for i:=1 to length(s) do
|
||||||
|
p[i]:=WideChar(towlower(wint_t(s[i])));
|
||||||
|
{$else}
|
||||||
|
{$error Unsupported wchar_t size}
|
||||||
|
{$endif}
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
function UpperWideString(const s : WideString) : WideString;
|
||||||
|
var
|
||||||
|
i, slen : SizeInt;
|
||||||
|
{$if sizeof(wint_t) = 4}
|
||||||
|
nc : wint_t;
|
||||||
|
resindex : SizeInt;
|
||||||
|
len : longint;
|
||||||
|
valid : boolean;
|
||||||
|
{$elseif sizeof(wchar_t) = 2}
|
||||||
|
p : PWideChar;
|
||||||
|
{$endif sizeof(wchar_t)}
|
||||||
|
begin
|
||||||
|
slen:=length(s);
|
||||||
|
SetLength(result,slen);
|
||||||
|
{$if sizeof(wchar_t) = 4}
|
||||||
|
i:=1;
|
||||||
|
resindex:=1;
|
||||||
|
while (i<=slen) do
|
||||||
|
begin
|
||||||
|
nc:=utf16toutf32(s,i,len,valid);
|
||||||
|
inc(i,len);
|
||||||
|
if (valid) then
|
||||||
|
ConcatUTF32ToWideStr (towupper(nc),result,resindex)
|
||||||
|
else
|
||||||
|
{ do nothing }
|
||||||
|
pwidechar(@result[i])^:=s[i];
|
||||||
|
end;
|
||||||
|
{ adjust length }
|
||||||
|
setlength(result,resindex-1);
|
||||||
|
{$elseif sizeof(wchar_t) = 2}
|
||||||
|
{ avoid unique calls for each character }
|
||||||
|
p:=@result[1];
|
||||||
|
{ this will fail for surrogate pairs, but that's inherent }
|
||||||
|
{ to choosing sizeof(wint_t)=2, and nothing we can help }
|
||||||
|
for i:=1 to length(s) do
|
||||||
|
p[i]:=WideChar(towupper(wint_t(s[i])));
|
||||||
|
{$else}
|
||||||
|
{$error Unsupported wchar_t size}
|
||||||
|
{$endif}
|
||||||
|
end;
|
||||||
|
*)
|
||||||
|
|
||||||
|
|
||||||
function LowerWideString(const s : WideString) : WideString;
|
function LowerWideString(const s : WideString) : WideString;
|
||||||
var
|
var
|
||||||
i : SizeInt;
|
i : SizeInt;
|
||||||
@ -273,11 +371,193 @@ function UpperWideString(const s : WideString) : WideString;
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
procedure EnsureAnsiLen(var S: AnsiString; const len: SizeInt); inline;
|
||||||
|
begin
|
||||||
|
if (len>length(s)) then
|
||||||
|
if (length(s) < 10*256) then
|
||||||
|
setlength(s,length(s)+10)
|
||||||
|
else
|
||||||
|
setlength(s,length(s)+length(s) shr 8);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
procedure ConcatCharToAnsiStr(const c: char; var S: AnsiString; var index: SizeInt);
|
||||||
|
begin
|
||||||
|
EnsureAnsiLen(s,index);
|
||||||
|
pchar(@s[index])^:=c;
|
||||||
|
inc(index);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
{ concatenates an utf-32 char to a widestring. S *must* be unique when entering. }
|
||||||
|
{$ifndef beos}
|
||||||
|
procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt; var mbstate: mbstate_t);
|
||||||
|
{$else not beos}
|
||||||
|
procedure ConcatUTF32ToAnsiStr(const nc: wint_t; var S: AnsiString; var index: SizeInt);
|
||||||
|
{$endif beos}
|
||||||
|
var
|
||||||
|
p : pchar;
|
||||||
|
mblen : size_t;
|
||||||
|
begin
|
||||||
|
{ we know that s is unique -> avoid uniquestring calls}
|
||||||
|
p:=@s[index];
|
||||||
|
if (nc<$7f) then
|
||||||
|
ConcatCharToAnsiStr(char(nc),s,index)
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
{ en_US.UTF-8 needs maximally 6 chars, UCS-4/UTF-32 needs 4 }
|
||||||
|
{ -> 10 should be enough? Should actually use MB_CUR_MAX, but }
|
||||||
|
{ that's a libc macro mapped to internal functions/variables }
|
||||||
|
{ and thus not a stable external API on systems where libc }
|
||||||
|
{ breaks backwards compatibility every now and then }
|
||||||
|
EnsureAnsiLen(s,index+10);
|
||||||
|
{$ifndef beos}
|
||||||
|
mblen:=wcrtomb(p,wchar_t(nc),@mbstate);
|
||||||
|
{$else not beos}
|
||||||
|
mblen:=wctomb(p,wchar_t(nc));
|
||||||
|
{$endif not beos}
|
||||||
|
if (mblen<>size_t(-1)) then
|
||||||
|
inc(index,mblen)
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
{ invalid wide char }
|
||||||
|
p^:='?';
|
||||||
|
inc(index);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
function LowerAnsiString(const s : AnsiString) : AnsiString;
|
||||||
|
var
|
||||||
|
i, slen,
|
||||||
|
resindex : SizeInt;
|
||||||
|
mblen : size_t;
|
||||||
|
{$ifndef beos}
|
||||||
|
ombstate,
|
||||||
|
nmbstate : mbstate_t;
|
||||||
|
{$endif beos}
|
||||||
|
wc : wchar_t;
|
||||||
|
begin
|
||||||
|
fillchar(ombstate,sizeof(ombstate),0);
|
||||||
|
fillchar(nmbstate,sizeof(nmbstate),0);
|
||||||
|
slen:=length(s);
|
||||||
|
SetLength(result,slen+10);
|
||||||
|
i:=1;
|
||||||
|
resindex:=1;
|
||||||
|
while (i<=slen) do
|
||||||
|
begin
|
||||||
|
if (s[i]<=#127) then
|
||||||
|
begin
|
||||||
|
ConcatCharToAnsiStr(char(tolower(cint(s[i]))),result,resindex);
|
||||||
|
inc(i)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
{$ifndef beos}
|
||||||
|
mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
|
||||||
|
{$else not beos}
|
||||||
|
mblen:=mbtowc(@wc, pchar(@s[i]), slen-i+1);
|
||||||
|
{$endif not beos}
|
||||||
|
case mblen of
|
||||||
|
size_t(-2):
|
||||||
|
begin
|
||||||
|
{ partial invalid character, copy literally }
|
||||||
|
while (i<=slen) do
|
||||||
|
begin
|
||||||
|
ConcatCharToAnsiStr(s[i],result,resindex);
|
||||||
|
inc(i);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
size_t(-1), 0:
|
||||||
|
begin
|
||||||
|
{ invalid or null character }
|
||||||
|
ConcatCharToAnsiStr(s[i],result,resindex);
|
||||||
|
inc(i);
|
||||||
|
end;
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
{ a valid sequence }
|
||||||
|
{ even if mblen = 1, the lowercase version may have a }
|
||||||
|
{ different length }
|
||||||
|
{ We can't do anything special if wchar_t is 16 bit... }
|
||||||
|
{$ifndef beos}
|
||||||
|
ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex,nmbstate);
|
||||||
|
{$else not beos}
|
||||||
|
ConcatUTF32ToAnsiStr(towlower(wint_t(wc)),result,resindex);
|
||||||
|
{$endif not beos}
|
||||||
|
inc(i,mblen);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
SetLength(result,resindex-1);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
function UpperAnsiString(const s : AnsiString) : AnsiString;
|
||||||
|
var
|
||||||
|
i, slen,
|
||||||
|
resindex : SizeInt;
|
||||||
|
mblen : size_t;
|
||||||
|
ombstate,
|
||||||
|
nmbstate : mbstate_t;
|
||||||
|
wc : wchar_t;
|
||||||
|
begin
|
||||||
|
fillchar(ombstate,sizeof(ombstate),0);
|
||||||
|
fillchar(nmbstate,sizeof(nmbstate),0);
|
||||||
|
slen:=length(s);
|
||||||
|
SetLength(result,slen+10);
|
||||||
|
i:=1;
|
||||||
|
resindex:=1;
|
||||||
|
while (i<=slen) do
|
||||||
|
begin
|
||||||
|
if (s[i]<=#127) then
|
||||||
|
begin
|
||||||
|
ConcatCharToAnsiStr(char(toupper(cint(s[i]))),result,resindex);
|
||||||
|
inc(i)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
mblen:=mbrtowc(@wc, pchar(@s[i]), slen-i+1, @ombstate);
|
||||||
|
case mblen of
|
||||||
|
size_t(-2):
|
||||||
|
begin
|
||||||
|
{ partial invalid character, copy literally }
|
||||||
|
while (i<=slen) do
|
||||||
|
begin
|
||||||
|
ConcatCharToAnsiStr(s[i],result,resindex);
|
||||||
|
inc(i);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
size_t(-1), 0:
|
||||||
|
begin
|
||||||
|
{ invalid or null character }
|
||||||
|
ConcatCharToAnsiStr(s[i],result,resindex);
|
||||||
|
inc(i);
|
||||||
|
end;
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
{ a valid sequence }
|
||||||
|
{ even if mblen = 1, the uppercase version may have a }
|
||||||
|
{ different length }
|
||||||
|
{ We can't do anything special if wchar_t is 16 bit... }
|
||||||
|
ConcatUTF32ToAnsiStr(towupper(wint_t(wc)),result,resindex,nmbstate);
|
||||||
|
inc(i,mblen);
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
SetLength(result,resindex-1);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
|
procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
|
||||||
var
|
var
|
||||||
outlength,
|
outlength,
|
||||||
outoffset,
|
outoffset,
|
||||||
outleft : size_t;
|
outleft : size_t;
|
||||||
|
err: cint;
|
||||||
srcpos,
|
srcpos,
|
||||||
destpos: pchar;
|
destpos: pchar;
|
||||||
mynil : pchar;
|
mynil : pchar;
|
||||||
@ -294,7 +574,22 @@ procedure Ansi2UCS4Move(source:pchar;var dest:UCS4String;len:SizeInt);
|
|||||||
outleft:=outlength*4;
|
outleft:=outlength*4;
|
||||||
while iconv(iconv_ansi2ucs4,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
|
while iconv(iconv_ansi2ucs4,@srcpos,psize(@len),@destpos,@outleft)=size_t(-1) do
|
||||||
begin
|
begin
|
||||||
case fpgetCerrno of
|
err:=fpgetCerrno;
|
||||||
|
case err of
|
||||||
|
ESysEINVAL,
|
||||||
|
ESysEILSEQ:
|
||||||
|
begin
|
||||||
|
{ skip and set to '?' }
|
||||||
|
inc(srcpos);
|
||||||
|
dec(len);
|
||||||
|
plongint(destpos)^:=longint('?');
|
||||||
|
inc(destpos,4);
|
||||||
|
dec(outleft,4);
|
||||||
|
{ reset }
|
||||||
|
iconv(iconv_ansi2ucs4,@mynil,@my0,@mynil,@my0);
|
||||||
|
if err=ESysEINVAL then
|
||||||
|
break;
|
||||||
|
end;
|
||||||
ESysE2BIG:
|
ESysE2BIG:
|
||||||
begin
|
begin
|
||||||
outoffset:=destpos-pchar(dest);
|
outoffset:=destpos-pchar(dest);
|
||||||
@ -375,9 +670,10 @@ begin
|
|||||||
CompareTextWideStringProc:=@CompareTextWideString;
|
CompareTextWideStringProc:=@CompareTextWideString;
|
||||||
{
|
{
|
||||||
CharLengthPCharProc
|
CharLengthPCharProc
|
||||||
|
}
|
||||||
UpperAnsiStringProc
|
UpperAnsiStringProc:=@UpperAnsiString;
|
||||||
LowerAnsiStringProc
|
LowerAnsiStringProc:=@LowerAnsiString;
|
||||||
|
{
|
||||||
CompareStrAnsiStringProc
|
CompareStrAnsiStringProc
|
||||||
CompareTextAnsiStringProc
|
CompareTextAnsiStringProc
|
||||||
}
|
}
|
||||||
|
339
tests/test/twide6.pp
Normal file
339
tests/test/twide6.pp
Normal file
@ -0,0 +1,339 @@
|
|||||||
|
{$codepage utf-8}
|
||||||
|
uses
|
||||||
|
{$ifdef unix}
|
||||||
|
cwstring,
|
||||||
|
{$endif}
|
||||||
|
sysutils;
|
||||||
|
|
||||||
|
|
||||||
|
{ normal upper case testing }
|
||||||
|
procedure testupper;
|
||||||
|
var
|
||||||
|
s: ansistring;
|
||||||
|
w1,w2,w3,w4: widestring;
|
||||||
|
i: longint;
|
||||||
|
begin
|
||||||
|
w1:='aé'#0'èàł'#$d87e#$dc04;
|
||||||
|
w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
|
||||||
|
{$ifdef print}
|
||||||
|
// the utf-8 output can confuse the testsuite parser
|
||||||
|
writeln('original: ',w1);
|
||||||
|
writeln('original upper: ',w2);
|
||||||
|
{$endif print}
|
||||||
|
s:=w1;
|
||||||
|
w3:=s;
|
||||||
|
w4:=AnsiUpperCase(s);
|
||||||
|
{ filter out unsupported characters }
|
||||||
|
for i:=1 to length(w3) do
|
||||||
|
if w3[i]='?' then
|
||||||
|
begin
|
||||||
|
w2[i]:='?';
|
||||||
|
w1[i]:='?';
|
||||||
|
end;
|
||||||
|
w1:=wideuppercase(w1);
|
||||||
|
{$ifdef print}
|
||||||
|
writeln('wideupper: ',w1);
|
||||||
|
writeln('ansiupper: ',w4);
|
||||||
|
{$endif print}
|
||||||
|
if (w1 <> w2) then
|
||||||
|
halt(1);
|
||||||
|
if (w4 <> w2) then
|
||||||
|
halt(2);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
{ normal lower case testing }
|
||||||
|
procedure testlower;
|
||||||
|
var
|
||||||
|
s: ansistring;
|
||||||
|
w1,w2,w3,w4: widestring;
|
||||||
|
i: longint;
|
||||||
|
begin
|
||||||
|
w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04;
|
||||||
|
w2:='aé'#0'èàł'#$d87e#$dc04;
|
||||||
|
{$ifdef print}
|
||||||
|
// the utf-8 output can confuse the testsuite parser
|
||||||
|
writeln('original: ',w1);
|
||||||
|
writeln('original lower: ',w2);
|
||||||
|
{$endif print}
|
||||||
|
s:=w1;
|
||||||
|
w3:=s;
|
||||||
|
w4:=AnsiLowerCase(s);
|
||||||
|
{ filter out unsupported characters }
|
||||||
|
for i:=1 to length(w3) do
|
||||||
|
if w3[i]='?' then
|
||||||
|
begin
|
||||||
|
w2[i]:='?';
|
||||||
|
w1[i]:='?';
|
||||||
|
end;
|
||||||
|
w1:=widelowercase(w1);
|
||||||
|
{$ifdef print}
|
||||||
|
writeln('widelower: ',w1);
|
||||||
|
writeln('ansilower: ',w4);
|
||||||
|
{$endif print}
|
||||||
|
if (w1 <> w2) then
|
||||||
|
halt(3);
|
||||||
|
if (w4 <> w2) then
|
||||||
|
halt(4);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{ upper case testing with a missing utf-16 pair at the end }
|
||||||
|
procedure testupperinvalid;
|
||||||
|
var
|
||||||
|
s: ansistring;
|
||||||
|
w1,w2,w3,w4: widestring;
|
||||||
|
i: longint;
|
||||||
|
begin
|
||||||
|
{ missing utf-16 pair at end }
|
||||||
|
w1:='aé'#0'èàł'#$d87e;
|
||||||
|
w2:='AÉ'#0'ÈÀŁ'#$d87e;
|
||||||
|
{$ifdef print}
|
||||||
|
// the utf-8 output can confuse the testsuite parser
|
||||||
|
writeln('original: ',w1);
|
||||||
|
writeln('original upper: ',w2);
|
||||||
|
{$endif print}
|
||||||
|
s:=w1;
|
||||||
|
w3:=s;
|
||||||
|
w4:=AnsiUpperCase(s);
|
||||||
|
{ filter out unsupported characters }
|
||||||
|
for i:=1 to length(w3) do
|
||||||
|
if w3[i]='?' then
|
||||||
|
begin
|
||||||
|
w2[i]:='?';
|
||||||
|
w1[i]:='?';
|
||||||
|
end;
|
||||||
|
w1:=wideuppercase(w1);
|
||||||
|
{$ifdef print}
|
||||||
|
writeln('wideupper: ',w1);
|
||||||
|
writeln('ansiupper: ',w4);
|
||||||
|
{$endif print}
|
||||||
|
if (w1 <> w2) then
|
||||||
|
halt(5);
|
||||||
|
if (w4 <> w2) then
|
||||||
|
halt(6);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
{ lower case testing with a missing utf-16 pair at the end }
|
||||||
|
procedure testlowerinvalid;
|
||||||
|
var
|
||||||
|
s: ansistring;
|
||||||
|
w1,w2,w3,w4: widestring;
|
||||||
|
i: longint;
|
||||||
|
begin
|
||||||
|
{ missing utf-16 pair at end}
|
||||||
|
w1:='AÉ'#0'ÈÀŁ'#$d87e;
|
||||||
|
w2:='aé'#0'èàł'#$d87e;
|
||||||
|
{$ifdef print}
|
||||||
|
// the utf-8 output can confuse the testsuite parser
|
||||||
|
writeln('original: ',w1);
|
||||||
|
writeln('original lower: ',w2);
|
||||||
|
{$endif print}
|
||||||
|
s:=w1;
|
||||||
|
w3:=s;
|
||||||
|
w4:=AnsiLowerCase(s);
|
||||||
|
{ filter out unsupported characters }
|
||||||
|
for i:=1 to length(w3) do
|
||||||
|
if w3[i]='?' then
|
||||||
|
begin
|
||||||
|
w2[i]:='?';
|
||||||
|
w1[i]:='?';
|
||||||
|
end;
|
||||||
|
w1:=widelowercase(w1);
|
||||||
|
{$ifdef print}
|
||||||
|
writeln('widelower: ',w1);
|
||||||
|
writeln('ansilower: ',w4);
|
||||||
|
{$endif print}
|
||||||
|
if (w1 <> w2) then
|
||||||
|
halt(7);
|
||||||
|
if (w4 <> w2) then
|
||||||
|
halt(8);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
{ upper case testing with a missing utf-16 pair at the end, followed by a normal char }
|
||||||
|
procedure testupperinvalid1;
|
||||||
|
var
|
||||||
|
s: ansistring;
|
||||||
|
w1,w2,w3,w4: widestring;
|
||||||
|
i: longint;
|
||||||
|
begin
|
||||||
|
{ missing utf-16 pair at end with char after it}
|
||||||
|
w1:='aé'#0'èàł'#$d87e'j';
|
||||||
|
w2:='AÉ'#0'ÈÀŁ'#$d87e'J';
|
||||||
|
{$ifdef print}
|
||||||
|
// the utf-8 output can confuse the testsuite parser
|
||||||
|
writeln('original: ',w1);
|
||||||
|
writeln('original upper: ',w2);
|
||||||
|
{$endif print}
|
||||||
|
s:=w1;
|
||||||
|
w3:=s;
|
||||||
|
w4:=AnsiUpperCase(s);
|
||||||
|
{ filter out unsupported characters }
|
||||||
|
for i:=1 to length(w3) do
|
||||||
|
if w3[i]='?' then
|
||||||
|
begin
|
||||||
|
w2[i]:='?';
|
||||||
|
w1[i]:='?';
|
||||||
|
end;
|
||||||
|
w1:=wideuppercase(w1);
|
||||||
|
{$ifdef print}
|
||||||
|
writeln('wideupper: ',w1);
|
||||||
|
writeln('ansiupper: ',w4);
|
||||||
|
{$endif print}
|
||||||
|
if (w1 <> w2) then
|
||||||
|
halt(9);
|
||||||
|
if (w4 <> w2) then
|
||||||
|
halt(10);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
{ lower case testing with a missing utf-16 pair at the end, followed by a normal char }
|
||||||
|
procedure testlowerinvalid1;
|
||||||
|
var
|
||||||
|
s: ansistring;
|
||||||
|
w1,w2,w3,w4: widestring;
|
||||||
|
i: longint;
|
||||||
|
begin
|
||||||
|
{ missing utf-16 pair at end with char after it}
|
||||||
|
w1:='AÉ'#0'ÈÀŁ'#$d87e'J';
|
||||||
|
w2:='aé'#0'èàł'#$d87e'j';
|
||||||
|
{$ifdef print}
|
||||||
|
// the utf-8 output can confuse the testsuite parser
|
||||||
|
writeln('original: ',w1);
|
||||||
|
writeln('original lower: ',w2);
|
||||||
|
{$endif print}
|
||||||
|
s:=w1;
|
||||||
|
w3:=s;
|
||||||
|
w4:=AnsiLowerCase(s);
|
||||||
|
{ filter out unsupported characters }
|
||||||
|
for i:=1 to length(w3) do
|
||||||
|
if w3[i]='?' then
|
||||||
|
begin
|
||||||
|
w2[i]:='?';
|
||||||
|
w1[i]:='?';
|
||||||
|
end;
|
||||||
|
w1:=widelowercase(w1);
|
||||||
|
{$ifdef print}
|
||||||
|
writeln('widelower: ',w1);
|
||||||
|
writeln('ansilower: ',w4);
|
||||||
|
{$endif print}
|
||||||
|
if (w1 <> w2) then
|
||||||
|
halt(11);
|
||||||
|
if (w4 <> w2) then
|
||||||
|
halt(12);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
{ upper case testing with corrupting the utf-8 string after conversion }
|
||||||
|
procedure testupperinvalid2;
|
||||||
|
var
|
||||||
|
s: ansistring;
|
||||||
|
w1,w2,w3,w4: widestring;
|
||||||
|
i: longint;
|
||||||
|
begin
|
||||||
|
w1:='aé'#0'èàł'#$d87e#$dc04'ö';
|
||||||
|
w2:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö';
|
||||||
|
{$ifdef print}
|
||||||
|
// the utf-8 output can confuse the testsuite parser
|
||||||
|
writeln('original: ',w1);
|
||||||
|
writeln('original upper: ',w2);
|
||||||
|
{$endif print}
|
||||||
|
s:=w1;
|
||||||
|
{ truncate the last utf-8 character }
|
||||||
|
setlength(s,length(s)-1);
|
||||||
|
w3:=s;
|
||||||
|
{ adjust checking values for new length due to corruption }
|
||||||
|
if length(w3)<>length(w2) then
|
||||||
|
begin
|
||||||
|
setlength(w2,length(w3));
|
||||||
|
setlength(w1,length(w3));
|
||||||
|
end;
|
||||||
|
w4:=AnsiUpperCase(s);
|
||||||
|
{ filter out unsupported characters }
|
||||||
|
for i:=1 to length(w3) do
|
||||||
|
if w3[i]='?' then
|
||||||
|
begin
|
||||||
|
w2[i]:='?';
|
||||||
|
w1[i]:='?';
|
||||||
|
end;
|
||||||
|
w1:=wideuppercase(w1);
|
||||||
|
{$ifdef print}
|
||||||
|
writeln('wideupper: ',w1);
|
||||||
|
writeln('ansiupper: ',w4);
|
||||||
|
{$endif print}
|
||||||
|
if (w1 <> w2) then
|
||||||
|
halt(13);
|
||||||
|
if (w4 <> w2) then
|
||||||
|
halt(14);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
{ lower case testing with corrupting the utf-8 string after conversion }
|
||||||
|
procedure testlowerinvalid2;
|
||||||
|
var
|
||||||
|
s: ansistring;
|
||||||
|
w1,w2,w3,w4: widestring;
|
||||||
|
i: longint;
|
||||||
|
begin
|
||||||
|
w1:='AÉ'#0'ÈÀŁ'#$d87e#$dc04'Ö';
|
||||||
|
w2:='aé'#0'èàł'#$d87e#$dc04'ö';
|
||||||
|
{$ifdef print}
|
||||||
|
// the utf-8 output can confuse the testsuite parser
|
||||||
|
writeln('original: ',w1);
|
||||||
|
writeln('original lower: ',w2);
|
||||||
|
{$endif print}
|
||||||
|
s:=w1;
|
||||||
|
{ truncate the last utf-8 character }
|
||||||
|
setlength(s,length(s)-1);
|
||||||
|
w3:=s;
|
||||||
|
{ adjust checking values for new length due to corruption }
|
||||||
|
if length(w3)<>length(w2) then
|
||||||
|
begin
|
||||||
|
setlength(w2,length(w3));
|
||||||
|
setlength(w1,length(w3));
|
||||||
|
end;
|
||||||
|
w4:=AnsiLowerCase(s);
|
||||||
|
{ filter out unsupported characters }
|
||||||
|
for i:=1 to length(w3) do
|
||||||
|
if w3[i]='?' then
|
||||||
|
begin
|
||||||
|
w2[i]:='?';
|
||||||
|
w1[i]:='?';
|
||||||
|
end;
|
||||||
|
w1:=widelowercase(w1);
|
||||||
|
{$ifdef print}
|
||||||
|
writeln('widelower: ',w1);
|
||||||
|
writeln('ansilower: ',w4);
|
||||||
|
{$endif print}
|
||||||
|
if (w1 <> w2) then
|
||||||
|
halt(15);
|
||||||
|
if (w4 <> w2) then
|
||||||
|
halt(16);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
begin
|
||||||
|
testupper;
|
||||||
|
writeln;
|
||||||
|
testlower;
|
||||||
|
writeln;
|
||||||
|
writeln;
|
||||||
|
testupperinvalid;
|
||||||
|
writeln;
|
||||||
|
testlowerinvalid;
|
||||||
|
writeln;
|
||||||
|
writeln;
|
||||||
|
testupperinvalid1;
|
||||||
|
writeln;
|
||||||
|
testlowerinvalid1;
|
||||||
|
writeln;
|
||||||
|
writeln;
|
||||||
|
testupperinvalid2;
|
||||||
|
writeln;
|
||||||
|
testlowerinvalid2;
|
||||||
|
end.
|
Loading…
Reference in New Issue
Block a user