mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-07 22:27:57 +02:00

determine the length of a multi-byte character. The return values are defined to be the same as those of POSIX' mblen: -1 = invalid/incomplete sequence, 0 = #0, > 0 = length of sequence in bytes. + default implementation for widestringmanager.codepointlengthproc (assumes all code points have length 1) and Unix implementation (based on mb(r)len); Windows implementation is still required * replaced default implementation of widestringmanager.CharLengthPCharProc with strlen() of the input instead of an error (correct if all code points have length 1, still needs Windows implementation) + implemented fpc_text_read_{wide,unicode}str() and fpc_text_read_widechar() (mantis #18163); fpc_text_read_widechar() uses the new widestringmanager.codepointlengthproc() + unicodestring support for readstr/writestr * fixed declaration of fpc_Write_Text_UnicodeStr (unicodestring instead of widestring parameter) * extended test/twide*.pp tests to test the new/fixed functionality git-svn-id: trunk@16533 -
58 lines
1.4 KiB
ObjectPascal
58 lines
1.4 KiB
ObjectPascal
{$codepage utf-8}
|
|
|
|
var
|
|
ws: widestring;
|
|
uns: unicodestring;
|
|
us: UCS4String;
|
|
begin
|
|
// the compiler does not yet support characters which require
|
|
// a surrogate pair in utf-16
|
|
// ws:='鳣ćçŹ你';
|
|
// so write the last character directly using a utf-16 surrogate pair
|
|
ws:='鳣ćçŹ'#$d87e#$dc04;
|
|
|
|
if (length(ws)<>8) or
|
|
(ws[1]<>'é') or
|
|
(ws[2]<>'ł') or
|
|
(ws[3]<>'Ł') or
|
|
(ws[4]<>'ć') or
|
|
(ws[5]<>'ç') or
|
|
(ws[6]<>'Ź') or
|
|
(ws[7]<>#$d87e) or
|
|
(ws[8]<>#$dc04) then
|
|
halt(1);
|
|
us:=WideStringToUCS4String(ws);
|
|
if (length(us)<>8) or
|
|
(us[0]<>UCS4Char(widechar('é'))) or
|
|
(us[1]<>UCS4Char(widechar('ł'))) or
|
|
(us[2]<>UCS4Char(widechar('Ł'))) or
|
|
(us[3]<>UCS4Char(widechar('ć'))) or
|
|
(us[4]<>UCS4Char(widechar('ç'))) or
|
|
(us[5]<>UCS4Char(widechar('Ź'))) or
|
|
(us[6]<>UCS4Char($2F804)) or
|
|
(us[7]<>UCS4Char(0)) then
|
|
halt(2);
|
|
ws:=UCS4StringToWideString(us);
|
|
if (length(ws)<>8) or
|
|
(ws[1]<>'é') or
|
|
(ws[2]<>'ł') or
|
|
(ws[3]<>'Ł') or
|
|
(ws[4]<>'ć') or
|
|
(ws[5]<>'ç') or
|
|
(ws[6]<>'Ź') or
|
|
(ws[7]<>#$d87e) or
|
|
(ws[8]<>#$dc04) then
|
|
halt(3);
|
|
uns:='鳣ćçŹ'#$d87e#$dc04;
|
|
if (length(uns)<>8) or
|
|
(uns[1]<>'é') or
|
|
(uns[2]<>'ł') or
|
|
(uns[3]<>'Ł') or
|
|
(uns[4]<>'ć') or
|
|
(uns[5]<>'ç') or
|
|
(uns[6]<>'Ź') or
|
|
(uns[7]<>#$d87e) or
|
|
(uns[8]<>#$dc04) then
|
|
halt(4);
|
|
end.
|