program tw29585; {$IFDEF FPC} {$MODE OBJFPC}{$H+} {$ENDIF} {$ifdef CPUJVM} uses {$ifdef java}jdk15{$else}androidr14{$endif}; {$macro on} {$define writeln:=jlsystem.fout.println} {$define write:=jlsystem.fout.print} {$endif} {$IFNDEF FPC} type tsystemcodepage = word; {$ENDIF} Type tstr1251 = type ansistring(1251); const utf8data: array[0..10] of ansichar = #$C3#$A9#$C2#$BA#$C3#$AE#$C5#$93#$E2#$88#$82; utf8data_in_utf16: unicodestring = #$00E9#$00BA#$00EE#$0153#$2202; invalidutf8data: array[0..3] of ansichar = #$80#$81#$82#$83; invalidutf8data_utf_16a: unicodestring = '????'; invalidutf8data_utf_16b: unicodestring = #$fffd#$fffd#$fffd#$fffd; function inttohex(l: longint; len: longint): unicodestring; var i: longint; const hexchars: array[0..15] of ansichar = ('0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'); begin result:=''; for i:=1 to len do begin result:=hexchars[l and $f]+result; l:=l shr 4; end; end; procedure error(l: longint; const u: unicodestring); var i: longint; begin write('error for test '); writeln(l); write('result: '); for i:=low(u) to high(u) do begin write('#$'); write(inttohex(ord(u[i]),4)); end; writeln; halt(l); end; procedure initarray(p: pbyte; const data: array of ansichar); var i: longint; begin for i:=low(data) to high(data) do p[i]:=ord(data[i]); end; procedure initstr(var s: rawbytestring; cp: tsystemcodepage; const data: array of ansichar); overload; var i: longint; begin setlength(s,length(data)); setcodepage(s,cp,false); for i:=low(data) to high(data) do s[i+1]:=data[i]; end; procedure initstr(var s: shortstring; const data: array of ansichar); overload; var i: longint; begin setlength(s,length(data)); for i:=low(data) to high(data) do s[i+1]:=data[i]; end; procedure testvalidutf8; var s1251: tstr1251; rs: rawbytestring; utf8: utf8string; s: ansistring; ss: shortstring; ba: array[low(utf8data)..high(utf8data)] of byte; bc: array[low(utf8data)..high(utf8data)] of ansichar; bcc: array[low(utf8data)..high(utf8data)+1] of ansichar; w: unicodestring; begin initstr(rawbytestring(s1251),1251,utf8data); w:=UTF8ToString(s1251); if w<>utf8data_in_utf16 then error(1,w); initstr(rs,0,utf8data); w:=UTF8ToString(rs); if w<>utf8data_in_utf16 then error(2,w); initstr(rawbytestring(utf8),CP_UTF8,utf8data); w:=UTF8ToString(utf8); if w<>utf8data_in_utf16 then error(3,w); initstr(rawbytestring(s),defaultsystemcodepage,utf8data); w:=UTF8ToString(s); if w<>utf8data_in_utf16 then error(4,w); initstr(ss,utf8data); w:=UTF8ToString(ss); if w<>utf8data_in_utf16 then error(5,w); initarray(@bcc[0],utf8data); bcc[high(bcc)]:=#0; w:=UTF8ToString(@bcc[0]); if w<>utf8data_in_utf16 then error(6,w); {$ifndef cpujvm} initarray(@ba[0],utf8data); w:=UTF8ToString(ba); if w<>utf8data_in_utf16 then error(7,w); initarray(@bc[0],utf8data); w:=UTF8ToString(bc); if w<>utf8data_in_utf16 then error(8,w); {$endif not cpujvm} end; procedure testinvalidutf8; var s1251: tstr1251; rs: rawbytestring; utf8: utf8string; s: ansistring; ss: shortstring; ba: array[low(invalidutf8data)..high(invalidutf8data)] of byte; bc: array[low(invalidutf8data)..high(invalidutf8data)] of ansichar; bcc: array[low(invalidutf8data)..high(invalidutf8data)+1] of ansichar; w: unicodestring; begin initstr(rawbytestring(s1251),1251,invalidutf8data); w:=UTF8ToString(s1251); if (w<>invalidutf8data_utf_16a) and (w<>invalidutf8data_utf_16b) then error(11,w); initstr(rs,0,invalidutf8data); w:=UTF8ToString(rs); if (w<>invalidutf8data_utf_16a) and (w<>invalidutf8data_utf_16b) then error(12,w); initstr(rawbytestring(utf8),CP_UTF8,invalidutf8data); w:=UTF8ToString(utf8); if (w<>invalidutf8data_utf_16a) and (w<>invalidutf8data_utf_16b) then error(13,w); initstr(rawbytestring(s),defaultsystemcodepage,invalidutf8data); w:=UTF8ToString(s); if (w<>invalidutf8data_utf_16a) and (w<>invalidutf8data_utf_16b) then error(14,w); initstr(ss,invalidutf8data); w:=UTF8ToString(ss); if (w<>invalidutf8data_utf_16a) and (w<>invalidutf8data_utf_16b) then error(15,w); initarray(@bcc[0],invalidutf8data); bcc[high(bcc)]:=#0; w:=UTF8ToString(@bcc[0]); if (w<>invalidutf8data_utf_16a) and (w<>invalidutf8data_utf_16b) then error(16,w); {$ifndef cpujvm} initarray(@ba[0],invalidutf8data); w:=UTF8ToString(ba); if (w<>invalidutf8data_utf_16a) and (w<>invalidutf8data_utf_16b) then error(17,w); initarray(@bc[0],invalidutf8data); w:=UTF8ToString(bc); if (w<>invalidutf8data_utf_16a) and (w<>invalidutf8data_utf_16b) then error(18,w); {$endif not cpujvm} end; begin testvalidutf8; testinvalidutf8; end.