fpc/tests/webtbs/tw27665.pp
2015-03-14 21:46:45 +00:00

28 lines
841 B
ObjectPascal

{$mode objfpc}
function UTF8CodePointLength(firstbyte: byte): SizeInt;
var
firstzerobit: SizeInt;
begin
result:=1;
{ bsr searches for the leftmost 1 bit. We are interested in the
leftmost 0 bit, so first invert the value
}
firstzerobit:=BsrByte(not(firstbyte));
{ if there is no zero bit or the first zero bit is the rightmost bit
(bit 0), this is an invalid UTF-8 byte ($ff cannot appear in an
UTF-8-encoded string, and in the worst case bit 1 has to be zero)
}
if (firstzerobit=0) or (firstzerobit=255) then
exit;
{ the number of bytes belonging to this code point is
7-(pos first 0-bit).
}
result:=7-firstzerobit;
end;
begin
writeln(UTF8CodePointLength(ord(' ')));
end.