mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-08-23 16:29:13 +02:00
lazutils: FindInvalidUTF8Codepoint: check if bigger U+10FFFF
git-svn-id: trunk@65162 -
This commit is contained in:
parent
e828efa600
commit
c887c889e3
@ -2813,22 +2813,22 @@ function FindInvalidUTF8Codepoint(p: PChar; Count: PtrInt; StopOnNonUTF8: Boolea
|
|||||||
// return -1 if ok
|
// return -1 if ok
|
||||||
var
|
var
|
||||||
CharLen: Integer;
|
CharLen: Integer;
|
||||||
c: Char;
|
c: Byte;
|
||||||
begin
|
begin
|
||||||
if (p<>nil) then begin
|
if (p<>nil) then begin
|
||||||
Result:=0;
|
Result:=0;
|
||||||
while Result<Count do begin
|
while Result<Count do begin
|
||||||
c:=p^;
|
c:=ord(p^);
|
||||||
if ord(c)<%10000000 then begin
|
if c<%10000000 then begin
|
||||||
// regular single byte ASCII character (#0 is a character, this is Pascal ;)
|
// regular single byte ASCII character (#0 is a character, this is Pascal ;)
|
||||||
CharLen:=1;
|
CharLen:=1;
|
||||||
end else if ord(c)<=%11000001 then begin
|
end else if c<=%11000001 then begin
|
||||||
// single byte character, between valid UTF-8 encodings
|
// single byte character, between valid UTF-8 encodings
|
||||||
// %11000000 and %11000001 map 2 byte to #0..#128, which is invalid and used for XSS attacks
|
// %11000000 and %11000001 map 2 byte to #0..#128, which is invalid and used for XSS attacks
|
||||||
if StopOnNonUTF8 or (ord(c)>=192) then
|
if StopOnNonUTF8 or (c>=192) then
|
||||||
exit;
|
exit;
|
||||||
CharLen:=1;
|
CharLen:=1;
|
||||||
end else if ord(c)<=%11011111 then begin
|
end else if c<=%11011111 then begin
|
||||||
// could be 2 byte character (%110xxxxx %10xxxxxx)
|
// could be 2 byte character (%110xxxxx %10xxxxxx)
|
||||||
if (Result<Count-1)
|
if (Result<Count-1)
|
||||||
and ((ord(p[1]) and %11000000) = %10000000) then
|
and ((ord(p[1]) and %11000000) = %10000000) then
|
||||||
@ -2836,25 +2836,29 @@ begin
|
|||||||
else
|
else
|
||||||
exit; // missing following bytes
|
exit; // missing following bytes
|
||||||
end
|
end
|
||||||
else if ord(c)<=%11101111 then begin
|
else if c<=%11101111 then begin
|
||||||
// could be 3 byte character (%1110xxxx %10xxxxxx %10xxxxxx)
|
// could be 3 byte character (%1110xxxx %10xxxxxx %10xxxxxx)
|
||||||
if (Result<Count-2)
|
if (Result<Count-2)
|
||||||
and ((ord(p[1]) and %11000000) = %10000000)
|
and ((ord(p[1]) and %11000000) = %10000000)
|
||||||
and ((ord(p[2]) and %11000000) = %10000000) then begin
|
and ((ord(p[2]) and %11000000) = %10000000) then begin
|
||||||
if (ord(c)=%11100000) and (ord(p[1])<=%10011111) then
|
if (c=%11100000) and (ord(p[1])<=%10011111) then
|
||||||
exit; // XSS attack: 3 bytes are mapped to the 1 or 2 byte codes
|
exit; // XSS attack: 3 bytes are mapped to the 1 or 2 byte codes
|
||||||
CharLen:=3;
|
CharLen:=3;
|
||||||
end else
|
end else
|
||||||
exit; // missing following bytes
|
exit; // missing following bytes
|
||||||
end
|
end
|
||||||
else if ord(c)<=%11110111 then begin
|
else if c<=%11110111 then begin
|
||||||
// could be 4 byte character (%11110xxx %10xxxxxx %10xxxxxx %10xxxxxx)
|
// could be 4 byte character (%11110xxx %10xxxxxx %10xxxxxx %10xxxxxx)
|
||||||
if (Result<Count-3)
|
if (Result<Count-3)
|
||||||
and ((ord(p[1]) and %11000000) = %10000000)
|
and ((ord(p[1]) and %11000000) = %10000000)
|
||||||
and ((ord(p[2]) and %11000000) = %10000000)
|
and ((ord(p[2]) and %11000000) = %10000000)
|
||||||
and ((ord(p[3]) and %11000000) = %10000000) then begin
|
and ((ord(p[3]) and %11000000) = %10000000) then begin
|
||||||
if (ord(c)=%11110000) and (ord(p[1])<=%10001111) then
|
if (c=%11110000) and (ord(p[1])<=%10001111) then
|
||||||
exit; // XSS attack: 4 bytes are mapped to the 1-3 byte codes
|
exit; // XSS attack: 4 bytes are mapped to the 1-3 byte codes
|
||||||
|
if (c>%11110100) then
|
||||||
|
exit; // out of range U+10FFFF
|
||||||
|
if (c=%11110100) and (ord(p[1])>%10001111) then
|
||||||
|
exit; // out of range U+10FFFF
|
||||||
CharLen:=4;
|
CharLen:=4;
|
||||||
end else
|
end else
|
||||||
exit; // missing following bytes
|
exit; // missing following bytes
|
||||||
|
@ -92,6 +92,8 @@ begin
|
|||||||
t(UnicodeToUTF8($10000),-1,'unicode($10000)');
|
t(UnicodeToUTF8($10000),-1,'unicode($10000)');
|
||||||
t(UnicodeToUTF8($10900),-1,'unicode($10900)');
|
t(UnicodeToUTF8($10900),-1,'unicode($10900)');
|
||||||
t(UnicodeToUTF8($10ffff),-1,'unicode($10ffff)');
|
t(UnicodeToUTF8($10ffff),-1,'unicode($10ffff)');
|
||||||
|
t(#$F4#$8F#$BF#$BF,-1,'unicode($10ffff)');
|
||||||
|
t(#$F4#$90#$80#$80,0,'unicode($110000)');
|
||||||
t(#$c0#0,0,'invalid second byte of 2 byte');
|
t(#$c0#0,0,'invalid second byte of 2 byte');
|
||||||
t(#$e0#0,0,'invalid second byte of 3 byte');
|
t(#$e0#0,0,'invalid second byte of 3 byte');
|
||||||
t(#$e0#$80#0,0,'invalid third byte of 3 byte');
|
t(#$e0#$80#0,0,'invalid third byte of 3 byte');
|
||||||
@ -106,6 +108,7 @@ begin
|
|||||||
t(#$e0#$9f#$bf,0,'invalid: $7ff encoded as 3 byte');
|
t(#$e0#$9f#$bf,0,'invalid: $7ff encoded as 3 byte');
|
||||||
t(#$f0#$80#$80#$80,0,'invalid: 0 encoded as 4 byte');
|
t(#$f0#$80#$80#$80,0,'invalid: 0 encoded as 4 byte');
|
||||||
t(#$f0#$8f#$bf#$bf,0,'invalid: $ffff encoded as 4 byte');
|
t(#$f0#$8f#$bf#$bf,0,'invalid: $ffff encoded as 4 byte');
|
||||||
|
t(#$F7#$BF#$BF#$BF,0,'invalid 4 byte out of range');
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TTestLazUTF8.TestFindUnicodeToUTF8;
|
procedure TTestLazUTF8.TestFindUnicodeToUTF8;
|
||||||
|
Loading…
Reference in New Issue
Block a user