mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-11-26 16:27:23 +01:00
lazutils: UTF8CharacterToUnicode: check for intersecting ranges
git-svn-id: trunk@35038 -
This commit is contained in:
parent
26d2cf7352
commit
64e2cd6b98
@ -239,6 +239,11 @@ begin
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal;
|
function UTF8CharacterToUnicode(p: PChar; out CharLen: integer): Cardinal;
|
||||||
|
{ if p=nil then CharLen=0 otherwise CharLen>0
|
||||||
|
If there is an encoding error the Result is undefined.
|
||||||
|
Use UTF8FixBroken to fix UTF-8 encoding.
|
||||||
|
It does not check if the codepoint is defined in the Unicode tables.
|
||||||
|
}
|
||||||
begin
|
begin
|
||||||
if p<>nil then begin
|
if p<>nil then begin
|
||||||
if ord(p^)<%11000000 then begin
|
if ord(p^)<%11000000 then begin
|
||||||
@ -246,56 +251,61 @@ begin
|
|||||||
Result:=ord(p^);
|
Result:=ord(p^);
|
||||||
CharLen:=1;
|
CharLen:=1;
|
||||||
end
|
end
|
||||||
else begin
|
else if ((ord(p^) and %11100000) = %11000000) then begin
|
||||||
// multi byte
|
// starts with %110 => could be double byte character
|
||||||
if ((ord(p^) and %11100000) = %11000000) then begin
|
if (ord(p[1]) and %11000000) = %10000000 then begin
|
||||||
// starts with %110 => could be double byte character
|
CharLen:=2;
|
||||||
if (ord(p[1]) and %11000000) = %10000000 then begin
|
Result:=((ord(p^) and %00011111) shl 6)
|
||||||
Result:=((ord(p^) and %00011111) shl 6)
|
or (ord(p[1]) and %00111111);
|
||||||
or (ord(p[1]) and %00111111);
|
if Result<(1 shl 7) then begin
|
||||||
CharLen:=2;
|
// wrong encoded, could be an XSS attack
|
||||||
end else begin
|
Result:=0;
|
||||||
Result:=ord(p^);
|
|
||||||
CharLen:=1;
|
|
||||||
end;
|
end;
|
||||||
end
|
end else begin
|
||||||
else if ((ord(p^) and %11110000) = %11100000) then begin
|
|
||||||
// starts with %1110 => could be triple byte character
|
|
||||||
if ((ord(p[1]) and %11000000) = %10000000)
|
|
||||||
and ((ord(p[2]) and %11000000) = %10000000) then begin
|
|
||||||
Result:=((ord(p^) and %00011111) shl 12)
|
|
||||||
or ((ord(p[1]) and %00111111) shl 6)
|
|
||||||
or (ord(p[2]) and %00111111);
|
|
||||||
CharLen:=3;
|
|
||||||
end else begin
|
|
||||||
Result:=ord(p^);
|
|
||||||
CharLen:=1;
|
|
||||||
end;
|
|
||||||
end
|
|
||||||
else if ((ord(p^) and %11111000) = %11110000) then begin
|
|
||||||
// starts with %11110 => could be 4 byte character
|
|
||||||
if ((ord(p[1]) and %11000000) = %10000000)
|
|
||||||
and ((ord(p[2]) and %11000000) = %10000000)
|
|
||||||
and ((ord(p[3]) and %11000000) = %10000000) then begin
|
|
||||||
Result:=((ord(p^) and %00001111) shl 18)
|
|
||||||
or ((ord(p[1]) and %00111111) shl 12)
|
|
||||||
or ((ord(p[2]) and %00111111) shl 6)
|
|
||||||
or (ord(p[3]) and %00111111);
|
|
||||||
CharLen:=4;
|
|
||||||
end else begin
|
|
||||||
Result:=ord(p^);
|
|
||||||
CharLen:=1;
|
|
||||||
end;
|
|
||||||
end
|
|
||||||
else begin
|
|
||||||
// invalid character
|
|
||||||
Result:=ord(p^);
|
Result:=ord(p^);
|
||||||
CharLen:=1;
|
CharLen:=1;
|
||||||
end;
|
end;
|
||||||
if (CharLen>1) and (Result<128) then begin
|
end
|
||||||
// invalid character
|
else if ((ord(p^) and %11110000) = %11100000) then begin
|
||||||
|
// starts with %1110 => could be triple byte character
|
||||||
|
if ((ord(p[1]) and %11000000) = %10000000)
|
||||||
|
and ((ord(p[2]) and %11000000) = %10000000) then begin
|
||||||
|
CharLen:=3;
|
||||||
|
Result:=((ord(p^) and %00011111) shl 12)
|
||||||
|
or ((ord(p[1]) and %00111111) shl 6)
|
||||||
|
or (ord(p[2]) and %00111111);
|
||||||
|
if Result<(1 shl 11) then begin
|
||||||
|
// wrong encoded, could be an XSS attack
|
||||||
|
Result:=0;
|
||||||
|
end;
|
||||||
|
end else begin
|
||||||
Result:=ord(p^);
|
Result:=ord(p^);
|
||||||
|
CharLen:=1;
|
||||||
end;
|
end;
|
||||||
|
end
|
||||||
|
else if ((ord(p^) and %11111000) = %11110000) then begin
|
||||||
|
// starts with %11110 => could be 4 byte character
|
||||||
|
if ((ord(p[1]) and %11000000) = %10000000)
|
||||||
|
and ((ord(p[2]) and %11000000) = %10000000)
|
||||||
|
and ((ord(p[3]) and %11000000) = %10000000) then begin
|
||||||
|
CharLen:=4;
|
||||||
|
Result:=((ord(p^) and %00001111) shl 18)
|
||||||
|
or ((ord(p[1]) and %00111111) shl 12)
|
||||||
|
or ((ord(p[2]) and %00111111) shl 6)
|
||||||
|
or (ord(p[3]) and %00111111);
|
||||||
|
if Result<(1 shl 16) then begin
|
||||||
|
// wrong encoded, could be an XSS attack
|
||||||
|
Result:=0;
|
||||||
|
end;
|
||||||
|
end else begin
|
||||||
|
Result:=ord(p^);
|
||||||
|
CharLen:=1;
|
||||||
|
end;
|
||||||
|
end
|
||||||
|
else begin
|
||||||
|
// invalid character
|
||||||
|
Result:=ord(p^);
|
||||||
|
CharLen:=1;
|
||||||
end;
|
end;
|
||||||
end else begin
|
end else begin
|
||||||
Result:=0;
|
Result:=0;
|
||||||
@ -484,7 +494,7 @@ begin
|
|||||||
if ((ord(p[1]) and %11000000) = %10000000) then begin
|
if ((ord(p[1]) and %11000000) = %10000000) then begin
|
||||||
c:=((ord(p^) and %00011111) shl 6);
|
c:=((ord(p^) and %00011111) shl 6);
|
||||||
//or (ord(p[1]) and %00111111);
|
//or (ord(p[1]) and %00111111);
|
||||||
if c<128 then
|
if c<(1 shl 7) then
|
||||||
p^:=' '
|
p^:=' '
|
||||||
else
|
else
|
||||||
inc(p,2)
|
inc(p,2)
|
||||||
@ -499,7 +509,7 @@ begin
|
|||||||
c:=((ord(p^) and %00011111) shl 12)
|
c:=((ord(p^) and %00011111) shl 12)
|
||||||
or ((ord(p[1]) and %00111111) shl 6);
|
or ((ord(p[1]) and %00111111) shl 6);
|
||||||
//or (ord(p[2]) and %00111111);
|
//or (ord(p[2]) and %00111111);
|
||||||
if c<128 then
|
if c<(1 shl 11) then
|
||||||
p^:=' '
|
p^:=' '
|
||||||
else
|
else
|
||||||
inc(p,3);
|
inc(p,3);
|
||||||
@ -515,7 +525,7 @@ begin
|
|||||||
or ((ord(p[1]) and %00111111) shl 12)
|
or ((ord(p[1]) and %00111111) shl 12)
|
||||||
or ((ord(p[2]) and %00111111) shl 6);
|
or ((ord(p[2]) and %00111111) shl 6);
|
||||||
//or (ord(p[3]) and %00111111);
|
//or (ord(p[3]) and %00111111);
|
||||||
if c<128 then
|
if c<(1 shl 16) then
|
||||||
p^:=' '
|
p^:=' '
|
||||||
else
|
else
|
||||||
inc(p,4)
|
inc(p,4)
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user