* Merging revisions r45350 from trunk:

------------------------------------------------------------------------
    r45350 | michael | 2020-05-13 09:19:03 +0200 (Wed, 13 May 2020) | 1 line
    
    * Patch from Noel Duffy to improve IPV6 address parsing (bug ID tw37013)
    ------------------------------------------------------------------------

git-svn-id: branches/fixes_3_2@46569 -
This commit is contained in:
michael 2020-08-23 09:07:57 +00:00
parent 2b75bf162b
commit f65de92ee9
3 changed files with 507 additions and 47 deletions

1
.gitattributes vendored
View File

@ -17635,6 +17635,7 @@ tests/webtbs/tw3694.pp svneol=native#text/plain
tests/webtbs/tw3695.pp svneol=native#text/plain
tests/webtbs/tw3697.pp svneol=native#text/plain
tests/webtbs/tw3700.pp svneol=native#text/plain
tests/webtbs/tw37013.pp svneol=native#text/plain
tests/webtbs/tw3708.pp svneol=native#text/plain
tests/webtbs/tw37095.pp svneol=native#text/plain
tests/webtbs/tw37095d/uw37095.pp svneol=native#text/plain

View File

@ -442,56 +442,263 @@ begin
SetLength(HostAddrToStr6, Length(HostAddrToStr6)-1);
end;
function StrToHostAddr6(IP: String): in6_addr;
type
TCharClass = (cHexDigit, cColon, cDot, cUnknown, cEndStr);
TParserMode = (pmIPv6, pmIPv4);
function StrToHostAddr6(IP : String) : TIn6_addr;
Var Part : String;
IPv6 : TIn6_addr;
P,J : Integer;
W : Word;
Index : Integer;
ZeroAt : Integer;
Begin
FillChar(IPv6,SizeOf(IPv6),0);
FillChar(StrToHostAddr6,SizeOf(TIn6_addr),0);
{ Every 16-bit block is converted at its own and stored into Result. When }
{ the '::' zero-spacer is found, its location is stored. Afterwards the }
{ address is shifted and zero-filled. }
Index := 0; ZeroAt := -1;
J := 0;
P := Pos(':',IP);
While (P > 0) and (Length(IP) > 0) and (Index < 8) do
Begin
Part := '$'+Copy(IP,1,P-1);
Delete(IP,1,P);
if Length(Part) > 1 then { is there a digit after the '$'? }
Val(Part,W,J)
else W := 0;
IPv6.u6_addr16[Index] := HtoNS(W);
if J <> 0 then
Begin
FillChar(IPv6,SizeOf(IPv6),0);
Exit(IPV6);
End;
if IP[1] = ':' then
Begin
ZeroAt := Index;
Delete(IP,1,1);
End;
Inc(Index);
P := Pos(':',IP); if P = 0 then P := Length(IP)+1;
End;
{ address a:b:c::f:g:h }
{ Result now a : b : c : f : g : h : 0 : 0, ZeroAt = 2, Index = 6 }
{ Result after a : b : c : 0 : 0 : f : g : h }
if ZeroAt >= 0 then
Begin
Move(IPv6.u6_addr16[ZeroAt+1],IPv6.u6_addr16[(8-Index)+ZeroAt+1],2*(Index-ZeroAt-1));
FillChar(IPv6.u6_addr16[ZeroAt+1],2*(8-Index),0);
End;
TCharRec = record
ch: AnsiChar;
ctype: TCharClass;
end;
StrToHostAddr6:=IPv6;
End;
TToken = record
s: ShortString;
tt: TCharClass;
end;
function get_char_class(ch: AnsiChar): TCharClass;
begin
get_char_class := cUnknown;
case ch of
'A' .. 'F', 'a' .. 'f', '0' .. '9': get_char_class := cHexDigit;
':': get_char_class := cColon;
'.': get_char_class := cDot;
else
get_char_class := cUnknown;
end;
end;
function is_eos(idx: Cardinal): Boolean;
begin
is_eos := (idx < 1) or (idx > Length(IP));
end;
function next_char(idx: Cardinal): TCharRec;
begin
next_char.ctype := cUnknown;
if is_eos(idx) then
begin
next_char.ch := '-';
next_char.ctype := cEndStr;
end
else
begin
next_char.ch := IP[idx];
next_char.ctype := get_char_class(next_char.ch);
end;
end;
function next_token(var idx: Cardinal): TToken;
var
rch: TCharRec;
prv: TCharClass;
begin
next_token.s := '';
next_token.tt := cUnknown;
rch := next_char(idx);
prv := rch.ctype;
next_token.tt := rch.ctype;
while (rch.ctype <> cEndStr) and (rch.ctype = prv) do
begin
next_token.s := next_token.s + rch.ch;
Inc(idx);
rch := next_char(idx);
end;
end;
function convert_hextet(const s: ShortString; var res: Word): Boolean;
var
tmpval,valcode: Word;
begin
convert_hextet := False;
if Length(s) > 4 then exit;
Val('0x'+s,tmpval,valcode);
if valcode <> 0 then exit;
res := htons(tmpval);
convert_hextet := True;
end;
function convert_octet(const s: ShortString; var res: Byte): Boolean;
var
tmpval: Word;
valcode: Word;
begin
convert_octet := False;
if Length(s) > 3 then exit;
Val(s,tmpval,valcode);
if valcode <> 0 then exit;
if tmpval > 255 then exit;
res := tmpval;
convert_octet := True;
end;
var
tkn, ptkn: TToken;
idx: Cardinal;
hextet_arr: array[0 .. 7] of Word = (0,0,0,0,0,0,0,0);
hextet_idx, octet_idx,coll_start_idx: byte;
octet_arr: array[0 .. 3] of byte = (0,0,0,0);
coll_zero_seen: Boolean = False;
parser_mode: TParserMode = pmIPv6;
tmpval: Word = 0;
tmpByte: Byte = 0;
begin
StrToHostAddr6.s6_addr32[0] := 0;
StrToHostAddr6.s6_addr32[1] := 0;
StrToHostAddr6.s6_addr32[2] := 0;
StrToHostAddr6.s6_addr32[3] := 0;
if (Length(IP) > 45) or (Length(IP) < 2) then exit;
hextet_idx := 0;
coll_start_idx := 0;
octet_idx := 0;
idx := 1;
ptkn.s := '';
ptkn.tt := cUnknown;
tkn := next_token(idx);
while (tkn.tt <> cEndStr) do
begin
case tkn.tt of
cHexDigit:
begin
case parser_mode of
pmIPv6:
begin
if (hextet_idx <= 7) and (convert_hextet(tkn.s, tmpval)) then
begin
hextet_arr[hextet_idx] := tmpval;
Inc(hextet_idx);
end
else
exit; // too many hextets, or invalid hextet.
end;
pmIPv4:
begin
if (octet_idx <= 3) and (convert_octet(tkn.s, tmpByte)) then
begin
octet_arr[octet_idx] := tmpByte;
Inc(octet_idx);
end
else
exit; // too many octets, or invalid octet.
end;
end;
end;
cColon:
begin
if (parser_mode = pmIPv4) or (Length(tkn.s) > 2) then exit;
if Length(tkn.s) = 2 then
begin
// if we saw a collapsed sequence before, or if we've already
// seen 8 hextets.
if (coll_zero_seen = True) or (hextet_idx > 7) then exit;
coll_zero_seen := True;
coll_start_idx := hextet_idx;
Inc(hextet_idx);
end
else if Length(tkn.s) = 1 then
begin
// is this single colon the first token? if so, address is invalid.
// if the prev token is cUnknown, then this must be the first token.
if ptkn.tt = cUnknown then exit;
end;
end;
cDot:
begin
if Length(tkn.s) > 1 then exit;
// By the time we see the first dot, the first octet of the IPv4
// address has already been processed as an IPv6 hextet. we have
// to backtrack to remove that value from hextet_arr
// and reprocess the value as ipv4.
if parser_mode = pmIPv6 then
begin
if ptkn.tt = cHexDigit then
begin
Dec(hextet_idx);
hextet_arr[hextet_idx] := 0;
if (octet_idx <= 3) and (convert_octet(ptkn.s, tmpByte)) then
begin
octet_arr[octet_idx] := tmpByte;
Inc(octet_idx);
end
else
exit; // too many octets, or invalid octet.
end
else // dot preceded by something other than digit
exit;
parser_mode := pmIPv4;
end;
end;
cUnknown:
exit;
end;
ptkn := tkn;
tkn := next_token(idx);
end;
// if we finished on a . or :, the address is invalid.
if (ptkn.tt = cDot) or ((ptkn.tt = cColon) and (Length(ptkn.s) = 1)) then
exit;
// if there's an ipv4 addr, add its octets onto the end
// of the ipv6 hextet array. we have to convert the bytes to
// words.
if (parser_mode = pmIPv4) then
begin
if (octet_idx = 4) and (hextet_idx <= 6) then
begin
tmpval := (octet_arr[0] shl 8) + (octet_arr[1]);
hextet_arr[hextet_idx] := htons(tmpval);
Inc(hextet_idx);
tmpval := (octet_arr[2] shl 8) + (octet_arr[3]);
hextet_arr[hextet_idx] := htons(tmpval);
Inc(hextet_idx);
end
else
exit; // invalid no of ipv4 octets, or not enough room for them.
end;
// finish line is in sight. if we have a collapsed-zeroes sequence
// then we must fill that in now.
if coll_zero_seen = True then
begin
for tmpByte := 0 to coll_start_idx do
StrToHostAddr6.s6_addr16[tmpByte] := hextet_arr[tmpByte];
// hextet_idx-1 points to the final byte we processed, in the hextet_arr
// array. starting there, reading back to coll_start_idx, we copy these
// words to the end of the Result array, with word hextet_idx-1 going at
// the end of the Result array, hextet_idx-2 going to the end - 1 of Result,
// and so on.
// NOTE: optimization note -- a memmove/memcpy equivalent could help here.
tmpByte := hextet_idx-1;
idx := 7;
while tmpByte > coll_start_idx do
begin
StrToHostAddr6.s6_addr16[idx] := hextet_arr[tmpByte];
Dec(tmpByte);
Dec(idx);
end;
end
else
begin
// no collapsed zeroes. we must have exactly 8 words then, or we're short.
// NOTE: optimization note: memmove/memcpy equivalent could help here.
if hextet_idx < 8 then exit;
for tmpByte := 0 to 7 do
StrToHostAddr6.s6_addr16[tmpByte] := hextet_arr[tmpByte];
end;
end;
function NetAddrToStr6 (Entry : TIn6_Addr) : ansiString;
begin

252
tests/webtbs/tw37013.pp Normal file
View File

@ -0,0 +1,252 @@
program tw37013;
{$mode objfpc}{$H+}
{
Test StrToHostAddr6 in the sockets unit with lists of known bad and known
good IPv6 addresses. By Noel Duffy (bug ID 37013)
}
uses Classes, sockets, SysUtils;
procedure BuildBaddAddrList(out bad_addrs: TStringList);
begin
// start with some obviously bad formats.
bad_addrs.Add('');
bad_addrs.Add(':');
bad_addrs.Add(':::');
bad_addrs.Add('::.');
bad_addrs.Add('::::');
bad_addrs.Add('fe80:');
bad_addrs.Add('x:');
bad_addrs.Add('.');
bad_addrs.Add('....');
// invalid chars in all 8 hextets.
bad_addrs.Add('fe@0:b46c:c2a1:a202:9*6e:9d2f:a520:4172');
bad_addrs.Add('fe80:b4%c:c2a1:a202:9*6e:9d2f:a520:4172');
bad_addrs.Add('fe80:b46c:c2#1:a202:926e:9d2f:a520:4172');
bad_addrs.Add('fe80:b46c:c2a1:a2^2:926e:9d2f:a520:4172');
bad_addrs.Add('fe80:b46c:c2a1:a202:9*6e:9d2f:a520:4172');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9!d2f:a520:4172');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:a=20:4172');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:a520:41+2');
// $ sign in hextets.
bad_addrs.Add('$fe80:b46c:c2a1:a202:926e:9d2f:a520:4172');
bad_addrs.Add('fe80:b$46c:c2a1:a202:926e:9d2f:a520:4172');
bad_addrs.Add('fe80:b46c:c2a1$:a202:926e:9d2f:a520:4172');
bad_addrs.Add('fe80:b46c:c2a1:a202:926$e:9d2f:a520:4172');
// last char is :
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:a520:4172:');
// first char is :
bad_addrs.Add(':fe80:b46c:c2a1:a202:926e:9d2f:a520:4172');
// two sequences of collapsed zeroes. :: is the Highlander
// sequence. There can be only one.
bad_addrs.Add('fe80::c2a1:a202::9d2f:a520:4172');
// 8 hextets plus collapsed zeroes, which means at least 1 hextet
// of all zeroes, equaling 9 hextets.
bad_addrs.Add('fe80:b46c:c2a1::a202:926e:9d2f:a520:4172');
// try the same with the :: at the start.
bad_addrs.Add('::b46c:c2a1:8fcb:a202:926e:9d2f:a520:4172');
// and now try the same with the :: at the ned.
bad_addrs.Add('b46c:a771:8fcb:a202:926e:9d2f:a520:4172::');
// too many hextets
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:a520:4172:1211');
// too few hextets
bad_addrs.Add('fe80:b46c:9d2f:a520:4172:1211');
// too many digits in each of the 8 hextets
bad_addrs.Add('fe801:b46c:c2a1:a202:926e:9d2f:a520:4172');
bad_addrs.Add('fe80:b46cb:c2a1:a202:926e:9d2f:a520:4172');
bad_addrs.Add('fe80:b46c:0c2ad:a202:926e:9d2f:a520:4172');
bad_addrs.Add('fe80:b46c:c2a1:a2022:926e:9d2f:a520:4172');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e6:9d2f:a520:4172');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:09d2f:a520:4172');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:a5209:4172');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:a520:04172');
// math signs in hextets. naive parsing of hextets with
// math signs produces a positive result, but these are not
// valid.
bad_addrs.Add('fe80:-b46c:c2a1:a202:926e:9d2f:a520:4172');
bad_addrs.Add('fe80:b46c:-c2a1:a202:926e:9d2f:a520:4172');
// Hybrid 6 and 4 addresses.
// ipv4 octet can't contain hex
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:19F.168.1.2');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:192.1A8.1.2');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:192.168.B.2');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:192.168.1.C');
// ipv4 octets can't contain math signs
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:-192.168.1.1');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:192.-168.1.1');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:192.168.-1.1');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:192.168.1.-1');
// invalid hybrid ipv6/ipv4 address, because there are 7
// hextets before the ipv4. there can be only 6, as the 4
// octets make up 2 hextets, and there can be no more than 8
// hextets in total.
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:a520:192.168.1.2');
// 5 hextets plus 2 hextets (4 octets) = 7. Must be 8.
bad_addrs.Add('fe80:b46c:926e:9d2f:a520:192.168.1.2');
// too few octets in ipv4 bit.
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:192.168.1');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:192.168');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:192');
// 7 hextets plus two octets of ipv4 = 8 hextets, but still must
// not be parsed as valid because there must be 4 octets.
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:e2fc:192.168');
// too many ipv4 octets
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:192.168.1.17.32');
// addr starts with .
bad_addrs.Add('.fe80:b46c:c2a1:a202:926e:9d2f:192.168.1.17');
// addr ends with .
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:192.168.1.17.');
// 6 hextets + 2 hextets (4 octets) plus collapsed zero sequence = 9 hextets.
bad_addrs.Add('fe80:b46c::c2a1:a202:926e:9d2f:72.16.32.1');
// repeat with :: at start
bad_addrs.Add('::fe80:b46c:c2a1:a202:926e:9d2f:72.16.32.1');
// and at end
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f::72.16.32.1');
// ipv4 octets > 255
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:351.16.32.1');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:72.123216.32.1');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:72.16.9999999999999999999.1');
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:72.16.32.5e21');
// dot sequence
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:192..168.1.17');
// start with dot
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:.192.168.1.17');
// end with dot
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:9d2f:192.168.1.17.');
// ipv4 octets followed by hextet
bad_addrs.Add('fe80:b46c:c2a1:a202:926e:192.168.1.1:a2c1');
// all zeroes but for mathematical operator.
bad_addrs.Add('::-');
// end on colon, but with earlier collapsed section.
bad_addrs.Add('fe80::a202:926e:a2c1:');
// too many digits in ipv4 octet
bad_addrs.Add('21ac:d349:07c4:198e:6fab:df5a:0192.168.1.17');
bad_addrs.Add('21ac:d349:07c4:198e:6fab:df5a:192.0168.1.17');
bad_addrs.Add('21ac:d349:07c4:198e:6fab:df5a:192.168.0001.17');
bad_addrs.Add('21ac:d349:07c4:198e:6fab:df5a:192.0168.1.0017');
// just ipv4 address
bad_addrs.Add('127.0.0.2');
end;
procedure BuildGoodAddrList(out addrlist: TStringList);
begin
// Each str is two parts, separated by a pipe. The left part is the input
// address to be parsed, and the right is the expected result of taking the
// resulting address and converting back to a string. This provides an
// easy way to verify that the StrToHostAddr6 function parsed the address
// correctly.
// The values on the right have been double-checked with libc's inet_pton.
addrlist.Add('::1|::0001');
addrlist.Add('::|::');
addrlist.Add('2001:4860:4000::|2001:4860:4000::');
addrlist.Add('21ac:d349:07c4:198e:6fab:df5a:192.168.1.17|21AC:D349:07C4:198E:6FAB:DF5A:C0A8:0111');
addrlist.Add('21ac:d349:07c4:198e:6fab:df5a:0.0.0.0|21AC:D349:07C4:198E:6FAB:DF5A::');
addrlist.Add('::213.41.35.14|::D529:230E');
addrlist.Add('fe80:b46c:c2a1:a202:926e:9d2f:a520:4172|FE80:B46C:C2A1:A202:926E:9D2F:A520:4172');
addrlist.Add('a:b:c:d:e:f:0:1|000A:000B:000C:000D:000E:000F::0001');
addrlist.Add('a:B:c:D:e:f:9:1|000A:000B:000C:000D:000E:000F:0009:0001');
end;
function TestAddrs(al: TStringList): Cardinal;
var
bad_addr: String;
i6: in6_addr;
begin
Result := 0;
for bad_addr in al do
begin
i6 := StrToHostAddr6(bad_addr);
if (i6.s6_addr32[0] <> 0) or (i6.s6_addr32[1] <> 0) or
(i6.s6_addr32[2] <> 0) or (i6.s6_addr32[3] <> 0) then
begin
writeln(' [x]"'+bad_addr+'".');
Inc(Result);
end;
end;
end;
function TestGoodAddrs(al: TStringList): Cardinal;
var
addr,instr,parsed_addr,expected: String;
i6: in6_addr;
idx: Cardinal;
begin
Result := 0;
for addr in al do
begin
idx := Pos('|', addr);
if idx > 0 then
begin
instr := Copy(addr,1,idx-1);
i6 := StrToHostAddr6(instr);
expected := Copy(addr, idx+1, Length(addr)-idx);
parsed_addr := HostAddrToStr6(i6);
if parsed_addr <> expected then
writeln(' [x] "'+instr+'" -> '+parsed_addr+'".')
else
Inc(Result);
end;
end;
end;
var
addrlist: TStringList;
count: Cardinal;
begin
ExitCode := 0;
addrlist := TStringList.Create;
BuildBaddAddrList(addrlist);
count := TestAddrs(addrlist);
writeln('Got non-zero result for '+inttostr(count)+' out of '+
inttostr(addrlist.Count)+' bad addresses.');
writeln();
// if we successfully parsed any bad addresses
if count > 0 then ExitCode := 1;
addrlist.Clear;
BuildGoodAddrList(addrlist);
count := TestGoodAddrs(addrlist);
writeln('Successfully parsed '+inttostr(count)+' out of '+
inttostr(addrlist.Count)+' good addresses.');
// if we didn't parse all the good addresses.
if count < addrlist.Count then
ExitCode := 1;
addrlist.Free;
end.