* Some fixes from Sergei Gorelkin, plus RFC3986 additions

git-svn-id: trunk@4437 -
This commit is contained in:
michael 2006-08-19 09:38:12 +00:00
parent bcf5490dfb
commit 4c744032d9
2 changed files with 407 additions and 131 deletions

View File

@ -12,16 +12,20 @@
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
{$MODE objfpc}
{$H+}
program TestUri;
program Testuri;
{$IFDEF FPC}
{$MODE OBJFPC}{$H+}
{$ENDIF}
uses URIParser;
uses
uriparser;
var
URI: TURI;
s: String;
procedure TestParse;
begin
with URI do
begin
@ -34,12 +38,15 @@ begin
Document := 'some index.html';
Params := 'param1=value1&param2=value2';
Bookmark := 'bookmark';
HasAuthority := True;
end;
s := EncodeURI(URI);
WriteLn(s);
Finalize(URI);
FillChar(URI, SizeOf(URI), #0);
Writeln;
// URI := ParseURI(s, 'defaultprotocol', 1234);
URI:=ParseURI('http://www.lazarus.freepascal.org/main.php');
@ -55,5 +62,101 @@ begin
WriteLn('Params: ', Params);
WriteLn('Bookmark: ', Bookmark);
end;
end;
type
urirec = record
a, b: string
end;
const
Base = 'http://a/b/c/d;p?q';
tests: array[0..22] of urirec = (
(a: 'g:h'; b: 'g:h'),
(a: 'g'; b: 'http://a/b/c/g'),
(a: './g'; b: 'http://a/b/c/g'),
(a: 'g/'; b: 'http://a/b/c/g/'),
(a: '/g'; b: 'http://a/g'),
(a: '//g'; b: 'http://g'),
(a: '?y'; b: 'http://a/b/c/d;p?y'),
(a: 'g?y'; b: 'http://a/b/c/g?y'),
(a: '#s'; b: 'http://a/b/c/d;p?q#s'),
(a: 'g#s'; b: 'http://a/b/c/g#s'),
(a: 'g?y#s'; b: 'http://a/b/c/g?y#s'),
(a: ';x'; b: 'http://a/b/c/;x'),
(a: 'g;x'; b: 'http://a/b/c/g;x'),
(a: 'g;x?y#s'; b: 'http://a/b/c/g;x?y#s'),
(a: ''; b: 'http://a/b/c/d;p?q'),
(a: '.'; b: 'http://a/b/c/'),
(a: './'; b: 'http://a/b/c/'),
(a: '..'; b: 'http://a/b/'),
(a: '../'; b: 'http://a/b/'),
(a: '../g'; b: 'http://a/b/g'),
(a: '../..'; b: 'http://a/'),
(a: '../../'; b: 'http://a/'),
(a: '../../g'; b: 'http://a/g')
);
tests1: array[0..1] of urirec = (
(a: '../../../g'; b: 'http://a/g'),
(a: '../../../../g'; b: 'http://a/g')
);
tests2: array[0..5] of urirec = (
(a: '/./g'; b: 'http://a/g'),
(a: '/../g'; b: 'http://a/g'),
(a: 'g.'; b: 'http://a/b/c/g.'),
(a: '.g'; b: 'http://a/b/c/.g'),
(a: 'g..'; b: 'http://a/b/c/g..'),
(a: '..g'; b: 'http://a/b/c/..g')
);
tests3: array[0..5] of urirec = (
(a: './../g'; b: 'http://a/b/g'),
(a: './g/.'; b: 'http://a/b/c/g/'),
(a: 'g/./h'; b: 'http://a/b/c/g/h'),
(a: 'g/../h'; b: 'http://a/b/c/h'),
(a: 'g;x=1/./y'; b: 'http://a/b/c/g;x=1/y'),
(a: 'g;x=1/../y'; b: 'http://a/b/c/y')
);
tests4: array[0..3] of urirec = (
(a: 'g?y/./x'; b: 'http://a/b/c/g?y/./x'),
(a: 'g?y/../x'; b: 'http://a/b/c/g?y/../x'),
(a: 'g#s/./x'; b: 'http://a/b/c/g#s/./x'),
(a: 'g#s/../x'; b: 'http://a/b/c/g#s/../x')
);
procedure Test(const Caption: string; const t: array of urirec);
var
rslt: UTF8String;
i: Integer;
Failed: Boolean;
begin
write(Caption, '...');
Failed := False;
for i := low(t) to high(t) do
begin
ResolveRelativeUri(Base, t[i].a, rslt);
if rslt <> t[i].b then
begin
if not Failed then writeln;
Failed := True;
writeln('Test ', i, ' mismatch, expected: ''', t[i].b, '''; got: ''', rslt, '''');
end;
end;
if not Failed then
writeln(' OK');
end;
begin
TestParse;
Writeln;
Writeln('Now testing relative URI resolving:');
Test('Normal tests', tests);
Test('URI authority is not changed by using dot segments', tests1);
Test('Dot segments are removed only if they are complete path components', tests2);
Test('Testing some nonsensical forms of URI', tests3);
Test('Testing dot segments present in query or fragments', tests4);
end.

View File

@ -13,8 +13,10 @@
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
{$IFDEF FPC}
{$MODE objfpc}
{$H+}
{$ENDIF}
unit URIParser;
@ -31,42 +33,55 @@ type
Document: String;
Params: String;
Bookmark: String;
HasAuthority: Boolean;
end;
function EncodeURI(const URI: TURI): String;
function ParseURI(const URI: String): TURI;
function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word): TURI;
function ParseURI(const URI: String): TURI; overload;
function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word): TURI; overload;
function ResolveRelativeURI(const BaseUri, RelUri: WideString;
out ResultUri: WideString): Boolean; overload;
function ResolveRelativeURI(const BaseUri, RelUri: UTF8String;
out ResultUri: UTF8String): Boolean; overload;
function URIToFilename(const URI: string; out Filename: string): Boolean;
function FilenameToURI(const Filename: string): string;
function IsAbsoluteURI(const UriReference: string): Boolean;
implementation
uses SysUtils;
const
HexTable: array[0..15] of Char = '0123456789abcdef';
GenDelims = [':', '/', '?', '#', '[', ']', '@'];
SubDelims = ['!', '$', '&', '''', '(', ')', '*', '+', ',', ';', '='];
ALPHA = ['A'..'Z', 'a'..'z'];
DIGIT = ['0'..'9'];
Unreserved = ALPHA + DIGIT + ['-', '.', '_', '~'];
ValidPathChars = Unreserved + SubDelims + ['@', ':', '/'];
function Escape(const s: String; const Allowed: TSysCharSet): String;
var
i: Integer;
begin
SetLength(Result, 0);
for i := 1 to Length(s) do
if not (s[i] in Allowed) then
Result := Result + '%' + IntToHex(ord(s[i]), 2)
else
Result := Result + s[i];
end;
function EncodeURI(const URI: TURI): String;
function Escape(const s: String): String;
var
i: Integer;
begin
SetLength(Result, 0);
for i := 1 to Length(s) do
if not (s[i] in ['0'..'9', 'A'..'Z', 'a'..'z', ',', '-', '.', '_',
'/', '\']) then
Result := Result + '%' + HexTable[Ord(s[i]) shr 4] +
HexTable[Ord(s[i]) and $f]
else
Result := Result + s[i];
end;
// ! if no scheme then first colon in path should be escaped
begin
SetLength(Result, 0);
if Length(URI.Protocol) > 0 then
Result := LowerCase(URI.Protocol) + ':';
if Length(URI.Host) > 0 then
if URI.HasAuthority then
begin
Result := Result + '//';
if Length(URI.Username) > 0 then
@ -80,17 +95,17 @@ begin
end;
if URI.Port <> 0 then
Result := Result + ':' + IntToStr(URI.Port);
Result := Result + Escape(URI.Path);
Result := Result + Escape(URI.Path, ValidPathChars);
if Length(URI.Document) > 0 then
begin
if (Length(Result) = 0) or (Result[Length(Result)] <> '/') then
if (Length(URI.Path) > 0) and ((Length(Result) = 0) or (Result[Length(Result)] <> '/')) then
Result := Result + '/';
Result := Result + Escape(URI.Document);
Result := Result + Escape(URI.Document, ValidPathChars);
end;
if Length(URI.Params) > 0 then
Result := Result + '?' + URI.Params;
Result := Result + '?' + Escape(URI.Params, ValidPathChars);
if Length(URI.Bookmark) > 0 then
Result := Result + '#' + Escape(URI.Bookmark);
Result := Result + '#' + Escape(URI.Bookmark, ValidPathChars);
end;
function ParseURI(const URI: String): TURI;
@ -98,155 +113,313 @@ begin
Result := ParseURI(URI, '', 0);
end;
function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word): TURI;
function Unescape(const s: String): String;
function HexValue(c: Char): Integer;
begin
if (c >= '0') and (c <= '9') then
Result := Ord(c) - Ord('0')
else if (c >= 'A') and (c <= 'F') then
Result := Ord(c) - Ord('A') + 10
else if (c >= 'a') and (c <= 'f') then
Result := Ord(c) - Ord('a') + 10
else
Result := 0;
end;
var
i, RealLength: Integer;
begin
SetLength(Result, Length(s));
i := 1;
RealLength := 0;
while i <= Length(s) do
begin
Inc(RealLength);
if s[i] = '%' then
begin
Result[RealLength] := Chr(HexValue(s[i + 1]) shl 4 or HexValue(s[i + 2]));
Inc(i, 3);
end else
begin
Result[RealLength] := s[i];
Inc(i);
end;
end;
SetLength(Result, RealLength);
function HexValue(c: Char): Integer;
begin
case c of
'0'..'9': Result := ord(c) - ord('0');
'A'..'F': Result := ord(c) - (ord('A') - 10);
'a'..'f': Result := ord(c) - (ord('a') - 10);
else
Result := 0;
end;
end;
function Unescape(const s: String): String;
var
s: String;
i, LastValidPos: Integer;
i, RealLength: Integer;
begin
SetLength(Result, Length(s));
i := 1;
RealLength := 0;
while i <= Length(s) do
begin
Inc(RealLength);
if s[i] = '%' then
begin
Result[RealLength] := Chr(HexValue(s[i + 1]) shl 4 or HexValue(s[i + 2]));
Inc(i, 3);
end else
begin
Result[RealLength] := s[i];
Inc(i);
end;
end;
SetLength(Result, RealLength);
end;
function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word): TURI;
var
s, Authority: String;
i: Integer;
begin
Result.Protocol := LowerCase(DefaultProtocol);
Result.Port := DefaultPort;
s := URI;
// Extract the protocol
// Extract scheme
for i := 1 to Length(s) do
if s[i] = ':' then
begin
Result.Protocol := Copy(s, 1, i - 1);
s := Copy(s, i + 1, Length(s));
break;
end else if not (s[i] in ['0'..'9', 'A'..'Z', 'a'..'z']) then
s := Copy(s, i + 1, MaxInt);
break;
end
else
if not (((i=1) and (s[i] in ALPHA)) or (s[i] in ALPHA + DIGIT + ['+', '-', '.'])) then
break;
// Extract the bookmark name
// Extract the bookmark
for i := Length(s) downto 1 do
if s[i] = '#' then
begin
Result.Bookmark := Unescape(Copy(s, i + 1, Length(s)));
s := Copy(s, 1, i - 1);
break;
end else if s[i] = '/' then
break;
i := LastDelimiter('#', s);
if i > 0 then
begin
Result.Bookmark := Unescape(Copy(s, i + 1, MaxInt));
s := Copy(s, 1, i - 1);
end;
// Extract the params
for i := Length(s) downto 1 do
if s[i] = '?' then
begin
Result.Params := Copy(s, i + 1, Length(s));
s := Copy(s, 1, i - 1);
break;
end else if s[i] = '/' then
break;
i := LastDelimiter('?', s);
if i > 0 then
begin
Result.Params := Unescape(Copy(s, i + 1, MaxInt));
s := Copy(s, 1, i - 1);
end;
// Extract the document name
// extract authority
if (Length(s) > 1) and (s[1] = '/') and (s[2] = '/') then
begin
i := 3;
while (i <= Length(s)) and (s[i] <> '/') do
Inc(i);
Authority := Copy(s, 3, i-3);
s := Copy(s, i, MaxInt);
Result.HasAuthority := True; // even if Authority is empty
end
else
begin
Result.HasAuthority := False;
Authority := '';
end;
// now s is 'hier-part' per RFC3986
// Extract the document name (nasty...)
for i := Length(s) downto 1 do
if s[i] = '/' then
begin
Result.Document := Unescape(Copy(s, i + 1, Length(s)));
s := Copy(s, 1, i - 1);
if (Result.Document <> '.') and (Result.Document <> '..') then
s := Copy(s, 1, i)
else
Result.Document := '';
break;
end else if s[i] = ':' then
break;
break
else if i = 1 then
begin
Result.Document := Unescape(s);
if (Result.Document <> '.') and (Result.Document <> '..') then
s := ''
else
Result.Document := '';
// break - not needed, last iteration
end;
// Extract the path
// Everything left is a path
LastValidPos := 0;
for i := Length(s) downto 1 do
if (s[i] = '/')
and ((I>1) and (S[i-1]<>'/'))
and ((I<Length(S)) and (S[I+1]<>'/')) then
LastValidPos := i
else if s[i] in [':', '@'] then
break;
if (LastValidPos > 0) and
(Length(S)>LastValidPos) and
(S[LastValidPos+1]<>'/') then
begin
Result.Path := Unescape(Copy(s, LastValidPos, Length(s)));
s := Copy(s, 1, LastValidPos - 1);
end;
Result.Path := Unescape(s);
// Extract the port number
for i := Length(s) downto 1 do
if s[i] = ':' then
begin
Result.Port := StrToInt(Copy(s, i + 1, Length(s)));
s := Copy(s, 1, i - 1);
break;
end else if s[i] in ['@', '/'] then
break;
i := LastDelimiter(':@', Authority);
if (i > 0) and (Authority[i] = ':') then
begin
Result.Port := StrToInt(Copy(Authority, i + 1, MaxInt));
Authority := Copy(Authority, 1, i - 1);
end;
// Extract the hostname
if ((Length(s) > 2) and (s[1] = '/') and (s[2] = '/')) or
((Length(s) > 1) and (s[1] <> '/')) then
i := Pos('@', Authority);
if i > 0 then
begin
if s[1] <> '/' then
s := '//' + s;
for i := Length(s) downto 1 do
if s[i] in ['@', '/'] then
begin
Result.Host := Copy(s, i + 1, Length(s));
s := Copy(s, 3, i - 3);
break;
end;
Result.Host := Copy(Authority, i+1, MaxInt);
Delete(Authority, i, MaxInt);
// Extract username and password
if Length(s) > 0 then
if Length(Authority) > 0 then
begin
i := Pos(':', s);
i := Pos(':', Authority);
if i = 0 then
Result.Username := s
Result.Username := Authority
else
begin
Result.Username := Copy(s, 1, i - 1);
Result.Password := Copy(s, i + 1, Length(s));
Result.Username := Copy(Authority, 1, i - 1);
Result.Password := Copy(Authority, i + 1, MaxInt);
end;
end;
end
else
Result.Host := Authority;
end;
procedure RemoveDotSegments(var s: string);
var
Cur, Prev: Integer;
begin
Prev := Pos('/', s);
while (Prev > 0) and (Prev < Length(s)) do
begin
Cur := Prev+1;
while (Cur <= Length(s)) and (s[Cur] <> '/') do
Inc(Cur);
if (Cur - Prev = 2) and (s[Prev+1] = '.') then
Delete(s, Prev+1, 2)
else if (Cur - Prev = 3) and (s[Prev+1] = '.') and (s[Prev+2] = '.') then
begin
while (Prev > 1) and (s[Prev-1] <> '/') do
Dec(Prev);
if Prev > 1 then
Dec(Prev);
Delete(s, Prev+1, Cur-Prev);
end
else
Prev := Cur;
end;
end;
// TODO: this probably must NOT percent-encode the result...
function ResolveRelativeURI(const BaseUri, RelUri: UTF8String;
out ResultUri: UTF8String): Boolean;
var
Base, Rel: TUri;
begin
Base := ParseUri(BaseUri);
Rel := ParseUri(RelUri);
Result := (Base.Protocol <> '') or (Rel.Protocol <> '');
if not Result then
Exit;
with Rel do
begin
if (Path = '') and (Document = '') then
begin
if (Protocol = '') and (Host = '') then
begin
if Params <> '' then
Base.Params := Params;
Base.Bookmark := Bookmark;
ResultUri := EncodeUri(Base);
Exit;
end;
end;
if (Protocol <> '') then // RelURI is absolute - return it...
begin
ResultUri := RelUri;
Exit;
end;
// Inherit protocol
Protocol := Base.Protocol;
if (Host = '') then // TODO: or "not HasAuthority"?
begin
// Inherit Authority (host, port, username, password)
Host := Base.Host;
Port := Base.Port;
Username := Base.Username;
Password := Base.Password;
HasAuthority := Base.HasAuthority;
if (Path = '') or (Path[1] <> '/') then // path is empty or relative
Path := Base.Path + Path;
RemoveDotSegments(Path);
end;
end; // with
ResultUri := EncodeUri(Rel);
end;
function ResolveRelativeURI(const BaseUri, RelUri: WideString;
out ResultUri: WideString): Boolean;
var
rslt: UTF8String;
begin
Result := ResolveRelativeURI(UTF8Encode(BaseUri), UTF8Encode(RelUri), rslt);
if Result then
ResultURI := UTF8Decode(rslt);
end;
function URIToFilename(const URI: string; out Filename: string): Boolean;
var
U: TURI;
I: Integer;
begin
Result := False;
U := ParseURI(URI);
if SameText(U.Protocol, 'file') then
begin
if (Length(U.Path) > 2) and (U.Path[1] = '/') and (U.Path[3] = ':') then
Filename := Copy(U.Path, 2, MaxInt)
else
Filename := U.Path;
Filename := Filename + U.Document;
Result := True;
end
else
if U.Protocol = '' then // fire and pray?
begin
Filename := U.Path + U.Document;
Result := True;
end;
if PathDelim <> '/' then
begin
I := Pos('/', Filename);
while I > 0 do
begin
Filename[I] := PathDelim;
I := Pos('/', Filename);
end;
end;
end;
function FilenameToURI(const Filename: string): string;
var
I: Integer;
begin
// TODO: seems implemented, but not tested well
Result := 'file://';
if (Length(Filename) > 2) and (Filename[1] <> PathDelim) and (Filename[2] = ':') then
Result := Result + '/';
Result := Result + Filename;
if PathDelim <> '/' then
begin
I := Pos(PathDelim, Result);
while I <> 0 do
begin
Result[I] := '/';
I := Pos(PathDelim, Result);
end;
end;
end;
function IsAbsoluteURI(const UriReference: string): Boolean;
var
I: Integer;
begin
Result := True;
for I := 1 to Length(UriReference) do
begin
if UriReference[I] = ':' then
Exit
else
if not (((I=1) and (UriReference[I] in ALPHA)) or
(UriReference[i] in ALPHA + DIGIT + ['+', '-', '.'])) then
Break;
end;
Result := False;
end;
end.