mirror of
				https://gitlab.com/freepascal.org/fpc/source.git
				synced 2025-10-26 16:33:45 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			426 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
			
		
		
	
	
			426 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
| {
 | |
|     This file is part of the Free Pascal run time library.
 | |
|     Copyright (c) 2003 by the Free Pascal development team
 | |
|     Original author: Sebastian Guenther
 | |
| 
 | |
|     Unit to parse complete URI in its parts or to reassemble an URI
 | |
| 
 | |
|     See the file COPYING.FPC, included in this distribution,
 | |
|     for details about the copyright.
 | |
| 
 | |
|     This program is distributed in the hope that it will be useful,
 | |
|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 | |
| 
 | |
|  **********************************************************************}
 | |
| {$IFDEF FPC}
 | |
| {$MODE objfpc}
 | |
| {$H+}
 | |
| {$ENDIF}
 | |
| 
 | |
| unit URIParser;
 | |
| 
 | |
| interface
 | |
| 
 | |
| type
 | |
|   TURI = record
 | |
|     Protocol: String;
 | |
|     Username: String;
 | |
|     Password: String;
 | |
|     Host: String;
 | |
|     Port: Word;
 | |
|     Path: String;
 | |
|     Document: String;
 | |
|     Params: String;
 | |
|     Bookmark: String;
 | |
|     HasAuthority: Boolean;
 | |
|   end;
 | |
| 
 | |
| function EncodeURI(const URI: TURI): String;
 | |
| function ParseURI(const URI: String):  TURI; overload;
 | |
| function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word):  TURI; overload;
 | |
| 
 | |
| function ResolveRelativeURI(const BaseUri, RelUri: WideString;
 | |
|   out ResultUri: WideString): Boolean; overload;
 | |
| 
 | |
| function ResolveRelativeURI(const BaseUri, RelUri: UTF8String;
 | |
|   out ResultUri: UTF8String): Boolean; overload;
 | |
| 
 | |
| function URIToFilename(const URI: string; out Filename: string): Boolean;
 | |
| function FilenameToURI(const Filename: string): string;
 | |
| 
 | |
| function IsAbsoluteURI(const UriReference: string): Boolean;
 | |
| 
 | |
| implementation
 | |
| 
 | |
| uses SysUtils;
 | |
| 
 | |
| const
 | |
|   GenDelims = [':', '/', '?', '#', '[', ']', '@'];
 | |
|   SubDelims = ['!', '$', '&', '''', '(', ')', '*', '+', ',', ';', '='];
 | |
|   ALPHA = ['A'..'Z', 'a'..'z'];
 | |
|   DIGIT = ['0'..'9'];
 | |
|   Unreserved = ALPHA + DIGIT + ['-', '.', '_', '~'];
 | |
|   ValidPathChars = Unreserved + SubDelims + ['@', ':', '/'];
 | |
| 
 | |
| function Escape(const s: String; const Allowed: TSysCharSet): String;
 | |
| var
 | |
|   i: Integer;
 | |
| begin
 | |
|   SetLength(Result, 0);
 | |
|   for i := 1 to Length(s) do
 | |
|     if not (s[i] in Allowed) then
 | |
|       Result := Result + '%' + IntToHex(ord(s[i]), 2)
 | |
|     else
 | |
|       Result := Result + s[i];
 | |
| end;
 | |
| 
 | |
| function EncodeURI(const URI: TURI): String;
 | |
| // ! if no scheme then first colon in path should be escaped
 | |
| begin
 | |
|   SetLength(Result, 0);
 | |
|   if Length(URI.Protocol) > 0 then
 | |
|     Result := LowerCase(URI.Protocol) + ':';
 | |
|   if URI.HasAuthority then
 | |
|   begin
 | |
|     Result := Result + '//';
 | |
|     if Length(URI.Username) > 0 then
 | |
|     begin
 | |
|       Result := Result + URI.Username;
 | |
|       if Length(URI.Password) > 0 then
 | |
|         Result := Result + ':' + URI.Password;
 | |
|       Result := Result + '@';
 | |
|     end;
 | |
|     Result := Result + URI.Host;
 | |
|   end;
 | |
|   if URI.Port <> 0 then
 | |
|     Result := Result + ':' + IntToStr(URI.Port);
 | |
|   Result := Result + Escape(URI.Path, ValidPathChars);
 | |
|   if Length(URI.Document) > 0 then
 | |
|   begin
 | |
|     if (Length(URI.Path) > 0) and ((Length(Result) = 0) or (Result[Length(Result)] <> '/')) then
 | |
|       Result := Result + '/';
 | |
|     Result := Result + Escape(URI.Document, ValidPathChars);
 | |
|   end;
 | |
|   if Length(URI.Params) > 0 then
 | |
|     Result := Result + '?' + Escape(URI.Params, ValidPathChars);
 | |
|   if Length(URI.Bookmark) > 0 then
 | |
|     Result := Result + '#' + Escape(URI.Bookmark, ValidPathChars);
 | |
| end;
 | |
| 
 | |
| function ParseURI(const URI: String):  TURI;
 | |
| begin
 | |
|   Result := ParseURI(URI, '', 0);
 | |
| end;
 | |
| 
 | |
| function HexValue(c: Char): Integer;
 | |
| begin
 | |
|   case c of
 | |
|     '0'..'9': Result := ord(c) - ord('0');
 | |
|     'A'..'F': Result := ord(c) - (ord('A') - 10);
 | |
|     'a'..'f': Result := ord(c) - (ord('a') - 10);
 | |
|   else
 | |
|     Result := 0;
 | |
|   end;
 | |
| end;
 | |
| 
 | |
| function Unescape(const s: String): String;
 | |
| var
 | |
|   i, RealLength: Integer;
 | |
| begin
 | |
|   SetLength(Result, Length(s));
 | |
|   i := 1;
 | |
|   RealLength := 0;
 | |
|   while i <= Length(s) do
 | |
|   begin
 | |
|     Inc(RealLength);
 | |
|     if s[i] = '%' then
 | |
|     begin
 | |
|       Result[RealLength] := Chr(HexValue(s[i + 1]) shl 4 or HexValue(s[i + 2]));
 | |
|       Inc(i, 3);
 | |
|     end else
 | |
|     begin
 | |
|       Result[RealLength] := s[i];
 | |
|       Inc(i);
 | |
|     end;
 | |
|   end;
 | |
|   SetLength(Result, RealLength);
 | |
| end;
 | |
| 
 | |
| function ParseURI(const URI, DefaultProtocol: String; DefaultPort: Word):  TURI;
 | |
| var
 | |
|   s, Authority: String;
 | |
|   i: Integer;
 | |
| begin
 | |
|   Result.Protocol := LowerCase(DefaultProtocol);
 | |
|   Result.Port := DefaultPort;
 | |
| 
 | |
|   s := URI;
 | |
| 
 | |
|   // Extract scheme
 | |
| 
 | |
|   for i := 1 to Length(s) do
 | |
|     if s[i] = ':' then
 | |
|     begin
 | |
|       Result.Protocol := Copy(s, 1, i - 1);
 | |
|       s := Copy(s, i + 1, MaxInt);
 | |
|       break;
 | |
|     end
 | |
|     else
 | |
|       if not (((i=1) and (s[i] in ALPHA)) or (s[i] in ALPHA + DIGIT + ['+', '-', '.'])) then
 | |
|         break;
 | |
| 
 | |
|   // Extract the bookmark
 | |
| 
 | |
|   i := LastDelimiter('#', s);
 | |
|   if i > 0 then
 | |
|   begin
 | |
|     Result.Bookmark := Unescape(Copy(s, i + 1, MaxInt));
 | |
|     s := Copy(s, 1, i - 1);
 | |
|   end;
 | |
| 
 | |
|   // Extract the params
 | |
| 
 | |
|   i := LastDelimiter('?', s);
 | |
|   if i > 0 then
 | |
|   begin
 | |
|     Result.Params := Unescape(Copy(s, i + 1, MaxInt));
 | |
|     s := Copy(s, 1, i - 1);
 | |
|   end;
 | |
| 
 | |
|   // extract authority
 | |
| 
 | |
|   if (Length(s) > 1) and (s[1] = '/') and (s[2] = '/') then
 | |
|   begin
 | |
|     i := 3;
 | |
|     while (i <= Length(s)) and (s[i] <> '/') do
 | |
|       Inc(i);
 | |
|     Authority := Copy(s, 3, i-3);
 | |
|     s := Copy(s, i, MaxInt);
 | |
|     Result.HasAuthority := True;    // even if Authority is empty
 | |
|   end
 | |
|   else
 | |
|   begin
 | |
|     Result.HasAuthority := False;
 | |
|     Authority := '';
 | |
|   end;
 | |
| 
 | |
|   // now s is 'hier-part' per RFC3986
 | |
|   // Extract the document name (nasty...)
 | |
| 
 | |
|   for i := Length(s) downto 1 do
 | |
|     if s[i] = '/' then
 | |
|     begin
 | |
|       Result.Document := Unescape(Copy(s, i + 1, Length(s)));
 | |
|       if (Result.Document <> '.') and (Result.Document <> '..') then
 | |
|         s := Copy(s, 1, i)
 | |
|       else
 | |
|         Result.Document := '';
 | |
|       break;
 | |
|     end else if s[i] = ':' then
 | |
|       break
 | |
|     else if i = 1 then
 | |
|     begin
 | |
|       Result.Document := Unescape(s);
 | |
|       if (Result.Document <> '.') and (Result.Document <> '..') then
 | |
|         s := ''
 | |
|       else
 | |
|         Result.Document := '';
 | |
|       // break - not needed, last iteration
 | |
|     end;
 | |
| 
 | |
|   // Everything left is a path
 | |
| 
 | |
|   Result.Path := Unescape(s);
 | |
| 
 | |
|   // Extract the port number
 | |
| 
 | |
|   i := LastDelimiter(':@', Authority);
 | |
|   if (i > 0) and (Authority[i] = ':') then
 | |
|   begin
 | |
|     Result.Port := StrToInt(Copy(Authority, i + 1, MaxInt));
 | |
|     Authority := Copy(Authority, 1, i - 1);
 | |
|   end;
 | |
| 
 | |
|   // Extract the hostname
 | |
| 
 | |
|   i := Pos('@', Authority);
 | |
|   if i > 0 then
 | |
|   begin
 | |
|     Result.Host := Copy(Authority, i+1, MaxInt);
 | |
|     Delete(Authority, i, MaxInt);
 | |
| 
 | |
|     // Extract username and password
 | |
|     if Length(Authority) > 0 then
 | |
|     begin
 | |
|       i := Pos(':', Authority);
 | |
|       if i = 0 then
 | |
|         Result.Username := Authority
 | |
|       else
 | |
|       begin
 | |
|         Result.Username := Copy(Authority, 1, i - 1);
 | |
|         Result.Password := Copy(Authority, i + 1, MaxInt);
 | |
|       end;
 | |
|     end;
 | |
|   end
 | |
|   else
 | |
|     Result.Host := Authority;
 | |
| end;
 | |
| 
 | |
| procedure RemoveDotSegments(var s: string);
 | |
| var
 | |
|   Cur, Prev: Integer;
 | |
| begin
 | |
|   Prev := Pos('/', s);
 | |
|   while (Prev > 0) and (Prev < Length(s)) do
 | |
|   begin
 | |
|     Cur := Prev+1;
 | |
|     while (Cur <= Length(s)) and (s[Cur] <> '/') do
 | |
|       Inc(Cur);
 | |
|     if (Cur - Prev = 2) and (s[Prev+1] = '.') then
 | |
|       Delete(s, Prev+1, 2)
 | |
|     else if (Cur - Prev = 3) and (s[Prev+1] = '.') and (s[Prev+2] = '.') then
 | |
|     begin
 | |
|       while (Prev > 1) and (s[Prev-1] <> '/') do
 | |
|         Dec(Prev);
 | |
|       if Prev > 1 then
 | |
|         Dec(Prev);
 | |
|       Delete(s, Prev+1, Cur-Prev);
 | |
|     end
 | |
|     else
 | |
|       Prev := Cur;
 | |
|   end;
 | |
| end;
 | |
| 
 | |
| // TODO: this probably must NOT percent-encode the result...
 | |
| function ResolveRelativeURI(const BaseUri, RelUri: UTF8String;
 | |
|   out ResultUri: UTF8String): Boolean;
 | |
| var
 | |
|   Base, Rel: TUri;
 | |
| begin
 | |
|   Base := ParseUri(BaseUri);
 | |
|   Rel := ParseUri(RelUri);
 | |
| 
 | |
|   Result := (Base.Protocol <> '') or (Rel.Protocol <> '');
 | |
|   if not Result then
 | |
|     Exit;
 | |
|   with Rel do
 | |
|   begin
 | |
|     if (Path = '') and (Document = '') then
 | |
|     begin
 | |
|       if (Protocol = '') and (Host = '') then
 | |
|       begin
 | |
|         if Params <> '' then
 | |
|           Base.Params := Params;
 | |
|         Base.Bookmark := Bookmark;
 | |
|         ResultUri := EncodeUri(Base);
 | |
|         Exit;
 | |
|       end;
 | |
|     end;
 | |
|     if (Protocol <> '') then  // RelURI is absolute - return it...
 | |
|     begin
 | |
|       ResultUri := RelUri;
 | |
|       Exit;
 | |
|     end;
 | |
|     // Inherit protocol
 | |
|     Protocol := Base.Protocol;
 | |
|     if (Host = '') then   // TODO: or "not HasAuthority"?
 | |
|     begin
 | |
|       // Inherit Authority (host, port, username, password)
 | |
|       Host := Base.Host;
 | |
|       Port := Base.Port;
 | |
|       Username := Base.Username;
 | |
|       Password := Base.Password;
 | |
|       HasAuthority := Base.HasAuthority;
 | |
|       if (Path = '') or (Path[1] <> '/') then  // path is empty or relative
 | |
|         Path := Base.Path + Path;
 | |
|       RemoveDotSegments(Path);      
 | |
|     end;
 | |
|   end; // with
 | |
|   ResultUri := EncodeUri(Rel);
 | |
| end;
 | |
| 
 | |
| function ResolveRelativeURI(const BaseUri, RelUri: WideString;
 | |
|   out ResultUri: WideString): Boolean;
 | |
| var
 | |
|   rslt: UTF8String;
 | |
| begin
 | |
|   Result := ResolveRelativeURI(UTF8Encode(BaseUri), UTF8Encode(RelUri), rslt);
 | |
|   if Result then
 | |
|     ResultURI := UTF8Decode(rslt);
 | |
| end;
 | |
| 
 | |
| function URIToFilename(const URI: string; out Filename: string): Boolean;
 | |
| var
 | |
|   U: TURI;
 | |
|   I: Integer;
 | |
| begin
 | |
|   Result := False;
 | |
|   U := ParseURI(URI);
 | |
|   if SameText(U.Protocol, 'file') then
 | |
|   begin
 | |
|     if (Length(U.Path) > 2) and (U.Path[1] = '/') and (U.Path[3] = ':') then
 | |
|       Filename := Copy(U.Path, 2, MaxInt)
 | |
|     else
 | |
|       Filename := U.Path;
 | |
|     Filename := Filename + U.Document;
 | |
|     Result := True;
 | |
|   end
 | |
|   else
 | |
|     if U.Protocol = '' then  // fire and pray?
 | |
|     begin
 | |
|       Filename := U.Path + U.Document;
 | |
|       Result := True;
 | |
|     end;
 | |
|   if PathDelim <> '/' then
 | |
|   begin
 | |
|     I := Pos('/', Filename);
 | |
|     while I > 0 do
 | |
|     begin
 | |
|       Filename[I] := PathDelim;
 | |
|       I := Pos('/', Filename);
 | |
|     end;
 | |
|   end;
 | |
| end;
 | |
| 
 | |
| function FilenameToURI(const Filename: string): string;
 | |
| var
 | |
|   I: Integer;
 | |
| begin
 | |
|   // TODO: seems implemented, but not tested well
 | |
|   Result := 'file://';
 | |
|   if (Length(Filename) > 2) and (Filename[1] <> PathDelim) and (Filename[2] = ':') then
 | |
|     Result := Result + '/';
 | |
|   Result := Result + Filename;
 | |
|   if PathDelim <> '/' then
 | |
|   begin
 | |
|     I := Pos(PathDelim, Result);
 | |
|     while I <> 0 do
 | |
|     begin
 | |
|       Result[I] := '/';
 | |
|       I := Pos(PathDelim, Result);
 | |
|     end;
 | |
|   end;
 | |
| end;
 | |
| 
 | |
| 
 | |
| function IsAbsoluteURI(const UriReference: string): Boolean;
 | |
| var
 | |
|   I: Integer;
 | |
| begin
 | |
|   Result := True;
 | |
|   for I := 1 to Length(UriReference) do
 | |
|   begin
 | |
|     if UriReference[I] = ':' then
 | |
|       Exit
 | |
|     else
 | |
|       if not (((I=1) and (UriReference[I] in ALPHA)) or
 | |
|          (UriReference[i] in ALPHA + DIGIT + ['+', '-', '.'])) then
 | |
|       Break;
 | |
|   end;
 | |
|   Result := False;
 | |
| end;
 | |
| 
 | |
| 
 | |
| end.
 | 
