mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-04-27 03:13:42 +02:00
4133 lines
112 KiB
ObjectPascal
4133 lines
112 KiB
ObjectPascal
{
|
|
This file is based on the FCL unit xmlread svn revision 15251.
|
|
Converted to use UTF8 instead of widestrings by Mattias Gaertner.
|
|
}
|
|
{
|
|
This file is part of the Free Component Library
|
|
|
|
XML reading routines.
|
|
Copyright (c) 1999-2000 by Sebastian Guenther, sg@freepascal.org
|
|
Modified in 2006 by Sergei Gorelkin, sergei_gorelkin@mail.ru
|
|
|
|
See the file COPYING.FPC, included in this distribution,
|
|
for details about the copyright.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
**********************************************************************}
|
|
|
|
unit laz2_XMLRead;
|
|
|
|
{$ifdef fpc}
|
|
{$MODE objfpc}{$H+}
|
|
{$endif}
|
|
|
|
{$DEFINE UseUTF8}
|
|
{off $DEFINE UseWideString}
|
|
|
|
interface
|
|
|
|
uses
|
|
SysUtils, Classes, laz2_DOM;
|
|
|
|
type
|
|
TErrorSeverity = (esWarning, esError, esFatal);
|
|
|
|
TXMLReaderFlag = (
|
|
xrfAllowLowerThanInAttributeValue,
|
|
xrfAllowSpecialCharsInAttributeValue
|
|
);
|
|
TXMLReaderFlags = set of TXMLReaderFlag;
|
|
|
|
{ EXMLReadError }
|
|
|
|
EXMLReadError = class(Exception)
|
|
private
|
|
FSeverity: TErrorSeverity;
|
|
FErrorMessage: string;
|
|
FLine: Integer;
|
|
FLinePos: Integer;
|
|
public
|
|
property Severity: TErrorSeverity read FSeverity;
|
|
property ErrorMessage: string read FErrorMessage;
|
|
property Line: Integer read FLine;
|
|
property LinePos: Integer read FLinePos;
|
|
function LineCol: TPoint;
|
|
end;
|
|
|
|
procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String; Flags: TXMLReaderFlags = []); overload;
|
|
procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text; Flags: TXMLReaderFlags = []); overload;
|
|
procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; Flags: TXMLReaderFlags = []); overload;
|
|
procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String; Flags: TXMLReaderFlags = []); overload;
|
|
|
|
procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String; Flags: TXMLReaderFlags = []); overload;
|
|
procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text; Flags: TXMLReaderFlags = []); overload;
|
|
procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; Flags: TXMLReaderFlags = []); overload;
|
|
procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const ABaseURI: String; Flags: TXMLReaderFlags = []); overload;
|
|
|
|
procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String); overload;
|
|
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text); overload;
|
|
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream); overload;
|
|
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String); overload;
|
|
|
|
type
|
|
TDOMParseOptions = class(TObject)
|
|
private
|
|
FValidate: Boolean;
|
|
FPreserveWhitespace: Boolean;
|
|
FExpandEntities: Boolean;
|
|
FIgnoreComments: Boolean;
|
|
FCDSectionsAsText: Boolean;
|
|
FResolveExternals: Boolean;
|
|
FNamespaces: Boolean;
|
|
FDisallowDoctype: Boolean;
|
|
FCanonical: Boolean;
|
|
FMaxChars: Cardinal;
|
|
function GetCanonical: Boolean;
|
|
procedure SetCanonical(aValue: Boolean);
|
|
public
|
|
property Validate: Boolean read FValidate write FValidate;
|
|
property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace;
|
|
property ExpandEntities: Boolean read FExpandEntities write FExpandEntities;
|
|
property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments;
|
|
property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText;
|
|
property ResolveExternals: Boolean read FResolveExternals write FResolveExternals;
|
|
property Namespaces: Boolean read FNamespaces write FNamespaces;
|
|
property DisallowDoctype: Boolean read FDisallowDoctype write FDisallowDoctype;
|
|
property MaxChars: Cardinal read FMaxChars write FMaxChars;
|
|
property CanonicalForm: Boolean read GetCanonical write SetCanonical;
|
|
end;
|
|
|
|
// NOTE: DOM 3 LS ACTION_TYPE enumeration starts at 1
|
|
TXMLContextAction = (
|
|
xaAppendAsChildren = 1,
|
|
xaReplaceChildren,
|
|
xaInsertBefore,
|
|
xaInsertAfter,
|
|
xaReplace);
|
|
|
|
TXMLErrorEvent = procedure(Error: EXMLReadError) of object;
|
|
|
|
TXMLInputSource = class(TObject)
|
|
private
|
|
FStream: TStream;
|
|
FStringData: string;
|
|
FBaseURI: DOMString;
|
|
FSystemID: DOMString;
|
|
FPublicID: DOMString;
|
|
// FEncoding: string;
|
|
public
|
|
constructor Create(AStream: TStream); overload;
|
|
constructor Create(const AStringData: string); overload;
|
|
property Stream: TStream read FStream;
|
|
property StringData: string read FStringData;
|
|
property BaseURI: DOMString read FBaseURI write FBaseURI;
|
|
property SystemID: DOMString read FSystemID write FSystemID;
|
|
property PublicID: DOMString read FPublicID write FPublicID;
|
|
// property Encoding: string read FEncoding write FEncoding;
|
|
end;
|
|
|
|
TDOMParser = class(TObject)
|
|
private
|
|
FOptions: TDOMParseOptions;
|
|
FOnError: TXMLErrorEvent;
|
|
public
|
|
constructor Create;
|
|
destructor Destroy; override;
|
|
procedure Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
|
|
procedure ParseUri(const URI: DOMString; out ADoc: TXMLDocument);
|
|
function ParseWithContext(Src: TXMLInputSource; Context: TDOMNode;
|
|
Action: TXMLContextAction): TDOMNode;
|
|
property Options: TDOMParseOptions read FOptions;
|
|
property OnError: TXMLErrorEvent read FOnError write FOnError;
|
|
end;
|
|
|
|
TDecoder = record
|
|
Context: Pointer;
|
|
Decode: function(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: DOMPChar; var OutCnt: Cardinal): Integer; stdcall;
|
|
Cleanup: procedure(Context: Pointer); stdcall;
|
|
end;
|
|
|
|
TGetDecoderProc = function(const AEncoding: string; out Decoder: TDecoder): Boolean; stdcall;
|
|
|
|
procedure RegisterDecoder(Proc: TGetDecoderProc);
|
|
|
|
// =======================================================
|
|
|
|
implementation
|
|
|
|
uses
|
|
UriParser, laz2_xmlutils;
|
|
|
|
const
|
|
PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
|
|
'-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
|
|
'#', '@', '$', '_', '%'];
|
|
|
|
type
|
|
TDOMNotationEx = class(TDOMNotation);
|
|
TDOMDocumentTypeEx = class(TDOMDocumentType);
|
|
TDOMElementDef = class;
|
|
|
|
TDTDSubsetType = (dsNone, dsInternal, dsExternal);
|
|
|
|
// This may be augmented with ByteOffset, UTF8Offset, etc.
|
|
TLocation = record
|
|
Line: Integer;
|
|
LinePos: Integer;
|
|
end;
|
|
|
|
TDOMEntityEx = class(TDOMEntity)
|
|
protected
|
|
FExternallyDeclared: Boolean;
|
|
FPrefetched: Boolean;
|
|
FResolved: Boolean;
|
|
FOnStack: Boolean;
|
|
FBetweenDecls: Boolean;
|
|
FIsPE: Boolean;
|
|
FReplacementText: DOMString;
|
|
FURI: DOMString;
|
|
FStartLocation: TLocation;
|
|
FCharCount: Cardinal;
|
|
end;
|
|
|
|
DOMPCharBuf = ^TDOMCharBuf;
|
|
TDOMCharBuf = record
|
|
Buffer: DOMPChar;
|
|
Length: Integer;
|
|
MaxLength: Integer;
|
|
end;
|
|
|
|
TXMLReader = class;
|
|
|
|
TXMLCharSource = class(TObject)
|
|
private
|
|
FBuf: DOMPChar;
|
|
FBufEnd: DOMPChar;
|
|
FReader: TXMLReader;
|
|
FParent: TXMLCharSource;
|
|
FEntity: TObject; // weak reference
|
|
FLineNo: Integer;
|
|
LFPos: DOMPChar;
|
|
FXML11Rules: Boolean;
|
|
FSystemID: DOMString;
|
|
FCharCount: Cardinal;
|
|
FStartNesting: Integer;
|
|
function GetSystemID: DOMString;
|
|
protected
|
|
function Reload: Boolean; virtual;
|
|
public
|
|
DTDSubsetType: TDTDSubsetType;
|
|
constructor Create(const AData: DOMString);
|
|
procedure NextChar;
|
|
procedure NewLine; virtual;
|
|
function SkipUntil(var ToFill: TDOMCharBuf; const Delim: TSetOfChar;
|
|
wsflag: PBoolean = nil; AllowSpecialChars: boolean = false): DOMChar; virtual;
|
|
procedure Initialize; virtual;
|
|
function SetEncoding(const AEncoding: string): Boolean; virtual;
|
|
function Matches(const arg: DOMString): Boolean;
|
|
property SystemID: DOMString read GetSystemID write FSystemID;
|
|
end;
|
|
|
|
TXMLDecodingSource = class(TXMLCharSource)
|
|
private
|
|
FCharBuf: PChar;
|
|
FCharBufEnd: PChar;
|
|
FBufStart: DOMPChar;
|
|
FDecoder: TDecoder;
|
|
FHasBOM: Boolean;
|
|
FFixedUCS2: string;
|
|
FBufSize: Integer;
|
|
procedure DecodingError(const Msg: string);
|
|
protected
|
|
function Reload: Boolean; override;
|
|
procedure FetchData; virtual;
|
|
public
|
|
procedure AfterConstruction; override;
|
|
destructor Destroy; override;
|
|
function SetEncoding(const AEncoding: string): Boolean; override;
|
|
procedure NewLine; override;
|
|
function SkipUntil(var ToFill: TDOMCharBuf; const Delim: TSetOfChar;
|
|
wsflag: PBoolean = nil; AllowSpecialChars: boolean = false): DOMChar; override;
|
|
procedure Initialize; override;
|
|
end;
|
|
|
|
TXMLStreamInputSource = class(TXMLDecodingSource)
|
|
private
|
|
FAllocated: PChar;
|
|
FStream: TStream;
|
|
FCapacity: Integer;
|
|
FOwnStream: Boolean;
|
|
FEof: Boolean;
|
|
public
|
|
constructor Create(AStream: TStream; AOwnStream: Boolean);
|
|
destructor Destroy; override;
|
|
procedure FetchData; override;
|
|
end;
|
|
|
|
TXMLFileInputSource = class(TXMLDecodingSource)
|
|
private
|
|
FFile: ^Text;
|
|
FString: string;
|
|
FTmp: string;
|
|
public
|
|
constructor Create(var AFile: Text);
|
|
procedure FetchData; override;
|
|
end;
|
|
|
|
PForwardRef = ^TForwardRef;
|
|
TForwardRef = record
|
|
Value: DOMString;
|
|
Loc: TLocation;
|
|
end;
|
|
|
|
TCPType = (ctName, ctChoice, ctSeq);
|
|
TCPQuant = (cqOnce, cqZeroOrOnce, cqZeroOrMore, cqOnceOrMore);
|
|
|
|
TContentParticle = class(TObject)
|
|
private
|
|
FParent: TContentParticle;
|
|
FChildren: TFPList;
|
|
FIndex: Integer;
|
|
function GetChildCount: Integer;
|
|
function GetChild(Index: Integer): TContentParticle;
|
|
public
|
|
CPType: TCPType;
|
|
CPQuant: TCPQuant;
|
|
Def: TDOMElementDef;
|
|
destructor Destroy; override;
|
|
function Add: TContentParticle;
|
|
function IsRequired: Boolean;
|
|
function FindFirst(aDef: TDOMElementDef): TContentParticle;
|
|
function FindNext(aDef: TDOMElementDef; ChildIdx: Integer): TContentParticle;
|
|
function MoreRequired(ChildIdx: Integer): Boolean;
|
|
property ChildCount: Integer read GetChildCount;
|
|
property Children[Index: Integer]: TContentParticle read GetChild;
|
|
end;
|
|
|
|
TElementValidator = object
|
|
FElement: TDOMElement;
|
|
FElementDef: TDOMElementDef;
|
|
FCurCP: TContentParticle;
|
|
FFailed: Boolean;
|
|
function IsElementAllowed(Def: TDOMElementDef): Boolean;
|
|
function Incomplete: Boolean;
|
|
end;
|
|
|
|
TXMLReadState = (rsProlog, rsDTD, rsRoot, rsEpilog);
|
|
|
|
TElementContentType = (
|
|
ctUndeclared,
|
|
ctAny,
|
|
ctEmpty,
|
|
ctMixed,
|
|
ctChildren
|
|
);
|
|
|
|
TCheckNameFlags = set of (cnOptional, cnToken);
|
|
|
|
TPrefixedAttr = record
|
|
Attr: TDOMAttr;
|
|
PrefixLen: Integer; // to avoid recalculation
|
|
end;
|
|
|
|
TLiteralType = (ltPlain, ltAttr, ltTokAttr, ltPubid, ltEntity);
|
|
|
|
TXMLReader = class
|
|
private
|
|
FFlags: TXMLReaderFlags;
|
|
FSource: TXMLCharSource;
|
|
FCtrl: TDOMParser;
|
|
FXML11: Boolean;
|
|
FState: TXMLReadState;
|
|
FRecognizePE: Boolean;
|
|
FHavePERefs: Boolean;
|
|
FInsideDecl: Boolean;
|
|
FDocNotValid: Boolean;
|
|
FValue: TDOMCharBuf;
|
|
FEntityValue: TDOMCharBuf;
|
|
FName: TDOMCharBuf;
|
|
FTokenStart: TLocation;
|
|
FStandalone: Boolean; // property of Doc ?
|
|
FNamePages: PByteArray;
|
|
FDocType: TDOMDocumentTypeEx; // a shortcut
|
|
FPEMap: TDOMNamedNodeMap;
|
|
FIDRefs: TFPList;
|
|
FNotationRefs: TFPList;
|
|
FCurrContentType: TElementContentType;
|
|
FSaViolation: Boolean;
|
|
FDTDStartPos: DOMPChar;
|
|
FIntSubset: TDOMCharBuf;
|
|
FAttrTag: Cardinal;
|
|
FOwnsDoctype: Boolean;
|
|
FDTDProcessed: Boolean;
|
|
|
|
FNSHelper: TNSSupport;
|
|
FWorkAtts: array of TPrefixedAttr;
|
|
FNsAttHash: TDblHashArray;
|
|
FStdPrefix_xml: PHashItem;
|
|
FStdPrefix_xmlns: PHashItem;
|
|
|
|
FColonPos: Integer;
|
|
FValidate: Boolean; // parsing options, copy of FCtrl.Options
|
|
FPreserveWhitespace: Boolean;
|
|
FExpandEntities: Boolean;
|
|
FIgnoreComments: Boolean;
|
|
FCDSectionsAsText: Boolean;
|
|
FResolveExternals: Boolean;
|
|
FNamespaces: Boolean;
|
|
FDisallowDoctype: Boolean;
|
|
FCanonical: Boolean;
|
|
FMaxChars: Cardinal;
|
|
|
|
procedure SkipQuote(out Delim: DOMChar; required: Boolean = True);
|
|
procedure Initialize(ASource: TXMLCharSource);
|
|
function ContextPush(AEntity: TDOMEntityEx): Boolean;
|
|
function ContextPop(Forced: Boolean = False): Boolean;
|
|
procedure XML11_BuildTables;
|
|
procedure ParseQuantity(CP: TContentParticle);
|
|
procedure StoreLocation(out Loc: TLocation);
|
|
function ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: DOMString): Boolean;
|
|
procedure ValidateAttrValue(Attr: TDOMAttr; const aValue: DOMString);
|
|
procedure AddForwardRef(aList: TFPList; Buf: DOMPChar; Length: Integer);
|
|
procedure ClearRefs(aList: TFPList);
|
|
procedure ValidateIdRefs;
|
|
procedure StandaloneError(LineOffs: Integer = 0);
|
|
procedure CallErrorHandler(E: EXMLReadError);
|
|
function FindOrCreateElDef: TDOMElementDef;
|
|
function SkipUntilSeq(const Delim: TSetOfChar; c1: DOMChar; c2: DOMChar = #0): Boolean;
|
|
procedure CheckMaxChars;
|
|
protected
|
|
FCursor: TDOMNode_WithChildren;
|
|
FNesting: Integer;
|
|
FValidator: array of TElementValidator;
|
|
|
|
procedure DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer=0);
|
|
procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
|
|
const ErrPos: TLocation);
|
|
procedure FatalError(const descr: String; LineOffs: Integer=0); overload;
|
|
procedure FatalError(const descr: string; const args: array of const; LineOffs: Integer=0); overload;
|
|
procedure FatalError(Expected: DOMChar); overload;
|
|
function SkipWhitespace(PercentAloneIsOk: Boolean = False): Boolean;
|
|
function SkipS(required: Boolean = False): Boolean;
|
|
procedure ExpectWhitespace;
|
|
procedure ExpectString(const s: String);
|
|
procedure ExpectChar(wc: DOMChar);
|
|
function CheckForChar(c: DOMChar): Boolean;
|
|
|
|
procedure RaiseNameNotFound;
|
|
function CheckName(aFlags: TCheckNameFlags = []): Boolean;
|
|
procedure CheckNCName;
|
|
function ExpectName: DOMString; // [5]
|
|
function ParseLiteral(var ToFill: TDOMCharBuf; aType: TLiteralType;
|
|
Required: Boolean; Normalized: PBoolean = nil): Boolean;
|
|
procedure ExpectAttValue; // [10]
|
|
procedure ParseComment; // [15]
|
|
procedure ParsePI; // [16]
|
|
procedure ParseXmlOrTextDecl(TextDecl: Boolean);
|
|
procedure ExpectEq;
|
|
procedure ParseDoctypeDecl; // [28]
|
|
procedure ParseMarkupDecl; // [29]
|
|
procedure ParseElement; // [39]
|
|
procedure ParseEndTag; // [42]
|
|
procedure DoEndElement(ErrOffset: Integer);
|
|
procedure ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
|
|
procedure ParseContent; // [43]
|
|
function ResolvePredefined: Boolean;
|
|
function EntityCheck(NoExternals: Boolean = False): TDOMEntityEx;
|
|
procedure AppendReference(AEntity: TDOMEntityEx);
|
|
function PrefetchEntity(AEntity: TDOMEntityEx): Boolean;
|
|
procedure StartPE;
|
|
function ParseRef(var ToFill: TDOMCharBuf): Boolean; // [67]
|
|
function ParseExternalID(out SysID, PubID: DOMString; // [75]
|
|
SysIdOptional: Boolean): Boolean;
|
|
|
|
procedure BadPENesting(S: TErrorSeverity = esError);
|
|
procedure ParseEntityDecl;
|
|
procedure ParseAttlistDecl;
|
|
procedure ExpectChoiceOrSeq(CP: TContentParticle);
|
|
procedure ParseElementDecl;
|
|
procedure ParseNotationDecl;
|
|
function ResolveEntity(const SystemID, PublicID, BaseURI: DOMString; out Source: TXMLCharSource): Boolean;
|
|
procedure ProcessDefaultAttributes(Element: TDOMElement; Map: TDOMNamedNodeMap);
|
|
procedure ProcessNamespaceAtts(Element: TDOMElement);
|
|
procedure AddBinding(Attr: TDOMAttr; PrefixPtr: DOMPChar; PrefixLen: Integer);
|
|
|
|
procedure PushVC(aElement: TDOMElement; aElDef: TDOMElementDef);
|
|
procedure PopVC;
|
|
procedure UpdateConstraints;
|
|
procedure ValidateDTD;
|
|
procedure ValidateRoot;
|
|
procedure ValidationError(const Msg: string; const args: array of const; LineOffs: Integer = -1);
|
|
procedure DoAttrText(ch: DOMPChar; Count: Integer);
|
|
procedure DTDReloadHook;
|
|
procedure ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
|
|
// Some SAX-alike stuff (at a very early stage)
|
|
procedure DoText(ch: DOMPChar; Count: Integer; Whitespace: Boolean=False);
|
|
procedure DoComment(ch: DOMPChar; Count: Integer);
|
|
procedure DoCDSect(ch: DOMPChar; Count: Integer);
|
|
procedure DoNotationDecl(const aName, aPubID, aSysID: DOMString);
|
|
public
|
|
doc: TDOMDocument;
|
|
constructor Create; overload;
|
|
constructor Create(AParser: TDOMParser); overload;
|
|
destructor Destroy; override;
|
|
procedure ProcessXML(ASource: TXMLCharSource); // [1]
|
|
procedure ProcessFragment(ASource: TXMLCharSource; AOwner: TDOMNode);
|
|
procedure ProcessDTD(ASource: TXMLCharSource); // ([29])
|
|
|
|
property Flags: TXMLReaderFlags read FFlags write FFlags;
|
|
end;
|
|
|
|
// Attribute/Element declarations
|
|
|
|
TDOMElementDef = class(TDOMElement)
|
|
public
|
|
FExternallyDeclared: Boolean;
|
|
ContentType: TElementContentType;
|
|
IDAttr: TDOMAttrDef;
|
|
NotationAttr: TDOMAttrDef;
|
|
RootCP: TContentParticle;
|
|
destructor Destroy; override;
|
|
end;
|
|
|
|
const
|
|
NullLocation: TLocation = (Line: 0; LinePos: 0);
|
|
|
|
{ Decoders }
|
|
|
|
var
|
|
Decoders: array of TGetDecoderProc;
|
|
|
|
procedure RegisterDecoder(Proc: TGetDecoderProc);
|
|
var
|
|
L: Integer;
|
|
begin
|
|
L := Length(Decoders);
|
|
SetLength(Decoders, L+1);
|
|
Decoders[L] := Proc;
|
|
end;
|
|
|
|
function FindDecoder(const AEncoding: string; out Decoder: TDecoder): Boolean;
|
|
var
|
|
I: Integer;
|
|
begin
|
|
Result := False;
|
|
for I := 0 to High(Decoders) do
|
|
if Decoders[I](AEncoding, Decoder) then
|
|
begin
|
|
Result := True;
|
|
Exit;
|
|
end;
|
|
end;
|
|
|
|
function WriteUTF8(u: cardinal; var OutBuf: DOMPChar; var OutCnt: Cardinal): boolean; inline;
|
|
begin
|
|
case u of
|
|
0..$7f:
|
|
begin
|
|
if OutCnt<1 then exit(false);
|
|
dec(OutCnt);
|
|
OutBuf[0]:=char(byte(u));
|
|
inc(OutBuf);
|
|
end;
|
|
$80..$7ff:
|
|
begin
|
|
if OutCnt<2 then exit(false);
|
|
dec(OutCnt,2);
|
|
OutBuf[0]:=char(byte($c0 or (u shr 6)));
|
|
OutBuf[1]:=char(byte($80 or (u and $3f)));
|
|
inc(OutBuf,2);
|
|
end;
|
|
$800..$ffff:
|
|
begin
|
|
if OutCnt<3 then exit(false);
|
|
dec(OutCnt,3);
|
|
OutBuf[0]:=char(byte($e0 or (u shr 12)));
|
|
OutBuf[1]:=char(byte((u shr 6) and $3f) or $80);
|
|
OutBuf[2]:=char(byte(u and $3f) or $80);
|
|
inc(OutBuf,3);
|
|
end;
|
|
$10000..$10ffff:
|
|
begin
|
|
if OutCnt<4 then exit(false);
|
|
dec(OutCnt,4);
|
|
OutBuf[0]:=char(byte($f0 or (u shr 18)));
|
|
OutBuf[1]:=char(byte((u shr 12) and $3f) or $80);
|
|
OutBuf[2]:=char(byte((u shr 6) and $3f) or $80);
|
|
OutBuf[3]:=char(byte(u and $3f) or $80);
|
|
inc(OutBuf,3);
|
|
end;
|
|
else
|
|
exit(false);
|
|
end;
|
|
Result:=true;
|
|
end;
|
|
|
|
function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: DOMPChar; var OutCnt: Cardinal): Integer; stdcall;
|
|
{$IFDEF UseUTF8}
|
|
var
|
|
u: cardinal;
|
|
OldOutCnt: cardinal;
|
|
begin
|
|
Result:=0;
|
|
OldOutCnt:=OutCnt;
|
|
while InCnt>1 do begin
|
|
u:=PWord(InBuf)^;
|
|
inc(InBuf,2);
|
|
if not WriteUTF8(u,OutBuf,OutCnt) then break;
|
|
dec(InCnt,2);
|
|
end;
|
|
Result:=OldOutCnt-OutCnt;
|
|
end;
|
|
{$ENDIF UseUTF8}
|
|
{$IFDEF UseWideString}
|
|
var
|
|
cnt: Cardinal;
|
|
begin
|
|
cnt := OutCnt; // num of DOMchars
|
|
if cnt > InCnt div sizeof(DOMChar) then
|
|
cnt := InCnt div sizeof(DOMChar);
|
|
Move(InBuf^, OutBuf^, cnt * sizeof(DOMChar));
|
|
Dec(InCnt, cnt*sizeof(DOMChar));
|
|
Dec(OutCnt, cnt);
|
|
Result := cnt;
|
|
end;
|
|
{$ENDIF UseWideString}
|
|
|
|
function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: DOMPChar; var OutCnt: Cardinal): Integer; stdcall;
|
|
{$IFDEF UseUTF8}
|
|
var
|
|
u: cardinal;
|
|
OldOutCnt: cardinal;
|
|
begin
|
|
Result:=0;
|
|
OldOutCnt:=OutCnt;
|
|
while InCnt>1 do begin
|
|
u:=(ord(InBuf^) shl 8) or ord(InBuf[1]);
|
|
inc(InBuf,2);
|
|
if not WriteUTF8(u,OutBuf,OutCnt) then break;
|
|
dec(InCnt,2);
|
|
end;
|
|
Result:=OldOutCnt-OutCnt;
|
|
end;
|
|
{$ENDIF UseUTF8}
|
|
{$IFDEF UseWideString}
|
|
var
|
|
I: Integer;
|
|
cnt: Cardinal;
|
|
InPtr: PChar;
|
|
begin
|
|
cnt := OutCnt; // num of DOMchars
|
|
if cnt > InCnt div sizeof(DOMChar) then
|
|
cnt := InCnt div sizeof(DOMChar);
|
|
InPtr := InBuf;
|
|
for I := 0 to cnt-1 do
|
|
begin
|
|
OutBuf[I] := DOMChar((ord(InPtr^) shl 8) or ord(InPtr[1]));
|
|
Inc(InPtr, 2);
|
|
end;
|
|
Dec(InCnt, cnt*sizeof(DOMChar));
|
|
Dec(OutCnt, cnt);
|
|
Result := cnt;
|
|
end;
|
|
{$ENDIF UseWideString}
|
|
|
|
function Decode_88591(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: DOMPChar; var OutCnt: Cardinal): Integer; stdcall;
|
|
{$IFDEF UseUTF8}
|
|
var
|
|
u: cardinal;
|
|
OldOutCnt: cardinal;
|
|
begin
|
|
Result:=0;
|
|
OldOutCnt:=OutCnt;
|
|
while InCnt>0 do begin
|
|
u:=ord(InBuf^);
|
|
inc(InBuf);
|
|
if not WriteUTF8(u,OutBuf,OutCnt) then break;
|
|
dec(InCnt);
|
|
end;
|
|
Result:=OldOutCnt-OutCnt;
|
|
end;
|
|
{$ENDIF UseUTF8}
|
|
{$IFDEF UseWideString}
|
|
var
|
|
I: Integer;
|
|
cnt: Cardinal;
|
|
begin
|
|
cnt := OutCnt; // num of DOMchars
|
|
if cnt > InCnt then
|
|
cnt := InCnt;
|
|
for I := 0 to cnt-1 do // ToDo: check for >#127
|
|
OutBuf[I] := DOMChar(ord(InBuf[I]));
|
|
Dec(InCnt, cnt);
|
|
Dec(OutCnt, cnt);
|
|
Result := cnt;
|
|
end;
|
|
{$ENDIF}
|
|
|
|
function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: DOMPChar; var OutCnt: Cardinal): Integer; stdcall;
|
|
{$IFDEF UseUTF8}
|
|
var
|
|
cnt: Cardinal;
|
|
begin
|
|
cnt := OutCnt; // num of DOMchars
|
|
if cnt > InCnt then
|
|
cnt := InCnt;
|
|
if cnt>0 then begin
|
|
System.Move(InBuf^,OutBuf^,cnt);
|
|
Dec(InCnt, cnt);
|
|
Dec(OutCnt, cnt);
|
|
end;
|
|
Result := cnt;
|
|
end;
|
|
{$ENDIF UseUTF8}
|
|
{$IFDEF UseWideString}
|
|
const
|
|
MaxCode: array[1..4] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
|
|
var
|
|
i, j, bc: Cardinal;
|
|
Value: Cardinal;
|
|
begin
|
|
result := 0;
|
|
i := OutCnt;
|
|
while (i > 0) and (InCnt > 0) do
|
|
begin
|
|
bc := 1;
|
|
Value := ord(InBuf^);
|
|
if Value < $80 then
|
|
OutBuf^ := DOMChar(Value)
|
|
else
|
|
begin
|
|
if Value < $C2 then
|
|
begin
|
|
Result := -1;
|
|
Break;
|
|
end;
|
|
Inc(bc);
|
|
if Value > $DF then
|
|
begin
|
|
Inc(bc);
|
|
if Value > $EF then
|
|
begin
|
|
Inc(bc);
|
|
if Value > $F7 then // never encountered in the tests.
|
|
begin
|
|
Result := -1;
|
|
Break;
|
|
end;
|
|
end;
|
|
end;
|
|
if InCnt < bc then
|
|
Break;
|
|
j := 1;
|
|
while j < bc do
|
|
begin
|
|
if InBuf[j] in [#$80..#$BF] then
|
|
Value := (Value shl 6) or (Cardinal(InBuf[j]) and $3F)
|
|
else
|
|
begin
|
|
Result := -1;
|
|
Break;
|
|
end;
|
|
Inc(j);
|
|
end;
|
|
Value := Value and MaxCode[bc];
|
|
// RFC2279 check
|
|
if Value <= MaxCode[bc-1] then
|
|
begin
|
|
Result := -1;
|
|
Break;
|
|
end;
|
|
case Value of
|
|
0..$D7FF, $E000..$FFFF: OutBuf^ := DOMChar(Value);
|
|
$10000..$10FFFF:
|
|
begin
|
|
if i < 2 then Break;
|
|
OutBuf^ := DOMChar($D7C0 + (Value shr 10));
|
|
OutBuf[1] := DOMChar($DC00 xor (Value and $3FF));
|
|
Inc(OutBuf); // once here
|
|
Dec(i);
|
|
end
|
|
else
|
|
begin
|
|
Result := -1;
|
|
Break;
|
|
end;
|
|
end;
|
|
end;
|
|
Inc(OutBuf);
|
|
Inc(InBuf, bc);
|
|
Dec(InCnt, bc);
|
|
Dec(i);
|
|
end;
|
|
if Result >= 0 then
|
|
Result := OutCnt-i;
|
|
OutCnt := i;
|
|
end;
|
|
{$ENDIF UseWideString}
|
|
|
|
function Is_8859_1(const AEncoding: string): Boolean;
|
|
begin
|
|
Result := SameText(AEncoding, 'ISO-8859-1') or
|
|
SameText(AEncoding, 'ISO_8859-1') or
|
|
SameText(AEncoding, 'latin1') or
|
|
SameText(AEncoding, 'iso-ir-100') or
|
|
SameText(AEncoding, 'l1') or
|
|
SameText(AEncoding, 'IBM819') or
|
|
SameText(AEncoding, 'CP819') or
|
|
SameText(AEncoding, 'csISOLatin1') or
|
|
// This one is not in character-sets.txt, but was used in FPC documentation,
|
|
// and still being used in fcl-registry package
|
|
SameText(AEncoding, 'ISO8859-1');
|
|
end;
|
|
|
|
procedure BufAllocate(var ABuffer: TDOMCharBuf; ALength: Integer);
|
|
begin
|
|
ABuffer.MaxLength := ALength;
|
|
ABuffer.Length := 0;
|
|
ABuffer.Buffer := AllocMem(ABuffer.MaxLength*SizeOf(DOMChar));
|
|
end;
|
|
|
|
procedure BufAppend(var ABuffer: TDOMCharBuf; wc: DOMChar);
|
|
begin
|
|
if ABuffer.Length >= ABuffer.MaxLength then
|
|
begin
|
|
ReallocMem(ABuffer.Buffer, ABuffer.MaxLength * 2 * SizeOf(DOMChar));
|
|
FillChar(ABuffer.Buffer[ABuffer.MaxLength], ABuffer.MaxLength * SizeOf(DOMChar),0);
|
|
ABuffer.MaxLength := ABuffer.MaxLength * 2;
|
|
end;
|
|
ABuffer.Buffer[ABuffer.Length] := wc;
|
|
Inc(ABuffer.Length);
|
|
end;
|
|
|
|
procedure BufAppendChunk(var ABuf: TDOMCharBuf; pstart, pend: DOMPChar);
|
|
var
|
|
Len: Integer;
|
|
begin
|
|
Len := PEnd - PStart;
|
|
if Len <= 0 then
|
|
Exit;
|
|
if Len >= ABuf.MaxLength - ABuf.Length then
|
|
begin
|
|
ABuf.MaxLength := (Len + ABuf.Length)*2;
|
|
// note: memory clean isn't necessary here.
|
|
// To avoid garbage, control Length field.
|
|
ReallocMem(ABuf.Buffer, ABuf.MaxLength * sizeof(DOMChar));
|
|
end;
|
|
Move(pstart^, ABuf.Buffer[ABuf.Length], Len * sizeof(DOMChar));
|
|
Inc(ABuf.Length, Len);
|
|
end;
|
|
|
|
function BufEquals(const ABuf: TDOMCharBuf; const Arg: DOMString): Boolean;
|
|
begin
|
|
Result := (ABuf.Length = Length(Arg)) and
|
|
CompareMem(ABuf.Buffer, Pointer(Arg), ABuf.Length*sizeof(DOMChar));
|
|
end;
|
|
|
|
{ TDOMParseOptions }
|
|
|
|
function TDOMParseOptions.GetCanonical: Boolean;
|
|
begin
|
|
Result := FCanonical and FExpandEntities and FCDSectionsAsText and
|
|
{ (not normalizeCharacters) and } FNamespaces and
|
|
{ namespaceDeclarations and } FPreserveWhitespace;
|
|
end;
|
|
|
|
procedure TDOMParseOptions.SetCanonical(aValue: Boolean);
|
|
begin
|
|
FCanonical := aValue;
|
|
if aValue then
|
|
begin
|
|
FExpandEntities := True;
|
|
FCDSectionsAsText := True;
|
|
FNamespaces := True;
|
|
FPreserveWhitespace := True;
|
|
{ normalizeCharacters := False; }
|
|
{ namespaceDeclarations := True; }
|
|
{ wellFormed := True; }
|
|
end;
|
|
end;
|
|
|
|
{ TXMLInputSource }
|
|
|
|
constructor TXMLInputSource.Create(AStream: TStream);
|
|
begin
|
|
inherited Create;
|
|
FStream := AStream;
|
|
end;
|
|
|
|
constructor TXMLInputSource.Create(const AStringData: string);
|
|
begin
|
|
inherited Create;
|
|
FStringData := AStringData;
|
|
end;
|
|
|
|
{ TDOMParser }
|
|
|
|
constructor TDOMParser.Create;
|
|
begin
|
|
FOptions := TDOMParseOptions.Create;
|
|
end;
|
|
|
|
destructor TDOMParser.Destroy;
|
|
begin
|
|
FOptions.Free;
|
|
inherited Destroy;
|
|
end;
|
|
|
|
procedure TDOMParser.Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
|
|
var
|
|
InputSrc: TXMLCharSource;
|
|
begin
|
|
with TXMLReader.Create(Self) do
|
|
try
|
|
ConvertSource(Src, InputSrc); // handles 'no-input-specified' case
|
|
ProcessXML(InputSrc)
|
|
finally
|
|
ADoc := TXMLDocument(doc);
|
|
Free;
|
|
end;
|
|
end;
|
|
|
|
procedure TDOMParser.ParseUri(const URI: DOMString; out ADoc: TXMLDocument);
|
|
var
|
|
Src: TXMLCharSource;
|
|
begin
|
|
ADoc := nil;
|
|
with TXMLReader.Create(Self) do
|
|
try
|
|
if ResolveEntity(URI, '', '', Src) then
|
|
ProcessXML(Src)
|
|
else
|
|
DoErrorPos(esFatal, 'The specified URI could not be resolved', NullLocation);
|
|
finally
|
|
ADoc := TXMLDocument(doc);
|
|
Free;
|
|
end;
|
|
end;
|
|
|
|
function TDOMParser.ParseWithContext(Src: TXMLInputSource;
|
|
Context: TDOMNode; Action: TXMLContextAction): TDOMNode;
|
|
var
|
|
InputSrc: TXMLCharSource;
|
|
Frag: TDOMDocumentFragment;
|
|
node: TDOMNode;
|
|
begin
|
|
if Action in [xaInsertBefore, xaInsertAfter, xaReplace] then
|
|
node := Context.ParentNode
|
|
else
|
|
node := Context;
|
|
// TODO: replacing document isn't yet supported
|
|
if (Action = xaReplaceChildren) and (node.NodeType = DOCUMENT_NODE) then
|
|
raise EDOMNotSupported.Create('DOMParser.ParseWithContext');
|
|
|
|
if not (node.NodeType in [ELEMENT_NODE, DOCUMENT_FRAGMENT_NODE]) then
|
|
raise EDOMHierarchyRequest.Create('DOMParser.ParseWithContext');
|
|
|
|
with TXMLReader.Create(Self) do
|
|
try
|
|
ConvertSource(Src, InputSrc); // handles 'no-input-specified' case
|
|
Frag := Context.OwnerDocument.CreateDocumentFragment;
|
|
try
|
|
ProcessFragment(InputSrc, Frag);
|
|
Result := Frag.FirstChild;
|
|
case Action of
|
|
xaAppendAsChildren: Context.AppendChild(Frag);
|
|
|
|
xaReplaceChildren: begin
|
|
Context.TextContent := ''; // removes children
|
|
Context.ReplaceChild(Frag, Context.FirstChild);
|
|
end;
|
|
xaInsertBefore: node.InsertBefore(Frag, Context);
|
|
xaInsertAfter: node.InsertBefore(Frag, Context.NextSibling);
|
|
xaReplace: node.ReplaceChild(Frag, Context);
|
|
end;
|
|
finally
|
|
Frag.Free;
|
|
end;
|
|
finally
|
|
Free;
|
|
end;
|
|
end;
|
|
|
|
{ TXMLCharSource }
|
|
|
|
constructor TXMLCharSource.Create(const AData: DOMString);
|
|
begin
|
|
inherited Create;
|
|
FLineNo := 1;
|
|
FBuf := DOMPChar(AData);
|
|
FBufEnd := FBuf + Length(AData);
|
|
LFPos := FBuf-1;
|
|
FCharCount := Length(AData);
|
|
end;
|
|
|
|
procedure TXMLCharSource.Initialize;
|
|
begin
|
|
end;
|
|
|
|
function TXMLCharSource.SetEncoding(const AEncoding: string): Boolean;
|
|
begin
|
|
Result := True; // always succeed
|
|
end;
|
|
|
|
function TXMLCharSource.GetSystemID: DOMString;
|
|
begin
|
|
if FSystemID <> '' then
|
|
Result := FSystemID
|
|
else if Assigned(FParent) then
|
|
Result := FParent.SystemID
|
|
else
|
|
Result := '';
|
|
end;
|
|
|
|
function TXMLCharSource.Reload: Boolean;
|
|
begin
|
|
Result := False;
|
|
end;
|
|
|
|
procedure TXMLCharSource.NewLine;
|
|
begin
|
|
Inc(FLineNo);
|
|
LFPos := FBuf;
|
|
end;
|
|
|
|
function TXMLCharSource.SkipUntil(var ToFill: TDOMCharBuf; const Delim: TSetOfChar;
|
|
wsflag: PBoolean; AllowSpecialChars: boolean): DOMChar;
|
|
var
|
|
old: DOMPChar;
|
|
nonws: Boolean;
|
|
begin
|
|
old := FBuf;
|
|
nonws := False;
|
|
repeat
|
|
if FBuf^ = #10 then
|
|
NewLine;
|
|
if (FBuf^ < #255) and (Char(ord(FBuf^)) in Delim) then
|
|
Break;
|
|
if (FBuf^ > #32) or not (Char(ord(FBuf^)) in [#32, #9, #10, #13]) then
|
|
nonws := True;
|
|
Inc(FBuf);
|
|
until False;
|
|
Result := FBuf^;
|
|
BufAppendChunk(ToFill, old, FBuf);
|
|
if Assigned(wsflag) then
|
|
wsflag^ := wsflag^ or nonws;
|
|
end;
|
|
|
|
function TXMLCharSource.Matches(const arg: DOMString): Boolean;
|
|
begin
|
|
Result := False;
|
|
if (FBufEnd >= FBuf + Length(arg)) or Reload then
|
|
Result := CompareMem(Pointer(arg), FBuf, Length(arg)*sizeof(DOMChar));
|
|
if Result then
|
|
begin
|
|
Inc(FBuf, Length(arg));
|
|
if FBuf >= FBufEnd then
|
|
Reload;
|
|
end;
|
|
end;
|
|
|
|
{ TXMLDecodingSource }
|
|
|
|
procedure TXMLDecodingSource.AfterConstruction;
|
|
begin
|
|
inherited AfterConstruction;
|
|
FBufStart := AllocMem(4096);
|
|
FBuf := FBufStart;
|
|
FBufEnd := FBuf;
|
|
LFPos := FBuf-1;
|
|
end;
|
|
|
|
destructor TXMLDecodingSource.Destroy;
|
|
begin
|
|
FreeMem(FBufStart);
|
|
if Assigned(FDecoder.Cleanup) then
|
|
FDecoder.Cleanup(FDecoder.Context);
|
|
inherited Destroy;
|
|
end;
|
|
|
|
procedure TXMLDecodingSource.FetchData;
|
|
begin
|
|
end;
|
|
|
|
procedure TXMLDecodingSource.DecodingError(const Msg: string);
|
|
begin
|
|
// count line endings to obtain correct error location
|
|
while FBuf < FBufEnd do
|
|
begin
|
|
if (FBuf^ = #10) or (FBuf^ = #13)
|
|
or (FXML11Rules and ((FBuf^ = #$85) or (FBuf^ = #$2028))) // ToDo #$2028
|
|
then begin
|
|
if (FBuf^ = #13) and (FBuf < FBufEnd-1) and
|
|
((FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85))) then
|
|
Inc(FBuf);
|
|
LFPos := FBuf;
|
|
Inc(FLineNo);
|
|
end;
|
|
Inc(FBuf);
|
|
end;
|
|
FReader.FatalError(Msg);
|
|
end;
|
|
|
|
function TXMLDecodingSource.Reload: Boolean;
|
|
var
|
|
Remainder: PtrInt;
|
|
r, inLeft: Cardinal;
|
|
rslt: Integer;
|
|
begin
|
|
if DTDSubsetType = dsInternal then
|
|
FReader.DTDReloadHook;
|
|
Remainder := FBufEnd - FBuf;
|
|
if Remainder > 0 then
|
|
Move(FBuf^, FBufStart^, Remainder * sizeof(DOMChar));
|
|
Dec(LFPos, FBuf-FBufStart);
|
|
FBuf := FBufStart;
|
|
FBufEnd := FBufStart + Remainder;
|
|
|
|
repeat
|
|
inLeft := FCharBufEnd - FCharBuf;
|
|
if inLeft < 4 then // may contain an incomplete char
|
|
begin
|
|
FetchData;
|
|
inLeft := FCharBufEnd - FCharBuf;
|
|
if inLeft <= 0 then
|
|
Break;
|
|
end;
|
|
r := FBufStart + FBufSize - FBufEnd;
|
|
if r = 0 then
|
|
Break;
|
|
rslt := FDecoder.Decode(FDecoder.Context, FCharBuf, inLeft, FBufEnd, r);
|
|
{ Sanity checks: r and inLeft must not increase. }
|
|
if inLeft + FCharBuf <= FCharBufEnd then
|
|
FCharBuf := FCharBufEnd - inLeft
|
|
else
|
|
DecodingError('Decoder error: input byte count out of bounds');
|
|
if r + FBufEnd <= FBufStart + FBufSize then
|
|
FBufEnd := FBufStart + FBufSize - r
|
|
else
|
|
DecodingError('Decoder error: output char count out of bounds');
|
|
|
|
if rslt = 0 then
|
|
Break
|
|
else if rslt < 0 then
|
|
DecodingError('Invalid character in input stream')
|
|
else
|
|
begin
|
|
Inc(FCharCount, rslt);
|
|
FReader.CheckMaxChars;
|
|
end;
|
|
until False;
|
|
|
|
FBufEnd^ := #0;
|
|
Result := FBuf < FBufEnd;
|
|
end;
|
|
|
|
const
|
|
XmlSign: array [0..4] of DOMChar = ('<', '?', 'x', 'm', 'l');
|
|
|
|
procedure TXMLDecodingSource.Initialize;
|
|
begin
|
|
inherited;
|
|
FLineNo := 1;
|
|
FXml11Rules := FReader.FXML11;
|
|
|
|
FDecoder.Decode := @Decode_UTF8;
|
|
|
|
FFixedUCS2 := '';
|
|
if FCharBufEnd-FCharBuf > 1 then
|
|
begin
|
|
if (FCharBuf[0] = #$FE) and (FCharBuf[1] = #$FF) then
|
|
begin
|
|
FFixedUCS2 := 'UTF-16BE';
|
|
FDecoder.Decode := {$IFNDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
|
|
end
|
|
else if (FCharBuf[0] = #$FF) and (FCharBuf[1] = #$FE) then
|
|
begin
|
|
FFixedUCS2 := 'UTF-16LE';
|
|
FDecoder.Decode := {$IFDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
|
|
end;
|
|
end;
|
|
FBufSize := 6; // possible BOM and '<?xml'
|
|
Reload;
|
|
if FBuf^ = #$FEFF then
|
|
begin
|
|
FHasBOM := True;
|
|
Inc(FBuf);
|
|
end;
|
|
LFPos := FBuf-1;
|
|
if CompareMem(FBuf, @XmlSign[0], sizeof(XmlSign)) then
|
|
begin
|
|
FBufSize := 3; // don't decode past XML declaration
|
|
Inc(FBuf, Length(XmlSign));
|
|
FReader.ParseXmlOrTextDecl(FParent <> nil);
|
|
end;
|
|
FBufSize := 2047;
|
|
end;
|
|
|
|
function TXMLDecodingSource.SetEncoding(const AEncoding: string): Boolean;
|
|
var
|
|
NewDecoder: TDecoder;
|
|
begin
|
|
Result := True;
|
|
if (FFixedUCS2 = '') and SameText(AEncoding, 'UTF-8') then
|
|
Exit;
|
|
if FFixedUCS2 <> '' then
|
|
begin
|
|
Result := SameText(AEncoding, FFixedUCS2) or
|
|
SameText(AEncoding, 'UTF-16') or
|
|
SameText(AEncoding, 'unicode');
|
|
Exit;
|
|
end;
|
|
// TODO: must fail when a byte-based stream is labeled as word-based.
|
|
// see rmt-e2e-61, it now fails but for a completely different reason.
|
|
FillChar(NewDecoder, sizeof(TDecoder), 0);
|
|
if Is_8859_1(AEncoding) then
|
|
FDecoder.Decode := @Decode_88591
|
|
else if FindDecoder(AEncoding, NewDecoder) then
|
|
FDecoder := NewDecoder
|
|
else
|
|
Result := False;
|
|
end;
|
|
|
|
procedure TXMLDecodingSource.NewLine;
|
|
begin
|
|
case FBuf^ of
|
|
#10: begin
|
|
Inc(FLineNo);
|
|
LFPos := FBuf;
|
|
end;
|
|
#13: begin
|
|
Inc(FLineNo);
|
|
LFPos := FBuf;
|
|
// Reload trashes the buffer, it should be consumed beforehand
|
|
if (FBufEnd >= FBuf+2) or Reload then
|
|
begin
|
|
if (FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85)) then
|
|
begin
|
|
Inc(FBuf);
|
|
Inc(LFPos);
|
|
end;
|
|
FBuf^ := #10;
|
|
end;
|
|
end;
|
|
#$85: // ToDo #$2028
|
|
if FXML11Rules then
|
|
begin
|
|
FBuf^ := #10;
|
|
Inc(FLineNo);
|
|
LFPos := FBuf;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
{ TXMLStreamInputSource }
|
|
|
|
const
|
|
Slack = 16;
|
|
|
|
constructor TXMLStreamInputSource.Create(AStream: TStream; AOwnStream: Boolean);
|
|
begin
|
|
FStream := AStream;
|
|
FCapacity := 4096;
|
|
GetMem(FAllocated, FCapacity+Slack);
|
|
FCharBuf := FAllocated+(Slack-4);
|
|
FCharBufEnd := FCharBuf;
|
|
FOwnStream := AOwnStream;
|
|
FetchData;
|
|
end;
|
|
|
|
destructor TXMLStreamInputSource.Destroy;
|
|
begin
|
|
FreeMem(FAllocated);
|
|
if FOwnStream then
|
|
FStream.Free;
|
|
inherited Destroy;
|
|
end;
|
|
|
|
procedure TXMLStreamInputSource.FetchData;
|
|
var
|
|
Remainder, BytesRead: Integer;
|
|
OldBuf: PChar;
|
|
begin
|
|
Assert(FCharBufEnd - FCharBuf < Slack-4);
|
|
if FEof then
|
|
Exit;
|
|
OldBuf := FCharBuf;
|
|
Remainder := FCharBufEnd - FCharBuf;
|
|
if Remainder < 0 then
|
|
Remainder := 0;
|
|
FCharBuf := FAllocated+Slack-4-Remainder;
|
|
if Remainder > 0 then
|
|
Move(OldBuf^, FCharBuf^, Remainder);
|
|
BytesRead := FStream.Read(FAllocated[Slack-4], FCapacity);
|
|
if BytesRead < FCapacity then
|
|
FEof := True;
|
|
FCharBufEnd := FAllocated + (Slack-4) + BytesRead;
|
|
{ Null-termination has been removed:
|
|
1) Built-in decoders don't need it because they respect the buffer length.
|
|
2) It was causing unaligned access errors on ARM CPUs.
|
|
}
|
|
//DOMPChar(FCharBufEnd)^ := #0;
|
|
end;
|
|
|
|
{ TXMLFileInputSource }
|
|
|
|
constructor TXMLFileInputSource.Create(var AFile: Text);
|
|
begin
|
|
FFile := @AFile;
|
|
SystemID := FilenameToURI(TTextRec(AFile).Name);
|
|
FetchData;
|
|
end;
|
|
|
|
procedure TXMLFileInputSource.FetchData;
|
|
var
|
|
Remainder: Integer;
|
|
begin
|
|
if not Eof(FFile^) then
|
|
begin
|
|
Remainder := FCharBufEnd - FCharBuf;
|
|
if Remainder > 0 then
|
|
SetString(FTmp, FCharBuf, Remainder);
|
|
ReadLn(FFile^, FString);
|
|
FString := FString + #10; // bad solution...
|
|
if Remainder > 0 then
|
|
Insert(FTmp, FString, 1);
|
|
FCharBuf := PChar(FString);
|
|
FCharBufEnd := FCharBuf + Length(FString);
|
|
end;
|
|
end;
|
|
|
|
{ helper that closes handle upon destruction }
|
|
type
|
|
THandleOwnerStream = class(THandleStream)
|
|
public
|
|
destructor Destroy; override;
|
|
end;
|
|
|
|
destructor THandleOwnerStream.Destroy;
|
|
begin
|
|
if Handle >= 0 then FileClose(Handle);
|
|
inherited Destroy;
|
|
end;
|
|
|
|
{ TXMLReader }
|
|
|
|
procedure TXMLReader.ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
|
|
begin
|
|
SrcOut := nil;
|
|
if Assigned(SrcIn) then
|
|
begin
|
|
if Assigned(SrcIn.FStream) then
|
|
SrcOut := TXMLStreamInputSource.Create(SrcIn.FStream, False)
|
|
else if SrcIn.FStringData <> '' then
|
|
SrcOut := TXMLStreamInputSource.Create(TStringStream.Create(SrcIn.FStringData), True)
|
|
else if (SrcIn.SystemID <> '') then
|
|
ResolveEntity(SrcIn.SystemID, SrcIn.PublicID, SrcIn.BaseURI, SrcOut);
|
|
end;
|
|
if (SrcOut = nil) and (FSource = nil) then
|
|
DoErrorPos(esFatal, 'No input source specified', NullLocation);
|
|
end;
|
|
|
|
procedure TXMLReader.StoreLocation(out Loc: TLocation);
|
|
begin
|
|
Loc.Line := FSource.FLineNo;
|
|
Loc.LinePos := FSource.FBuf-FSource.LFPos;
|
|
end;
|
|
|
|
function TXMLReader.ResolveEntity(const SystemID, PublicID, BaseURI: DOMString; out Source: TXMLCharSource): Boolean;
|
|
var
|
|
AbsSysID: DOMString;
|
|
Filename: string;
|
|
Stream: TStream;
|
|
fd: THandle;
|
|
begin
|
|
Source := nil;
|
|
Result := False;
|
|
if not ResolveRelativeURI(BaseURI, SystemID, AbsSysID) then
|
|
Exit;
|
|
{ TODO: alternative resolvers
|
|
These may be 'internal' resolvers or a handler set by application.
|
|
Internal resolvers should probably produce a TStream
|
|
( so that internal classes need not be exported ).
|
|
External resolver will produce TXMLInputSource that should be converted.
|
|
External resolver must NOT be called for root entity.
|
|
External resolver can return nil, in which case we do the default }
|
|
if URIToFilename(AbsSysID, Filename) then
|
|
begin
|
|
fd := FileOpen(Filename, fmOpenRead + fmShareDenyWrite);
|
|
if fd <> THandle(-1) then
|
|
begin
|
|
Stream := THandleOwnerStream.Create(fd);
|
|
Source := TXMLStreamInputSource.Create(Stream, True);
|
|
Source.SystemID := AbsSysID; // <- Revisit: Really need absolute sysID?
|
|
end;
|
|
end;
|
|
Result := Assigned(Source);
|
|
end;
|
|
|
|
procedure TXMLReader.Initialize(ASource: TXMLCharSource);
|
|
begin
|
|
ASource.FParent := FSource;
|
|
FSource := ASource;
|
|
FSource.FReader := Self;
|
|
FSource.FStartNesting := FNesting;
|
|
FSource.Initialize;
|
|
end;
|
|
|
|
procedure TXMLReader.FatalError(Expected: DOMChar);
|
|
begin
|
|
// FIX: don't output what is found - anything may be found, including exploits...
|
|
FatalError('Expected "%1s"', [string(Expected)]);
|
|
end;
|
|
|
|
procedure TXMLReader.FatalError(const descr: String; LineOffs: Integer);
|
|
begin
|
|
DoError(esFatal, descr, LineOffs);
|
|
end;
|
|
|
|
procedure TXMLReader.FatalError(const descr: string; const args: array of const; LineOffs: Integer);
|
|
begin
|
|
DoError(esFatal, Format(descr, args), LineOffs);
|
|
end;
|
|
|
|
procedure TXMLReader.ValidationError(const Msg: string; const Args: array of const; LineOffs: Integer);
|
|
begin
|
|
FDocNotValid := True;
|
|
if FValidate then
|
|
DoError(esError, Format(Msg, Args), LineOffs);
|
|
end;
|
|
|
|
procedure TXMLReader.DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer);
|
|
var
|
|
Loc: TLocation;
|
|
begin
|
|
StoreLocation(Loc);
|
|
if LineOffs >= 0 then
|
|
begin
|
|
Dec(Loc.LinePos, LineOffs);
|
|
DoErrorPos(Severity, descr, Loc);
|
|
end
|
|
else
|
|
DoErrorPos(Severity, descr, FTokenStart);
|
|
end;
|
|
|
|
procedure TXMLReader.DoErrorPos(Severity: TErrorSeverity; const descr: string; const ErrPos: TLocation);
|
|
var
|
|
E: EXMLReadError;
|
|
sysid: DOMString;
|
|
begin
|
|
if Assigned(FSource) then
|
|
begin
|
|
sysid := FSource.FSystemID;
|
|
if (sysid = '') and Assigned(FSource.FEntity) then
|
|
sysid := TDOMEntityEx(FSource.FEntity).FURI;
|
|
E := EXMLReadError.CreateFmt('In ''%s'' (line %d pos %d): %s', [sysid, ErrPos.Line, ErrPos.LinePos, descr]);
|
|
end
|
|
else
|
|
E := EXMLReadError.Create(descr);
|
|
E.FSeverity := Severity;
|
|
E.FErrorMessage := descr;
|
|
E.FLine := ErrPos.Line;
|
|
E.FLinePos := ErrPos.LinePos;
|
|
CallErrorHandler(E);
|
|
// No 'finally'! If user handler raises exception, control should not get here
|
|
// and the exception will be freed in CallErrorHandler (below)
|
|
E.Free;
|
|
end;
|
|
|
|
procedure TXMLReader.CheckMaxChars;
|
|
var
|
|
src: TXMLCharSource;
|
|
total: Cardinal;
|
|
begin
|
|
if FMaxChars = 0 then
|
|
Exit;
|
|
src := FSource;
|
|
total := 0;
|
|
repeat
|
|
Inc(total, src.FCharCount);
|
|
if total > FMaxChars then
|
|
FatalError('Exceeded character count limit');
|
|
src := src.FParent;
|
|
until src = nil;
|
|
end;
|
|
|
|
procedure TXMLReader.CallErrorHandler(E: EXMLReadError);
|
|
begin
|
|
try
|
|
if Assigned(FCtrl) and Assigned(FCtrl.FOnError) then
|
|
FCtrl.FOnError(E);
|
|
if E.Severity = esFatal then
|
|
raise E;
|
|
except
|
|
if ExceptObject <> E then
|
|
E.Free;
|
|
raise;
|
|
end;
|
|
end;
|
|
|
|
function TXMLReader.SkipWhitespace(PercentAloneIsOk: Boolean): Boolean;
|
|
begin
|
|
Result := False;
|
|
repeat
|
|
Result := SkipS or Result;
|
|
if FSource.FBuf^ = #0 then
|
|
begin
|
|
Result := True; // report whitespace upon exiting the PE
|
|
if not ContextPop then
|
|
Break;
|
|
end
|
|
else if FSource.FBuf^ = '%' then
|
|
begin
|
|
if not FRecognizePE then
|
|
Break;
|
|
// This is the only case where look-ahead is needed
|
|
if FSource.FBuf > FSource.FBufEnd-2 then
|
|
FSource.Reload;
|
|
|
|
if (not PercentAloneIsOk) or (Byte(FSource.FBuf[1]) in NamingBitmap[FNamePages^[$100+hi(Word(FSource.FBuf[1]))]]) or
|
|
(FXML11 and (FSource.FBuf[1] >= #$D800) and (FSource.FBuf[1] <= #$DB7F)) then
|
|
begin
|
|
Inc(FSource.FBuf); // skip '%'
|
|
CheckName;
|
|
ExpectChar(';');
|
|
StartPE;
|
|
Result := True; // report whitespace upon entering the PE
|
|
end
|
|
else Break;
|
|
end
|
|
else
|
|
Break;
|
|
until False;
|
|
end;
|
|
|
|
procedure TXMLReader.ExpectWhitespace;
|
|
begin
|
|
if not SkipWhitespace then
|
|
FatalError('Expected whitespace');
|
|
end;
|
|
|
|
function TXMLReader.SkipS(Required: Boolean): Boolean;
|
|
var
|
|
p: DOMPChar;
|
|
begin
|
|
Result := False;
|
|
repeat
|
|
p := FSource.FBuf;
|
|
repeat
|
|
if (p^ = #10) or (p^ = #13)
|
|
or (FXML11 and ((p^ = #$85) or (p^ = #$2028))) // ToDo #$2028
|
|
then begin
|
|
FSource.FBuf := p;
|
|
FSource.NewLine;
|
|
p := FSource.FBuf;
|
|
end
|
|
else if (p^ <> #32) and (p^ <> #9) then
|
|
Break;
|
|
Inc(p);
|
|
Result := True;
|
|
until False;
|
|
FSource.FBuf := p;
|
|
until (p^ <> #0) or (not FSource.Reload);
|
|
if (not Result) and Required then
|
|
FatalError('Expected whitespace');
|
|
end;
|
|
|
|
procedure TXMLReader.ExpectString(const s: String);
|
|
var
|
|
I: Integer;
|
|
begin
|
|
for I := 1 to Length(s) do
|
|
begin
|
|
if FSource.FBuf^ <> DOMChar(ord(s[i])) then
|
|
FatalError('Expected "%s"', [s], i-1);
|
|
FSource.NextChar;
|
|
end;
|
|
end;
|
|
|
|
function TXMLReader.CheckForChar(c: DOMChar): Boolean;
|
|
begin
|
|
Result := (FSource.FBuf^ = c);
|
|
if Result then
|
|
begin
|
|
Inc(FSource.FBuf);
|
|
if FSource.FBuf >= FSource.FBufEnd then
|
|
FSource.Reload;
|
|
end;
|
|
end;
|
|
|
|
procedure TXMLReader.SkipQuote(out Delim: DOMChar; required: Boolean);
|
|
begin
|
|
Delim := #0;
|
|
if (FSource.FBuf^ = '''') or (FSource.FBuf^ = '"') then
|
|
begin
|
|
Delim := FSource.FBuf^;
|
|
FSource.NextChar; // skip quote
|
|
StoreLocation(FTokenStart);
|
|
end
|
|
else if required then
|
|
FatalError('Expected single or double quote');
|
|
end;
|
|
|
|
const
|
|
PrefixDefault: array[0..4] of DOMChar = ('x','m','l','n','s');
|
|
|
|
constructor TXMLReader.Create;
|
|
begin
|
|
inherited Create;
|
|
BufAllocate(FName, 128);
|
|
BufAllocate(FValue, 512);
|
|
FIDRefs := TFPList.Create;
|
|
FNotationRefs := TFPList.Create;
|
|
|
|
FNSHelper := TNSSupport.Create;
|
|
|
|
FNsAttHash := TDblHashArray.Create;
|
|
SetLength(FWorkAtts, 16);
|
|
FStdPrefix_xml := FNSHelper.GetPrefix(@PrefixDefault, 3);
|
|
FStdPrefix_xmlns := FNSHelper.GetPrefix(@PrefixDefault, 5);
|
|
// Set char rules to XML 1.0
|
|
FNamePages := @NamePages;
|
|
SetLength(FValidator, 16);
|
|
end;
|
|
|
|
constructor TXMLReader.Create(AParser: TDOMParser);
|
|
begin
|
|
Create;
|
|
FCtrl := AParser;
|
|
FValidate := FCtrl.Options.Validate;
|
|
FPreserveWhitespace := FCtrl.Options.PreserveWhitespace;
|
|
FExpandEntities := FCtrl.Options.ExpandEntities;
|
|
FCDSectionsAsText := FCtrl.Options.CDSectionsAsText;
|
|
FIgnoreComments := FCtrl.Options.IgnoreComments;
|
|
FResolveExternals := FCtrl.Options.ResolveExternals;
|
|
FNamespaces := FCtrl.Options.Namespaces;
|
|
FDisallowDoctype := FCtrl.Options.DisallowDoctype;
|
|
FCanonical := FCtrl.Options.CanonicalForm;
|
|
FMaxChars := FCtrl.Options.MaxChars;
|
|
end;
|
|
|
|
destructor TXMLReader.Destroy;
|
|
begin
|
|
if Assigned(FEntityValue.Buffer) then
|
|
FreeMem(FEntityValue.Buffer);
|
|
FreeMem(FName.Buffer);
|
|
FreeMem(FValue.Buffer);
|
|
if Assigned(FSource) then
|
|
while ContextPop(True) do; // clean input stack
|
|
FSource.Free;
|
|
FPEMap.Free;
|
|
ClearRefs(FNotationRefs);
|
|
ClearRefs(FIDRefs);
|
|
FNsAttHash.Free;
|
|
FNSHelper.Free;
|
|
if FOwnsDoctype then
|
|
FDocType.Free;
|
|
|
|
FNotationRefs.Free;
|
|
FIDRefs.Free;
|
|
inherited Destroy;
|
|
end;
|
|
|
|
procedure TXMLReader.XML11_BuildTables;
|
|
begin
|
|
FNamePages := Xml11NamePages;
|
|
FXML11 := True;
|
|
FSource.FXml11Rules := True;
|
|
end;
|
|
|
|
procedure TXMLReader.ProcessXML(ASource: TXMLCharSource);
|
|
begin
|
|
doc := TXMLDocument.Create;
|
|
doc.documentURI := ASource.SystemID; // TODO: to be changed to URI or BaseURI
|
|
FCursor := doc;
|
|
FState := rsProlog;
|
|
FNesting := 0;
|
|
Initialize(ASource);
|
|
ParseContent;
|
|
|
|
if FState < rsRoot then
|
|
FatalError('Root element is missing');
|
|
|
|
if FValidate and Assigned(FDocType) then
|
|
ValidateIdRefs;
|
|
end;
|
|
|
|
procedure TXMLReader.ProcessFragment(ASource: TXMLCharSource; AOwner: TDOMNode);
|
|
begin
|
|
doc := AOwner.OwnerDocument;
|
|
FCursor := AOwner as TDOMNode_WithChildren;
|
|
FState := rsRoot;
|
|
Initialize(ASource);
|
|
FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1');
|
|
ParseContent;
|
|
end;
|
|
|
|
function TXMLReader.CheckName(aFlags: TCheckNameFlags): Boolean;
|
|
var
|
|
p: DOMPChar;
|
|
NameStartFlag: Boolean;
|
|
begin
|
|
p := FSource.FBuf;
|
|
FName.Length := 0;
|
|
FColonPos := -1;
|
|
NameStartFlag := not (cnToken in aFlags);
|
|
|
|
repeat
|
|
if NameStartFlag then
|
|
begin
|
|
if (Byte(p^) in NamingBitmap[FNamePages^[hi(Word(p^))]]) or
|
|
((p^ = ':') and (not FNamespaces)) then
|
|
Inc(p)
|
|
else if FXML11 and ((p^ >= #$D800) and (p^ <= #$DB7F) and
|
|
(p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
|
|
Inc(p, 2)
|
|
else
|
|
begin
|
|
// here we come either when first char of name is bad (it may be a colon),
|
|
// or when a colon is not followed by a valid NameStartChar
|
|
FSource.FBuf := p;
|
|
Result := False;
|
|
Break;
|
|
end;
|
|
NameStartFlag := False;
|
|
end;
|
|
|
|
if FXML11 then
|
|
repeat
|
|
if Byte(p^) in NamingBitmap[FNamePages^[$100+hi(Word(p^))]] then
|
|
Inc(p)
|
|
else if ((p^ >= #$D800) and (p^ <= #$DB7F) and
|
|
(p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
|
|
Inc(p,2)
|
|
else
|
|
Break;
|
|
until False
|
|
else
|
|
while Byte(p^) in NamingBitmap[FNamePages^[$100+hi(Word(p^))]] do
|
|
Inc(p);
|
|
|
|
if p^ = ':' then
|
|
begin
|
|
if (cnToken in aFlags) or not FNamespaces then // colon has no specific meaning
|
|
begin
|
|
Inc(p);
|
|
if p^ <> #0 then Continue;
|
|
end
|
|
else if FColonPos = -1 then // this is the first colon, remember it
|
|
begin
|
|
FColonPos := p-FSource.FBuf+FName.Length;
|
|
NameStartFlag := True;
|
|
Inc(p);
|
|
if p^ <> #0 then Continue;
|
|
end;
|
|
end;
|
|
|
|
BufAppendChunk(FName, FSource.FBuf, p);
|
|
Result := (FName.Length > 0);
|
|
|
|
FSource.FBuf := p;
|
|
if (p^ <> #0) or not FSource.Reload then
|
|
Break;
|
|
|
|
p := FSource.FBuf;
|
|
until False;
|
|
if not (Result or (cnOptional in aFlags)) then
|
|
RaiseNameNotFound;
|
|
end;
|
|
|
|
procedure TXMLReader.CheckNCName;
|
|
begin
|
|
if FNamespaces and (FColonPos <> -1) then
|
|
FatalError('Names of entities, notations and processing instructions may not contain colons', FName.Length);
|
|
end;
|
|
|
|
procedure TXMLReader.RaiseNameNotFound;
|
|
begin
|
|
if FColonPos <> -1 then
|
|
FatalError('Bad QName syntax, local part is missing')
|
|
else
|
|
// Coming at no cost, this allows more user-friendly error messages
|
|
with FSource do
|
|
if (FBuf^ = #32) or (FBuf^ = #10) or (FBuf^ = #9) or (FBuf^ = #13) then
|
|
FatalError('Whitespace is not allowed here')
|
|
else
|
|
FatalError('Name starts with invalid character');
|
|
end;
|
|
|
|
function TXMLReader.ExpectName: DOMString;
|
|
begin
|
|
CheckName;
|
|
SetString(Result, FName.Buffer, FName.Length);
|
|
end;
|
|
|
|
function TXMLReader.ResolvePredefined: Boolean;
|
|
var
|
|
wc: DOMChar;
|
|
begin
|
|
Result := False;
|
|
with FName do
|
|
begin
|
|
if (Length = 2) and (Buffer[1] = 't') then
|
|
begin
|
|
if Buffer[0] = 'l' then
|
|
wc := '<'
|
|
else if Buffer[0] = 'g' then
|
|
wc := '>'
|
|
else Exit;
|
|
end
|
|
else if Buffer[0] = 'a' then
|
|
begin
|
|
if (Length = 3) and (Buffer[1] = 'm') and (Buffer[2] = 'p') then
|
|
wc := '&'
|
|
else if (Length = 4) and (Buffer[1] = 'p') and (Buffer[2] = 'o') and
|
|
(Buffer[3] = 's') then
|
|
wc := ''''
|
|
else Exit;
|
|
end
|
|
else if (Length = 4) and (Buffer[0] = 'q') and (Buffer[1] = 'u') and
|
|
(Buffer[2] = 'o') and (Buffer[3] ='t') then
|
|
wc := '"'
|
|
else
|
|
Exit;
|
|
end; // with
|
|
BufAppend(FValue, wc);
|
|
Result := True;
|
|
end;
|
|
|
|
function TXMLReader.ParseRef(var ToFill: TDOMCharBuf): Boolean; // [67]
|
|
var
|
|
Value: Integer;
|
|
begin
|
|
FSource.NextChar; // skip '&'
|
|
Result := CheckForChar('#');
|
|
if Result then
|
|
begin
|
|
Value := 0;
|
|
if CheckForChar('x') then
|
|
repeat
|
|
case FSource.FBuf^ of
|
|
'0'..'9': Value := Value * 16 + Ord(FSource.FBuf^) - Ord('0');
|
|
'a'..'f': Value := Value * 16 + Ord(FSource.FBuf^) - (Ord('a') - 10);
|
|
'A'..'F': Value := Value * 16 + Ord(FSource.FBuf^) - (Ord('A') - 10);
|
|
else
|
|
Break;
|
|
end;
|
|
FSource.NextChar;
|
|
until Value > $10FFFF
|
|
else
|
|
repeat
|
|
case FSource.FBuf^ of
|
|
'0'..'9': Value := Value * 10 + Ord(FSource.FBuf^) - Ord('0');
|
|
else
|
|
Break;
|
|
end;
|
|
FSource.NextChar;
|
|
until Value > $10FFFF;
|
|
|
|
case Value of
|
|
$01..$08, $0B..$0C, $0E..$1F:
|
|
if FXML11 then
|
|
BufAppend(ToFill, DOMChar(Value))
|
|
else
|
|
FatalError('Invalid character reference');
|
|
$09, $0A, $0D, $20..$7F:
|
|
BufAppend(ToFill, DOMChar(Value));
|
|
{$IFDEF UseUTF8}
|
|
$80..$7ff:
|
|
begin
|
|
BufAppend(ToFill, DOMChar(byte($c0 or (Value shr 6))));
|
|
BufAppend(ToFill, DOMChar(byte($80 or (Value and $3f))));
|
|
end;
|
|
$800..$ffff:
|
|
begin
|
|
BufAppend(ToFill, DOMChar(byte($e0 or (Value shr 12))));
|
|
BufAppend(ToFill, DOMChar(byte((Value shr 6) and $3f) or $80));
|
|
BufAppend(ToFill, DOMChar(byte(Value and $3f) or $80));
|
|
end;
|
|
$10000..$10ffff:
|
|
begin
|
|
BufAppend(ToFill, DOMChar(byte($f0 or (Value shr 18))));
|
|
BufAppend(ToFill, DOMChar(byte((Value shr 12) and $3f) or $80));
|
|
BufAppend(ToFill, DOMChar(byte((Value shr 6) and $3f) or $80));
|
|
BufAppend(ToFill, DOMChar(byte(Value and $3f) or $80));
|
|
end;
|
|
{$ENDIF}
|
|
{$IFDEF UseWideString}
|
|
$D7FF, $E000..$FFFD:
|
|
BufAppend(ToFill, DOMChar(Value));
|
|
$10000..$10FFFF:
|
|
begin
|
|
BufAppend(ToFill, DOMChar($D7C0 + (Value shr 10)));
|
|
BufAppend(ToFill, DOMChar($DC00 xor (Value and $3FF)));
|
|
end;
|
|
{$ENDIF}
|
|
else
|
|
FatalError('Invalid character reference');
|
|
end;
|
|
end
|
|
else CheckName;
|
|
ExpectChar(';');
|
|
end;
|
|
|
|
const
|
|
AttrDelims: TSetOfChar = [#0, '<', '&', '''', '"', #9, #10, #13];
|
|
GT_Delim: TSetOfChar = [#0, '>'];
|
|
|
|
procedure TXMLReader.ExpectAttValue;
|
|
var
|
|
wc: DOMChar;
|
|
Delim: DOMChar;
|
|
ent: TDOMEntityEx;
|
|
start: TObject;
|
|
begin
|
|
SkipQuote(Delim);
|
|
FValue.Length := 0;
|
|
start := FSource.FEntity;
|
|
repeat
|
|
wc := FSource.SkipUntil(FValue, AttrDelims, nil, xrfAllowSpecialCharsInAttributeValue in Flags);
|
|
if (wc = '<') and (not (xrfAllowLowerThanInAttributeValue in Flags)) then
|
|
FatalError('Character ''<'' is not allowed in attribute value')
|
|
else if wc = '&' then
|
|
begin
|
|
if ParseRef(FValue) or ResolvePredefined then
|
|
Continue;
|
|
|
|
ent := EntityCheck(True);
|
|
if (ent = nil) or (not FExpandEntities) then
|
|
begin
|
|
if FValue.Length > 0 then
|
|
begin
|
|
DoAttrText(FValue.Buffer, FValue.Length);
|
|
FValue.Length := 0;
|
|
end;
|
|
AppendReference(ent);
|
|
end
|
|
else
|
|
ContextPush(ent);
|
|
end
|
|
else if wc <> #0 then
|
|
begin
|
|
FSource.NextChar;
|
|
if (wc = Delim) and (FSource.FEntity = start) then
|
|
Break;
|
|
if (wc = #10) or (wc = #9) or (wc = #13) then
|
|
wc := #32;
|
|
BufAppend(FValue, wc);
|
|
end
|
|
else if (FSource.FEntity = start) or not ContextPop then // #0
|
|
FatalError('Literal has no closing quote', -1);
|
|
until False;
|
|
if FValue.Length > 0 then
|
|
DoAttrText(FValue.Buffer, FValue.Length);
|
|
FValue.Length := 0;
|
|
end;
|
|
|
|
const
|
|
PrefixChar: array[Boolean] of string = ('', '%');
|
|
|
|
function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean;
|
|
var
|
|
Src: TXMLCharSource;
|
|
begin
|
|
if AEntity.FOnStack then
|
|
FatalError('Entity ''%s%s'' recursively references itself', [PrefixChar[AEntity.FIsPE], AEntity.FName]);
|
|
|
|
if (AEntity.SystemID <> '') and not AEntity.FPrefetched then
|
|
begin
|
|
Result := ResolveEntity(AEntity.SystemID, AEntity.PublicID, AEntity.FURI, Src);
|
|
if not Result then
|
|
begin
|
|
// TODO: a detailed message like SysErrorMessage(GetLastError) would be great here
|
|
ValidationError('Unable to resolve external entity ''%s''', [AEntity.FName]);
|
|
Exit;
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
Src := TXMLCharSource.Create(AEntity.FReplacementText);
|
|
Src.FLineNo := AEntity.FStartLocation.Line;
|
|
Src.LFPos := Src.FBuf - AEntity.FStartLocation.LinePos;
|
|
// needed in case of prefetched external PE
|
|
if AEntity.SystemID <> '' then
|
|
Src.SystemID := AEntity.FURI;
|
|
end;
|
|
|
|
AEntity.FOnStack := True;
|
|
Src.FEntity := AEntity;
|
|
|
|
Initialize(Src);
|
|
Result := True;
|
|
end;
|
|
|
|
function TXMLReader.ContextPop(Forced: Boolean): Boolean;
|
|
var
|
|
Src: TXMLCharSource;
|
|
Error: Boolean;
|
|
begin
|
|
Result := Assigned(FSource.FParent) and (Forced or (FSource.DTDSubsetType = dsNone));
|
|
if Result then
|
|
begin
|
|
Src := FSource.FParent;
|
|
Error := False;
|
|
if Assigned(FSource.FEntity) then
|
|
begin
|
|
TDOMEntityEx(FSource.FEntity).FOnStack := False;
|
|
TDOMEntityEx(FSource.FEntity).FCharCount := FSource.FCharCount;
|
|
// [28a] PE that was started between MarkupDecls may not end inside MarkupDecl
|
|
Error := TDOMEntityEx(FSource.FEntity).FBetweenDecls and FInsideDecl;
|
|
end;
|
|
FSource.Free;
|
|
FSource := Src;
|
|
// correct position of this error is after PE reference
|
|
if Error then
|
|
BadPENesting(esFatal);
|
|
end;
|
|
end;
|
|
|
|
function TXMLReader.EntityCheck(NoExternals: Boolean): TDOMEntityEx;
|
|
var
|
|
RefName: DOMString;
|
|
cnt: Integer;
|
|
SaveCursor: TDOMNode_WithChildren;
|
|
SaveState: TXMLReadState;
|
|
SaveElDef: TDOMElementDef;
|
|
SaveValue: TDOMCharBuf;
|
|
begin
|
|
Result := nil;
|
|
SetString(RefName, FName.Buffer, FName.Length);
|
|
cnt := FName.Length+2;
|
|
|
|
if Assigned(FDocType) then
|
|
Result := FDocType.Entities.GetNamedItem(RefName) as TDOMEntityEx;
|
|
|
|
if Result = nil then
|
|
begin
|
|
if FStandalone or (FDocType = nil) or not (FHavePERefs or (FDocType.SystemID <> '')) then
|
|
FatalError('Reference to undefined entity ''%s''', [RefName], cnt)
|
|
else
|
|
ValidationError('Undefined entity ''%s'' referenced', [RefName], cnt);
|
|
Exit;
|
|
end;
|
|
|
|
if FStandalone and Result.FExternallyDeclared then
|
|
FatalError('Standalone constraint violation', cnt);
|
|
if Result.NotationName <> '' then
|
|
FatalError('Reference to unparsed entity ''%s''', [RefName], cnt);
|
|
|
|
if NoExternals and (Result.SystemID <> '') then
|
|
FatalError('External entity reference is not allowed in attribute value', cnt);
|
|
|
|
if not Result.FResolved then
|
|
begin
|
|
// To build children of the entity itself, we must parse it "out of context"
|
|
SaveCursor := FCursor;
|
|
SaveElDef := FValidator[FNesting].FElementDef;
|
|
SaveState := FState;
|
|
SaveValue := FValue;
|
|
if ContextPush(Result) then
|
|
try
|
|
FCursor := Result; // build child node tree for the entity
|
|
Result.SetReadOnly(False);
|
|
FState := rsRoot;
|
|
FValidator[FNesting].FElementDef := nil;
|
|
UpdateConstraints;
|
|
FSource.DTDSubsetType := dsExternal; // avoids ContextPop at the end
|
|
BufAllocate(FValue, 256);
|
|
ParseContent;
|
|
Result.FResolved := True;
|
|
finally
|
|
FreeMem(FValue.Buffer);
|
|
FValue := SaveValue;
|
|
Result.SetReadOnly(True);
|
|
ContextPop(True);
|
|
FCursor := SaveCursor;
|
|
FState := SaveState;
|
|
FValidator[FNesting].FElementDef := SaveElDef;
|
|
UpdateConstraints;
|
|
end;
|
|
end;
|
|
// at this point we know the charcount of the entity being included
|
|
Inc(FSource.FCharCount, Result.FCharCount - cnt);
|
|
CheckMaxChars;
|
|
end;
|
|
|
|
procedure TXMLReader.StartPE;
|
|
var
|
|
PEName: DOMString;
|
|
PEnt: TDOMEntityEx;
|
|
begin
|
|
SetString(PEName, FName.Buffer, FName.Length);
|
|
PEnt := nil;
|
|
if Assigned(FPEMap) then
|
|
PEnt := FPEMap.GetNamedItem(PEName) as TDOMEntityEx;
|
|
if PEnt = nil then
|
|
begin
|
|
ValidationError('Undefined parameter entity ''%s'' referenced', [PEName], FName.Length+2);
|
|
// cease processing declarations, unless document is standalone.
|
|
FDTDProcessed := FStandalone;
|
|
Exit;
|
|
end;
|
|
|
|
{ cache an external PE so it's only fetched once }
|
|
if (PEnt.SystemID <> '') and (not PEnt.FPrefetched) and (not PrefetchEntity(PEnt)) then
|
|
begin
|
|
FDTDProcessed := FStandalone;
|
|
Exit;
|
|
end;
|
|
Inc(FSource.FCharCount, PEnt.FCharCount);
|
|
CheckMaxChars;
|
|
|
|
PEnt.FBetweenDecls := not FInsideDecl;
|
|
ContextPush(PEnt);
|
|
FHavePERefs := True;
|
|
end;
|
|
|
|
function TXMLReader.PrefetchEntity(AEntity: TDOMEntityEx): Boolean;
|
|
begin
|
|
Result := ContextPush(AEntity);
|
|
if Result then
|
|
try
|
|
FValue.Length := 0;
|
|
FSource.SkipUntil(FValue, [#0]);
|
|
SetString(AEntity.FReplacementText, FValue.Buffer, FValue.Length);
|
|
AEntity.FCharCount := FValue.Length;
|
|
AEntity.FStartLocation.Line := 1;
|
|
AEntity.FStartLocation.LinePos := 1;
|
|
AEntity.FURI := FSource.SystemID; // replace base URI with absolute one
|
|
finally
|
|
ContextPop;
|
|
AEntity.FPrefetched := True;
|
|
FValue.Length := 0;
|
|
end;
|
|
end;
|
|
|
|
procedure Normalize(var Buf: TDOMCharBuf; Modified: PBoolean);
|
|
var
|
|
Dst, Src: Integer;
|
|
begin
|
|
Dst := 0;
|
|
Src := 0;
|
|
// skip leading space if any
|
|
while (Src < Buf.Length) and (Buf.Buffer[Src] = ' ') do
|
|
Inc(Src);
|
|
|
|
while Src < Buf.Length do
|
|
begin
|
|
if Buf.Buffer[Src] = ' ' then
|
|
begin
|
|
// Dst cannot be 0 here, because leading space is already skipped
|
|
if Buf.Buffer[Dst-1] <> ' ' then
|
|
begin
|
|
Buf.Buffer[Dst] := ' ';
|
|
Inc(Dst);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
Buf.Buffer[Dst] := Buf.Buffer[Src];
|
|
Inc(Dst);
|
|
end;
|
|
Inc(Src);
|
|
end;
|
|
// trailing space (only one possible due to compression)
|
|
if (Dst > 0) and (Buf.Buffer[Dst-1] = ' ') then
|
|
Dec(Dst);
|
|
|
|
if Assigned(Modified) then
|
|
Modified^ := Dst <> Buf.Length;
|
|
Buf.Length := Dst;
|
|
end;
|
|
|
|
const
|
|
LiteralDelims: array[TLiteralType] of TSetOfChar = (
|
|
[#0, '''', '"'], // ltPlain
|
|
[#0, '<', '&', '''', '"', #9, #10, #13], // ltAttr
|
|
[#0, '<', '&', '''', '"', #9, #10, #13], // ltTokAttr
|
|
[#0, '''', '"', #13, #10], // ltPubid
|
|
[#0, '%', '&', '''', '"'] // ltEntity
|
|
);
|
|
|
|
function TXMLReader.ParseLiteral(var ToFill: TDOMCharBuf; aType: TLiteralType;
|
|
Required: Boolean; Normalized: PBoolean): Boolean;
|
|
var
|
|
start: TObject;
|
|
wc, Delim: DOMChar;
|
|
ent: TDOMEntityEx;
|
|
begin
|
|
SkipQuote(Delim, Required);
|
|
Result := (Delim <> #0);
|
|
if not Result then
|
|
Exit;
|
|
ToFill.Length := 0;
|
|
start := FSource.FEntity;
|
|
repeat
|
|
wc := FSource.SkipUntil(ToFill, LiteralDelims[aType]);
|
|
if wc = '%' then { ltEntity only }
|
|
begin
|
|
FSource.NextChar;
|
|
CheckName;
|
|
ExpectChar(';');
|
|
if FSource.DTDSubsetType = dsInternal then
|
|
FatalError('PE reference not allowed here in internal subset', FName.Length+2);
|
|
StartPE;
|
|
end
|
|
else if wc = '&' then { ltAttr, ltTokAttr, ltEntity }
|
|
begin
|
|
if ParseRef(ToFill) then // charRefs always expanded
|
|
Continue;
|
|
if aType = ltEntity then // bypass
|
|
begin
|
|
BufAppend(ToFill, '&');
|
|
BufAppendChunk(ToFill, FName.Buffer, FName.Buffer + FName.Length);
|
|
BufAppend(ToFill, ';');
|
|
end
|
|
else // include
|
|
begin
|
|
if ResolvePredefined then
|
|
Continue;
|
|
ent := EntityCheck(True);
|
|
if ent = nil then
|
|
Continue;
|
|
ContextPush(ent);
|
|
end;
|
|
end
|
|
else if wc = '<' then
|
|
FatalError('Character ''<'' is not allowed in attribute value')
|
|
else if wc <> #0 then
|
|
begin
|
|
FSource.NextChar;
|
|
if (wc = #10) or (wc = #13) or (wc = #9) then
|
|
wc := #32
|
|
// terminating delimiter must be in the same context as the starting one
|
|
else if (wc = Delim) and (start = FSource.FEntity) then
|
|
Break;
|
|
BufAppend(ToFill, wc);
|
|
end
|
|
else if (FSource.FEntity = start) or not ContextPop then // #0
|
|
FatalError('Literal has no closing quote', -1);
|
|
until False;
|
|
if aType in [ltTokAttr, ltPubid] then
|
|
Normalize(ToFill, Normalized);
|
|
end;
|
|
|
|
function TXMLReader.SkipUntilSeq(const Delim: TSetOfChar; c1: DOMChar; c2: DOMChar = #0): Boolean;
|
|
var
|
|
wc: DOMChar;
|
|
begin
|
|
Result := False;
|
|
FValue.Length := 0;
|
|
StoreLocation(FTokenStart);
|
|
repeat
|
|
wc := FSource.SkipUntil(FValue, Delim);
|
|
if wc <> #0 then
|
|
begin
|
|
FSource.NextChar;
|
|
if (FValue.Length > ord(c2 <> #0)) then
|
|
begin
|
|
if (FValue.Buffer[FValue.Length-1] = c1) and
|
|
((c2 = #0) or ((c2 <> #0) and (FValue.Buffer[FValue.Length-2] = c2))) then
|
|
begin
|
|
Dec(FValue.Length, ord(c2 <> #0) + 1);
|
|
Result := True;
|
|
Exit;
|
|
end;
|
|
end;
|
|
BufAppend(FValue, wc);
|
|
end;
|
|
until wc = #0;
|
|
end;
|
|
|
|
procedure TXMLReader.ParseComment; // [15]
|
|
begin
|
|
ExpectString('--');
|
|
if SkipUntilSeq([#0, '-'], '-') then
|
|
begin
|
|
ExpectChar('>');
|
|
DoComment(FValue.Buffer, FValue.Length);
|
|
end
|
|
else
|
|
FatalError('Unterminated comment', -1);
|
|
end;
|
|
|
|
procedure TXMLReader.ParsePI; // [16]
|
|
var
|
|
Name, Value: DOMString;
|
|
PINode: TDOMProcessingInstruction;
|
|
begin
|
|
FSource.NextChar; // skip '?'
|
|
Name := ExpectName;
|
|
CheckNCName;
|
|
with FName do
|
|
if (Length = 3) and
|
|
((Buffer[0] = 'X') or (Buffer[0] = 'x')) and
|
|
((Buffer[1] = 'M') or (Buffer[1] = 'm')) and
|
|
((Buffer[2] = 'L') or (Buffer[2] = 'l')) then
|
|
begin
|
|
if Name <> 'xml' then
|
|
FatalError('''xml'' is a reserved word; it must be lowercase', FName.Length)
|
|
else
|
|
FatalError('XML declaration is not allowed here', FName.Length);
|
|
end;
|
|
|
|
if FSource.FBuf^ <> '?' then
|
|
SkipS(True);
|
|
|
|
if SkipUntilSeq(GT_Delim, '?') then
|
|
begin
|
|
SetString(Value, FValue.Buffer, FValue.Length);
|
|
// SAX: ContentHandler.ProcessingInstruction(Name, Value);
|
|
if FCurrContentType = ctEmpty then
|
|
ValidationError('Processing instructions are not allowed within EMPTY elements', []);
|
|
|
|
PINode := Doc.CreateProcessingInstruction(Name, Value);
|
|
if Assigned(FCursor) then
|
|
FCursor.AppendChild(PINode)
|
|
else // to comply with certain tests, insert PI from DTD before DTD
|
|
Doc.InsertBefore(PINode, FDocType);
|
|
end
|
|
else
|
|
FatalError('Unterminated processing instruction', -1);
|
|
end;
|
|
|
|
const
|
|
verStr: array[Boolean] of DOMString = ('1.0', '1.1');
|
|
|
|
procedure TXMLReader.ParseXmlOrTextDecl(TextDecl: Boolean);
|
|
var
|
|
TmpStr: DOMString;
|
|
IsXML11: Boolean;
|
|
Delim: DOMChar;
|
|
buf: array[0..31] of DOMChar;
|
|
I: Integer;
|
|
begin
|
|
SkipS(True);
|
|
// [24] VersionInfo: optional in TextDecl, required in XmlDecl
|
|
if (not TextDecl) or (FSource.FBuf^ = 'v') then
|
|
begin
|
|
ExpectString('version');
|
|
ExpectEq;
|
|
SkipQuote(Delim);
|
|
I := 0;
|
|
while (I < 3) and (FSource.FBuf^ <> Delim) do
|
|
begin
|
|
buf[I] := FSource.FBuf^;
|
|
Inc(I);
|
|
FSource.NextChar;
|
|
end;
|
|
if (I <> 3) or (buf[0] <> '1') or (buf[1] <> '.') or
|
|
((buf[2] <> '0') and (buf[2] <> '1')) then
|
|
FatalError('Illegal version number', -1);
|
|
|
|
ExpectChar(Delim);
|
|
IsXML11 := buf[2] = '1';
|
|
|
|
if not TextDecl then
|
|
begin
|
|
if doc.InheritsFrom(TXMLDocument) then
|
|
TXMLDocument(doc).XMLVersion := verStr[IsXML11]; // buf[0..2] works with FPC only
|
|
end
|
|
else // parsing external entity
|
|
if IsXML11 and not FXML11 then
|
|
FatalError('XML 1.0 document cannot invoke XML 1.1 entities', -1);
|
|
|
|
if TextDecl or (FSource.FBuf^ <> '?') then
|
|
SkipS(True);
|
|
end;
|
|
|
|
// [80] EncodingDecl: required in TextDecl, optional in XmlDecl
|
|
if TextDecl or (FSource.FBuf^ = 'e') then
|
|
begin
|
|
ExpectString('encoding');
|
|
ExpectEq;
|
|
SkipQuote(Delim);
|
|
I := 0;
|
|
while (I < 30) and (FSource.FBuf^ <> Delim) and (FSource.FBuf^ < #127) and
|
|
((Char(ord(FSource.FBuf^)) in ['A'..'Z', 'a'..'z']) or
|
|
((I > 0) and (Char(ord(FSource.FBuf^)) in ['0'..'9', '.', '-', '_']))) do
|
|
begin
|
|
buf[I] := FSource.FBuf^;
|
|
Inc(I);
|
|
FSource.NextChar;
|
|
end;
|
|
if not CheckForChar(Delim) then
|
|
FatalError('Illegal encoding name', i);
|
|
|
|
SetString(TmpStr, buf, i);
|
|
if not FSource.SetEncoding(TmpStr) then // <-- Wide2Ansi conversion here
|
|
FatalError('Encoding ''%s'' is not supported', [TmpStr], i+1);
|
|
// getting here means that specified encoding is supported
|
|
// TODO: maybe assign the 'preferred' encoding name?
|
|
if not TextDecl and doc.InheritsFrom(TXMLDocument) then
|
|
TXMLDocument(doc).Encoding := TmpStr;
|
|
|
|
if FSource.FBuf^ <> '?' then
|
|
SkipS(not TextDecl);
|
|
end;
|
|
|
|
// [32] SDDecl: forbidden in TextDecl, optional in XmlDecl
|
|
if (not TextDecl) and (FSource.FBuf^ = 's') then
|
|
begin
|
|
ExpectString('standalone');
|
|
ExpectEq;
|
|
SkipQuote(Delim);
|
|
if FSource.Matches('yes') then
|
|
FStandalone := True
|
|
else if not FSource.Matches('no') then
|
|
FatalError('Only "yes" or "no" are permitted as values of "standalone"', -1);
|
|
ExpectChar(Delim);
|
|
SkipS;
|
|
end;
|
|
|
|
ExpectString('?>');
|
|
{ Switch to 1.1 rules only after declaration is parsed completely. This is to
|
|
ensure that NEL and LSEP within declaration are rejected (rmt-056, rmt-057) }
|
|
if (not TextDecl) and IsXML11 then
|
|
XML11_BuildTables;
|
|
end;
|
|
|
|
procedure TXMLReader.DTDReloadHook;
|
|
var
|
|
p: DOMPChar;
|
|
begin
|
|
{ FSource converts CR, NEL and LSEP linebreaks to LF, and CR-NEL sequences to CR-LF.
|
|
We must further remove the CR chars and have only LF's left. }
|
|
p := FDTDStartPos;
|
|
while p < FSource.FBuf do
|
|
begin
|
|
while (p < FSource.FBuf) and (p^ <> #13) do
|
|
Inc(p);
|
|
BufAppendChunk(FIntSubset, FDTDStartPos, p);
|
|
if p^ = #13 then
|
|
Inc(p);
|
|
FDTDStartPos := p;
|
|
end;
|
|
FDTDStartPos := TXMLDecodingSource(FSource).FBufStart;
|
|
end;
|
|
|
|
procedure TXMLReader.ParseDoctypeDecl; // [28]
|
|
var
|
|
Src: TXMLCharSource;
|
|
begin
|
|
if FState >= rsDTD then
|
|
FatalError('Markup declaration is not allowed here');
|
|
if FDisallowDoctype then
|
|
FatalError('Document type is prohibited by parser settings');
|
|
|
|
ExpectString('DOCTYPE');
|
|
SkipS(True);
|
|
|
|
FDocType := TDOMDocumentTypeEx(TDOMDocumentType.Create(doc));
|
|
FDTDProcessed := True; // assume success
|
|
FState := rsDTD;
|
|
try
|
|
FDocType.FName := ExpectName;
|
|
SkipS(True);
|
|
ParseExternalID(FDocType.FSystemID, FDocType.FPublicID, False);
|
|
SkipS;
|
|
finally
|
|
// DONE: append node after its name has been set; always append to avoid leak
|
|
if FCanonical then
|
|
FOwnsDoctype := True
|
|
else
|
|
Doc.AppendChild(FDocType);
|
|
FCursor := nil;
|
|
end;
|
|
|
|
if CheckForChar('[') then
|
|
begin
|
|
BufAllocate(FIntSubset, 256);
|
|
FSource.DTDSubsetType := dsInternal;
|
|
try
|
|
FDTDStartPos := FSource.FBuf;
|
|
ParseMarkupDecl;
|
|
DTDReloadHook; // fetch last chunk
|
|
SetString(FDocType.FInternalSubset, FIntSubset.Buffer, FIntSubset.Length);
|
|
finally
|
|
FreeMem(FIntSubset.Buffer);
|
|
FSource.DTDSubsetType := dsNone;
|
|
end;
|
|
ExpectChar(']');
|
|
SkipS;
|
|
end;
|
|
ExpectChar('>');
|
|
|
|
if (FDocType.SystemID <> '') then
|
|
begin
|
|
if ResolveEntity(FDocType.SystemID, FDocType.PublicID, FSource.SystemID, Src) then
|
|
begin
|
|
Initialize(Src);
|
|
try
|
|
Src.DTDSubsetType := dsExternal;
|
|
ParseMarkupDecl;
|
|
finally
|
|
ContextPop(True);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
ValidationError('Unable to resolve external DTD subset', []);
|
|
FDTDProcessed := FStandalone;
|
|
end;
|
|
end;
|
|
FCursor := Doc;
|
|
ValidateDTD;
|
|
FDocType.SetReadOnly(True);
|
|
end;
|
|
|
|
procedure TXMLReader.ExpectEq; // [25]
|
|
begin
|
|
if FSource.FBuf^ <> '=' then
|
|
SkipS;
|
|
if FSource.FBuf^ <> '=' then
|
|
FatalError('Expected "="');
|
|
FSource.NextChar;
|
|
SkipS;
|
|
end;
|
|
|
|
|
|
{ DTD stuff }
|
|
|
|
procedure TXMLReader.BadPENesting(S: TErrorSeverity);
|
|
begin
|
|
if (S = esFatal) or FValidate then
|
|
DoError(S, 'Parameter entities must be properly nested');
|
|
end;
|
|
|
|
procedure TXMLReader.StandaloneError(LineOffs: Integer);
|
|
begin
|
|
ValidationError('Standalone constriant violation', [], LineOffs);
|
|
end;
|
|
|
|
procedure TXMLReader.ParseQuantity(CP: TContentParticle);
|
|
begin
|
|
case FSource.FBuf^ of
|
|
'?': CP.CPQuant := cqZeroOrOnce;
|
|
'*': CP.CPQuant := cqZeroOrMore;
|
|
'+': CP.CPQuant := cqOnceOrMore;
|
|
else
|
|
Exit;
|
|
end;
|
|
FSource.NextChar;
|
|
end;
|
|
|
|
function TXMLReader.FindOrCreateElDef: TDOMElementDef;
|
|
var
|
|
p: PHashItem;
|
|
begin
|
|
CheckName;
|
|
p := doc.Names.FindOrAdd(FName.Buffer, FName.Length);
|
|
Result := TDOMElementDef(p^.Data);
|
|
if Result = nil then
|
|
begin
|
|
Result := TDOMElementDef.Create(doc);
|
|
Result.FNSI.QName := p;
|
|
p^.Data := Result;
|
|
end;
|
|
end;
|
|
|
|
procedure TXMLReader.ExpectChoiceOrSeq(CP: TContentParticle); // [49], [50]
|
|
var
|
|
Delim: DOMChar;
|
|
CurrentEntity: TObject;
|
|
CurrentCP: TContentParticle;
|
|
begin
|
|
Delim := #0;
|
|
repeat
|
|
CurrentCP := CP.Add;
|
|
SkipWhitespace;
|
|
if CheckForChar('(') then
|
|
begin
|
|
CurrentEntity := FSource.FEntity;
|
|
ExpectChoiceOrSeq(CurrentCP);
|
|
if CurrentEntity <> FSource.FEntity then
|
|
BadPENesting;
|
|
FSource.NextChar;
|
|
end
|
|
else
|
|
CurrentCP.Def := FindOrCreateElDef;
|
|
|
|
ParseQuantity(CurrentCP);
|
|
SkipWhitespace;
|
|
if FSource.FBuf^ = ')' then
|
|
Break;
|
|
if Delim = #0 then
|
|
begin
|
|
if (FSource.FBuf^ = '|') or (FSource.FBuf^ = ',') then
|
|
Delim := FSource.FBuf^
|
|
else
|
|
FatalError('Expected pipe or comma delimiter');
|
|
end
|
|
else
|
|
if FSource.FBuf^ <> Delim then
|
|
FatalError(Delim);
|
|
FSource.NextChar; // skip delimiter
|
|
until False;
|
|
if Delim = '|' then
|
|
CP.CPType := ctChoice
|
|
else
|
|
CP.CPType := ctSeq; // '(foo)' is a sequence!
|
|
end;
|
|
|
|
procedure TXMLReader.ParseElementDecl; // [45]
|
|
var
|
|
ElDef: TDOMElementDef;
|
|
CurrentEntity: TObject;
|
|
I: Integer;
|
|
CP: TContentParticle;
|
|
Typ: TElementContentType;
|
|
ExtDecl: Boolean;
|
|
begin
|
|
CP := nil;
|
|
Typ := ctUndeclared; // satisfy compiler
|
|
ExpectWhitespace;
|
|
ElDef := FindOrCreateElDef;
|
|
if ElDef.ContentType <> ctUndeclared then
|
|
ValidationError('Duplicate declaration of element ''%s''', [ElDef.TagName], FName.Length);
|
|
|
|
ExtDecl := FSource.DTDSubsetType <> dsInternal;
|
|
|
|
ExpectWhitespace;
|
|
if FSource.Matches('EMPTY') then
|
|
Typ := ctEmpty
|
|
else if FSource.Matches('ANY') then
|
|
Typ := ctAny
|
|
else if CheckForChar('(') then
|
|
begin
|
|
CP := TContentParticle.Create;
|
|
try
|
|
CurrentEntity := FSource.FEntity;
|
|
SkipWhitespace;
|
|
if FSource.Matches('#PCDATA') then // Mixed section [51]
|
|
begin
|
|
SkipWhitespace;
|
|
Typ := ctMixed;
|
|
while FSource.FBuf^ <> ')' do
|
|
begin
|
|
ExpectChar('|');
|
|
SkipWhitespace;
|
|
|
|
with CP.Add do
|
|
begin
|
|
Def := FindOrCreateElDef;
|
|
for I := CP.ChildCount-2 downto 0 do
|
|
if Def = CP.Children[I].Def then
|
|
ValidationError('Duplicate token in mixed section', [], FName.Length);
|
|
end;
|
|
SkipWhitespace;
|
|
end;
|
|
if CurrentEntity <> FSource.FEntity then
|
|
BadPENesting;
|
|
FSource.NextChar;
|
|
if (not CheckForChar('*')) and (CP.ChildCount > 0) then
|
|
FatalError(DOMChar('*'));
|
|
end
|
|
else // Children section [47]
|
|
begin
|
|
Typ := ctChildren;
|
|
ExpectChoiceOrSeq(CP);
|
|
if CurrentEntity <> FSource.FEntity then
|
|
BadPENesting;
|
|
FSource.NextChar;
|
|
ParseQuantity(CP);
|
|
end;
|
|
except
|
|
CP.Free;
|
|
raise;
|
|
end;
|
|
end
|
|
else
|
|
FatalError('Invalid content specification');
|
|
// SAX: DeclHandler.ElementDecl(name, model);
|
|
if FDTDProcessed and (ElDef.ContentType = ctUndeclared) then
|
|
begin
|
|
ElDef.FExternallyDeclared := ExtDecl;
|
|
ElDef.ContentType := Typ;
|
|
ElDef.RootCP := CP;
|
|
end
|
|
else
|
|
CP.Free;
|
|
end;
|
|
|
|
|
|
procedure TXMLReader.ParseNotationDecl; // [82]
|
|
var
|
|
Name, SysID, PubID: DOMString;
|
|
begin
|
|
ExpectWhitespace;
|
|
Name := ExpectName;
|
|
CheckNCName;
|
|
ExpectWhitespace;
|
|
if not ParseExternalID(SysID, PubID, True) then
|
|
FatalError('Expected external or public ID');
|
|
if FDTDProcessed then
|
|
DoNotationDecl(Name, PubID, SysID);
|
|
end;
|
|
|
|
const
|
|
AttrDataTypeNames: array[TAttrDataType] of DOMString = (
|
|
'CDATA',
|
|
'ID',
|
|
'IDREF',
|
|
'IDREFS',
|
|
'ENTITY',
|
|
'ENTITIES',
|
|
'NMTOKEN',
|
|
'NMTOKENS',
|
|
'NOTATION'
|
|
);
|
|
|
|
procedure TXMLReader.ParseAttlistDecl; // [52]
|
|
var
|
|
ElDef: TDOMElementDef;
|
|
AttDef: TDOMAttrDef;
|
|
dt: TAttrDataType;
|
|
Found, DiscardIt: Boolean;
|
|
Offsets: array [Boolean] of Integer;
|
|
begin
|
|
ExpectWhitespace;
|
|
ElDef := FindOrCreateElDef;
|
|
SkipWhitespace;
|
|
while FSource.FBuf^ <> '>' do
|
|
begin
|
|
CheckName;
|
|
ExpectWhitespace;
|
|
AttDef := doc.CreateAttributeDef(FName.Buffer, FName.Length);
|
|
try
|
|
AttDef.ExternallyDeclared := FSource.DTDSubsetType <> dsInternal;
|
|
// In case of duplicate declaration of the same attribute, we must discard it,
|
|
// not modifying ElDef, and suppressing certain validation errors.
|
|
DiscardIt := (not FDTDProcessed) or Assigned(ElDef.GetAttributeNode(AttDef.Name));
|
|
if not DiscardIt then
|
|
ElDef.SetAttributeNode(AttDef);
|
|
|
|
if CheckForChar('(') then // [59]
|
|
begin
|
|
AttDef.DataType := dtNmToken;
|
|
repeat
|
|
SkipWhitespace;
|
|
CheckName([cnToken]);
|
|
if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
|
|
ValidationError('Duplicate token in enumerated attibute declaration', [], FName.Length);
|
|
SkipWhitespace;
|
|
until not CheckForChar('|');
|
|
ExpectChar(')');
|
|
ExpectWhitespace;
|
|
end
|
|
else
|
|
begin
|
|
StoreLocation(FTokenStart);
|
|
// search topside-up so that e.g. NMTOKENS is matched before NMTOKEN
|
|
for dt := dtNotation downto dtCData do
|
|
begin
|
|
Found := FSource.Matches(AttrDataTypeNames[dt]);
|
|
if Found then
|
|
Break;
|
|
end;
|
|
if Found and SkipWhitespace then
|
|
begin
|
|
AttDef.DataType := dt;
|
|
if (dt = dtId) and not DiscardIt then
|
|
begin
|
|
if Assigned(ElDef.IDAttr) then
|
|
ValidationError('Only one attribute of type ID is allowed per element',[])
|
|
else
|
|
ElDef.IDAttr := AttDef;
|
|
end
|
|
else if dt = dtNotation then // no test cases for these ?!
|
|
begin
|
|
if not DiscardIt then
|
|
begin
|
|
if Assigned(ElDef.NotationAttr) then
|
|
ValidationError('Only one attribute of type NOTATION is allowed per element',[])
|
|
else
|
|
ElDef.NotationAttr := AttDef;
|
|
if ElDef.ContentType = ctEmpty then
|
|
ValidationError('NOTATION attributes are not allowed on EMPTY elements',[]);
|
|
end;
|
|
ExpectChar('(');
|
|
repeat
|
|
SkipWhitespace;
|
|
StoreLocation(FTokenStart);
|
|
CheckName;
|
|
CheckNCName;
|
|
if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
|
|
ValidationError('Duplicate token in NOTATION attribute declaration',[], FName.Length);
|
|
|
|
if not DiscardIt then
|
|
AddForwardRef(FNotationRefs, FName.Buffer, FName.Length);
|
|
SkipWhitespace;
|
|
until not CheckForChar('|');
|
|
ExpectChar(')');
|
|
ExpectWhitespace;
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
// don't report 'expected whitespace' if token does not match completely
|
|
Offsets[False] := 0;
|
|
Offsets[True] := Length(AttrDataTypeNames[dt]);
|
|
if Found and (FSource.FBuf^ < 'A') then
|
|
ExpectWhitespace
|
|
else
|
|
FatalError('Illegal attribute type for ''%s''', [AttDef.Name], Offsets[Found]);
|
|
end;
|
|
end;
|
|
StoreLocation(FTokenStart);
|
|
if FSource.Matches('#REQUIRED') then
|
|
AttDef.Default := adRequired
|
|
else if FSource.Matches('#IMPLIED') then
|
|
AttDef.Default := adImplied
|
|
else if FSource.Matches('#FIXED') then
|
|
begin
|
|
AttDef.Default := adFixed;
|
|
ExpectWhitespace;
|
|
end
|
|
else
|
|
AttDef.Default := adDefault;
|
|
|
|
if AttDef.Default in [adDefault, adFixed] then
|
|
begin
|
|
if AttDef.DataType = dtId then
|
|
ValidationError('An attribute of type ID cannot have a default value',[]);
|
|
|
|
FCursor := AttDef;
|
|
// See comments to valid-sa-094: PE expansion should be disabled in AttDef.
|
|
// ExpectAttValue() does not recognize PEs anyway, so setting FRecognizePEs isn't needed
|
|
// Saving/restoring FCursor is also redundant because it is always nil here.
|
|
ExpectAttValue;
|
|
FCursor := nil;
|
|
if not ValidateAttrSyntax(AttDef, AttDef.NodeValue) then
|
|
ValidationError('Default value for attribute ''%s'' has wrong syntax', [AttDef.Name]);
|
|
end;
|
|
// SAX: DeclHandler.AttributeDecl(...)
|
|
if DiscardIt then
|
|
AttDef.Free;
|
|
except
|
|
AttDef.Free;
|
|
raise;
|
|
end;
|
|
SkipWhitespace;
|
|
end;
|
|
end;
|
|
|
|
procedure TXMLReader.ParseEntityDecl; // [70]
|
|
var
|
|
IsPE: Boolean;
|
|
Entity: TDOMEntityEx;
|
|
Map: TDOMNamedNodeMap;
|
|
begin
|
|
if not SkipWhitespace(True) then
|
|
FatalError('Expected whitespace');
|
|
IsPE := False;
|
|
Map := FDocType.Entities;
|
|
if CheckForChar('%') then // [72]
|
|
begin
|
|
ExpectWhitespace;
|
|
IsPE := True;
|
|
if FPEMap = nil then
|
|
FPEMap := TDOMNamedNodeMap.Create(FDocType, ENTITY_NODE);
|
|
Map := FPEMap;
|
|
end;
|
|
|
|
Entity := TDOMEntityEx.Create(Doc);
|
|
Entity.SetReadOnly(True);
|
|
try
|
|
Entity.FExternallyDeclared := FSource.DTDSubsetType <> dsInternal;
|
|
Entity.FIsPE := IsPE;
|
|
Entity.FName := ExpectName;
|
|
CheckNCName;
|
|
ExpectWhitespace;
|
|
|
|
// remember where the entity is declared
|
|
Entity.FURI := FSource.SystemID;
|
|
|
|
if FEntityValue.Buffer = nil then
|
|
BufAllocate(FEntityValue, 256);
|
|
|
|
if ParseLiteral(FEntityValue, ltEntity, False) then
|
|
begin
|
|
SetString(Entity.FReplacementText, FEntityValue.Buffer, FEntityValue.Length);
|
|
Entity.FCharCount := FEntityValue.Length;
|
|
Entity.FStartLocation := FTokenStart;
|
|
end
|
|
else
|
|
begin
|
|
if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, False) then
|
|
FatalError('Expected entity value or external ID');
|
|
|
|
if not IsPE then // [76]
|
|
begin
|
|
if FSource.FBuf^ <> '>' then
|
|
ExpectWhitespace;
|
|
if FSource.Matches('NDATA') then
|
|
begin
|
|
ExpectWhitespace;
|
|
StoreLocation(FTokenStart);
|
|
Entity.FNotationName := ExpectName;
|
|
AddForwardRef(FNotationRefs, FName.Buffer, FName.Length);
|
|
// SAX: DTDHandler.UnparsedEntityDecl(...);
|
|
end;
|
|
end;
|
|
end;
|
|
except
|
|
Entity.Free;
|
|
raise;
|
|
end;
|
|
|
|
// Repeated declarations of same entity are legal but must be ignored
|
|
if FDTDProcessed and (Map.GetNamedItem(Entity.FName) = nil) then
|
|
Map.SetNamedItem(Entity)
|
|
else
|
|
Entity.Free;
|
|
end;
|
|
|
|
|
|
procedure TXMLReader.ParseMarkupDecl; // [29]
|
|
var
|
|
IncludeLevel: Integer;
|
|
IgnoreLevel: Integer;
|
|
CurrentEntity: TObject;
|
|
IncludeLoc: TLocation;
|
|
IgnoreLoc: TLocation;
|
|
wc: DOMChar;
|
|
CondType: (ctUnknown, ctInclude, ctIgnore);
|
|
begin
|
|
IncludeLevel := 0;
|
|
IgnoreLevel := 0;
|
|
repeat
|
|
FRecognizePE := True; // PERef between declarations should always be recognized
|
|
SkipWhitespace;
|
|
FRecognizePE := False;
|
|
|
|
if (FSource.FBuf^ = ']') and (IncludeLevel > 0) then
|
|
begin
|
|
ExpectString(']]>');
|
|
Dec(IncludeLevel);
|
|
Continue;
|
|
end;
|
|
|
|
if not CheckForChar('<') then
|
|
Break;
|
|
|
|
CurrentEntity := FSource.FEntity;
|
|
|
|
if FSource.FBuf^ = '?' then
|
|
ParsePI
|
|
else
|
|
begin
|
|
ExpectChar('!');
|
|
if FSource.FBuf^ = '-' then
|
|
ParseComment
|
|
else if CheckForChar('[') then
|
|
begin
|
|
if FSource.DTDSubsetType = dsInternal then
|
|
FatalError('Conditional sections are not allowed in internal subset', 1);
|
|
|
|
FRecognizePE := True;
|
|
SkipWhitespace;
|
|
|
|
CondType := ctUnknown; // satisfy compiler
|
|
if FSource.Matches('INCLUDE') then
|
|
CondType := ctInclude
|
|
else if FSource.Matches('IGNORE') then
|
|
CondType := ctIgnore
|
|
else
|
|
FatalError('Expected "INCLUDE" or "IGNORE"');
|
|
|
|
SkipWhitespace;
|
|
if CurrentEntity <> FSource.FEntity then
|
|
BadPENesting;
|
|
ExpectChar('[');
|
|
if CondType = ctInclude then
|
|
begin
|
|
if IncludeLevel = 0 then
|
|
StoreLocation(IncludeLoc);
|
|
Inc(IncludeLevel);
|
|
end
|
|
else if CondType = ctIgnore then
|
|
begin
|
|
StoreLocation(IgnoreLoc);
|
|
IgnoreLevel := 1;
|
|
repeat
|
|
FValue.Length := 0;
|
|
wc := FSource.SkipUntil(FValue, [#0, '<', ']']);
|
|
if FSource.Matches('<![') then
|
|
Inc(IgnoreLevel)
|
|
else if FSource.Matches(']]>') then
|
|
Dec(IgnoreLevel)
|
|
else if wc <> #0 then
|
|
FSource.NextChar
|
|
else // PE's aren't recognized in ignore section, cannot ContextPop()
|
|
DoErrorPos(esFatal, 'IGNORE section is not closed', IgnoreLoc);
|
|
until IgnoreLevel=0;
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
FRecognizePE := FSource.DTDSubsetType <> dsInternal;
|
|
FInsideDecl := True;
|
|
if FSource.Matches('ELEMENT') then
|
|
ParseElementDecl
|
|
else if FSource.Matches('ENTITY') then
|
|
ParseEntityDecl
|
|
else if FSource.Matches('ATTLIST') then
|
|
ParseAttlistDecl
|
|
else if FSource.Matches('NOTATION') then
|
|
ParseNotationDecl
|
|
else
|
|
FatalError('Illegal markup declaration');
|
|
|
|
SkipWhitespace;
|
|
FRecognizePE := False;
|
|
|
|
if CurrentEntity <> FSource.FEntity then
|
|
BadPENesting;
|
|
ExpectChar('>');
|
|
FInsideDecl := False;
|
|
end;
|
|
end;
|
|
until False;
|
|
FRecognizePE := False;
|
|
if IncludeLevel > 0 then
|
|
DoErrorPos(esFatal, 'INCLUDE section is not closed', IncludeLoc);
|
|
if (FSource.DTDSubsetType = dsInternal) and (FSource.FBuf^ = ']') then
|
|
Exit;
|
|
if FSource.FBuf^ <> #0 then
|
|
FatalError('Illegal character in DTD');
|
|
end;
|
|
|
|
procedure TXMLReader.ProcessDTD(ASource: TXMLCharSource);
|
|
begin
|
|
doc := TXMLDocument.Create;
|
|
FDocType := TDOMDocumentTypeEx.Create(doc);
|
|
// TODO: DTD labeled version 1.1 will be rejected - must set FXML11 flag
|
|
// DONE: It's ok to have FCursor=nil now
|
|
doc.AppendChild(FDocType);
|
|
Initialize(ASource);
|
|
ParseMarkupDecl;
|
|
end;
|
|
|
|
procedure TXMLReader.AppendReference(AEntity: TDOMEntityEx);
|
|
var
|
|
s: DOMString;
|
|
begin
|
|
if AEntity = nil then
|
|
SetString(s, FName.Buffer, FName.Length)
|
|
else
|
|
s := AEntity.nodeName;
|
|
FCursor.AppendChild(doc.CreateEntityReference(s));
|
|
end;
|
|
|
|
|
|
// The code below does the bulk of the parsing, and must be as fast as possible.
|
|
// To minimize CPU cache effects, methods from different classes are kept together
|
|
|
|
function TXMLDecodingSource.SkipUntil(var ToFill: TDOMCharBuf; const Delim: TSetOfChar;
|
|
wsflag: PBoolean; AllowSpecialChars: boolean): DOMChar;
|
|
var
|
|
old: DOMPChar;
|
|
nonws: Boolean;
|
|
wc: DOMChar;
|
|
begin
|
|
nonws := False;
|
|
repeat
|
|
old := FBuf;
|
|
repeat
|
|
wc := FBuf^;
|
|
if (wc = #10) or (wc = #13)
|
|
or (FXML11Rules and ((wc = #$85) or (wc = #$2028))) // ToDo #$2028
|
|
then begin
|
|
// strictly this is needed only for 2-byte lineendings
|
|
BufAppendChunk(ToFill, old, FBuf);
|
|
NewLine;
|
|
old := FBuf;
|
|
wc := FBuf^
|
|
end
|
|
else if (not AllowSpecialChars)
|
|
and ( ((wc < #32) and (not ((wc = #0) and (FBuf >= FBufEnd))) and (wc <> #9))
|
|
or (wc > #$FFFD) or
|
|
(FXML11Rules and (wc >= #$7F) and (wc <= #$9F)) )
|
|
then
|
|
FReader.FatalError('Invalid character')
|
|
else if wc=#0 then
|
|
FReader.FatalError('Invalid character');
|
|
if (wc < #255) and (Char(ord(wc)) in Delim) then
|
|
Break;
|
|
// the checks above filter away everything below #32 that isn't a whitespace
|
|
if wc > #32 then
|
|
nonws := True;
|
|
Inc(FBuf);
|
|
until False;
|
|
Result := wc;
|
|
BufAppendChunk(ToFill, old, FBuf);
|
|
until (Result <> #0) or (not Reload);
|
|
if Assigned(wsflag) then
|
|
wsflag^ := wsflag^ or nonws;
|
|
end;
|
|
|
|
const
|
|
TextDelims: array[Boolean] of TSetOfChar = (
|
|
[#0, '<', '&', '>'],
|
|
[#0, '>']
|
|
);
|
|
|
|
procedure TXMLReader.ParseContent;
|
|
var
|
|
nonWs: Boolean;
|
|
wc: DOMChar;
|
|
ent: TDOMEntityEx;
|
|
InCDATA: Boolean;
|
|
begin
|
|
InCDATA := False;
|
|
StoreLocation(FTokenStart);
|
|
nonWs := False;
|
|
FValue.Length := 0;
|
|
repeat
|
|
wc := FSource.SkipUntil(FValue, TextDelims[InCDATA], @nonWs);
|
|
if wc = '<' then
|
|
begin
|
|
Inc(FSource.FBuf);
|
|
if FSource.FBufEnd < FSource.FBuf + 2 then
|
|
FSource.Reload;
|
|
if FSource.FBuf^ = '/' then
|
|
begin
|
|
DoText(FValue.Buffer, FValue.Length, not nonWs);
|
|
if FNesting <= FSource.FStartNesting then
|
|
FatalError('End-tag is not allowed here');
|
|
Inc(FSource.FBuf);
|
|
ParseEndTag;
|
|
end
|
|
else if CheckName([cnOptional]) then
|
|
begin
|
|
DoText(FValue.Buffer, FValue.Length, not nonWs);
|
|
ParseElement;
|
|
end
|
|
else if FSource.FBuf^ = '!' then
|
|
begin
|
|
Inc(FSource.FBuf);
|
|
if FSource.FBuf^ = '[' then
|
|
begin
|
|
ExpectString('[CDATA[');
|
|
if FState <> rsRoot then
|
|
FatalError('Illegal at document level');
|
|
StoreLocation(FTokenStart);
|
|
InCDATA := True;
|
|
if not FCDSectionsAsText then
|
|
DoText(FValue.Buffer, FValue.Length, not nonWs)
|
|
else
|
|
Continue;
|
|
end
|
|
else if FSource.FBuf^ = '-' then
|
|
begin
|
|
DoText(FValue.Buffer, FValue.Length, not nonWs);
|
|
ParseComment;
|
|
end
|
|
else
|
|
begin
|
|
DoText(FValue.Buffer, FValue.Length, not nonWs);
|
|
ParseDoctypeDecl;
|
|
end;
|
|
end
|
|
else if FSource.FBuf^ = '?' then
|
|
begin
|
|
DoText(FValue.Buffer, FValue.Length, not nonWs);
|
|
ParsePI;
|
|
end
|
|
else
|
|
RaiseNameNotFound;
|
|
end
|
|
else if wc = #0 then
|
|
begin
|
|
if InCDATA then
|
|
FatalError('Unterminated CDATA section', -1);
|
|
if FNesting > FSource.FStartNesting then
|
|
FatalError('End-tag is missing for ''%s''', [FValidator[FNesting].FElement.NSI.QName^.Key]);
|
|
if ContextPop then Continue;
|
|
Break;
|
|
end
|
|
else if wc = '>' then
|
|
begin
|
|
BufAppend(FValue, wc);
|
|
FSource.NextChar;
|
|
|
|
if (FValue.Length <= 2) or (FValue.Buffer[FValue.Length-2] <> ']') or
|
|
(FValue.Buffer[FValue.Length-3] <> ']') then Continue;
|
|
|
|
if InCData then // got a ']]>' separator
|
|
begin
|
|
Dec(FValue.Length, 3);
|
|
InCDATA := False;
|
|
if FCDSectionsAsText then
|
|
Continue;
|
|
DoCDSect(FValue.Buffer, FValue.Length);
|
|
end
|
|
else
|
|
FatalError('Literal '']]>'' is not allowed in text', 3);
|
|
end
|
|
else if wc = '&' then
|
|
begin
|
|
if FState <> rsRoot then
|
|
FatalError('Illegal at document level');
|
|
|
|
if FCurrContentType = ctEmpty then
|
|
ValidationError('References are illegal in EMPTY elements', []);
|
|
|
|
if ParseRef(FValue) or ResolvePredefined then
|
|
begin
|
|
nonWs := True; // CharRef to whitespace is not considered whitespace
|
|
Continue;
|
|
end
|
|
else
|
|
begin
|
|
ent := EntityCheck;
|
|
if (ent = nil) or (not FExpandEntities) then
|
|
begin
|
|
DoText(FValue.Buffer, FValue.Length, not nonWs);
|
|
AppendReference(ent);
|
|
end
|
|
else
|
|
begin
|
|
ContextPush(ent);
|
|
Continue;
|
|
end;
|
|
end;
|
|
end;
|
|
StoreLocation(FTokenStart);
|
|
FValue.Length := 0;
|
|
nonWs := False;
|
|
until False;
|
|
DoText(FValue.Buffer, FValue.Length, not nonWs);
|
|
end;
|
|
|
|
procedure TXMLCharSource.NextChar;
|
|
begin
|
|
Inc(FBuf);
|
|
if FBuf >= FBufEnd then
|
|
Reload;
|
|
end;
|
|
|
|
procedure TXMLReader.ExpectChar(wc: DOMChar);
|
|
begin
|
|
if FSource.FBuf^ = wc then
|
|
FSource.NextChar
|
|
else
|
|
FatalError(wc);
|
|
end;
|
|
|
|
// Element name already in FNameBuffer
|
|
procedure TXMLReader.ParseElement; // [39] [40] [44]
|
|
var
|
|
NewElem: TDOMElement;
|
|
ElDef: TDOMElementDef;
|
|
IsEmpty: Boolean;
|
|
ElName: PHashItem;
|
|
begin
|
|
if FState > rsRoot then
|
|
FatalError('Only one top-level element allowed', FName.Length)
|
|
else if FState < rsRoot then
|
|
begin
|
|
if FValidate then
|
|
ValidateRoot;
|
|
FState := rsRoot;
|
|
end;
|
|
|
|
NewElem := doc.CreateElementBuf(FName.Buffer, FName.Length);
|
|
FCursor.AppendChild(NewElem);
|
|
// we're about to process a new set of attributes
|
|
Inc(FAttrTag);
|
|
|
|
// Remember the hash entry, we'll need it often
|
|
ElName := NewElem.NSI.QName;
|
|
|
|
// Find declaration for this element
|
|
ElDef := TDOMElementDef(ElName^.Data);
|
|
if (ElDef = nil) or (ElDef.ContentType = ctUndeclared) then
|
|
ValidationError('Using undeclared element ''%s''',[ElName^.Key], FName.Length);
|
|
|
|
// Check if new element is allowed in current context
|
|
if FValidate and not FValidator[FNesting].IsElementAllowed(ElDef) then
|
|
ValidationError('Element ''%s'' is not allowed in this context',[ElName^.Key], FName.Length);
|
|
|
|
IsEmpty := False;
|
|
while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do
|
|
begin
|
|
SkipS(True);
|
|
if (FSource.FBuf^ = '>') or (FSource.FBuf^ = '/') then
|
|
Break;
|
|
ParseAttribute(NewElem, ElDef);
|
|
end;
|
|
|
|
if FSource.FBuf^ = '/' then
|
|
begin
|
|
IsEmpty := True;
|
|
FSource.NextChar;
|
|
end;
|
|
ExpectChar('>');
|
|
|
|
if Assigned(ElDef) and Assigned(ElDef.FAttributes) then
|
|
ProcessDefaultAttributes(NewElem, ElDef.FAttributes);
|
|
PushVC(NewElem, ElDef); // this increases FNesting
|
|
if FNamespaces then
|
|
ProcessNamespaceAtts(NewElem);
|
|
|
|
if not IsEmpty then
|
|
begin
|
|
FCursor := NewElem;
|
|
if not FPreserveWhitespace then // critical for testsuite compliance
|
|
SkipS;
|
|
end
|
|
else
|
|
DoEndElement(0);
|
|
end;
|
|
|
|
procedure TXMLReader.DoEndElement(ErrOffset: Integer);
|
|
var
|
|
NewElem: TDOMElement;
|
|
begin
|
|
NewElem := FValidator[FNesting].FElement;
|
|
TDOMNode(FCursor) := NewElem.ParentNode;
|
|
if FCursor = doc then
|
|
FState := rsEpilog;
|
|
|
|
if FValidate and FValidator[FNesting].Incomplete then
|
|
ValidationError('Element ''%s'' is missing required sub-elements', [NewElem.NSI.QName^.Key], ErrOffset);
|
|
|
|
if FNamespaces then
|
|
FNSHelper.EndElement;
|
|
PopVC;
|
|
end;
|
|
|
|
procedure TXMLReader.ParseEndTag; // [42]
|
|
var
|
|
ErrOffset: Integer;
|
|
ElName: PHashItem;
|
|
begin
|
|
ElName := FValidator[FNesting].FElement.NSI.QName;
|
|
|
|
CheckName;
|
|
if not BufEquals(FName, ElName^.Key) then
|
|
FatalError('Unmatching element end tag (expected "</%s>")', [ElName^.Key], FName.Length);
|
|
if FSource.FBuf^ = '>' then // this handles majority of cases
|
|
begin
|
|
ErrOffset := FName.Length+1;
|
|
FSource.NextChar;
|
|
end
|
|
else // but if closing '>' is preceded by whitespace,
|
|
begin // skipping it is likely to lose position info.
|
|
StoreLocation(FTokenStart);
|
|
Dec(FTokenStart.LinePos, FName.Length);
|
|
ErrOffset := -1;
|
|
SkipS;
|
|
ExpectChar('>');
|
|
end;
|
|
DoEndElement(ErrOffset);
|
|
end;
|
|
|
|
procedure TXMLReader.ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
|
|
var
|
|
attr: TDOMAttr;
|
|
AttDef: TDOMAttrDef;
|
|
OldAttr: TDOMNode;
|
|
|
|
procedure CheckValue;
|
|
var
|
|
AttValue, OldValue: DOMString;
|
|
begin
|
|
if FStandalone and AttDef.ExternallyDeclared then
|
|
begin
|
|
OldValue := Attr.Value;
|
|
Attr.DataType := AttDef.DataType;
|
|
AttValue := Attr.Value;
|
|
if AttValue <> OldValue then
|
|
StandaloneError(-1);
|
|
end
|
|
else
|
|
begin
|
|
Attr.DataType := AttDef.DataType;
|
|
AttValue := Attr.Value;
|
|
end;
|
|
// TODO: what about normalization of AttDef.Value? (Currently it IS normalized)
|
|
if (AttDef.Default = adFixed) and (AttDef.Value <> AttValue) then
|
|
ValidationError('Value of attribute ''%s'' does not match its #FIXED default',[AttDef.Name], -1);
|
|
if not ValidateAttrSyntax(AttDef, AttValue) then
|
|
ValidationError('Attribute ''%s'' type mismatch', [AttDef.Name], -1);
|
|
ValidateAttrValue(Attr, AttValue);
|
|
end;
|
|
|
|
begin
|
|
CheckName;
|
|
attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length);
|
|
|
|
if Assigned(ElDef) then
|
|
begin
|
|
AttDef := TDOMAttrDef(ElDef.GetAttributeNode(attr.NSI.QName^.Key));
|
|
if AttDef = nil then
|
|
ValidationError('Using undeclared attribute ''%s'' on element ''%s''',[attr.NSI.QName^.Key, Elem.NSI.QName^.Key], FName.Length)
|
|
else
|
|
AttDef.Tag := FAttrTag; // indicates that this one is specified
|
|
end
|
|
else
|
|
AttDef := nil;
|
|
|
|
// !!cannot use TDOMElement.SetAttributeNode because it will free old attribute
|
|
OldAttr := Elem.Attributes.SetNamedItem(Attr);
|
|
if Assigned(OldAttr) then
|
|
begin
|
|
OldAttr.Free;
|
|
FatalError('Duplicate attribute', FName.Length);
|
|
end;
|
|
ExpectEq;
|
|
FCursor := attr;
|
|
ExpectAttValue;
|
|
|
|
if Assigned(AttDef) and ((AttDef.DataType <> dtCdata) or (AttDef.Default = adFixed)) then
|
|
CheckValue;
|
|
end;
|
|
|
|
procedure TXMLReader.AddForwardRef(aList: TFPList; Buf: DOMPChar; Length: Integer);
|
|
var
|
|
w: PForwardRef;
|
|
begin
|
|
New(w);
|
|
SetString(w^.Value, Buf, Length);
|
|
w^.Loc := FTokenStart;
|
|
aList.Add(w);
|
|
end;
|
|
|
|
procedure TXMLReader.ClearRefs(aList: TFPList);
|
|
var
|
|
I: Integer;
|
|
begin
|
|
for I := 0 to aList.Count-1 do
|
|
Dispose(PForwardRef(aList.List^[I]));
|
|
aList.Clear;
|
|
end;
|
|
|
|
procedure TXMLReader.ValidateIdRefs;
|
|
var
|
|
I: Integer;
|
|
begin
|
|
for I := 0 to FIDRefs.Count-1 do
|
|
with PForwardRef(FIDRefs.List^[I])^ do
|
|
if Doc.GetElementById(Value) = nil then
|
|
DoErrorPos(esError, Format('The ID ''%s'' does not match any element', [Value]), Loc);
|
|
ClearRefs(FIDRefs);
|
|
end;
|
|
|
|
procedure TXMLReader.ProcessDefaultAttributes(Element: TDOMElement; Map: TDOMNamedNodeMap);
|
|
var
|
|
I: Integer;
|
|
AttDef: TDOMAttrDef;
|
|
Attr: TDOMAttr;
|
|
begin
|
|
for I := 0 to Map.Length-1 do
|
|
begin
|
|
AttDef := Map[I] as TDOMAttrDef;
|
|
|
|
if AttDef.Tag <> FAttrTag then // this one wasn't specified
|
|
begin
|
|
case AttDef.Default of
|
|
adDefault, adFixed: begin
|
|
if FStandalone and AttDef.ExternallyDeclared then
|
|
StandaloneError;
|
|
Attr := TDOMAttr(AttDef.CloneNode(True));
|
|
Element.SetAttributeNode(Attr);
|
|
ValidateAttrValue(Attr, Attr.Value);
|
|
end;
|
|
adRequired: ValidationError('Required attribute ''%s'' of element ''%s'' is missing',[AttDef.Name, Element.TagName], 0)
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure TXMLReader.AddBinding(Attr: TDOMAttr; PrefixPtr: DOMPChar; PrefixLen: Integer);
|
|
var
|
|
nsUri: DOMString;
|
|
Prefix: PHashItem;
|
|
begin
|
|
nsUri := Attr.NodeValue;
|
|
Prefix := FNSHelper.GetPrefix(PrefixPtr, PrefixLen);
|
|
{ 'xml' is allowed to be bound to the correct namespace }
|
|
if ((nsUri = stduri_xml) <> (Prefix = FStdPrefix_xml)) or
|
|
(Prefix = FStdPrefix_xmlns) or
|
|
(nsUri = stduri_xmlns) then
|
|
begin
|
|
if (Prefix = FStdPrefix_xml) or (Prefix = FStdPrefix_xmlns) then
|
|
FatalError('Illegal usage of reserved prefix ''%s''', [Prefix^.Key])
|
|
else
|
|
FatalError('Illegal usage of reserved namespace URI ''%s''', [nsUri]);
|
|
end;
|
|
|
|
if (nsUri = '') and not (FXML11 or (Prefix^.Key = '')) then
|
|
FatalError('Illegal undefining of namespace'); { position - ? }
|
|
|
|
FNSHelper.BindPrefix(nsURI, Prefix);
|
|
end;
|
|
|
|
procedure TXMLReader.ProcessNamespaceAtts(Element: TDOMElement);
|
|
var
|
|
I, J: Integer;
|
|
Map: TDOMNamedNodeMap;
|
|
Prefix, AttrName: PHashItem;
|
|
Attr: TDOMAttr;
|
|
PrefixCount: Integer;
|
|
b: TBinding;
|
|
begin
|
|
FNSHelper.StartElement;
|
|
|
|
PrefixCount := 0;
|
|
if Element.HasAttributes then
|
|
begin
|
|
Map := Element.Attributes;
|
|
if Map.Length > LongWord(Length(FWorkAtts)) then
|
|
SetLength(FWorkAtts, Map.Length+10);
|
|
{ Pass 1, identify prefixed attrs and assign prefixes }
|
|
for I := 0 to Map.Length-1 do
|
|
begin
|
|
Attr := TDOMAttr(Map[I]);
|
|
AttrName := Attr.NSI.QName;
|
|
if Pos(DOMString('xmlns'), AttrName^.Key) = 1 then
|
|
begin
|
|
{ this is a namespace declaration }
|
|
if Length(AttrName^.Key) = 5 then
|
|
begin
|
|
// TODO: check all consequences of having zero PrefixLength
|
|
Attr.SetNSI(stduri_xmlns, 0);
|
|
AddBinding(Attr, nil, 0);
|
|
end
|
|
else if AttrName^.Key[6] = ':' then
|
|
begin
|
|
Attr.SetNSI(stduri_xmlns, 6);
|
|
AddBinding(Attr, @AttrName^.Key[7], Length(AttrName^.Key)-6);
|
|
end;
|
|
end
|
|
else
|
|
begin
|
|
J := Pos(DOMChar(':'), AttrName^.Key);
|
|
if J > 1 then
|
|
begin
|
|
FWorkAtts[PrefixCount].Attr := Attr;
|
|
FWorkAtts[PrefixCount].PrefixLen := J;
|
|
Inc(PrefixCount);
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
{ Pass 2, now all bindings are known, handle remaining prefixed attributes }
|
|
if PrefixCount > 0 then
|
|
begin
|
|
FNsAttHash.Init(PrefixCount);
|
|
for I := 0 to PrefixCount-1 do
|
|
begin
|
|
AttrName := FWorkAtts[I].Attr.NSI.QName;
|
|
if not FNSHelper.IsPrefixBound(DOMPChar(AttrName^.Key), FWorkAtts[I].PrefixLen-1, Prefix) then
|
|
FatalError('Unbound prefix "%s"', [Prefix^.Key]);
|
|
|
|
b := TBinding(Prefix^.Data);
|
|
{ detect duplicates }
|
|
J := FWorkAtts[I].PrefixLen+1;
|
|
|
|
if FNsAttHash.Locate(@b.uri, @AttrName^.Key[J], Length(AttrName^.Key) - J+1) then
|
|
FatalError('Duplicate prefixed attribute');
|
|
|
|
// convert Attr into namespaced one (by hack for the time being)
|
|
FWorkAtts[I].Attr.SetNSI(b.uri, J-1);
|
|
end;
|
|
end;
|
|
{ Finally, expand the element name }
|
|
J := Pos(DOMChar(':'), Element.NSI.QName^.Key);
|
|
if J > 1 then
|
|
begin
|
|
if not FNSHelper.IsPrefixBound(DOMPChar(Element.NSI.QName^.Key), J-1, Prefix) then
|
|
FatalError('Unbound prefix "%s"', [Prefix^.Key]);
|
|
b := TBinding(Prefix^.Data);
|
|
Element.SetNSI(b.uri, J);
|
|
end
|
|
else
|
|
begin
|
|
b := FNSHelper.DefaultNSBinding;
|
|
if Assigned(b) then
|
|
Element.SetNSI(b.uri, 0);
|
|
end;
|
|
end;
|
|
|
|
function TXMLReader.ParseExternalID(out SysID, PubID: DOMString; // [75]
|
|
SysIdOptional: Boolean): Boolean;
|
|
var
|
|
I: Integer;
|
|
wc: DOMChar;
|
|
begin
|
|
Result := False;
|
|
if FSource.Matches('SYSTEM') then
|
|
SysIdOptional := False
|
|
else if FSource.Matches('PUBLIC') then
|
|
begin
|
|
ExpectWhitespace;
|
|
ParseLiteral(FValue, ltPubid, True);
|
|
SetString(PubID, FValue.Buffer, FValue.Length);
|
|
for I := 1 to Length(PubID) do
|
|
begin
|
|
wc := PubID[I];
|
|
if (wc > #255) or not (Char(ord(wc)) in PubidChars) then
|
|
FatalError('Illegal Public ID literal', -1);
|
|
end;
|
|
end
|
|
else
|
|
Exit;
|
|
|
|
if SysIdOptional then
|
|
SkipWhitespace
|
|
else
|
|
ExpectWhitespace;
|
|
if ParseLiteral(FValue, ltPlain, not SysIdOptional) then
|
|
SetString(SysID, FValue.Buffer, FValue.Length);
|
|
Result := True;
|
|
end;
|
|
|
|
function TXMLReader.ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: DOMString): Boolean;
|
|
begin
|
|
case AttrDef.DataType of
|
|
dtId, dtIdRef, dtEntity: Result := IsXmlName(aValue, FXML11) and
|
|
((not FNamespaces) or (Pos(DOMChar(':'), aValue) = 0));
|
|
dtIdRefs, dtEntities: Result := IsXmlNames(aValue, FXML11) and
|
|
((not FNamespaces) or (Pos(DOMChar(':'), aValue) = 0));
|
|
dtNmToken: Result := IsXmlNmToken(aValue, FXML11) and AttrDef.HasEnumToken(aValue);
|
|
dtNmTokens: Result := IsXmlNmTokens(aValue, FXML11);
|
|
// IsXmlName() not necessary - enum is never empty and contains valid names
|
|
dtNotation: Result := AttrDef.HasEnumToken(aValue);
|
|
else
|
|
Result := True;
|
|
end;
|
|
end;
|
|
|
|
procedure TXMLReader.ValidateAttrValue(Attr: TDOMAttr; const aValue: DOMString);
|
|
var
|
|
L, StartPos, EndPos: Integer;
|
|
Entity: TDOMEntity;
|
|
begin
|
|
L := Length(aValue);
|
|
case Attr.DataType of
|
|
dtId: if not Doc.AddID(Attr) then
|
|
ValidationError('The ID ''%s'' is not unique', [aValue], -1);
|
|
|
|
dtIdRef, dtIdRefs: begin
|
|
StartPos := 1;
|
|
while StartPos <= L do
|
|
begin
|
|
EndPos := StartPos;
|
|
while (EndPos <= L) and (aValue[EndPos] <> #32) do
|
|
Inc(EndPos);
|
|
AddForwardRef(FIDRefs, @aValue[StartPos], EndPos-StartPos);
|
|
StartPos := EndPos + 1;
|
|
end;
|
|
end;
|
|
|
|
dtEntity, dtEntities: begin
|
|
StartPos := 1;
|
|
while StartPos <= L do
|
|
begin
|
|
EndPos := StartPos;
|
|
while (EndPos <= L) and (aValue[EndPos] <> #32) do
|
|
Inc(EndPos);
|
|
Entity := TDOMEntity(FDocType.Entities.GetNamedItem(Copy(aValue, StartPos, EndPos-StartPos)));
|
|
if (Entity = nil) or (Entity.NotationName = '') then
|
|
ValidationError('Attribute ''%s'' type mismatch', [Attr.Name], -1);
|
|
StartPos := EndPos + 1;
|
|
end;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
procedure TXMLReader.ValidateRoot;
|
|
begin
|
|
if Assigned(FDocType) then
|
|
begin
|
|
if not BufEquals(FName, FDocType.Name) then
|
|
ValidationError('Root element name does not match DTD', [], FName.Length);
|
|
end
|
|
else
|
|
ValidationError('Missing DTD', [], FName.Length);
|
|
end;
|
|
|
|
procedure TXMLReader.ValidateDTD;
|
|
var
|
|
I: Integer;
|
|
begin
|
|
if FValidate then
|
|
for I := 0 to FNotationRefs.Count-1 do
|
|
with PForwardRef(FNotationRefs[I])^ do
|
|
if FDocType.Notations.GetNamedItem(Value) = nil then
|
|
DoErrorPos(esError, Format('Notation ''%s'' is not declared', [Value]), Loc);
|
|
ClearRefs(FNotationRefs);
|
|
end;
|
|
|
|
procedure TXMLReader.DoText(ch: DOMPChar; Count: Integer; Whitespace: Boolean);
|
|
var
|
|
TextNode: TDOMText;
|
|
begin
|
|
if FState <> rsRoot then
|
|
if not Whitespace then
|
|
FatalError('Illegal at document level', -1)
|
|
else
|
|
Exit;
|
|
|
|
if (Whitespace and (not FPreserveWhitespace)) or (Count = 0) then
|
|
Exit;
|
|
|
|
// Validating filter part
|
|
case FCurrContentType of
|
|
ctChildren:
|
|
if not Whitespace then
|
|
ValidationError('Character data is not allowed in element-only content',[])
|
|
else
|
|
if FSaViolation then
|
|
StandaloneError(-1);
|
|
ctEmpty:
|
|
ValidationError('Character data is not allowed in EMPTY elements', []);
|
|
end;
|
|
|
|
// Document builder part
|
|
TextNode := Doc.CreateTextNodeBuf(ch, Count, Whitespace and (FCurrContentType = ctChildren));
|
|
FCursor.AppendChild(TextNode);
|
|
end;
|
|
|
|
procedure TXMLReader.DoAttrText(ch: DOMPChar; Count: Integer);
|
|
begin
|
|
FCursor.AppendChild(Doc.CreateTextNodeBuf(ch, Count, False));
|
|
end;
|
|
|
|
procedure TXMLReader.DoComment(ch: DOMPChar; Count: Integer);
|
|
var
|
|
Node: TDOMComment;
|
|
begin
|
|
// validation filter part
|
|
if FCurrContentType = ctEmpty then
|
|
ValidationError('Comments are not allowed within EMPTY elements', []);
|
|
|
|
// DOM builder part
|
|
if (not FIgnoreComments) and Assigned(FCursor) then
|
|
begin
|
|
Node := Doc.CreateCommentBuf(ch, Count);
|
|
FCursor.AppendChild(Node);
|
|
end;
|
|
end;
|
|
|
|
procedure TXMLReader.DoCDSect(ch: DOMPChar; Count: Integer);
|
|
var
|
|
s: DOMString;
|
|
begin
|
|
if FCurrContentType = ctChildren then
|
|
ValidationError('CDATA sections are not allowed in element-only content',[]);
|
|
|
|
if not FCDSectionsAsText then
|
|
begin
|
|
SetString(s, ch, Count);
|
|
// SAX: LexicalHandler.StartCDATA;
|
|
// SAX: ContentHandler.Characters(...);
|
|
FCursor.AppendChild(doc.CreateCDATASection(s));
|
|
// SAX: LexicalHandler.EndCDATA;
|
|
end
|
|
else
|
|
FCursor.AppendChild(doc.CreateTextNodeBuf(ch, Count, False));
|
|
end;
|
|
|
|
procedure TXMLReader.DoNotationDecl(const aName, aPubID, aSysID: DOMString);
|
|
var
|
|
Notation: TDOMNotationEx;
|
|
begin
|
|
if FDocType.Notations.GetNamedItem(aName) = nil then
|
|
begin
|
|
Notation := TDOMNotationEx(TDOMNotation.Create(doc));
|
|
Notation.FName := aName;
|
|
Notation.FPublicID := aPubID;
|
|
Notation.FSystemID := aSysID;
|
|
FDocType.Notations.SetNamedItem(Notation);
|
|
end
|
|
else
|
|
ValidationError('Duplicate notation declaration: ''%s''', [aName]);
|
|
end;
|
|
|
|
procedure TXMLReader.PushVC(aElement: TDOMElement; aElDef: TDOMElementDef);
|
|
begin
|
|
Inc(FNesting);
|
|
if FNesting >= Length(FValidator) then
|
|
SetLength(FValidator, FNesting * 2);
|
|
FValidator[FNesting].FElement := aElement;
|
|
FValidator[FNesting].FElementDef := aElDef;
|
|
FValidator[FNesting].FCurCP := nil;
|
|
FValidator[FNesting].FFailed := False;
|
|
UpdateConstraints;
|
|
end;
|
|
|
|
procedure TXMLReader.PopVC;
|
|
begin
|
|
if FNesting > 0 then Dec(FNesting);
|
|
UpdateConstraints;
|
|
end;
|
|
|
|
procedure TXMLReader.UpdateConstraints;
|
|
begin
|
|
if FValidate and Assigned(FValidator[FNesting].FElementDef) then
|
|
begin
|
|
FCurrContentType := FValidator[FNesting].FElementDef.ContentType;
|
|
FSaViolation := FStandalone and (FValidator[FNesting].FElementDef.FExternallyDeclared);
|
|
end
|
|
else
|
|
begin
|
|
FCurrContentType := ctAny;
|
|
FSaViolation := False;
|
|
end;
|
|
end;
|
|
|
|
{ TElementValidator }
|
|
|
|
function TElementValidator.IsElementAllowed(Def: TDOMElementDef): Boolean;
|
|
var
|
|
I: Integer;
|
|
Next: TContentParticle;
|
|
begin
|
|
Result := True;
|
|
// if element is not declared, non-validity has been already reported, no need to report again...
|
|
if Assigned(Def) and Assigned(FElementDef) then
|
|
begin
|
|
case FElementDef.ContentType of
|
|
ctMixed: begin
|
|
for I := 0 to FElementDef.RootCP.ChildCount-1 do
|
|
begin
|
|
if Def = FElementDef.RootCP.Children[I].Def then
|
|
Exit;
|
|
end;
|
|
Result := False;
|
|
end;
|
|
|
|
ctEmpty: Result := False;
|
|
|
|
ctChildren: begin
|
|
if FCurCP = nil then
|
|
Next := FElementDef.RootCP.FindFirst(Def)
|
|
else
|
|
Next := FCurCP.FindNext(Def, 0); { second arg ignored here }
|
|
Result := Assigned(Next);
|
|
if Result then
|
|
FCurCP := Next
|
|
else
|
|
FFailed := True; // used to prevent extra error at the end of element
|
|
end;
|
|
// ctAny, ctUndeclared: returns True by default
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function TElementValidator.Incomplete: Boolean;
|
|
begin
|
|
if Assigned(FElementDef) and (FElementDef.ContentType = ctChildren) and (not FFailed) then
|
|
begin
|
|
if FCurCP <> nil then
|
|
Result := FCurCP.MoreRequired(0) { arg ignored here }
|
|
else
|
|
Result := FElementDef.RootCP.IsRequired;
|
|
end
|
|
else
|
|
Result := False;
|
|
end;
|
|
|
|
{ TContentParticle }
|
|
|
|
function TContentParticle.Add: TContentParticle;
|
|
begin
|
|
if FChildren = nil then
|
|
FChildren := TFPList.Create;
|
|
Result := TContentParticle.Create;
|
|
Result.FParent := Self;
|
|
Result.FIndex := FChildren.Add(Result);
|
|
end;
|
|
|
|
destructor TContentParticle.Destroy;
|
|
var
|
|
I: Integer;
|
|
begin
|
|
if Assigned(FChildren) then
|
|
for I := FChildren.Count-1 downto 0 do
|
|
TObject(FChildren[I]).Free;
|
|
FChildren.Free;
|
|
inherited Destroy;
|
|
end;
|
|
|
|
function TContentParticle.GetChild(Index: Integer): TContentParticle;
|
|
begin
|
|
Result := TContentParticle(FChildren[Index]);
|
|
end;
|
|
|
|
function TContentParticle.GetChildCount: Integer;
|
|
begin
|
|
if Assigned(FChildren) then
|
|
Result := FChildren.Count
|
|
else
|
|
Result := 0;
|
|
end;
|
|
|
|
function TContentParticle.IsRequired: Boolean;
|
|
var
|
|
I: Integer;
|
|
begin
|
|
Result := (CPQuant = cqOnce) or (CPQuant = cqOnceOrMore);
|
|
// do not return True if all children are optional
|
|
if (CPType <> ctName) and Result then
|
|
begin
|
|
for I := 0 to ChildCount-1 do
|
|
begin
|
|
Result := Children[I].IsRequired;
|
|
if Result then Exit;
|
|
end;
|
|
end;
|
|
end;
|
|
|
|
function TContentParticle.MoreRequired(ChildIdx: Integer): Boolean;
|
|
var
|
|
I: Integer;
|
|
begin
|
|
Result := False;
|
|
if CPType = ctSeq then
|
|
begin
|
|
for I := ChildIdx + 1 to ChildCount-1 do
|
|
begin
|
|
Result := Children[I].IsRequired;
|
|
if Result then Exit;
|
|
end;
|
|
end;
|
|
if Assigned(FParent) then
|
|
Result := FParent.MoreRequired(FIndex);
|
|
end;
|
|
|
|
function TContentParticle.FindFirst(aDef: TDOMElementDef): TContentParticle;
|
|
var
|
|
I: Integer;
|
|
begin
|
|
Result := nil;
|
|
case CPType of
|
|
ctSeq:
|
|
for I := 0 to ChildCount-1 do with Children[I] do
|
|
begin
|
|
Result := FindFirst(aDef);
|
|
if Assigned(Result) or IsRequired then
|
|
Exit;
|
|
end;
|
|
ctChoice:
|
|
for I := 0 to ChildCount-1 do with Children[I] do
|
|
begin
|
|
Result := FindFirst(aDef);
|
|
if Assigned(Result) then
|
|
Exit;
|
|
end;
|
|
else // ctName
|
|
if aDef = Self.Def then
|
|
Result := Self
|
|
end;
|
|
end;
|
|
|
|
function TContentParticle.FindNext(aDef: TDOMElementDef;
|
|
ChildIdx: Integer): TContentParticle;
|
|
var
|
|
I: Integer;
|
|
begin
|
|
Result := nil;
|
|
if CPType = ctSeq then // search sequence to its end
|
|
begin
|
|
for I := ChildIdx + 1 to ChildCount-1 do with Children[I] do
|
|
begin
|
|
Result := FindFirst(aDef);
|
|
if (Result <> nil) or IsRequired then
|
|
Exit;
|
|
end;
|
|
end;
|
|
if (CPQuant = cqZeroOrMore) or (CPQuant = cqOnceOrMore) then
|
|
Result := FindFirst(aDef);
|
|
if (Result = nil) and Assigned(FParent) then
|
|
Result := FParent.FindNext(aDef, FIndex);
|
|
end;
|
|
|
|
{ TDOMElementDef }
|
|
|
|
destructor TDOMElementDef.Destroy;
|
|
begin
|
|
RootCP.Free;
|
|
inherited Destroy;
|
|
end;
|
|
|
|
{ plain calls }
|
|
|
|
procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text; Flags: TXMLReaderFlags);
|
|
var
|
|
Reader: TXMLReader;
|
|
Src: TXMLCharSource;
|
|
begin
|
|
ADoc := nil;
|
|
Src := TXMLFileInputSource.Create(f);
|
|
Reader := TXMLReader.Create;
|
|
try
|
|
Reader.Flags:=Flags;
|
|
Reader.ProcessXML(Src);
|
|
finally
|
|
ADoc := TXMLDocument(Reader.Doc);
|
|
Reader.Free;
|
|
end;
|
|
end;
|
|
|
|
procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String;
|
|
Flags: TXMLReaderFlags);
|
|
var
|
|
Reader: TXMLReader;
|
|
Src: TXMLCharSource;
|
|
begin
|
|
ADoc := nil;
|
|
Reader := TXMLReader.Create;
|
|
try
|
|
Src := TXMLStreamInputSource.Create(f, False);
|
|
Src.SystemID := ABaseURI;
|
|
Reader.Flags:=Flags;
|
|
Reader.ProcessXML(Src);
|
|
finally
|
|
ADoc := TXMLDocument(Reader.doc);
|
|
Reader.Free;
|
|
end;
|
|
end;
|
|
|
|
procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; Flags: TXMLReaderFlags);
|
|
begin
|
|
ReadXMLFile(ADoc, f, 'stream:', Flags);
|
|
end;
|
|
|
|
procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String;
|
|
Flags: TXMLReaderFlags);
|
|
var
|
|
FileStream: TStream;
|
|
begin
|
|
ADoc := nil;
|
|
FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
|
|
try
|
|
ReadXMLFile(ADoc, FileStream, FilenameToURI(AFilename), Flags);
|
|
finally
|
|
FileStream.Free;
|
|
end;
|
|
end;
|
|
|
|
procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text;
|
|
Flags: TXMLReaderFlags);
|
|
var
|
|
Reader: TXMLReader;
|
|
Src: TXMLCharSource;
|
|
begin
|
|
Reader := TXMLReader.Create;
|
|
try
|
|
Reader.Flags:=Flags;
|
|
Src := TXMLFileInputSource.Create(f);
|
|
Reader.ProcessFragment(Src, AParentNode);
|
|
finally
|
|
Reader.Free;
|
|
end;
|
|
end;
|
|
|
|
procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream;
|
|
const ABaseURI: String; Flags: TXMLReaderFlags);
|
|
var
|
|
Reader: TXMLReader;
|
|
Src: TXMLCharSource;
|
|
begin
|
|
Reader := TXMLReader.Create;
|
|
try
|
|
Src := TXMLStreamInputSource.Create(f, False);
|
|
Src.SystemID := ABaseURI;
|
|
Reader.Flags:=Flags;
|
|
Reader.ProcessFragment(Src, AParentNode);
|
|
finally
|
|
Reader.Free;
|
|
end;
|
|
end;
|
|
|
|
procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream;
|
|
Flags: TXMLReaderFlags);
|
|
begin
|
|
ReadXMLFragment(AParentNode, f, 'stream:', Flags);
|
|
end;
|
|
|
|
procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String;
|
|
Flags: TXMLReaderFlags);
|
|
var
|
|
Stream: TStream;
|
|
begin
|
|
Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
|
|
try
|
|
ReadXMLFragment(AParentNode, Stream, FilenameToURI(AFilename), Flags);
|
|
finally
|
|
Stream.Free;
|
|
end;
|
|
end;
|
|
|
|
|
|
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text);
|
|
var
|
|
Reader: TXMLReader;
|
|
Src: TXMLCharSource;
|
|
begin
|
|
ADoc := nil;
|
|
Reader := TXMLReader.Create;
|
|
try
|
|
Src := TXMLFileInputSource.Create(f);
|
|
Reader.ProcessDTD(Src);
|
|
finally
|
|
ADoc := TXMLDocument(Reader.doc);
|
|
Reader.Free;
|
|
end;
|
|
end;
|
|
|
|
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String);
|
|
var
|
|
Reader: TXMLReader;
|
|
Src: TXMLCharSource;
|
|
begin
|
|
ADoc := nil;
|
|
Reader := TXMLReader.Create;
|
|
try
|
|
Src := TXMLStreamInputSource.Create(f, False);
|
|
Src.SystemID := ABaseURI;
|
|
Reader.ProcessDTD(Src);
|
|
finally
|
|
ADoc := TXMLDocument(Reader.doc);
|
|
Reader.Free;
|
|
end;
|
|
end;
|
|
|
|
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream);
|
|
begin
|
|
ReadDTDFile(ADoc, f, 'stream:');
|
|
end;
|
|
|
|
procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String);
|
|
var
|
|
Stream: TStream;
|
|
begin
|
|
ADoc := nil;
|
|
Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
|
|
try
|
|
ReadDTDFile(ADoc, Stream, FilenameToURI(AFilename));
|
|
finally
|
|
Stream.Free;
|
|
end;
|
|
end;
|
|
|
|
{ EXMLReadError }
|
|
|
|
function EXMLReadError.LineCol: TPoint;
|
|
begin
|
|
Result.Y:=Line;
|
|
Result.X:=LinePos;
|
|
end;
|
|
|
|
end.
|