lazarus/components/codetools/laz2_xmlread.pas
mattias bb55ccecbd IDE: started upgrade of xml units
git-svn-id: trunk@25647 -
2010-05-25 20:37:53 +00:00

4133 lines
112 KiB
ObjectPascal

{
This file is based on the FCL unit xmlread svn revision 15251.
Converted to use UTF8 instead of widestrings by Mattias Gaertner.
}
{
This file is part of the Free Component Library
XML reading routines.
Copyright (c) 1999-2000 by Sebastian Guenther, sg@freepascal.org
Modified in 2006 by Sergei Gorelkin, sergei_gorelkin@mail.ru
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
unit laz2_XMLRead;
{$ifdef fpc}
{$MODE objfpc}{$H+}
{$endif}
{$DEFINE UseUTF8}
{off $DEFINE UseWideString}
interface
uses
SysUtils, Classes, laz2_DOM;
type
TErrorSeverity = (esWarning, esError, esFatal);
TXMLReaderFlag = (
xrfAllowLowerThanInAttributeValue,
xrfAllowSpecialCharsInAttributeValue
);
TXMLReaderFlags = set of TXMLReaderFlag;
{ EXMLReadError }
EXMLReadError = class(Exception)
private
FSeverity: TErrorSeverity;
FErrorMessage: string;
FLine: Integer;
FLinePos: Integer;
public
property Severity: TErrorSeverity read FSeverity;
property ErrorMessage: string read FErrorMessage;
property Line: Integer read FLine;
property LinePos: Integer read FLinePos;
function LineCol: TPoint;
end;
procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String; Flags: TXMLReaderFlags = []); overload;
procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text; Flags: TXMLReaderFlags = []); overload;
procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; Flags: TXMLReaderFlags = []); overload;
procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String; Flags: TXMLReaderFlags = []); overload;
procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String; Flags: TXMLReaderFlags = []); overload;
procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text; Flags: TXMLReaderFlags = []); overload;
procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; Flags: TXMLReaderFlags = []); overload;
procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream; const ABaseURI: String; Flags: TXMLReaderFlags = []); overload;
procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String); overload;
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text); overload;
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream); overload;
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String); overload;
type
TDOMParseOptions = class(TObject)
private
FValidate: Boolean;
FPreserveWhitespace: Boolean;
FExpandEntities: Boolean;
FIgnoreComments: Boolean;
FCDSectionsAsText: Boolean;
FResolveExternals: Boolean;
FNamespaces: Boolean;
FDisallowDoctype: Boolean;
FCanonical: Boolean;
FMaxChars: Cardinal;
function GetCanonical: Boolean;
procedure SetCanonical(aValue: Boolean);
public
property Validate: Boolean read FValidate write FValidate;
property PreserveWhitespace: Boolean read FPreserveWhitespace write FPreserveWhitespace;
property ExpandEntities: Boolean read FExpandEntities write FExpandEntities;
property IgnoreComments: Boolean read FIgnoreComments write FIgnoreComments;
property CDSectionsAsText: Boolean read FCDSectionsAsText write FCDSectionsAsText;
property ResolveExternals: Boolean read FResolveExternals write FResolveExternals;
property Namespaces: Boolean read FNamespaces write FNamespaces;
property DisallowDoctype: Boolean read FDisallowDoctype write FDisallowDoctype;
property MaxChars: Cardinal read FMaxChars write FMaxChars;
property CanonicalForm: Boolean read GetCanonical write SetCanonical;
end;
// NOTE: DOM 3 LS ACTION_TYPE enumeration starts at 1
TXMLContextAction = (
xaAppendAsChildren = 1,
xaReplaceChildren,
xaInsertBefore,
xaInsertAfter,
xaReplace);
TXMLErrorEvent = procedure(Error: EXMLReadError) of object;
TXMLInputSource = class(TObject)
private
FStream: TStream;
FStringData: string;
FBaseURI: DOMString;
FSystemID: DOMString;
FPublicID: DOMString;
// FEncoding: string;
public
constructor Create(AStream: TStream); overload;
constructor Create(const AStringData: string); overload;
property Stream: TStream read FStream;
property StringData: string read FStringData;
property BaseURI: DOMString read FBaseURI write FBaseURI;
property SystemID: DOMString read FSystemID write FSystemID;
property PublicID: DOMString read FPublicID write FPublicID;
// property Encoding: string read FEncoding write FEncoding;
end;
TDOMParser = class(TObject)
private
FOptions: TDOMParseOptions;
FOnError: TXMLErrorEvent;
public
constructor Create;
destructor Destroy; override;
procedure Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
procedure ParseUri(const URI: DOMString; out ADoc: TXMLDocument);
function ParseWithContext(Src: TXMLInputSource; Context: TDOMNode;
Action: TXMLContextAction): TDOMNode;
property Options: TDOMParseOptions read FOptions;
property OnError: TXMLErrorEvent read FOnError write FOnError;
end;
TDecoder = record
Context: Pointer;
Decode: function(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: DOMPChar; var OutCnt: Cardinal): Integer; stdcall;
Cleanup: procedure(Context: Pointer); stdcall;
end;
TGetDecoderProc = function(const AEncoding: string; out Decoder: TDecoder): Boolean; stdcall;
procedure RegisterDecoder(Proc: TGetDecoderProc);
// =======================================================
implementation
uses
UriParser, laz2_xmlutils;
const
PubidChars: TSetOfChar = [' ', #13, #10, 'a'..'z', 'A'..'Z', '0'..'9',
'-', '''', '(', ')', '+', ',', '.', '/', ':', '=', '?', ';', '!', '*',
'#', '@', '$', '_', '%'];
type
TDOMNotationEx = class(TDOMNotation);
TDOMDocumentTypeEx = class(TDOMDocumentType);
TDOMElementDef = class;
TDTDSubsetType = (dsNone, dsInternal, dsExternal);
// This may be augmented with ByteOffset, UTF8Offset, etc.
TLocation = record
Line: Integer;
LinePos: Integer;
end;
TDOMEntityEx = class(TDOMEntity)
protected
FExternallyDeclared: Boolean;
FPrefetched: Boolean;
FResolved: Boolean;
FOnStack: Boolean;
FBetweenDecls: Boolean;
FIsPE: Boolean;
FReplacementText: DOMString;
FURI: DOMString;
FStartLocation: TLocation;
FCharCount: Cardinal;
end;
DOMPCharBuf = ^TDOMCharBuf;
TDOMCharBuf = record
Buffer: DOMPChar;
Length: Integer;
MaxLength: Integer;
end;
TXMLReader = class;
TXMLCharSource = class(TObject)
private
FBuf: DOMPChar;
FBufEnd: DOMPChar;
FReader: TXMLReader;
FParent: TXMLCharSource;
FEntity: TObject; // weak reference
FLineNo: Integer;
LFPos: DOMPChar;
FXML11Rules: Boolean;
FSystemID: DOMString;
FCharCount: Cardinal;
FStartNesting: Integer;
function GetSystemID: DOMString;
protected
function Reload: Boolean; virtual;
public
DTDSubsetType: TDTDSubsetType;
constructor Create(const AData: DOMString);
procedure NextChar;
procedure NewLine; virtual;
function SkipUntil(var ToFill: TDOMCharBuf; const Delim: TSetOfChar;
wsflag: PBoolean = nil; AllowSpecialChars: boolean = false): DOMChar; virtual;
procedure Initialize; virtual;
function SetEncoding(const AEncoding: string): Boolean; virtual;
function Matches(const arg: DOMString): Boolean;
property SystemID: DOMString read GetSystemID write FSystemID;
end;
TXMLDecodingSource = class(TXMLCharSource)
private
FCharBuf: PChar;
FCharBufEnd: PChar;
FBufStart: DOMPChar;
FDecoder: TDecoder;
FHasBOM: Boolean;
FFixedUCS2: string;
FBufSize: Integer;
procedure DecodingError(const Msg: string);
protected
function Reload: Boolean; override;
procedure FetchData; virtual;
public
procedure AfterConstruction; override;
destructor Destroy; override;
function SetEncoding(const AEncoding: string): Boolean; override;
procedure NewLine; override;
function SkipUntil(var ToFill: TDOMCharBuf; const Delim: TSetOfChar;
wsflag: PBoolean = nil; AllowSpecialChars: boolean = false): DOMChar; override;
procedure Initialize; override;
end;
TXMLStreamInputSource = class(TXMLDecodingSource)
private
FAllocated: PChar;
FStream: TStream;
FCapacity: Integer;
FOwnStream: Boolean;
FEof: Boolean;
public
constructor Create(AStream: TStream; AOwnStream: Boolean);
destructor Destroy; override;
procedure FetchData; override;
end;
TXMLFileInputSource = class(TXMLDecodingSource)
private
FFile: ^Text;
FString: string;
FTmp: string;
public
constructor Create(var AFile: Text);
procedure FetchData; override;
end;
PForwardRef = ^TForwardRef;
TForwardRef = record
Value: DOMString;
Loc: TLocation;
end;
TCPType = (ctName, ctChoice, ctSeq);
TCPQuant = (cqOnce, cqZeroOrOnce, cqZeroOrMore, cqOnceOrMore);
TContentParticle = class(TObject)
private
FParent: TContentParticle;
FChildren: TFPList;
FIndex: Integer;
function GetChildCount: Integer;
function GetChild(Index: Integer): TContentParticle;
public
CPType: TCPType;
CPQuant: TCPQuant;
Def: TDOMElementDef;
destructor Destroy; override;
function Add: TContentParticle;
function IsRequired: Boolean;
function FindFirst(aDef: TDOMElementDef): TContentParticle;
function FindNext(aDef: TDOMElementDef; ChildIdx: Integer): TContentParticle;
function MoreRequired(ChildIdx: Integer): Boolean;
property ChildCount: Integer read GetChildCount;
property Children[Index: Integer]: TContentParticle read GetChild;
end;
TElementValidator = object
FElement: TDOMElement;
FElementDef: TDOMElementDef;
FCurCP: TContentParticle;
FFailed: Boolean;
function IsElementAllowed(Def: TDOMElementDef): Boolean;
function Incomplete: Boolean;
end;
TXMLReadState = (rsProlog, rsDTD, rsRoot, rsEpilog);
TElementContentType = (
ctUndeclared,
ctAny,
ctEmpty,
ctMixed,
ctChildren
);
TCheckNameFlags = set of (cnOptional, cnToken);
TPrefixedAttr = record
Attr: TDOMAttr;
PrefixLen: Integer; // to avoid recalculation
end;
TLiteralType = (ltPlain, ltAttr, ltTokAttr, ltPubid, ltEntity);
TXMLReader = class
private
FFlags: TXMLReaderFlags;
FSource: TXMLCharSource;
FCtrl: TDOMParser;
FXML11: Boolean;
FState: TXMLReadState;
FRecognizePE: Boolean;
FHavePERefs: Boolean;
FInsideDecl: Boolean;
FDocNotValid: Boolean;
FValue: TDOMCharBuf;
FEntityValue: TDOMCharBuf;
FName: TDOMCharBuf;
FTokenStart: TLocation;
FStandalone: Boolean; // property of Doc ?
FNamePages: PByteArray;
FDocType: TDOMDocumentTypeEx; // a shortcut
FPEMap: TDOMNamedNodeMap;
FIDRefs: TFPList;
FNotationRefs: TFPList;
FCurrContentType: TElementContentType;
FSaViolation: Boolean;
FDTDStartPos: DOMPChar;
FIntSubset: TDOMCharBuf;
FAttrTag: Cardinal;
FOwnsDoctype: Boolean;
FDTDProcessed: Boolean;
FNSHelper: TNSSupport;
FWorkAtts: array of TPrefixedAttr;
FNsAttHash: TDblHashArray;
FStdPrefix_xml: PHashItem;
FStdPrefix_xmlns: PHashItem;
FColonPos: Integer;
FValidate: Boolean; // parsing options, copy of FCtrl.Options
FPreserveWhitespace: Boolean;
FExpandEntities: Boolean;
FIgnoreComments: Boolean;
FCDSectionsAsText: Boolean;
FResolveExternals: Boolean;
FNamespaces: Boolean;
FDisallowDoctype: Boolean;
FCanonical: Boolean;
FMaxChars: Cardinal;
procedure SkipQuote(out Delim: DOMChar; required: Boolean = True);
procedure Initialize(ASource: TXMLCharSource);
function ContextPush(AEntity: TDOMEntityEx): Boolean;
function ContextPop(Forced: Boolean = False): Boolean;
procedure XML11_BuildTables;
procedure ParseQuantity(CP: TContentParticle);
procedure StoreLocation(out Loc: TLocation);
function ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: DOMString): Boolean;
procedure ValidateAttrValue(Attr: TDOMAttr; const aValue: DOMString);
procedure AddForwardRef(aList: TFPList; Buf: DOMPChar; Length: Integer);
procedure ClearRefs(aList: TFPList);
procedure ValidateIdRefs;
procedure StandaloneError(LineOffs: Integer = 0);
procedure CallErrorHandler(E: EXMLReadError);
function FindOrCreateElDef: TDOMElementDef;
function SkipUntilSeq(const Delim: TSetOfChar; c1: DOMChar; c2: DOMChar = #0): Boolean;
procedure CheckMaxChars;
protected
FCursor: TDOMNode_WithChildren;
FNesting: Integer;
FValidator: array of TElementValidator;
procedure DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer=0);
procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
const ErrPos: TLocation);
procedure FatalError(const descr: String; LineOffs: Integer=0); overload;
procedure FatalError(const descr: string; const args: array of const; LineOffs: Integer=0); overload;
procedure FatalError(Expected: DOMChar); overload;
function SkipWhitespace(PercentAloneIsOk: Boolean = False): Boolean;
function SkipS(required: Boolean = False): Boolean;
procedure ExpectWhitespace;
procedure ExpectString(const s: String);
procedure ExpectChar(wc: DOMChar);
function CheckForChar(c: DOMChar): Boolean;
procedure RaiseNameNotFound;
function CheckName(aFlags: TCheckNameFlags = []): Boolean;
procedure CheckNCName;
function ExpectName: DOMString; // [5]
function ParseLiteral(var ToFill: TDOMCharBuf; aType: TLiteralType;
Required: Boolean; Normalized: PBoolean = nil): Boolean;
procedure ExpectAttValue; // [10]
procedure ParseComment; // [15]
procedure ParsePI; // [16]
procedure ParseXmlOrTextDecl(TextDecl: Boolean);
procedure ExpectEq;
procedure ParseDoctypeDecl; // [28]
procedure ParseMarkupDecl; // [29]
procedure ParseElement; // [39]
procedure ParseEndTag; // [42]
procedure DoEndElement(ErrOffset: Integer);
procedure ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
procedure ParseContent; // [43]
function ResolvePredefined: Boolean;
function EntityCheck(NoExternals: Boolean = False): TDOMEntityEx;
procedure AppendReference(AEntity: TDOMEntityEx);
function PrefetchEntity(AEntity: TDOMEntityEx): Boolean;
procedure StartPE;
function ParseRef(var ToFill: TDOMCharBuf): Boolean; // [67]
function ParseExternalID(out SysID, PubID: DOMString; // [75]
SysIdOptional: Boolean): Boolean;
procedure BadPENesting(S: TErrorSeverity = esError);
procedure ParseEntityDecl;
procedure ParseAttlistDecl;
procedure ExpectChoiceOrSeq(CP: TContentParticle);
procedure ParseElementDecl;
procedure ParseNotationDecl;
function ResolveEntity(const SystemID, PublicID, BaseURI: DOMString; out Source: TXMLCharSource): Boolean;
procedure ProcessDefaultAttributes(Element: TDOMElement; Map: TDOMNamedNodeMap);
procedure ProcessNamespaceAtts(Element: TDOMElement);
procedure AddBinding(Attr: TDOMAttr; PrefixPtr: DOMPChar; PrefixLen: Integer);
procedure PushVC(aElement: TDOMElement; aElDef: TDOMElementDef);
procedure PopVC;
procedure UpdateConstraints;
procedure ValidateDTD;
procedure ValidateRoot;
procedure ValidationError(const Msg: string; const args: array of const; LineOffs: Integer = -1);
procedure DoAttrText(ch: DOMPChar; Count: Integer);
procedure DTDReloadHook;
procedure ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
// Some SAX-alike stuff (at a very early stage)
procedure DoText(ch: DOMPChar; Count: Integer; Whitespace: Boolean=False);
procedure DoComment(ch: DOMPChar; Count: Integer);
procedure DoCDSect(ch: DOMPChar; Count: Integer);
procedure DoNotationDecl(const aName, aPubID, aSysID: DOMString);
public
doc: TDOMDocument;
constructor Create; overload;
constructor Create(AParser: TDOMParser); overload;
destructor Destroy; override;
procedure ProcessXML(ASource: TXMLCharSource); // [1]
procedure ProcessFragment(ASource: TXMLCharSource; AOwner: TDOMNode);
procedure ProcessDTD(ASource: TXMLCharSource); // ([29])
property Flags: TXMLReaderFlags read FFlags write FFlags;
end;
// Attribute/Element declarations
TDOMElementDef = class(TDOMElement)
public
FExternallyDeclared: Boolean;
ContentType: TElementContentType;
IDAttr: TDOMAttrDef;
NotationAttr: TDOMAttrDef;
RootCP: TContentParticle;
destructor Destroy; override;
end;
const
NullLocation: TLocation = (Line: 0; LinePos: 0);
{ Decoders }
var
Decoders: array of TGetDecoderProc;
procedure RegisterDecoder(Proc: TGetDecoderProc);
var
L: Integer;
begin
L := Length(Decoders);
SetLength(Decoders, L+1);
Decoders[L] := Proc;
end;
function FindDecoder(const AEncoding: string; out Decoder: TDecoder): Boolean;
var
I: Integer;
begin
Result := False;
for I := 0 to High(Decoders) do
if Decoders[I](AEncoding, Decoder) then
begin
Result := True;
Exit;
end;
end;
function WriteUTF8(u: cardinal; var OutBuf: DOMPChar; var OutCnt: Cardinal): boolean; inline;
begin
case u of
0..$7f:
begin
if OutCnt<1 then exit(false);
dec(OutCnt);
OutBuf[0]:=char(byte(u));
inc(OutBuf);
end;
$80..$7ff:
begin
if OutCnt<2 then exit(false);
dec(OutCnt,2);
OutBuf[0]:=char(byte($c0 or (u shr 6)));
OutBuf[1]:=char(byte($80 or (u and $3f)));
inc(OutBuf,2);
end;
$800..$ffff:
begin
if OutCnt<3 then exit(false);
dec(OutCnt,3);
OutBuf[0]:=char(byte($e0 or (u shr 12)));
OutBuf[1]:=char(byte((u shr 6) and $3f) or $80);
OutBuf[2]:=char(byte(u and $3f) or $80);
inc(OutBuf,3);
end;
$10000..$10ffff:
begin
if OutCnt<4 then exit(false);
dec(OutCnt,4);
OutBuf[0]:=char(byte($f0 or (u shr 18)));
OutBuf[1]:=char(byte((u shr 12) and $3f) or $80);
OutBuf[2]:=char(byte((u shr 6) and $3f) or $80);
OutBuf[3]:=char(byte(u and $3f) or $80);
inc(OutBuf,3);
end;
else
exit(false);
end;
Result:=true;
end;
function Decode_UCS2(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: DOMPChar; var OutCnt: Cardinal): Integer; stdcall;
{$IFDEF UseUTF8}
var
u: cardinal;
OldOutCnt: cardinal;
begin
Result:=0;
OldOutCnt:=OutCnt;
while InCnt>1 do begin
u:=PWord(InBuf)^;
inc(InBuf,2);
if not WriteUTF8(u,OutBuf,OutCnt) then break;
dec(InCnt,2);
end;
Result:=OldOutCnt-OutCnt;
end;
{$ENDIF UseUTF8}
{$IFDEF UseWideString}
var
cnt: Cardinal;
begin
cnt := OutCnt; // num of DOMchars
if cnt > InCnt div sizeof(DOMChar) then
cnt := InCnt div sizeof(DOMChar);
Move(InBuf^, OutBuf^, cnt * sizeof(DOMChar));
Dec(InCnt, cnt*sizeof(DOMChar));
Dec(OutCnt, cnt);
Result := cnt;
end;
{$ENDIF UseWideString}
function Decode_UCS2_Swapped(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: DOMPChar; var OutCnt: Cardinal): Integer; stdcall;
{$IFDEF UseUTF8}
var
u: cardinal;
OldOutCnt: cardinal;
begin
Result:=0;
OldOutCnt:=OutCnt;
while InCnt>1 do begin
u:=(ord(InBuf^) shl 8) or ord(InBuf[1]);
inc(InBuf,2);
if not WriteUTF8(u,OutBuf,OutCnt) then break;
dec(InCnt,2);
end;
Result:=OldOutCnt-OutCnt;
end;
{$ENDIF UseUTF8}
{$IFDEF UseWideString}
var
I: Integer;
cnt: Cardinal;
InPtr: PChar;
begin
cnt := OutCnt; // num of DOMchars
if cnt > InCnt div sizeof(DOMChar) then
cnt := InCnt div sizeof(DOMChar);
InPtr := InBuf;
for I := 0 to cnt-1 do
begin
OutBuf[I] := DOMChar((ord(InPtr^) shl 8) or ord(InPtr[1]));
Inc(InPtr, 2);
end;
Dec(InCnt, cnt*sizeof(DOMChar));
Dec(OutCnt, cnt);
Result := cnt;
end;
{$ENDIF UseWideString}
function Decode_88591(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: DOMPChar; var OutCnt: Cardinal): Integer; stdcall;
{$IFDEF UseUTF8}
var
u: cardinal;
OldOutCnt: cardinal;
begin
Result:=0;
OldOutCnt:=OutCnt;
while InCnt>0 do begin
u:=ord(InBuf^);
inc(InBuf);
if not WriteUTF8(u,OutBuf,OutCnt) then break;
dec(InCnt);
end;
Result:=OldOutCnt-OutCnt;
end;
{$ENDIF UseUTF8}
{$IFDEF UseWideString}
var
I: Integer;
cnt: Cardinal;
begin
cnt := OutCnt; // num of DOMchars
if cnt > InCnt then
cnt := InCnt;
for I := 0 to cnt-1 do // ToDo: check for >#127
OutBuf[I] := DOMChar(ord(InBuf[I]));
Dec(InCnt, cnt);
Dec(OutCnt, cnt);
Result := cnt;
end;
{$ENDIF}
function Decode_UTF8(Context: Pointer; InBuf: PChar; var InCnt: Cardinal; OutBuf: DOMPChar; var OutCnt: Cardinal): Integer; stdcall;
{$IFDEF UseUTF8}
var
cnt: Cardinal;
begin
cnt := OutCnt; // num of DOMchars
if cnt > InCnt then
cnt := InCnt;
if cnt>0 then begin
System.Move(InBuf^,OutBuf^,cnt);
Dec(InCnt, cnt);
Dec(OutCnt, cnt);
end;
Result := cnt;
end;
{$ENDIF UseUTF8}
{$IFDEF UseWideString}
const
MaxCode: array[1..4] of Cardinal = ($7F, $7FF, $FFFF, $1FFFFF);
var
i, j, bc: Cardinal;
Value: Cardinal;
begin
result := 0;
i := OutCnt;
while (i > 0) and (InCnt > 0) do
begin
bc := 1;
Value := ord(InBuf^);
if Value < $80 then
OutBuf^ := DOMChar(Value)
else
begin
if Value < $C2 then
begin
Result := -1;
Break;
end;
Inc(bc);
if Value > $DF then
begin
Inc(bc);
if Value > $EF then
begin
Inc(bc);
if Value > $F7 then // never encountered in the tests.
begin
Result := -1;
Break;
end;
end;
end;
if InCnt < bc then
Break;
j := 1;
while j < bc do
begin
if InBuf[j] in [#$80..#$BF] then
Value := (Value shl 6) or (Cardinal(InBuf[j]) and $3F)
else
begin
Result := -1;
Break;
end;
Inc(j);
end;
Value := Value and MaxCode[bc];
// RFC2279 check
if Value <= MaxCode[bc-1] then
begin
Result := -1;
Break;
end;
case Value of
0..$D7FF, $E000..$FFFF: OutBuf^ := DOMChar(Value);
$10000..$10FFFF:
begin
if i < 2 then Break;
OutBuf^ := DOMChar($D7C0 + (Value shr 10));
OutBuf[1] := DOMChar($DC00 xor (Value and $3FF));
Inc(OutBuf); // once here
Dec(i);
end
else
begin
Result := -1;
Break;
end;
end;
end;
Inc(OutBuf);
Inc(InBuf, bc);
Dec(InCnt, bc);
Dec(i);
end;
if Result >= 0 then
Result := OutCnt-i;
OutCnt := i;
end;
{$ENDIF UseWideString}
function Is_8859_1(const AEncoding: string): Boolean;
begin
Result := SameText(AEncoding, 'ISO-8859-1') or
SameText(AEncoding, 'ISO_8859-1') or
SameText(AEncoding, 'latin1') or
SameText(AEncoding, 'iso-ir-100') or
SameText(AEncoding, 'l1') or
SameText(AEncoding, 'IBM819') or
SameText(AEncoding, 'CP819') or
SameText(AEncoding, 'csISOLatin1') or
// This one is not in character-sets.txt, but was used in FPC documentation,
// and still being used in fcl-registry package
SameText(AEncoding, 'ISO8859-1');
end;
procedure BufAllocate(var ABuffer: TDOMCharBuf; ALength: Integer);
begin
ABuffer.MaxLength := ALength;
ABuffer.Length := 0;
ABuffer.Buffer := AllocMem(ABuffer.MaxLength*SizeOf(DOMChar));
end;
procedure BufAppend(var ABuffer: TDOMCharBuf; wc: DOMChar);
begin
if ABuffer.Length >= ABuffer.MaxLength then
begin
ReallocMem(ABuffer.Buffer, ABuffer.MaxLength * 2 * SizeOf(DOMChar));
FillChar(ABuffer.Buffer[ABuffer.MaxLength], ABuffer.MaxLength * SizeOf(DOMChar),0);
ABuffer.MaxLength := ABuffer.MaxLength * 2;
end;
ABuffer.Buffer[ABuffer.Length] := wc;
Inc(ABuffer.Length);
end;
procedure BufAppendChunk(var ABuf: TDOMCharBuf; pstart, pend: DOMPChar);
var
Len: Integer;
begin
Len := PEnd - PStart;
if Len <= 0 then
Exit;
if Len >= ABuf.MaxLength - ABuf.Length then
begin
ABuf.MaxLength := (Len + ABuf.Length)*2;
// note: memory clean isn't necessary here.
// To avoid garbage, control Length field.
ReallocMem(ABuf.Buffer, ABuf.MaxLength * sizeof(DOMChar));
end;
Move(pstart^, ABuf.Buffer[ABuf.Length], Len * sizeof(DOMChar));
Inc(ABuf.Length, Len);
end;
function BufEquals(const ABuf: TDOMCharBuf; const Arg: DOMString): Boolean;
begin
Result := (ABuf.Length = Length(Arg)) and
CompareMem(ABuf.Buffer, Pointer(Arg), ABuf.Length*sizeof(DOMChar));
end;
{ TDOMParseOptions }
function TDOMParseOptions.GetCanonical: Boolean;
begin
Result := FCanonical and FExpandEntities and FCDSectionsAsText and
{ (not normalizeCharacters) and } FNamespaces and
{ namespaceDeclarations and } FPreserveWhitespace;
end;
procedure TDOMParseOptions.SetCanonical(aValue: Boolean);
begin
FCanonical := aValue;
if aValue then
begin
FExpandEntities := True;
FCDSectionsAsText := True;
FNamespaces := True;
FPreserveWhitespace := True;
{ normalizeCharacters := False; }
{ namespaceDeclarations := True; }
{ wellFormed := True; }
end;
end;
{ TXMLInputSource }
constructor TXMLInputSource.Create(AStream: TStream);
begin
inherited Create;
FStream := AStream;
end;
constructor TXMLInputSource.Create(const AStringData: string);
begin
inherited Create;
FStringData := AStringData;
end;
{ TDOMParser }
constructor TDOMParser.Create;
begin
FOptions := TDOMParseOptions.Create;
end;
destructor TDOMParser.Destroy;
begin
FOptions.Free;
inherited Destroy;
end;
procedure TDOMParser.Parse(Src: TXMLInputSource; out ADoc: TXMLDocument);
var
InputSrc: TXMLCharSource;
begin
with TXMLReader.Create(Self) do
try
ConvertSource(Src, InputSrc); // handles 'no-input-specified' case
ProcessXML(InputSrc)
finally
ADoc := TXMLDocument(doc);
Free;
end;
end;
procedure TDOMParser.ParseUri(const URI: DOMString; out ADoc: TXMLDocument);
var
Src: TXMLCharSource;
begin
ADoc := nil;
with TXMLReader.Create(Self) do
try
if ResolveEntity(URI, '', '', Src) then
ProcessXML(Src)
else
DoErrorPos(esFatal, 'The specified URI could not be resolved', NullLocation);
finally
ADoc := TXMLDocument(doc);
Free;
end;
end;
function TDOMParser.ParseWithContext(Src: TXMLInputSource;
Context: TDOMNode; Action: TXMLContextAction): TDOMNode;
var
InputSrc: TXMLCharSource;
Frag: TDOMDocumentFragment;
node: TDOMNode;
begin
if Action in [xaInsertBefore, xaInsertAfter, xaReplace] then
node := Context.ParentNode
else
node := Context;
// TODO: replacing document isn't yet supported
if (Action = xaReplaceChildren) and (node.NodeType = DOCUMENT_NODE) then
raise EDOMNotSupported.Create('DOMParser.ParseWithContext');
if not (node.NodeType in [ELEMENT_NODE, DOCUMENT_FRAGMENT_NODE]) then
raise EDOMHierarchyRequest.Create('DOMParser.ParseWithContext');
with TXMLReader.Create(Self) do
try
ConvertSource(Src, InputSrc); // handles 'no-input-specified' case
Frag := Context.OwnerDocument.CreateDocumentFragment;
try
ProcessFragment(InputSrc, Frag);
Result := Frag.FirstChild;
case Action of
xaAppendAsChildren: Context.AppendChild(Frag);
xaReplaceChildren: begin
Context.TextContent := ''; // removes children
Context.ReplaceChild(Frag, Context.FirstChild);
end;
xaInsertBefore: node.InsertBefore(Frag, Context);
xaInsertAfter: node.InsertBefore(Frag, Context.NextSibling);
xaReplace: node.ReplaceChild(Frag, Context);
end;
finally
Frag.Free;
end;
finally
Free;
end;
end;
{ TXMLCharSource }
constructor TXMLCharSource.Create(const AData: DOMString);
begin
inherited Create;
FLineNo := 1;
FBuf := DOMPChar(AData);
FBufEnd := FBuf + Length(AData);
LFPos := FBuf-1;
FCharCount := Length(AData);
end;
procedure TXMLCharSource.Initialize;
begin
end;
function TXMLCharSource.SetEncoding(const AEncoding: string): Boolean;
begin
Result := True; // always succeed
end;
function TXMLCharSource.GetSystemID: DOMString;
begin
if FSystemID <> '' then
Result := FSystemID
else if Assigned(FParent) then
Result := FParent.SystemID
else
Result := '';
end;
function TXMLCharSource.Reload: Boolean;
begin
Result := False;
end;
procedure TXMLCharSource.NewLine;
begin
Inc(FLineNo);
LFPos := FBuf;
end;
function TXMLCharSource.SkipUntil(var ToFill: TDOMCharBuf; const Delim: TSetOfChar;
wsflag: PBoolean; AllowSpecialChars: boolean): DOMChar;
var
old: DOMPChar;
nonws: Boolean;
begin
old := FBuf;
nonws := False;
repeat
if FBuf^ = #10 then
NewLine;
if (FBuf^ < #255) and (Char(ord(FBuf^)) in Delim) then
Break;
if (FBuf^ > #32) or not (Char(ord(FBuf^)) in [#32, #9, #10, #13]) then
nonws := True;
Inc(FBuf);
until False;
Result := FBuf^;
BufAppendChunk(ToFill, old, FBuf);
if Assigned(wsflag) then
wsflag^ := wsflag^ or nonws;
end;
function TXMLCharSource.Matches(const arg: DOMString): Boolean;
begin
Result := False;
if (FBufEnd >= FBuf + Length(arg)) or Reload then
Result := CompareMem(Pointer(arg), FBuf, Length(arg)*sizeof(DOMChar));
if Result then
begin
Inc(FBuf, Length(arg));
if FBuf >= FBufEnd then
Reload;
end;
end;
{ TXMLDecodingSource }
procedure TXMLDecodingSource.AfterConstruction;
begin
inherited AfterConstruction;
FBufStart := AllocMem(4096);
FBuf := FBufStart;
FBufEnd := FBuf;
LFPos := FBuf-1;
end;
destructor TXMLDecodingSource.Destroy;
begin
FreeMem(FBufStart);
if Assigned(FDecoder.Cleanup) then
FDecoder.Cleanup(FDecoder.Context);
inherited Destroy;
end;
procedure TXMLDecodingSource.FetchData;
begin
end;
procedure TXMLDecodingSource.DecodingError(const Msg: string);
begin
// count line endings to obtain correct error location
while FBuf < FBufEnd do
begin
if (FBuf^ = #10) or (FBuf^ = #13)
or (FXML11Rules and ((FBuf^ = #$85) or (FBuf^ = #$2028))) // ToDo #$2028
then begin
if (FBuf^ = #13) and (FBuf < FBufEnd-1) and
((FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85))) then
Inc(FBuf);
LFPos := FBuf;
Inc(FLineNo);
end;
Inc(FBuf);
end;
FReader.FatalError(Msg);
end;
function TXMLDecodingSource.Reload: Boolean;
var
Remainder: PtrInt;
r, inLeft: Cardinal;
rslt: Integer;
begin
if DTDSubsetType = dsInternal then
FReader.DTDReloadHook;
Remainder := FBufEnd - FBuf;
if Remainder > 0 then
Move(FBuf^, FBufStart^, Remainder * sizeof(DOMChar));
Dec(LFPos, FBuf-FBufStart);
FBuf := FBufStart;
FBufEnd := FBufStart + Remainder;
repeat
inLeft := FCharBufEnd - FCharBuf;
if inLeft < 4 then // may contain an incomplete char
begin
FetchData;
inLeft := FCharBufEnd - FCharBuf;
if inLeft <= 0 then
Break;
end;
r := FBufStart + FBufSize - FBufEnd;
if r = 0 then
Break;
rslt := FDecoder.Decode(FDecoder.Context, FCharBuf, inLeft, FBufEnd, r);
{ Sanity checks: r and inLeft must not increase. }
if inLeft + FCharBuf <= FCharBufEnd then
FCharBuf := FCharBufEnd - inLeft
else
DecodingError('Decoder error: input byte count out of bounds');
if r + FBufEnd <= FBufStart + FBufSize then
FBufEnd := FBufStart + FBufSize - r
else
DecodingError('Decoder error: output char count out of bounds');
if rslt = 0 then
Break
else if rslt < 0 then
DecodingError('Invalid character in input stream')
else
begin
Inc(FCharCount, rslt);
FReader.CheckMaxChars;
end;
until False;
FBufEnd^ := #0;
Result := FBuf < FBufEnd;
end;
const
XmlSign: array [0..4] of DOMChar = ('<', '?', 'x', 'm', 'l');
procedure TXMLDecodingSource.Initialize;
begin
inherited;
FLineNo := 1;
FXml11Rules := FReader.FXML11;
FDecoder.Decode := @Decode_UTF8;
FFixedUCS2 := '';
if FCharBufEnd-FCharBuf > 1 then
begin
if (FCharBuf[0] = #$FE) and (FCharBuf[1] = #$FF) then
begin
FFixedUCS2 := 'UTF-16BE';
FDecoder.Decode := {$IFNDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
end
else if (FCharBuf[0] = #$FF) and (FCharBuf[1] = #$FE) then
begin
FFixedUCS2 := 'UTF-16LE';
FDecoder.Decode := {$IFDEF ENDIAN_BIG} @Decode_UCS2_Swapped {$ELSE} @Decode_UCS2 {$ENDIF};
end;
end;
FBufSize := 6; // possible BOM and '<?xml'
Reload;
if FBuf^ = #$FEFF then
begin
FHasBOM := True;
Inc(FBuf);
end;
LFPos := FBuf-1;
if CompareMem(FBuf, @XmlSign[0], sizeof(XmlSign)) then
begin
FBufSize := 3; // don't decode past XML declaration
Inc(FBuf, Length(XmlSign));
FReader.ParseXmlOrTextDecl(FParent <> nil);
end;
FBufSize := 2047;
end;
function TXMLDecodingSource.SetEncoding(const AEncoding: string): Boolean;
var
NewDecoder: TDecoder;
begin
Result := True;
if (FFixedUCS2 = '') and SameText(AEncoding, 'UTF-8') then
Exit;
if FFixedUCS2 <> '' then
begin
Result := SameText(AEncoding, FFixedUCS2) or
SameText(AEncoding, 'UTF-16') or
SameText(AEncoding, 'unicode');
Exit;
end;
// TODO: must fail when a byte-based stream is labeled as word-based.
// see rmt-e2e-61, it now fails but for a completely different reason.
FillChar(NewDecoder, sizeof(TDecoder), 0);
if Is_8859_1(AEncoding) then
FDecoder.Decode := @Decode_88591
else if FindDecoder(AEncoding, NewDecoder) then
FDecoder := NewDecoder
else
Result := False;
end;
procedure TXMLDecodingSource.NewLine;
begin
case FBuf^ of
#10: begin
Inc(FLineNo);
LFPos := FBuf;
end;
#13: begin
Inc(FLineNo);
LFPos := FBuf;
// Reload trashes the buffer, it should be consumed beforehand
if (FBufEnd >= FBuf+2) or Reload then
begin
if (FBuf[1] = #10) or (FXML11Rules and (FBuf[1] = #$85)) then
begin
Inc(FBuf);
Inc(LFPos);
end;
FBuf^ := #10;
end;
end;
#$85: // ToDo #$2028
if FXML11Rules then
begin
FBuf^ := #10;
Inc(FLineNo);
LFPos := FBuf;
end;
end;
end;
{ TXMLStreamInputSource }
const
Slack = 16;
constructor TXMLStreamInputSource.Create(AStream: TStream; AOwnStream: Boolean);
begin
FStream := AStream;
FCapacity := 4096;
GetMem(FAllocated, FCapacity+Slack);
FCharBuf := FAllocated+(Slack-4);
FCharBufEnd := FCharBuf;
FOwnStream := AOwnStream;
FetchData;
end;
destructor TXMLStreamInputSource.Destroy;
begin
FreeMem(FAllocated);
if FOwnStream then
FStream.Free;
inherited Destroy;
end;
procedure TXMLStreamInputSource.FetchData;
var
Remainder, BytesRead: Integer;
OldBuf: PChar;
begin
Assert(FCharBufEnd - FCharBuf < Slack-4);
if FEof then
Exit;
OldBuf := FCharBuf;
Remainder := FCharBufEnd - FCharBuf;
if Remainder < 0 then
Remainder := 0;
FCharBuf := FAllocated+Slack-4-Remainder;
if Remainder > 0 then
Move(OldBuf^, FCharBuf^, Remainder);
BytesRead := FStream.Read(FAllocated[Slack-4], FCapacity);
if BytesRead < FCapacity then
FEof := True;
FCharBufEnd := FAllocated + (Slack-4) + BytesRead;
{ Null-termination has been removed:
1) Built-in decoders don't need it because they respect the buffer length.
2) It was causing unaligned access errors on ARM CPUs.
}
//DOMPChar(FCharBufEnd)^ := #0;
end;
{ TXMLFileInputSource }
constructor TXMLFileInputSource.Create(var AFile: Text);
begin
FFile := @AFile;
SystemID := FilenameToURI(TTextRec(AFile).Name);
FetchData;
end;
procedure TXMLFileInputSource.FetchData;
var
Remainder: Integer;
begin
if not Eof(FFile^) then
begin
Remainder := FCharBufEnd - FCharBuf;
if Remainder > 0 then
SetString(FTmp, FCharBuf, Remainder);
ReadLn(FFile^, FString);
FString := FString + #10; // bad solution...
if Remainder > 0 then
Insert(FTmp, FString, 1);
FCharBuf := PChar(FString);
FCharBufEnd := FCharBuf + Length(FString);
end;
end;
{ helper that closes handle upon destruction }
type
THandleOwnerStream = class(THandleStream)
public
destructor Destroy; override;
end;
destructor THandleOwnerStream.Destroy;
begin
if Handle >= 0 then FileClose(Handle);
inherited Destroy;
end;
{ TXMLReader }
procedure TXMLReader.ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
begin
SrcOut := nil;
if Assigned(SrcIn) then
begin
if Assigned(SrcIn.FStream) then
SrcOut := TXMLStreamInputSource.Create(SrcIn.FStream, False)
else if SrcIn.FStringData <> '' then
SrcOut := TXMLStreamInputSource.Create(TStringStream.Create(SrcIn.FStringData), True)
else if (SrcIn.SystemID <> '') then
ResolveEntity(SrcIn.SystemID, SrcIn.PublicID, SrcIn.BaseURI, SrcOut);
end;
if (SrcOut = nil) and (FSource = nil) then
DoErrorPos(esFatal, 'No input source specified', NullLocation);
end;
procedure TXMLReader.StoreLocation(out Loc: TLocation);
begin
Loc.Line := FSource.FLineNo;
Loc.LinePos := FSource.FBuf-FSource.LFPos;
end;
function TXMLReader.ResolveEntity(const SystemID, PublicID, BaseURI: DOMString; out Source: TXMLCharSource): Boolean;
var
AbsSysID: DOMString;
Filename: string;
Stream: TStream;
fd: THandle;
begin
Source := nil;
Result := False;
if not ResolveRelativeURI(BaseURI, SystemID, AbsSysID) then
Exit;
{ TODO: alternative resolvers
These may be 'internal' resolvers or a handler set by application.
Internal resolvers should probably produce a TStream
( so that internal classes need not be exported ).
External resolver will produce TXMLInputSource that should be converted.
External resolver must NOT be called for root entity.
External resolver can return nil, in which case we do the default }
if URIToFilename(AbsSysID, Filename) then
begin
fd := FileOpen(Filename, fmOpenRead + fmShareDenyWrite);
if fd <> THandle(-1) then
begin
Stream := THandleOwnerStream.Create(fd);
Source := TXMLStreamInputSource.Create(Stream, True);
Source.SystemID := AbsSysID; // <- Revisit: Really need absolute sysID?
end;
end;
Result := Assigned(Source);
end;
procedure TXMLReader.Initialize(ASource: TXMLCharSource);
begin
ASource.FParent := FSource;
FSource := ASource;
FSource.FReader := Self;
FSource.FStartNesting := FNesting;
FSource.Initialize;
end;
procedure TXMLReader.FatalError(Expected: DOMChar);
begin
// FIX: don't output what is found - anything may be found, including exploits...
FatalError('Expected "%1s"', [string(Expected)]);
end;
procedure TXMLReader.FatalError(const descr: String; LineOffs: Integer);
begin
DoError(esFatal, descr, LineOffs);
end;
procedure TXMLReader.FatalError(const descr: string; const args: array of const; LineOffs: Integer);
begin
DoError(esFatal, Format(descr, args), LineOffs);
end;
procedure TXMLReader.ValidationError(const Msg: string; const Args: array of const; LineOffs: Integer);
begin
FDocNotValid := True;
if FValidate then
DoError(esError, Format(Msg, Args), LineOffs);
end;
procedure TXMLReader.DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer);
var
Loc: TLocation;
begin
StoreLocation(Loc);
if LineOffs >= 0 then
begin
Dec(Loc.LinePos, LineOffs);
DoErrorPos(Severity, descr, Loc);
end
else
DoErrorPos(Severity, descr, FTokenStart);
end;
procedure TXMLReader.DoErrorPos(Severity: TErrorSeverity; const descr: string; const ErrPos: TLocation);
var
E: EXMLReadError;
sysid: DOMString;
begin
if Assigned(FSource) then
begin
sysid := FSource.FSystemID;
if (sysid = '') and Assigned(FSource.FEntity) then
sysid := TDOMEntityEx(FSource.FEntity).FURI;
E := EXMLReadError.CreateFmt('In ''%s'' (line %d pos %d): %s', [sysid, ErrPos.Line, ErrPos.LinePos, descr]);
end
else
E := EXMLReadError.Create(descr);
E.FSeverity := Severity;
E.FErrorMessage := descr;
E.FLine := ErrPos.Line;
E.FLinePos := ErrPos.LinePos;
CallErrorHandler(E);
// No 'finally'! If user handler raises exception, control should not get here
// and the exception will be freed in CallErrorHandler (below)
E.Free;
end;
procedure TXMLReader.CheckMaxChars;
var
src: TXMLCharSource;
total: Cardinal;
begin
if FMaxChars = 0 then
Exit;
src := FSource;
total := 0;
repeat
Inc(total, src.FCharCount);
if total > FMaxChars then
FatalError('Exceeded character count limit');
src := src.FParent;
until src = nil;
end;
procedure TXMLReader.CallErrorHandler(E: EXMLReadError);
begin
try
if Assigned(FCtrl) and Assigned(FCtrl.FOnError) then
FCtrl.FOnError(E);
if E.Severity = esFatal then
raise E;
except
if ExceptObject <> E then
E.Free;
raise;
end;
end;
function TXMLReader.SkipWhitespace(PercentAloneIsOk: Boolean): Boolean;
begin
Result := False;
repeat
Result := SkipS or Result;
if FSource.FBuf^ = #0 then
begin
Result := True; // report whitespace upon exiting the PE
if not ContextPop then
Break;
end
else if FSource.FBuf^ = '%' then
begin
if not FRecognizePE then
Break;
// This is the only case where look-ahead is needed
if FSource.FBuf > FSource.FBufEnd-2 then
FSource.Reload;
if (not PercentAloneIsOk) or (Byte(FSource.FBuf[1]) in NamingBitmap[FNamePages^[$100+hi(Word(FSource.FBuf[1]))]]) or
(FXML11 and (FSource.FBuf[1] >= #$D800) and (FSource.FBuf[1] <= #$DB7F)) then
begin
Inc(FSource.FBuf); // skip '%'
CheckName;
ExpectChar(';');
StartPE;
Result := True; // report whitespace upon entering the PE
end
else Break;
end
else
Break;
until False;
end;
procedure TXMLReader.ExpectWhitespace;
begin
if not SkipWhitespace then
FatalError('Expected whitespace');
end;
function TXMLReader.SkipS(Required: Boolean): Boolean;
var
p: DOMPChar;
begin
Result := False;
repeat
p := FSource.FBuf;
repeat
if (p^ = #10) or (p^ = #13)
or (FXML11 and ((p^ = #$85) or (p^ = #$2028))) // ToDo #$2028
then begin
FSource.FBuf := p;
FSource.NewLine;
p := FSource.FBuf;
end
else if (p^ <> #32) and (p^ <> #9) then
Break;
Inc(p);
Result := True;
until False;
FSource.FBuf := p;
until (p^ <> #0) or (not FSource.Reload);
if (not Result) and Required then
FatalError('Expected whitespace');
end;
procedure TXMLReader.ExpectString(const s: String);
var
I: Integer;
begin
for I := 1 to Length(s) do
begin
if FSource.FBuf^ <> DOMChar(ord(s[i])) then
FatalError('Expected "%s"', [s], i-1);
FSource.NextChar;
end;
end;
function TXMLReader.CheckForChar(c: DOMChar): Boolean;
begin
Result := (FSource.FBuf^ = c);
if Result then
begin
Inc(FSource.FBuf);
if FSource.FBuf >= FSource.FBufEnd then
FSource.Reload;
end;
end;
procedure TXMLReader.SkipQuote(out Delim: DOMChar; required: Boolean);
begin
Delim := #0;
if (FSource.FBuf^ = '''') or (FSource.FBuf^ = '"') then
begin
Delim := FSource.FBuf^;
FSource.NextChar; // skip quote
StoreLocation(FTokenStart);
end
else if required then
FatalError('Expected single or double quote');
end;
const
PrefixDefault: array[0..4] of DOMChar = ('x','m','l','n','s');
constructor TXMLReader.Create;
begin
inherited Create;
BufAllocate(FName, 128);
BufAllocate(FValue, 512);
FIDRefs := TFPList.Create;
FNotationRefs := TFPList.Create;
FNSHelper := TNSSupport.Create;
FNsAttHash := TDblHashArray.Create;
SetLength(FWorkAtts, 16);
FStdPrefix_xml := FNSHelper.GetPrefix(@PrefixDefault, 3);
FStdPrefix_xmlns := FNSHelper.GetPrefix(@PrefixDefault, 5);
// Set char rules to XML 1.0
FNamePages := @NamePages;
SetLength(FValidator, 16);
end;
constructor TXMLReader.Create(AParser: TDOMParser);
begin
Create;
FCtrl := AParser;
FValidate := FCtrl.Options.Validate;
FPreserveWhitespace := FCtrl.Options.PreserveWhitespace;
FExpandEntities := FCtrl.Options.ExpandEntities;
FCDSectionsAsText := FCtrl.Options.CDSectionsAsText;
FIgnoreComments := FCtrl.Options.IgnoreComments;
FResolveExternals := FCtrl.Options.ResolveExternals;
FNamespaces := FCtrl.Options.Namespaces;
FDisallowDoctype := FCtrl.Options.DisallowDoctype;
FCanonical := FCtrl.Options.CanonicalForm;
FMaxChars := FCtrl.Options.MaxChars;
end;
destructor TXMLReader.Destroy;
begin
if Assigned(FEntityValue.Buffer) then
FreeMem(FEntityValue.Buffer);
FreeMem(FName.Buffer);
FreeMem(FValue.Buffer);
if Assigned(FSource) then
while ContextPop(True) do; // clean input stack
FSource.Free;
FPEMap.Free;
ClearRefs(FNotationRefs);
ClearRefs(FIDRefs);
FNsAttHash.Free;
FNSHelper.Free;
if FOwnsDoctype then
FDocType.Free;
FNotationRefs.Free;
FIDRefs.Free;
inherited Destroy;
end;
procedure TXMLReader.XML11_BuildTables;
begin
FNamePages := Xml11NamePages;
FXML11 := True;
FSource.FXml11Rules := True;
end;
procedure TXMLReader.ProcessXML(ASource: TXMLCharSource);
begin
doc := TXMLDocument.Create;
doc.documentURI := ASource.SystemID; // TODO: to be changed to URI or BaseURI
FCursor := doc;
FState := rsProlog;
FNesting := 0;
Initialize(ASource);
ParseContent;
if FState < rsRoot then
FatalError('Root element is missing');
if FValidate and Assigned(FDocType) then
ValidateIdRefs;
end;
procedure TXMLReader.ProcessFragment(ASource: TXMLCharSource; AOwner: TDOMNode);
begin
doc := AOwner.OwnerDocument;
FCursor := AOwner as TDOMNode_WithChildren;
FState := rsRoot;
Initialize(ASource);
FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1');
ParseContent;
end;
function TXMLReader.CheckName(aFlags: TCheckNameFlags): Boolean;
var
p: DOMPChar;
NameStartFlag: Boolean;
begin
p := FSource.FBuf;
FName.Length := 0;
FColonPos := -1;
NameStartFlag := not (cnToken in aFlags);
repeat
if NameStartFlag then
begin
if (Byte(p^) in NamingBitmap[FNamePages^[hi(Word(p^))]]) or
((p^ = ':') and (not FNamespaces)) then
Inc(p)
else if FXML11 and ((p^ >= #$D800) and (p^ <= #$DB7F) and
(p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
Inc(p, 2)
else
begin
// here we come either when first char of name is bad (it may be a colon),
// or when a colon is not followed by a valid NameStartChar
FSource.FBuf := p;
Result := False;
Break;
end;
NameStartFlag := False;
end;
if FXML11 then
repeat
if Byte(p^) in NamingBitmap[FNamePages^[$100+hi(Word(p^))]] then
Inc(p)
else if ((p^ >= #$D800) and (p^ <= #$DB7F) and
(p[1] >= #$DC00) and (p[1] <= #$DFFF)) then
Inc(p,2)
else
Break;
until False
else
while Byte(p^) in NamingBitmap[FNamePages^[$100+hi(Word(p^))]] do
Inc(p);
if p^ = ':' then
begin
if (cnToken in aFlags) or not FNamespaces then // colon has no specific meaning
begin
Inc(p);
if p^ <> #0 then Continue;
end
else if FColonPos = -1 then // this is the first colon, remember it
begin
FColonPos := p-FSource.FBuf+FName.Length;
NameStartFlag := True;
Inc(p);
if p^ <> #0 then Continue;
end;
end;
BufAppendChunk(FName, FSource.FBuf, p);
Result := (FName.Length > 0);
FSource.FBuf := p;
if (p^ <> #0) or not FSource.Reload then
Break;
p := FSource.FBuf;
until False;
if not (Result or (cnOptional in aFlags)) then
RaiseNameNotFound;
end;
procedure TXMLReader.CheckNCName;
begin
if FNamespaces and (FColonPos <> -1) then
FatalError('Names of entities, notations and processing instructions may not contain colons', FName.Length);
end;
procedure TXMLReader.RaiseNameNotFound;
begin
if FColonPos <> -1 then
FatalError('Bad QName syntax, local part is missing')
else
// Coming at no cost, this allows more user-friendly error messages
with FSource do
if (FBuf^ = #32) or (FBuf^ = #10) or (FBuf^ = #9) or (FBuf^ = #13) then
FatalError('Whitespace is not allowed here')
else
FatalError('Name starts with invalid character');
end;
function TXMLReader.ExpectName: DOMString;
begin
CheckName;
SetString(Result, FName.Buffer, FName.Length);
end;
function TXMLReader.ResolvePredefined: Boolean;
var
wc: DOMChar;
begin
Result := False;
with FName do
begin
if (Length = 2) and (Buffer[1] = 't') then
begin
if Buffer[0] = 'l' then
wc := '<'
else if Buffer[0] = 'g' then
wc := '>'
else Exit;
end
else if Buffer[0] = 'a' then
begin
if (Length = 3) and (Buffer[1] = 'm') and (Buffer[2] = 'p') then
wc := '&'
else if (Length = 4) and (Buffer[1] = 'p') and (Buffer[2] = 'o') and
(Buffer[3] = 's') then
wc := ''''
else Exit;
end
else if (Length = 4) and (Buffer[0] = 'q') and (Buffer[1] = 'u') and
(Buffer[2] = 'o') and (Buffer[3] ='t') then
wc := '"'
else
Exit;
end; // with
BufAppend(FValue, wc);
Result := True;
end;
function TXMLReader.ParseRef(var ToFill: TDOMCharBuf): Boolean; // [67]
var
Value: Integer;
begin
FSource.NextChar; // skip '&'
Result := CheckForChar('#');
if Result then
begin
Value := 0;
if CheckForChar('x') then
repeat
case FSource.FBuf^ of
'0'..'9': Value := Value * 16 + Ord(FSource.FBuf^) - Ord('0');
'a'..'f': Value := Value * 16 + Ord(FSource.FBuf^) - (Ord('a') - 10);
'A'..'F': Value := Value * 16 + Ord(FSource.FBuf^) - (Ord('A') - 10);
else
Break;
end;
FSource.NextChar;
until Value > $10FFFF
else
repeat
case FSource.FBuf^ of
'0'..'9': Value := Value * 10 + Ord(FSource.FBuf^) - Ord('0');
else
Break;
end;
FSource.NextChar;
until Value > $10FFFF;
case Value of
$01..$08, $0B..$0C, $0E..$1F:
if FXML11 then
BufAppend(ToFill, DOMChar(Value))
else
FatalError('Invalid character reference');
$09, $0A, $0D, $20..$7F:
BufAppend(ToFill, DOMChar(Value));
{$IFDEF UseUTF8}
$80..$7ff:
begin
BufAppend(ToFill, DOMChar(byte($c0 or (Value shr 6))));
BufAppend(ToFill, DOMChar(byte($80 or (Value and $3f))));
end;
$800..$ffff:
begin
BufAppend(ToFill, DOMChar(byte($e0 or (Value shr 12))));
BufAppend(ToFill, DOMChar(byte((Value shr 6) and $3f) or $80));
BufAppend(ToFill, DOMChar(byte(Value and $3f) or $80));
end;
$10000..$10ffff:
begin
BufAppend(ToFill, DOMChar(byte($f0 or (Value shr 18))));
BufAppend(ToFill, DOMChar(byte((Value shr 12) and $3f) or $80));
BufAppend(ToFill, DOMChar(byte((Value shr 6) and $3f) or $80));
BufAppend(ToFill, DOMChar(byte(Value and $3f) or $80));
end;
{$ENDIF}
{$IFDEF UseWideString}
$D7FF, $E000..$FFFD:
BufAppend(ToFill, DOMChar(Value));
$10000..$10FFFF:
begin
BufAppend(ToFill, DOMChar($D7C0 + (Value shr 10)));
BufAppend(ToFill, DOMChar($DC00 xor (Value and $3FF)));
end;
{$ENDIF}
else
FatalError('Invalid character reference');
end;
end
else CheckName;
ExpectChar(';');
end;
const
AttrDelims: TSetOfChar = [#0, '<', '&', '''', '"', #9, #10, #13];
GT_Delim: TSetOfChar = [#0, '>'];
procedure TXMLReader.ExpectAttValue;
var
wc: DOMChar;
Delim: DOMChar;
ent: TDOMEntityEx;
start: TObject;
begin
SkipQuote(Delim);
FValue.Length := 0;
start := FSource.FEntity;
repeat
wc := FSource.SkipUntil(FValue, AttrDelims, nil, xrfAllowSpecialCharsInAttributeValue in Flags);
if (wc = '<') and (not (xrfAllowLowerThanInAttributeValue in Flags)) then
FatalError('Character ''<'' is not allowed in attribute value')
else if wc = '&' then
begin
if ParseRef(FValue) or ResolvePredefined then
Continue;
ent := EntityCheck(True);
if (ent = nil) or (not FExpandEntities) then
begin
if FValue.Length > 0 then
begin
DoAttrText(FValue.Buffer, FValue.Length);
FValue.Length := 0;
end;
AppendReference(ent);
end
else
ContextPush(ent);
end
else if wc <> #0 then
begin
FSource.NextChar;
if (wc = Delim) and (FSource.FEntity = start) then
Break;
if (wc = #10) or (wc = #9) or (wc = #13) then
wc := #32;
BufAppend(FValue, wc);
end
else if (FSource.FEntity = start) or not ContextPop then // #0
FatalError('Literal has no closing quote', -1);
until False;
if FValue.Length > 0 then
DoAttrText(FValue.Buffer, FValue.Length);
FValue.Length := 0;
end;
const
PrefixChar: array[Boolean] of string = ('', '%');
function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean;
var
Src: TXMLCharSource;
begin
if AEntity.FOnStack then
FatalError('Entity ''%s%s'' recursively references itself', [PrefixChar[AEntity.FIsPE], AEntity.FName]);
if (AEntity.SystemID <> '') and not AEntity.FPrefetched then
begin
Result := ResolveEntity(AEntity.SystemID, AEntity.PublicID, AEntity.FURI, Src);
if not Result then
begin
// TODO: a detailed message like SysErrorMessage(GetLastError) would be great here
ValidationError('Unable to resolve external entity ''%s''', [AEntity.FName]);
Exit;
end;
end
else
begin
Src := TXMLCharSource.Create(AEntity.FReplacementText);
Src.FLineNo := AEntity.FStartLocation.Line;
Src.LFPos := Src.FBuf - AEntity.FStartLocation.LinePos;
// needed in case of prefetched external PE
if AEntity.SystemID <> '' then
Src.SystemID := AEntity.FURI;
end;
AEntity.FOnStack := True;
Src.FEntity := AEntity;
Initialize(Src);
Result := True;
end;
function TXMLReader.ContextPop(Forced: Boolean): Boolean;
var
Src: TXMLCharSource;
Error: Boolean;
begin
Result := Assigned(FSource.FParent) and (Forced or (FSource.DTDSubsetType = dsNone));
if Result then
begin
Src := FSource.FParent;
Error := False;
if Assigned(FSource.FEntity) then
begin
TDOMEntityEx(FSource.FEntity).FOnStack := False;
TDOMEntityEx(FSource.FEntity).FCharCount := FSource.FCharCount;
// [28a] PE that was started between MarkupDecls may not end inside MarkupDecl
Error := TDOMEntityEx(FSource.FEntity).FBetweenDecls and FInsideDecl;
end;
FSource.Free;
FSource := Src;
// correct position of this error is after PE reference
if Error then
BadPENesting(esFatal);
end;
end;
function TXMLReader.EntityCheck(NoExternals: Boolean): TDOMEntityEx;
var
RefName: DOMString;
cnt: Integer;
SaveCursor: TDOMNode_WithChildren;
SaveState: TXMLReadState;
SaveElDef: TDOMElementDef;
SaveValue: TDOMCharBuf;
begin
Result := nil;
SetString(RefName, FName.Buffer, FName.Length);
cnt := FName.Length+2;
if Assigned(FDocType) then
Result := FDocType.Entities.GetNamedItem(RefName) as TDOMEntityEx;
if Result = nil then
begin
if FStandalone or (FDocType = nil) or not (FHavePERefs or (FDocType.SystemID <> '')) then
FatalError('Reference to undefined entity ''%s''', [RefName], cnt)
else
ValidationError('Undefined entity ''%s'' referenced', [RefName], cnt);
Exit;
end;
if FStandalone and Result.FExternallyDeclared then
FatalError('Standalone constraint violation', cnt);
if Result.NotationName <> '' then
FatalError('Reference to unparsed entity ''%s''', [RefName], cnt);
if NoExternals and (Result.SystemID <> '') then
FatalError('External entity reference is not allowed in attribute value', cnt);
if not Result.FResolved then
begin
// To build children of the entity itself, we must parse it "out of context"
SaveCursor := FCursor;
SaveElDef := FValidator[FNesting].FElementDef;
SaveState := FState;
SaveValue := FValue;
if ContextPush(Result) then
try
FCursor := Result; // build child node tree for the entity
Result.SetReadOnly(False);
FState := rsRoot;
FValidator[FNesting].FElementDef := nil;
UpdateConstraints;
FSource.DTDSubsetType := dsExternal; // avoids ContextPop at the end
BufAllocate(FValue, 256);
ParseContent;
Result.FResolved := True;
finally
FreeMem(FValue.Buffer);
FValue := SaveValue;
Result.SetReadOnly(True);
ContextPop(True);
FCursor := SaveCursor;
FState := SaveState;
FValidator[FNesting].FElementDef := SaveElDef;
UpdateConstraints;
end;
end;
// at this point we know the charcount of the entity being included
Inc(FSource.FCharCount, Result.FCharCount - cnt);
CheckMaxChars;
end;
procedure TXMLReader.StartPE;
var
PEName: DOMString;
PEnt: TDOMEntityEx;
begin
SetString(PEName, FName.Buffer, FName.Length);
PEnt := nil;
if Assigned(FPEMap) then
PEnt := FPEMap.GetNamedItem(PEName) as TDOMEntityEx;
if PEnt = nil then
begin
ValidationError('Undefined parameter entity ''%s'' referenced', [PEName], FName.Length+2);
// cease processing declarations, unless document is standalone.
FDTDProcessed := FStandalone;
Exit;
end;
{ cache an external PE so it's only fetched once }
if (PEnt.SystemID <> '') and (not PEnt.FPrefetched) and (not PrefetchEntity(PEnt)) then
begin
FDTDProcessed := FStandalone;
Exit;
end;
Inc(FSource.FCharCount, PEnt.FCharCount);
CheckMaxChars;
PEnt.FBetweenDecls := not FInsideDecl;
ContextPush(PEnt);
FHavePERefs := True;
end;
function TXMLReader.PrefetchEntity(AEntity: TDOMEntityEx): Boolean;
begin
Result := ContextPush(AEntity);
if Result then
try
FValue.Length := 0;
FSource.SkipUntil(FValue, [#0]);
SetString(AEntity.FReplacementText, FValue.Buffer, FValue.Length);
AEntity.FCharCount := FValue.Length;
AEntity.FStartLocation.Line := 1;
AEntity.FStartLocation.LinePos := 1;
AEntity.FURI := FSource.SystemID; // replace base URI with absolute one
finally
ContextPop;
AEntity.FPrefetched := True;
FValue.Length := 0;
end;
end;
procedure Normalize(var Buf: TDOMCharBuf; Modified: PBoolean);
var
Dst, Src: Integer;
begin
Dst := 0;
Src := 0;
// skip leading space if any
while (Src < Buf.Length) and (Buf.Buffer[Src] = ' ') do
Inc(Src);
while Src < Buf.Length do
begin
if Buf.Buffer[Src] = ' ' then
begin
// Dst cannot be 0 here, because leading space is already skipped
if Buf.Buffer[Dst-1] <> ' ' then
begin
Buf.Buffer[Dst] := ' ';
Inc(Dst);
end;
end
else
begin
Buf.Buffer[Dst] := Buf.Buffer[Src];
Inc(Dst);
end;
Inc(Src);
end;
// trailing space (only one possible due to compression)
if (Dst > 0) and (Buf.Buffer[Dst-1] = ' ') then
Dec(Dst);
if Assigned(Modified) then
Modified^ := Dst <> Buf.Length;
Buf.Length := Dst;
end;
const
LiteralDelims: array[TLiteralType] of TSetOfChar = (
[#0, '''', '"'], // ltPlain
[#0, '<', '&', '''', '"', #9, #10, #13], // ltAttr
[#0, '<', '&', '''', '"', #9, #10, #13], // ltTokAttr
[#0, '''', '"', #13, #10], // ltPubid
[#0, '%', '&', '''', '"'] // ltEntity
);
function TXMLReader.ParseLiteral(var ToFill: TDOMCharBuf; aType: TLiteralType;
Required: Boolean; Normalized: PBoolean): Boolean;
var
start: TObject;
wc, Delim: DOMChar;
ent: TDOMEntityEx;
begin
SkipQuote(Delim, Required);
Result := (Delim <> #0);
if not Result then
Exit;
ToFill.Length := 0;
start := FSource.FEntity;
repeat
wc := FSource.SkipUntil(ToFill, LiteralDelims[aType]);
if wc = '%' then { ltEntity only }
begin
FSource.NextChar;
CheckName;
ExpectChar(';');
if FSource.DTDSubsetType = dsInternal then
FatalError('PE reference not allowed here in internal subset', FName.Length+2);
StartPE;
end
else if wc = '&' then { ltAttr, ltTokAttr, ltEntity }
begin
if ParseRef(ToFill) then // charRefs always expanded
Continue;
if aType = ltEntity then // bypass
begin
BufAppend(ToFill, '&');
BufAppendChunk(ToFill, FName.Buffer, FName.Buffer + FName.Length);
BufAppend(ToFill, ';');
end
else // include
begin
if ResolvePredefined then
Continue;
ent := EntityCheck(True);
if ent = nil then
Continue;
ContextPush(ent);
end;
end
else if wc = '<' then
FatalError('Character ''<'' is not allowed in attribute value')
else if wc <> #0 then
begin
FSource.NextChar;
if (wc = #10) or (wc = #13) or (wc = #9) then
wc := #32
// terminating delimiter must be in the same context as the starting one
else if (wc = Delim) and (start = FSource.FEntity) then
Break;
BufAppend(ToFill, wc);
end
else if (FSource.FEntity = start) or not ContextPop then // #0
FatalError('Literal has no closing quote', -1);
until False;
if aType in [ltTokAttr, ltPubid] then
Normalize(ToFill, Normalized);
end;
function TXMLReader.SkipUntilSeq(const Delim: TSetOfChar; c1: DOMChar; c2: DOMChar = #0): Boolean;
var
wc: DOMChar;
begin
Result := False;
FValue.Length := 0;
StoreLocation(FTokenStart);
repeat
wc := FSource.SkipUntil(FValue, Delim);
if wc <> #0 then
begin
FSource.NextChar;
if (FValue.Length > ord(c2 <> #0)) then
begin
if (FValue.Buffer[FValue.Length-1] = c1) and
((c2 = #0) or ((c2 <> #0) and (FValue.Buffer[FValue.Length-2] = c2))) then
begin
Dec(FValue.Length, ord(c2 <> #0) + 1);
Result := True;
Exit;
end;
end;
BufAppend(FValue, wc);
end;
until wc = #0;
end;
procedure TXMLReader.ParseComment; // [15]
begin
ExpectString('--');
if SkipUntilSeq([#0, '-'], '-') then
begin
ExpectChar('>');
DoComment(FValue.Buffer, FValue.Length);
end
else
FatalError('Unterminated comment', -1);
end;
procedure TXMLReader.ParsePI; // [16]
var
Name, Value: DOMString;
PINode: TDOMProcessingInstruction;
begin
FSource.NextChar; // skip '?'
Name := ExpectName;
CheckNCName;
with FName do
if (Length = 3) and
((Buffer[0] = 'X') or (Buffer[0] = 'x')) and
((Buffer[1] = 'M') or (Buffer[1] = 'm')) and
((Buffer[2] = 'L') or (Buffer[2] = 'l')) then
begin
if Name <> 'xml' then
FatalError('''xml'' is a reserved word; it must be lowercase', FName.Length)
else
FatalError('XML declaration is not allowed here', FName.Length);
end;
if FSource.FBuf^ <> '?' then
SkipS(True);
if SkipUntilSeq(GT_Delim, '?') then
begin
SetString(Value, FValue.Buffer, FValue.Length);
// SAX: ContentHandler.ProcessingInstruction(Name, Value);
if FCurrContentType = ctEmpty then
ValidationError('Processing instructions are not allowed within EMPTY elements', []);
PINode := Doc.CreateProcessingInstruction(Name, Value);
if Assigned(FCursor) then
FCursor.AppendChild(PINode)
else // to comply with certain tests, insert PI from DTD before DTD
Doc.InsertBefore(PINode, FDocType);
end
else
FatalError('Unterminated processing instruction', -1);
end;
const
verStr: array[Boolean] of DOMString = ('1.0', '1.1');
procedure TXMLReader.ParseXmlOrTextDecl(TextDecl: Boolean);
var
TmpStr: DOMString;
IsXML11: Boolean;
Delim: DOMChar;
buf: array[0..31] of DOMChar;
I: Integer;
begin
SkipS(True);
// [24] VersionInfo: optional in TextDecl, required in XmlDecl
if (not TextDecl) or (FSource.FBuf^ = 'v') then
begin
ExpectString('version');
ExpectEq;
SkipQuote(Delim);
I := 0;
while (I < 3) and (FSource.FBuf^ <> Delim) do
begin
buf[I] := FSource.FBuf^;
Inc(I);
FSource.NextChar;
end;
if (I <> 3) or (buf[0] <> '1') or (buf[1] <> '.') or
((buf[2] <> '0') and (buf[2] <> '1')) then
FatalError('Illegal version number', -1);
ExpectChar(Delim);
IsXML11 := buf[2] = '1';
if not TextDecl then
begin
if doc.InheritsFrom(TXMLDocument) then
TXMLDocument(doc).XMLVersion := verStr[IsXML11]; // buf[0..2] works with FPC only
end
else // parsing external entity
if IsXML11 and not FXML11 then
FatalError('XML 1.0 document cannot invoke XML 1.1 entities', -1);
if TextDecl or (FSource.FBuf^ <> '?') then
SkipS(True);
end;
// [80] EncodingDecl: required in TextDecl, optional in XmlDecl
if TextDecl or (FSource.FBuf^ = 'e') then
begin
ExpectString('encoding');
ExpectEq;
SkipQuote(Delim);
I := 0;
while (I < 30) and (FSource.FBuf^ <> Delim) and (FSource.FBuf^ < #127) and
((Char(ord(FSource.FBuf^)) in ['A'..'Z', 'a'..'z']) or
((I > 0) and (Char(ord(FSource.FBuf^)) in ['0'..'9', '.', '-', '_']))) do
begin
buf[I] := FSource.FBuf^;
Inc(I);
FSource.NextChar;
end;
if not CheckForChar(Delim) then
FatalError('Illegal encoding name', i);
SetString(TmpStr, buf, i);
if not FSource.SetEncoding(TmpStr) then // <-- Wide2Ansi conversion here
FatalError('Encoding ''%s'' is not supported', [TmpStr], i+1);
// getting here means that specified encoding is supported
// TODO: maybe assign the 'preferred' encoding name?
if not TextDecl and doc.InheritsFrom(TXMLDocument) then
TXMLDocument(doc).Encoding := TmpStr;
if FSource.FBuf^ <> '?' then
SkipS(not TextDecl);
end;
// [32] SDDecl: forbidden in TextDecl, optional in XmlDecl
if (not TextDecl) and (FSource.FBuf^ = 's') then
begin
ExpectString('standalone');
ExpectEq;
SkipQuote(Delim);
if FSource.Matches('yes') then
FStandalone := True
else if not FSource.Matches('no') then
FatalError('Only "yes" or "no" are permitted as values of "standalone"', -1);
ExpectChar(Delim);
SkipS;
end;
ExpectString('?>');
{ Switch to 1.1 rules only after declaration is parsed completely. This is to
ensure that NEL and LSEP within declaration are rejected (rmt-056, rmt-057) }
if (not TextDecl) and IsXML11 then
XML11_BuildTables;
end;
procedure TXMLReader.DTDReloadHook;
var
p: DOMPChar;
begin
{ FSource converts CR, NEL and LSEP linebreaks to LF, and CR-NEL sequences to CR-LF.
We must further remove the CR chars and have only LF's left. }
p := FDTDStartPos;
while p < FSource.FBuf do
begin
while (p < FSource.FBuf) and (p^ <> #13) do
Inc(p);
BufAppendChunk(FIntSubset, FDTDStartPos, p);
if p^ = #13 then
Inc(p);
FDTDStartPos := p;
end;
FDTDStartPos := TXMLDecodingSource(FSource).FBufStart;
end;
procedure TXMLReader.ParseDoctypeDecl; // [28]
var
Src: TXMLCharSource;
begin
if FState >= rsDTD then
FatalError('Markup declaration is not allowed here');
if FDisallowDoctype then
FatalError('Document type is prohibited by parser settings');
ExpectString('DOCTYPE');
SkipS(True);
FDocType := TDOMDocumentTypeEx(TDOMDocumentType.Create(doc));
FDTDProcessed := True; // assume success
FState := rsDTD;
try
FDocType.FName := ExpectName;
SkipS(True);
ParseExternalID(FDocType.FSystemID, FDocType.FPublicID, False);
SkipS;
finally
// DONE: append node after its name has been set; always append to avoid leak
if FCanonical then
FOwnsDoctype := True
else
Doc.AppendChild(FDocType);
FCursor := nil;
end;
if CheckForChar('[') then
begin
BufAllocate(FIntSubset, 256);
FSource.DTDSubsetType := dsInternal;
try
FDTDStartPos := FSource.FBuf;
ParseMarkupDecl;
DTDReloadHook; // fetch last chunk
SetString(FDocType.FInternalSubset, FIntSubset.Buffer, FIntSubset.Length);
finally
FreeMem(FIntSubset.Buffer);
FSource.DTDSubsetType := dsNone;
end;
ExpectChar(']');
SkipS;
end;
ExpectChar('>');
if (FDocType.SystemID <> '') then
begin
if ResolveEntity(FDocType.SystemID, FDocType.PublicID, FSource.SystemID, Src) then
begin
Initialize(Src);
try
Src.DTDSubsetType := dsExternal;
ParseMarkupDecl;
finally
ContextPop(True);
end;
end
else
begin
ValidationError('Unable to resolve external DTD subset', []);
FDTDProcessed := FStandalone;
end;
end;
FCursor := Doc;
ValidateDTD;
FDocType.SetReadOnly(True);
end;
procedure TXMLReader.ExpectEq; // [25]
begin
if FSource.FBuf^ <> '=' then
SkipS;
if FSource.FBuf^ <> '=' then
FatalError('Expected "="');
FSource.NextChar;
SkipS;
end;
{ DTD stuff }
procedure TXMLReader.BadPENesting(S: TErrorSeverity);
begin
if (S = esFatal) or FValidate then
DoError(S, 'Parameter entities must be properly nested');
end;
procedure TXMLReader.StandaloneError(LineOffs: Integer);
begin
ValidationError('Standalone constriant violation', [], LineOffs);
end;
procedure TXMLReader.ParseQuantity(CP: TContentParticle);
begin
case FSource.FBuf^ of
'?': CP.CPQuant := cqZeroOrOnce;
'*': CP.CPQuant := cqZeroOrMore;
'+': CP.CPQuant := cqOnceOrMore;
else
Exit;
end;
FSource.NextChar;
end;
function TXMLReader.FindOrCreateElDef: TDOMElementDef;
var
p: PHashItem;
begin
CheckName;
p := doc.Names.FindOrAdd(FName.Buffer, FName.Length);
Result := TDOMElementDef(p^.Data);
if Result = nil then
begin
Result := TDOMElementDef.Create(doc);
Result.FNSI.QName := p;
p^.Data := Result;
end;
end;
procedure TXMLReader.ExpectChoiceOrSeq(CP: TContentParticle); // [49], [50]
var
Delim: DOMChar;
CurrentEntity: TObject;
CurrentCP: TContentParticle;
begin
Delim := #0;
repeat
CurrentCP := CP.Add;
SkipWhitespace;
if CheckForChar('(') then
begin
CurrentEntity := FSource.FEntity;
ExpectChoiceOrSeq(CurrentCP);
if CurrentEntity <> FSource.FEntity then
BadPENesting;
FSource.NextChar;
end
else
CurrentCP.Def := FindOrCreateElDef;
ParseQuantity(CurrentCP);
SkipWhitespace;
if FSource.FBuf^ = ')' then
Break;
if Delim = #0 then
begin
if (FSource.FBuf^ = '|') or (FSource.FBuf^ = ',') then
Delim := FSource.FBuf^
else
FatalError('Expected pipe or comma delimiter');
end
else
if FSource.FBuf^ <> Delim then
FatalError(Delim);
FSource.NextChar; // skip delimiter
until False;
if Delim = '|' then
CP.CPType := ctChoice
else
CP.CPType := ctSeq; // '(foo)' is a sequence!
end;
procedure TXMLReader.ParseElementDecl; // [45]
var
ElDef: TDOMElementDef;
CurrentEntity: TObject;
I: Integer;
CP: TContentParticle;
Typ: TElementContentType;
ExtDecl: Boolean;
begin
CP := nil;
Typ := ctUndeclared; // satisfy compiler
ExpectWhitespace;
ElDef := FindOrCreateElDef;
if ElDef.ContentType <> ctUndeclared then
ValidationError('Duplicate declaration of element ''%s''', [ElDef.TagName], FName.Length);
ExtDecl := FSource.DTDSubsetType <> dsInternal;
ExpectWhitespace;
if FSource.Matches('EMPTY') then
Typ := ctEmpty
else if FSource.Matches('ANY') then
Typ := ctAny
else if CheckForChar('(') then
begin
CP := TContentParticle.Create;
try
CurrentEntity := FSource.FEntity;
SkipWhitespace;
if FSource.Matches('#PCDATA') then // Mixed section [51]
begin
SkipWhitespace;
Typ := ctMixed;
while FSource.FBuf^ <> ')' do
begin
ExpectChar('|');
SkipWhitespace;
with CP.Add do
begin
Def := FindOrCreateElDef;
for I := CP.ChildCount-2 downto 0 do
if Def = CP.Children[I].Def then
ValidationError('Duplicate token in mixed section', [], FName.Length);
end;
SkipWhitespace;
end;
if CurrentEntity <> FSource.FEntity then
BadPENesting;
FSource.NextChar;
if (not CheckForChar('*')) and (CP.ChildCount > 0) then
FatalError(DOMChar('*'));
end
else // Children section [47]
begin
Typ := ctChildren;
ExpectChoiceOrSeq(CP);
if CurrentEntity <> FSource.FEntity then
BadPENesting;
FSource.NextChar;
ParseQuantity(CP);
end;
except
CP.Free;
raise;
end;
end
else
FatalError('Invalid content specification');
// SAX: DeclHandler.ElementDecl(name, model);
if FDTDProcessed and (ElDef.ContentType = ctUndeclared) then
begin
ElDef.FExternallyDeclared := ExtDecl;
ElDef.ContentType := Typ;
ElDef.RootCP := CP;
end
else
CP.Free;
end;
procedure TXMLReader.ParseNotationDecl; // [82]
var
Name, SysID, PubID: DOMString;
begin
ExpectWhitespace;
Name := ExpectName;
CheckNCName;
ExpectWhitespace;
if not ParseExternalID(SysID, PubID, True) then
FatalError('Expected external or public ID');
if FDTDProcessed then
DoNotationDecl(Name, PubID, SysID);
end;
const
AttrDataTypeNames: array[TAttrDataType] of DOMString = (
'CDATA',
'ID',
'IDREF',
'IDREFS',
'ENTITY',
'ENTITIES',
'NMTOKEN',
'NMTOKENS',
'NOTATION'
);
procedure TXMLReader.ParseAttlistDecl; // [52]
var
ElDef: TDOMElementDef;
AttDef: TDOMAttrDef;
dt: TAttrDataType;
Found, DiscardIt: Boolean;
Offsets: array [Boolean] of Integer;
begin
ExpectWhitespace;
ElDef := FindOrCreateElDef;
SkipWhitespace;
while FSource.FBuf^ <> '>' do
begin
CheckName;
ExpectWhitespace;
AttDef := doc.CreateAttributeDef(FName.Buffer, FName.Length);
try
AttDef.ExternallyDeclared := FSource.DTDSubsetType <> dsInternal;
// In case of duplicate declaration of the same attribute, we must discard it,
// not modifying ElDef, and suppressing certain validation errors.
DiscardIt := (not FDTDProcessed) or Assigned(ElDef.GetAttributeNode(AttDef.Name));
if not DiscardIt then
ElDef.SetAttributeNode(AttDef);
if CheckForChar('(') then // [59]
begin
AttDef.DataType := dtNmToken;
repeat
SkipWhitespace;
CheckName([cnToken]);
if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
ValidationError('Duplicate token in enumerated attibute declaration', [], FName.Length);
SkipWhitespace;
until not CheckForChar('|');
ExpectChar(')');
ExpectWhitespace;
end
else
begin
StoreLocation(FTokenStart);
// search topside-up so that e.g. NMTOKENS is matched before NMTOKEN
for dt := dtNotation downto dtCData do
begin
Found := FSource.Matches(AttrDataTypeNames[dt]);
if Found then
Break;
end;
if Found and SkipWhitespace then
begin
AttDef.DataType := dt;
if (dt = dtId) and not DiscardIt then
begin
if Assigned(ElDef.IDAttr) then
ValidationError('Only one attribute of type ID is allowed per element',[])
else
ElDef.IDAttr := AttDef;
end
else if dt = dtNotation then // no test cases for these ?!
begin
if not DiscardIt then
begin
if Assigned(ElDef.NotationAttr) then
ValidationError('Only one attribute of type NOTATION is allowed per element',[])
else
ElDef.NotationAttr := AttDef;
if ElDef.ContentType = ctEmpty then
ValidationError('NOTATION attributes are not allowed on EMPTY elements',[]);
end;
ExpectChar('(');
repeat
SkipWhitespace;
StoreLocation(FTokenStart);
CheckName;
CheckNCName;
if not AttDef.AddEnumToken(FName.Buffer, FName.Length) then
ValidationError('Duplicate token in NOTATION attribute declaration',[], FName.Length);
if not DiscardIt then
AddForwardRef(FNotationRefs, FName.Buffer, FName.Length);
SkipWhitespace;
until not CheckForChar('|');
ExpectChar(')');
ExpectWhitespace;
end;
end
else
begin
// don't report 'expected whitespace' if token does not match completely
Offsets[False] := 0;
Offsets[True] := Length(AttrDataTypeNames[dt]);
if Found and (FSource.FBuf^ < 'A') then
ExpectWhitespace
else
FatalError('Illegal attribute type for ''%s''', [AttDef.Name], Offsets[Found]);
end;
end;
StoreLocation(FTokenStart);
if FSource.Matches('#REQUIRED') then
AttDef.Default := adRequired
else if FSource.Matches('#IMPLIED') then
AttDef.Default := adImplied
else if FSource.Matches('#FIXED') then
begin
AttDef.Default := adFixed;
ExpectWhitespace;
end
else
AttDef.Default := adDefault;
if AttDef.Default in [adDefault, adFixed] then
begin
if AttDef.DataType = dtId then
ValidationError('An attribute of type ID cannot have a default value',[]);
FCursor := AttDef;
// See comments to valid-sa-094: PE expansion should be disabled in AttDef.
// ExpectAttValue() does not recognize PEs anyway, so setting FRecognizePEs isn't needed
// Saving/restoring FCursor is also redundant because it is always nil here.
ExpectAttValue;
FCursor := nil;
if not ValidateAttrSyntax(AttDef, AttDef.NodeValue) then
ValidationError('Default value for attribute ''%s'' has wrong syntax', [AttDef.Name]);
end;
// SAX: DeclHandler.AttributeDecl(...)
if DiscardIt then
AttDef.Free;
except
AttDef.Free;
raise;
end;
SkipWhitespace;
end;
end;
procedure TXMLReader.ParseEntityDecl; // [70]
var
IsPE: Boolean;
Entity: TDOMEntityEx;
Map: TDOMNamedNodeMap;
begin
if not SkipWhitespace(True) then
FatalError('Expected whitespace');
IsPE := False;
Map := FDocType.Entities;
if CheckForChar('%') then // [72]
begin
ExpectWhitespace;
IsPE := True;
if FPEMap = nil then
FPEMap := TDOMNamedNodeMap.Create(FDocType, ENTITY_NODE);
Map := FPEMap;
end;
Entity := TDOMEntityEx.Create(Doc);
Entity.SetReadOnly(True);
try
Entity.FExternallyDeclared := FSource.DTDSubsetType <> dsInternal;
Entity.FIsPE := IsPE;
Entity.FName := ExpectName;
CheckNCName;
ExpectWhitespace;
// remember where the entity is declared
Entity.FURI := FSource.SystemID;
if FEntityValue.Buffer = nil then
BufAllocate(FEntityValue, 256);
if ParseLiteral(FEntityValue, ltEntity, False) then
begin
SetString(Entity.FReplacementText, FEntityValue.Buffer, FEntityValue.Length);
Entity.FCharCount := FEntityValue.Length;
Entity.FStartLocation := FTokenStart;
end
else
begin
if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, False) then
FatalError('Expected entity value or external ID');
if not IsPE then // [76]
begin
if FSource.FBuf^ <> '>' then
ExpectWhitespace;
if FSource.Matches('NDATA') then
begin
ExpectWhitespace;
StoreLocation(FTokenStart);
Entity.FNotationName := ExpectName;
AddForwardRef(FNotationRefs, FName.Buffer, FName.Length);
// SAX: DTDHandler.UnparsedEntityDecl(...);
end;
end;
end;
except
Entity.Free;
raise;
end;
// Repeated declarations of same entity are legal but must be ignored
if FDTDProcessed and (Map.GetNamedItem(Entity.FName) = nil) then
Map.SetNamedItem(Entity)
else
Entity.Free;
end;
procedure TXMLReader.ParseMarkupDecl; // [29]
var
IncludeLevel: Integer;
IgnoreLevel: Integer;
CurrentEntity: TObject;
IncludeLoc: TLocation;
IgnoreLoc: TLocation;
wc: DOMChar;
CondType: (ctUnknown, ctInclude, ctIgnore);
begin
IncludeLevel := 0;
IgnoreLevel := 0;
repeat
FRecognizePE := True; // PERef between declarations should always be recognized
SkipWhitespace;
FRecognizePE := False;
if (FSource.FBuf^ = ']') and (IncludeLevel > 0) then
begin
ExpectString(']]>');
Dec(IncludeLevel);
Continue;
end;
if not CheckForChar('<') then
Break;
CurrentEntity := FSource.FEntity;
if FSource.FBuf^ = '?' then
ParsePI
else
begin
ExpectChar('!');
if FSource.FBuf^ = '-' then
ParseComment
else if CheckForChar('[') then
begin
if FSource.DTDSubsetType = dsInternal then
FatalError('Conditional sections are not allowed in internal subset', 1);
FRecognizePE := True;
SkipWhitespace;
CondType := ctUnknown; // satisfy compiler
if FSource.Matches('INCLUDE') then
CondType := ctInclude
else if FSource.Matches('IGNORE') then
CondType := ctIgnore
else
FatalError('Expected "INCLUDE" or "IGNORE"');
SkipWhitespace;
if CurrentEntity <> FSource.FEntity then
BadPENesting;
ExpectChar('[');
if CondType = ctInclude then
begin
if IncludeLevel = 0 then
StoreLocation(IncludeLoc);
Inc(IncludeLevel);
end
else if CondType = ctIgnore then
begin
StoreLocation(IgnoreLoc);
IgnoreLevel := 1;
repeat
FValue.Length := 0;
wc := FSource.SkipUntil(FValue, [#0, '<', ']']);
if FSource.Matches('<![') then
Inc(IgnoreLevel)
else if FSource.Matches(']]>') then
Dec(IgnoreLevel)
else if wc <> #0 then
FSource.NextChar
else // PE's aren't recognized in ignore section, cannot ContextPop()
DoErrorPos(esFatal, 'IGNORE section is not closed', IgnoreLoc);
until IgnoreLevel=0;
end;
end
else
begin
FRecognizePE := FSource.DTDSubsetType <> dsInternal;
FInsideDecl := True;
if FSource.Matches('ELEMENT') then
ParseElementDecl
else if FSource.Matches('ENTITY') then
ParseEntityDecl
else if FSource.Matches('ATTLIST') then
ParseAttlistDecl
else if FSource.Matches('NOTATION') then
ParseNotationDecl
else
FatalError('Illegal markup declaration');
SkipWhitespace;
FRecognizePE := False;
if CurrentEntity <> FSource.FEntity then
BadPENesting;
ExpectChar('>');
FInsideDecl := False;
end;
end;
until False;
FRecognizePE := False;
if IncludeLevel > 0 then
DoErrorPos(esFatal, 'INCLUDE section is not closed', IncludeLoc);
if (FSource.DTDSubsetType = dsInternal) and (FSource.FBuf^ = ']') then
Exit;
if FSource.FBuf^ <> #0 then
FatalError('Illegal character in DTD');
end;
procedure TXMLReader.ProcessDTD(ASource: TXMLCharSource);
begin
doc := TXMLDocument.Create;
FDocType := TDOMDocumentTypeEx.Create(doc);
// TODO: DTD labeled version 1.1 will be rejected - must set FXML11 flag
// DONE: It's ok to have FCursor=nil now
doc.AppendChild(FDocType);
Initialize(ASource);
ParseMarkupDecl;
end;
procedure TXMLReader.AppendReference(AEntity: TDOMEntityEx);
var
s: DOMString;
begin
if AEntity = nil then
SetString(s, FName.Buffer, FName.Length)
else
s := AEntity.nodeName;
FCursor.AppendChild(doc.CreateEntityReference(s));
end;
// The code below does the bulk of the parsing, and must be as fast as possible.
// To minimize CPU cache effects, methods from different classes are kept together
function TXMLDecodingSource.SkipUntil(var ToFill: TDOMCharBuf; const Delim: TSetOfChar;
wsflag: PBoolean; AllowSpecialChars: boolean): DOMChar;
var
old: DOMPChar;
nonws: Boolean;
wc: DOMChar;
begin
nonws := False;
repeat
old := FBuf;
repeat
wc := FBuf^;
if (wc = #10) or (wc = #13)
or (FXML11Rules and ((wc = #$85) or (wc = #$2028))) // ToDo #$2028
then begin
// strictly this is needed only for 2-byte lineendings
BufAppendChunk(ToFill, old, FBuf);
NewLine;
old := FBuf;
wc := FBuf^
end
else if (not AllowSpecialChars)
and ( ((wc < #32) and (not ((wc = #0) and (FBuf >= FBufEnd))) and (wc <> #9))
or (wc > #$FFFD) or
(FXML11Rules and (wc >= #$7F) and (wc <= #$9F)) )
then
FReader.FatalError('Invalid character')
else if wc=#0 then
FReader.FatalError('Invalid character');
if (wc < #255) and (Char(ord(wc)) in Delim) then
Break;
// the checks above filter away everything below #32 that isn't a whitespace
if wc > #32 then
nonws := True;
Inc(FBuf);
until False;
Result := wc;
BufAppendChunk(ToFill, old, FBuf);
until (Result <> #0) or (not Reload);
if Assigned(wsflag) then
wsflag^ := wsflag^ or nonws;
end;
const
TextDelims: array[Boolean] of TSetOfChar = (
[#0, '<', '&', '>'],
[#0, '>']
);
procedure TXMLReader.ParseContent;
var
nonWs: Boolean;
wc: DOMChar;
ent: TDOMEntityEx;
InCDATA: Boolean;
begin
InCDATA := False;
StoreLocation(FTokenStart);
nonWs := False;
FValue.Length := 0;
repeat
wc := FSource.SkipUntil(FValue, TextDelims[InCDATA], @nonWs);
if wc = '<' then
begin
Inc(FSource.FBuf);
if FSource.FBufEnd < FSource.FBuf + 2 then
FSource.Reload;
if FSource.FBuf^ = '/' then
begin
DoText(FValue.Buffer, FValue.Length, not nonWs);
if FNesting <= FSource.FStartNesting then
FatalError('End-tag is not allowed here');
Inc(FSource.FBuf);
ParseEndTag;
end
else if CheckName([cnOptional]) then
begin
DoText(FValue.Buffer, FValue.Length, not nonWs);
ParseElement;
end
else if FSource.FBuf^ = '!' then
begin
Inc(FSource.FBuf);
if FSource.FBuf^ = '[' then
begin
ExpectString('[CDATA[');
if FState <> rsRoot then
FatalError('Illegal at document level');
StoreLocation(FTokenStart);
InCDATA := True;
if not FCDSectionsAsText then
DoText(FValue.Buffer, FValue.Length, not nonWs)
else
Continue;
end
else if FSource.FBuf^ = '-' then
begin
DoText(FValue.Buffer, FValue.Length, not nonWs);
ParseComment;
end
else
begin
DoText(FValue.Buffer, FValue.Length, not nonWs);
ParseDoctypeDecl;
end;
end
else if FSource.FBuf^ = '?' then
begin
DoText(FValue.Buffer, FValue.Length, not nonWs);
ParsePI;
end
else
RaiseNameNotFound;
end
else if wc = #0 then
begin
if InCDATA then
FatalError('Unterminated CDATA section', -1);
if FNesting > FSource.FStartNesting then
FatalError('End-tag is missing for ''%s''', [FValidator[FNesting].FElement.NSI.QName^.Key]);
if ContextPop then Continue;
Break;
end
else if wc = '>' then
begin
BufAppend(FValue, wc);
FSource.NextChar;
if (FValue.Length <= 2) or (FValue.Buffer[FValue.Length-2] <> ']') or
(FValue.Buffer[FValue.Length-3] <> ']') then Continue;
if InCData then // got a ']]>' separator
begin
Dec(FValue.Length, 3);
InCDATA := False;
if FCDSectionsAsText then
Continue;
DoCDSect(FValue.Buffer, FValue.Length);
end
else
FatalError('Literal '']]>'' is not allowed in text', 3);
end
else if wc = '&' then
begin
if FState <> rsRoot then
FatalError('Illegal at document level');
if FCurrContentType = ctEmpty then
ValidationError('References are illegal in EMPTY elements', []);
if ParseRef(FValue) or ResolvePredefined then
begin
nonWs := True; // CharRef to whitespace is not considered whitespace
Continue;
end
else
begin
ent := EntityCheck;
if (ent = nil) or (not FExpandEntities) then
begin
DoText(FValue.Buffer, FValue.Length, not nonWs);
AppendReference(ent);
end
else
begin
ContextPush(ent);
Continue;
end;
end;
end;
StoreLocation(FTokenStart);
FValue.Length := 0;
nonWs := False;
until False;
DoText(FValue.Buffer, FValue.Length, not nonWs);
end;
procedure TXMLCharSource.NextChar;
begin
Inc(FBuf);
if FBuf >= FBufEnd then
Reload;
end;
procedure TXMLReader.ExpectChar(wc: DOMChar);
begin
if FSource.FBuf^ = wc then
FSource.NextChar
else
FatalError(wc);
end;
// Element name already in FNameBuffer
procedure TXMLReader.ParseElement; // [39] [40] [44]
var
NewElem: TDOMElement;
ElDef: TDOMElementDef;
IsEmpty: Boolean;
ElName: PHashItem;
begin
if FState > rsRoot then
FatalError('Only one top-level element allowed', FName.Length)
else if FState < rsRoot then
begin
if FValidate then
ValidateRoot;
FState := rsRoot;
end;
NewElem := doc.CreateElementBuf(FName.Buffer, FName.Length);
FCursor.AppendChild(NewElem);
// we're about to process a new set of attributes
Inc(FAttrTag);
// Remember the hash entry, we'll need it often
ElName := NewElem.NSI.QName;
// Find declaration for this element
ElDef := TDOMElementDef(ElName^.Data);
if (ElDef = nil) or (ElDef.ContentType = ctUndeclared) then
ValidationError('Using undeclared element ''%s''',[ElName^.Key], FName.Length);
// Check if new element is allowed in current context
if FValidate and not FValidator[FNesting].IsElementAllowed(ElDef) then
ValidationError('Element ''%s'' is not allowed in this context',[ElName^.Key], FName.Length);
IsEmpty := False;
while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do
begin
SkipS(True);
if (FSource.FBuf^ = '>') or (FSource.FBuf^ = '/') then
Break;
ParseAttribute(NewElem, ElDef);
end;
if FSource.FBuf^ = '/' then
begin
IsEmpty := True;
FSource.NextChar;
end;
ExpectChar('>');
if Assigned(ElDef) and Assigned(ElDef.FAttributes) then
ProcessDefaultAttributes(NewElem, ElDef.FAttributes);
PushVC(NewElem, ElDef); // this increases FNesting
if FNamespaces then
ProcessNamespaceAtts(NewElem);
if not IsEmpty then
begin
FCursor := NewElem;
if not FPreserveWhitespace then // critical for testsuite compliance
SkipS;
end
else
DoEndElement(0);
end;
procedure TXMLReader.DoEndElement(ErrOffset: Integer);
var
NewElem: TDOMElement;
begin
NewElem := FValidator[FNesting].FElement;
TDOMNode(FCursor) := NewElem.ParentNode;
if FCursor = doc then
FState := rsEpilog;
if FValidate and FValidator[FNesting].Incomplete then
ValidationError('Element ''%s'' is missing required sub-elements', [NewElem.NSI.QName^.Key], ErrOffset);
if FNamespaces then
FNSHelper.EndElement;
PopVC;
end;
procedure TXMLReader.ParseEndTag; // [42]
var
ErrOffset: Integer;
ElName: PHashItem;
begin
ElName := FValidator[FNesting].FElement.NSI.QName;
CheckName;
if not BufEquals(FName, ElName^.Key) then
FatalError('Unmatching element end tag (expected "</%s>")', [ElName^.Key], FName.Length);
if FSource.FBuf^ = '>' then // this handles majority of cases
begin
ErrOffset := FName.Length+1;
FSource.NextChar;
end
else // but if closing '>' is preceded by whitespace,
begin // skipping it is likely to lose position info.
StoreLocation(FTokenStart);
Dec(FTokenStart.LinePos, FName.Length);
ErrOffset := -1;
SkipS;
ExpectChar('>');
end;
DoEndElement(ErrOffset);
end;
procedure TXMLReader.ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
var
attr: TDOMAttr;
AttDef: TDOMAttrDef;
OldAttr: TDOMNode;
procedure CheckValue;
var
AttValue, OldValue: DOMString;
begin
if FStandalone and AttDef.ExternallyDeclared then
begin
OldValue := Attr.Value;
Attr.DataType := AttDef.DataType;
AttValue := Attr.Value;
if AttValue <> OldValue then
StandaloneError(-1);
end
else
begin
Attr.DataType := AttDef.DataType;
AttValue := Attr.Value;
end;
// TODO: what about normalization of AttDef.Value? (Currently it IS normalized)
if (AttDef.Default = adFixed) and (AttDef.Value <> AttValue) then
ValidationError('Value of attribute ''%s'' does not match its #FIXED default',[AttDef.Name], -1);
if not ValidateAttrSyntax(AttDef, AttValue) then
ValidationError('Attribute ''%s'' type mismatch', [AttDef.Name], -1);
ValidateAttrValue(Attr, AttValue);
end;
begin
CheckName;
attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length);
if Assigned(ElDef) then
begin
AttDef := TDOMAttrDef(ElDef.GetAttributeNode(attr.NSI.QName^.Key));
if AttDef = nil then
ValidationError('Using undeclared attribute ''%s'' on element ''%s''',[attr.NSI.QName^.Key, Elem.NSI.QName^.Key], FName.Length)
else
AttDef.Tag := FAttrTag; // indicates that this one is specified
end
else
AttDef := nil;
// !!cannot use TDOMElement.SetAttributeNode because it will free old attribute
OldAttr := Elem.Attributes.SetNamedItem(Attr);
if Assigned(OldAttr) then
begin
OldAttr.Free;
FatalError('Duplicate attribute', FName.Length);
end;
ExpectEq;
FCursor := attr;
ExpectAttValue;
if Assigned(AttDef) and ((AttDef.DataType <> dtCdata) or (AttDef.Default = adFixed)) then
CheckValue;
end;
procedure TXMLReader.AddForwardRef(aList: TFPList; Buf: DOMPChar; Length: Integer);
var
w: PForwardRef;
begin
New(w);
SetString(w^.Value, Buf, Length);
w^.Loc := FTokenStart;
aList.Add(w);
end;
procedure TXMLReader.ClearRefs(aList: TFPList);
var
I: Integer;
begin
for I := 0 to aList.Count-1 do
Dispose(PForwardRef(aList.List^[I]));
aList.Clear;
end;
procedure TXMLReader.ValidateIdRefs;
var
I: Integer;
begin
for I := 0 to FIDRefs.Count-1 do
with PForwardRef(FIDRefs.List^[I])^ do
if Doc.GetElementById(Value) = nil then
DoErrorPos(esError, Format('The ID ''%s'' does not match any element', [Value]), Loc);
ClearRefs(FIDRefs);
end;
procedure TXMLReader.ProcessDefaultAttributes(Element: TDOMElement; Map: TDOMNamedNodeMap);
var
I: Integer;
AttDef: TDOMAttrDef;
Attr: TDOMAttr;
begin
for I := 0 to Map.Length-1 do
begin
AttDef := Map[I] as TDOMAttrDef;
if AttDef.Tag <> FAttrTag then // this one wasn't specified
begin
case AttDef.Default of
adDefault, adFixed: begin
if FStandalone and AttDef.ExternallyDeclared then
StandaloneError;
Attr := TDOMAttr(AttDef.CloneNode(True));
Element.SetAttributeNode(Attr);
ValidateAttrValue(Attr, Attr.Value);
end;
adRequired: ValidationError('Required attribute ''%s'' of element ''%s'' is missing',[AttDef.Name, Element.TagName], 0)
end;
end;
end;
end;
procedure TXMLReader.AddBinding(Attr: TDOMAttr; PrefixPtr: DOMPChar; PrefixLen: Integer);
var
nsUri: DOMString;
Prefix: PHashItem;
begin
nsUri := Attr.NodeValue;
Prefix := FNSHelper.GetPrefix(PrefixPtr, PrefixLen);
{ 'xml' is allowed to be bound to the correct namespace }
if ((nsUri = stduri_xml) <> (Prefix = FStdPrefix_xml)) or
(Prefix = FStdPrefix_xmlns) or
(nsUri = stduri_xmlns) then
begin
if (Prefix = FStdPrefix_xml) or (Prefix = FStdPrefix_xmlns) then
FatalError('Illegal usage of reserved prefix ''%s''', [Prefix^.Key])
else
FatalError('Illegal usage of reserved namespace URI ''%s''', [nsUri]);
end;
if (nsUri = '') and not (FXML11 or (Prefix^.Key = '')) then
FatalError('Illegal undefining of namespace'); { position - ? }
FNSHelper.BindPrefix(nsURI, Prefix);
end;
procedure TXMLReader.ProcessNamespaceAtts(Element: TDOMElement);
var
I, J: Integer;
Map: TDOMNamedNodeMap;
Prefix, AttrName: PHashItem;
Attr: TDOMAttr;
PrefixCount: Integer;
b: TBinding;
begin
FNSHelper.StartElement;
PrefixCount := 0;
if Element.HasAttributes then
begin
Map := Element.Attributes;
if Map.Length > LongWord(Length(FWorkAtts)) then
SetLength(FWorkAtts, Map.Length+10);
{ Pass 1, identify prefixed attrs and assign prefixes }
for I := 0 to Map.Length-1 do
begin
Attr := TDOMAttr(Map[I]);
AttrName := Attr.NSI.QName;
if Pos(DOMString('xmlns'), AttrName^.Key) = 1 then
begin
{ this is a namespace declaration }
if Length(AttrName^.Key) = 5 then
begin
// TODO: check all consequences of having zero PrefixLength
Attr.SetNSI(stduri_xmlns, 0);
AddBinding(Attr, nil, 0);
end
else if AttrName^.Key[6] = ':' then
begin
Attr.SetNSI(stduri_xmlns, 6);
AddBinding(Attr, @AttrName^.Key[7], Length(AttrName^.Key)-6);
end;
end
else
begin
J := Pos(DOMChar(':'), AttrName^.Key);
if J > 1 then
begin
FWorkAtts[PrefixCount].Attr := Attr;
FWorkAtts[PrefixCount].PrefixLen := J;
Inc(PrefixCount);
end;
end;
end;
end;
{ Pass 2, now all bindings are known, handle remaining prefixed attributes }
if PrefixCount > 0 then
begin
FNsAttHash.Init(PrefixCount);
for I := 0 to PrefixCount-1 do
begin
AttrName := FWorkAtts[I].Attr.NSI.QName;
if not FNSHelper.IsPrefixBound(DOMPChar(AttrName^.Key), FWorkAtts[I].PrefixLen-1, Prefix) then
FatalError('Unbound prefix "%s"', [Prefix^.Key]);
b := TBinding(Prefix^.Data);
{ detect duplicates }
J := FWorkAtts[I].PrefixLen+1;
if FNsAttHash.Locate(@b.uri, @AttrName^.Key[J], Length(AttrName^.Key) - J+1) then
FatalError('Duplicate prefixed attribute');
// convert Attr into namespaced one (by hack for the time being)
FWorkAtts[I].Attr.SetNSI(b.uri, J-1);
end;
end;
{ Finally, expand the element name }
J := Pos(DOMChar(':'), Element.NSI.QName^.Key);
if J > 1 then
begin
if not FNSHelper.IsPrefixBound(DOMPChar(Element.NSI.QName^.Key), J-1, Prefix) then
FatalError('Unbound prefix "%s"', [Prefix^.Key]);
b := TBinding(Prefix^.Data);
Element.SetNSI(b.uri, J);
end
else
begin
b := FNSHelper.DefaultNSBinding;
if Assigned(b) then
Element.SetNSI(b.uri, 0);
end;
end;
function TXMLReader.ParseExternalID(out SysID, PubID: DOMString; // [75]
SysIdOptional: Boolean): Boolean;
var
I: Integer;
wc: DOMChar;
begin
Result := False;
if FSource.Matches('SYSTEM') then
SysIdOptional := False
else if FSource.Matches('PUBLIC') then
begin
ExpectWhitespace;
ParseLiteral(FValue, ltPubid, True);
SetString(PubID, FValue.Buffer, FValue.Length);
for I := 1 to Length(PubID) do
begin
wc := PubID[I];
if (wc > #255) or not (Char(ord(wc)) in PubidChars) then
FatalError('Illegal Public ID literal', -1);
end;
end
else
Exit;
if SysIdOptional then
SkipWhitespace
else
ExpectWhitespace;
if ParseLiteral(FValue, ltPlain, not SysIdOptional) then
SetString(SysID, FValue.Buffer, FValue.Length);
Result := True;
end;
function TXMLReader.ValidateAttrSyntax(AttrDef: TDOMAttrDef; const aValue: DOMString): Boolean;
begin
case AttrDef.DataType of
dtId, dtIdRef, dtEntity: Result := IsXmlName(aValue, FXML11) and
((not FNamespaces) or (Pos(DOMChar(':'), aValue) = 0));
dtIdRefs, dtEntities: Result := IsXmlNames(aValue, FXML11) and
((not FNamespaces) or (Pos(DOMChar(':'), aValue) = 0));
dtNmToken: Result := IsXmlNmToken(aValue, FXML11) and AttrDef.HasEnumToken(aValue);
dtNmTokens: Result := IsXmlNmTokens(aValue, FXML11);
// IsXmlName() not necessary - enum is never empty and contains valid names
dtNotation: Result := AttrDef.HasEnumToken(aValue);
else
Result := True;
end;
end;
procedure TXMLReader.ValidateAttrValue(Attr: TDOMAttr; const aValue: DOMString);
var
L, StartPos, EndPos: Integer;
Entity: TDOMEntity;
begin
L := Length(aValue);
case Attr.DataType of
dtId: if not Doc.AddID(Attr) then
ValidationError('The ID ''%s'' is not unique', [aValue], -1);
dtIdRef, dtIdRefs: begin
StartPos := 1;
while StartPos <= L do
begin
EndPos := StartPos;
while (EndPos <= L) and (aValue[EndPos] <> #32) do
Inc(EndPos);
AddForwardRef(FIDRefs, @aValue[StartPos], EndPos-StartPos);
StartPos := EndPos + 1;
end;
end;
dtEntity, dtEntities: begin
StartPos := 1;
while StartPos <= L do
begin
EndPos := StartPos;
while (EndPos <= L) and (aValue[EndPos] <> #32) do
Inc(EndPos);
Entity := TDOMEntity(FDocType.Entities.GetNamedItem(Copy(aValue, StartPos, EndPos-StartPos)));
if (Entity = nil) or (Entity.NotationName = '') then
ValidationError('Attribute ''%s'' type mismatch', [Attr.Name], -1);
StartPos := EndPos + 1;
end;
end;
end;
end;
procedure TXMLReader.ValidateRoot;
begin
if Assigned(FDocType) then
begin
if not BufEquals(FName, FDocType.Name) then
ValidationError('Root element name does not match DTD', [], FName.Length);
end
else
ValidationError('Missing DTD', [], FName.Length);
end;
procedure TXMLReader.ValidateDTD;
var
I: Integer;
begin
if FValidate then
for I := 0 to FNotationRefs.Count-1 do
with PForwardRef(FNotationRefs[I])^ do
if FDocType.Notations.GetNamedItem(Value) = nil then
DoErrorPos(esError, Format('Notation ''%s'' is not declared', [Value]), Loc);
ClearRefs(FNotationRefs);
end;
procedure TXMLReader.DoText(ch: DOMPChar; Count: Integer; Whitespace: Boolean);
var
TextNode: TDOMText;
begin
if FState <> rsRoot then
if not Whitespace then
FatalError('Illegal at document level', -1)
else
Exit;
if (Whitespace and (not FPreserveWhitespace)) or (Count = 0) then
Exit;
// Validating filter part
case FCurrContentType of
ctChildren:
if not Whitespace then
ValidationError('Character data is not allowed in element-only content',[])
else
if FSaViolation then
StandaloneError(-1);
ctEmpty:
ValidationError('Character data is not allowed in EMPTY elements', []);
end;
// Document builder part
TextNode := Doc.CreateTextNodeBuf(ch, Count, Whitespace and (FCurrContentType = ctChildren));
FCursor.AppendChild(TextNode);
end;
procedure TXMLReader.DoAttrText(ch: DOMPChar; Count: Integer);
begin
FCursor.AppendChild(Doc.CreateTextNodeBuf(ch, Count, False));
end;
procedure TXMLReader.DoComment(ch: DOMPChar; Count: Integer);
var
Node: TDOMComment;
begin
// validation filter part
if FCurrContentType = ctEmpty then
ValidationError('Comments are not allowed within EMPTY elements', []);
// DOM builder part
if (not FIgnoreComments) and Assigned(FCursor) then
begin
Node := Doc.CreateCommentBuf(ch, Count);
FCursor.AppendChild(Node);
end;
end;
procedure TXMLReader.DoCDSect(ch: DOMPChar; Count: Integer);
var
s: DOMString;
begin
if FCurrContentType = ctChildren then
ValidationError('CDATA sections are not allowed in element-only content',[]);
if not FCDSectionsAsText then
begin
SetString(s, ch, Count);
// SAX: LexicalHandler.StartCDATA;
// SAX: ContentHandler.Characters(...);
FCursor.AppendChild(doc.CreateCDATASection(s));
// SAX: LexicalHandler.EndCDATA;
end
else
FCursor.AppendChild(doc.CreateTextNodeBuf(ch, Count, False));
end;
procedure TXMLReader.DoNotationDecl(const aName, aPubID, aSysID: DOMString);
var
Notation: TDOMNotationEx;
begin
if FDocType.Notations.GetNamedItem(aName) = nil then
begin
Notation := TDOMNotationEx(TDOMNotation.Create(doc));
Notation.FName := aName;
Notation.FPublicID := aPubID;
Notation.FSystemID := aSysID;
FDocType.Notations.SetNamedItem(Notation);
end
else
ValidationError('Duplicate notation declaration: ''%s''', [aName]);
end;
procedure TXMLReader.PushVC(aElement: TDOMElement; aElDef: TDOMElementDef);
begin
Inc(FNesting);
if FNesting >= Length(FValidator) then
SetLength(FValidator, FNesting * 2);
FValidator[FNesting].FElement := aElement;
FValidator[FNesting].FElementDef := aElDef;
FValidator[FNesting].FCurCP := nil;
FValidator[FNesting].FFailed := False;
UpdateConstraints;
end;
procedure TXMLReader.PopVC;
begin
if FNesting > 0 then Dec(FNesting);
UpdateConstraints;
end;
procedure TXMLReader.UpdateConstraints;
begin
if FValidate and Assigned(FValidator[FNesting].FElementDef) then
begin
FCurrContentType := FValidator[FNesting].FElementDef.ContentType;
FSaViolation := FStandalone and (FValidator[FNesting].FElementDef.FExternallyDeclared);
end
else
begin
FCurrContentType := ctAny;
FSaViolation := False;
end;
end;
{ TElementValidator }
function TElementValidator.IsElementAllowed(Def: TDOMElementDef): Boolean;
var
I: Integer;
Next: TContentParticle;
begin
Result := True;
// if element is not declared, non-validity has been already reported, no need to report again...
if Assigned(Def) and Assigned(FElementDef) then
begin
case FElementDef.ContentType of
ctMixed: begin
for I := 0 to FElementDef.RootCP.ChildCount-1 do
begin
if Def = FElementDef.RootCP.Children[I].Def then
Exit;
end;
Result := False;
end;
ctEmpty: Result := False;
ctChildren: begin
if FCurCP = nil then
Next := FElementDef.RootCP.FindFirst(Def)
else
Next := FCurCP.FindNext(Def, 0); { second arg ignored here }
Result := Assigned(Next);
if Result then
FCurCP := Next
else
FFailed := True; // used to prevent extra error at the end of element
end;
// ctAny, ctUndeclared: returns True by default
end;
end;
end;
function TElementValidator.Incomplete: Boolean;
begin
if Assigned(FElementDef) and (FElementDef.ContentType = ctChildren) and (not FFailed) then
begin
if FCurCP <> nil then
Result := FCurCP.MoreRequired(0) { arg ignored here }
else
Result := FElementDef.RootCP.IsRequired;
end
else
Result := False;
end;
{ TContentParticle }
function TContentParticle.Add: TContentParticle;
begin
if FChildren = nil then
FChildren := TFPList.Create;
Result := TContentParticle.Create;
Result.FParent := Self;
Result.FIndex := FChildren.Add(Result);
end;
destructor TContentParticle.Destroy;
var
I: Integer;
begin
if Assigned(FChildren) then
for I := FChildren.Count-1 downto 0 do
TObject(FChildren[I]).Free;
FChildren.Free;
inherited Destroy;
end;
function TContentParticle.GetChild(Index: Integer): TContentParticle;
begin
Result := TContentParticle(FChildren[Index]);
end;
function TContentParticle.GetChildCount: Integer;
begin
if Assigned(FChildren) then
Result := FChildren.Count
else
Result := 0;
end;
function TContentParticle.IsRequired: Boolean;
var
I: Integer;
begin
Result := (CPQuant = cqOnce) or (CPQuant = cqOnceOrMore);
// do not return True if all children are optional
if (CPType <> ctName) and Result then
begin
for I := 0 to ChildCount-1 do
begin
Result := Children[I].IsRequired;
if Result then Exit;
end;
end;
end;
function TContentParticle.MoreRequired(ChildIdx: Integer): Boolean;
var
I: Integer;
begin
Result := False;
if CPType = ctSeq then
begin
for I := ChildIdx + 1 to ChildCount-1 do
begin
Result := Children[I].IsRequired;
if Result then Exit;
end;
end;
if Assigned(FParent) then
Result := FParent.MoreRequired(FIndex);
end;
function TContentParticle.FindFirst(aDef: TDOMElementDef): TContentParticle;
var
I: Integer;
begin
Result := nil;
case CPType of
ctSeq:
for I := 0 to ChildCount-1 do with Children[I] do
begin
Result := FindFirst(aDef);
if Assigned(Result) or IsRequired then
Exit;
end;
ctChoice:
for I := 0 to ChildCount-1 do with Children[I] do
begin
Result := FindFirst(aDef);
if Assigned(Result) then
Exit;
end;
else // ctName
if aDef = Self.Def then
Result := Self
end;
end;
function TContentParticle.FindNext(aDef: TDOMElementDef;
ChildIdx: Integer): TContentParticle;
var
I: Integer;
begin
Result := nil;
if CPType = ctSeq then // search sequence to its end
begin
for I := ChildIdx + 1 to ChildCount-1 do with Children[I] do
begin
Result := FindFirst(aDef);
if (Result <> nil) or IsRequired then
Exit;
end;
end;
if (CPQuant = cqZeroOrMore) or (CPQuant = cqOnceOrMore) then
Result := FindFirst(aDef);
if (Result = nil) and Assigned(FParent) then
Result := FParent.FindNext(aDef, FIndex);
end;
{ TDOMElementDef }
destructor TDOMElementDef.Destroy;
begin
RootCP.Free;
inherited Destroy;
end;
{ plain calls }
procedure ReadXMLFile(out ADoc: TXMLDocument; var f: Text; Flags: TXMLReaderFlags);
var
Reader: TXMLReader;
Src: TXMLCharSource;
begin
ADoc := nil;
Src := TXMLFileInputSource.Create(f);
Reader := TXMLReader.Create;
try
Reader.Flags:=Flags;
Reader.ProcessXML(Src);
finally
ADoc := TXMLDocument(Reader.Doc);
Reader.Free;
end;
end;
procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; const ABaseURI: String;
Flags: TXMLReaderFlags);
var
Reader: TXMLReader;
Src: TXMLCharSource;
begin
ADoc := nil;
Reader := TXMLReader.Create;
try
Src := TXMLStreamInputSource.Create(f, False);
Src.SystemID := ABaseURI;
Reader.Flags:=Flags;
Reader.ProcessXML(Src);
finally
ADoc := TXMLDocument(Reader.doc);
Reader.Free;
end;
end;
procedure ReadXMLFile(out ADoc: TXMLDocument; f: TStream; Flags: TXMLReaderFlags);
begin
ReadXMLFile(ADoc, f, 'stream:', Flags);
end;
procedure ReadXMLFile(out ADoc: TXMLDocument; const AFilename: String;
Flags: TXMLReaderFlags);
var
FileStream: TStream;
begin
ADoc := nil;
FileStream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
try
ReadXMLFile(ADoc, FileStream, FilenameToURI(AFilename), Flags);
finally
FileStream.Free;
end;
end;
procedure ReadXMLFragment(AParentNode: TDOMNode; var f: Text;
Flags: TXMLReaderFlags);
var
Reader: TXMLReader;
Src: TXMLCharSource;
begin
Reader := TXMLReader.Create;
try
Reader.Flags:=Flags;
Src := TXMLFileInputSource.Create(f);
Reader.ProcessFragment(Src, AParentNode);
finally
Reader.Free;
end;
end;
procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream;
const ABaseURI: String; Flags: TXMLReaderFlags);
var
Reader: TXMLReader;
Src: TXMLCharSource;
begin
Reader := TXMLReader.Create;
try
Src := TXMLStreamInputSource.Create(f, False);
Src.SystemID := ABaseURI;
Reader.Flags:=Flags;
Reader.ProcessFragment(Src, AParentNode);
finally
Reader.Free;
end;
end;
procedure ReadXMLFragment(AParentNode: TDOMNode; var f: TStream;
Flags: TXMLReaderFlags);
begin
ReadXMLFragment(AParentNode, f, 'stream:', Flags);
end;
procedure ReadXMLFragment(AParentNode: TDOMNode; const AFilename: String;
Flags: TXMLReaderFlags);
var
Stream: TStream;
begin
Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
try
ReadXMLFragment(AParentNode, Stream, FilenameToURI(AFilename), Flags);
finally
Stream.Free;
end;
end;
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: Text);
var
Reader: TXMLReader;
Src: TXMLCharSource;
begin
ADoc := nil;
Reader := TXMLReader.Create;
try
Src := TXMLFileInputSource.Create(f);
Reader.ProcessDTD(Src);
finally
ADoc := TXMLDocument(Reader.doc);
Reader.Free;
end;
end;
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream; const ABaseURI: String);
var
Reader: TXMLReader;
Src: TXMLCharSource;
begin
ADoc := nil;
Reader := TXMLReader.Create;
try
Src := TXMLStreamInputSource.Create(f, False);
Src.SystemID := ABaseURI;
Reader.ProcessDTD(Src);
finally
ADoc := TXMLDocument(Reader.doc);
Reader.Free;
end;
end;
procedure ReadDTDFile(out ADoc: TXMLDocument; var f: TStream);
begin
ReadDTDFile(ADoc, f, 'stream:');
end;
procedure ReadDTDFile(out ADoc: TXMLDocument; const AFilename: String);
var
Stream: TStream;
begin
ADoc := nil;
Stream := TFileStream.Create(AFilename, fmOpenRead+fmShareDenyWrite);
try
ReadDTDFile(ADoc, Stream, FilenameToURI(AFilename));
finally
Stream.Free;
end;
end;
{ EXMLReadError }
function EXMLReadError.LineCol: TPoint;
begin
Result.Y:=Line;
Result.X:=LinePos;
end;
end.