* xmlread.pp: started implementing the streaming API, aka XmlTextReader from .net world.

git-svn-id: trunk@16079 -
This commit is contained in:
sergei 2010-10-05 07:35:00 +00:00
parent 1dc083438e
commit 6cc206ac77

View File

@ -284,7 +284,14 @@ type
property Children[Index: Integer]: TContentParticle read GetChild; property Children[Index: Integer]: TContentParticle read GetChild;
end; end;
TElementValidator = object PNodeData = ^TNodeData;
TNodeData = object
// generic members
FQName: PHashItem;
FDOMNode: TObject; // temporary
// validation-specific members
FElement: TDOMElement; FElement: TDOMElement;
FElementDef: TDOMElementDef; FElementDef: TDOMElementDef;
FCurCP: TContentParticle; FCurCP: TContentParticle;
@ -304,6 +311,9 @@ type
); );
TCheckNameFlags = set of (cnOptional, cnToken); TCheckNameFlags = set of (cnOptional, cnToken);
TXMLToken = (xtNone, xtEOF, xtText, xtWhitespace, xtElement, xtEndElement, xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd);
TPrefixedAttr = record TPrefixedAttr = record
Attr: TDOMAttr; Attr: TDOMAttr;
@ -337,6 +347,9 @@ type
FAttrTag: Cardinal; FAttrTag: Cardinal;
FOwnsDoctype: Boolean; FOwnsDoctype: Boolean;
FDTDProcessed: Boolean; FDTDProcessed: Boolean;
FToken: TXMLToken;
FNext: TXMLToken;
FCurrEntity: TDOMEntityEx;
FNSHelper: TNSSupport; FNSHelper: TNSSupport;
FWorkAtts: array of TPrefixedAttr; FWorkAtts: array of TPrefixedAttr;
@ -374,10 +387,13 @@ type
function FindOrCreateElDef: TDOMElementDef; function FindOrCreateElDef: TDOMElementDef;
function SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar; c2: WideChar = #0): Boolean; function SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar; c2: WideChar = #0): Boolean;
procedure CheckMaxChars; procedure CheckMaxChars;
function AllocNodeData(AIndex: Integer): PNodeData;
protected protected
FCursor: TDOMNode_WithChildren; FCursor: TDOMNode_WithChildren;
FNesting: Integer; FNesting: Integer;
FValidator: array of TElementValidator; FCurrNode: PNodeData;
FAttrCount: Integer;
FNodeStack: array of TNodeData;
procedure DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer=0); procedure DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer=0);
procedure DoErrorPos(Severity: TErrorSeverity; const descr: string; procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
@ -410,6 +426,7 @@ type
procedure DoEndElement(ErrOffset: Integer); procedure DoEndElement(ErrOffset: Integer);
procedure ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef); procedure ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
procedure ParseContent; // [43] procedure ParseContent; // [43]
function Read: Boolean;
function ResolvePredefined: Boolean; function ResolvePredefined: Boolean;
function EntityCheck(NoExternals: Boolean = False): TDOMEntityEx; function EntityCheck(NoExternals: Boolean = False): TDOMEntityEx;
procedure AppendReference(AEntity: TDOMEntityEx); procedure AppendReference(AEntity: TDOMEntityEx);
@ -1268,7 +1285,7 @@ begin
FStdPrefix_xmlns := FNSHelper.GetPrefix(@PrefixDefault, 5); FStdPrefix_xmlns := FNSHelper.GetPrefix(@PrefixDefault, 5);
// Set char rules to XML 1.0 // Set char rules to XML 1.0
FNamePages := @NamePages; FNamePages := @NamePages;
SetLength(FValidator, 16); SetLength(FNodeStack, 16);
end; end;
constructor TXMLReader.Create(AParser: TDOMParser); constructor TXMLReader.Create(AParser: TDOMParser);
@ -1325,6 +1342,8 @@ begin
FCursor := doc; FCursor := doc;
FState := rsProlog; FState := rsProlog;
FNesting := 0; FNesting := 0;
FCurrNode := @FNodeStack[0];
FCurrNode^.FDOMNode := doc;
Initialize(ASource); Initialize(ASource);
ParseContent; ParseContent;
@ -1340,6 +1359,8 @@ begin
doc := AOwner.OwnerDocument; doc := AOwner.OwnerDocument;
FCursor := AOwner as TDOMNode_WithChildren; FCursor := AOwner as TDOMNode_WithChildren;
FState := rsRoot; FState := rsRoot;
FNesting := 0;
FCurrNode := @FNodeStack[0];
FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1'); FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1');
Initialize(ASource); Initialize(ASource);
FDocType := TDOMDocumentTypeEx(doc.DocType); FDocType := TDOMDocumentTypeEx(doc.DocType);
@ -2714,21 +2735,33 @@ const
[#0, '>'] [#0, '>']
); );
type
TXMLToken = (xtNone, xtText, xtElement, xtEndElement, xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd);
procedure TXMLReader.ParseContent; procedure TXMLReader.ParseContent;
begin
FNext := xtText;
while Read do
begin
case FToken of
xtText, xtWhitespace:
DoText(FValue.Buffer, FValue.Length, FToken = xtWhitespace);
xtCDSect:
DoCDSect(FValue.Buffer, FValue.Length);
end;
end;
end;
function TXMLReader.Read: Boolean;
var var
nonWs: Boolean; nonWs: Boolean;
wc: WideChar; wc: WideChar;
ent: TDOMEntityEx;
InCDATA: Boolean; InCDATA: Boolean;
tok: TXMLToken; tok: TXMLToken;
begin begin
InCDATA := False; InCDATA := (FNext = xtCDSect);
StoreLocation(FTokenStart); StoreLocation(FTokenStart);
nonWs := False; nonWs := False;
FValue.Length := 0; FValue.Length := 0;
if FNext in [xtCDSect, xtText] then
repeat repeat
wc := FSource.SkipUntil(FValue, TextDelims[InCDATA], @nonWs); wc := FSource.SkipUntil(FValue, TextDelims[InCDATA], @nonWs);
if wc = '<' then if wc = '<' then
@ -2776,9 +2809,9 @@ begin
if InCDATA then if InCDATA then
FatalError('Unterminated CDATA section', -1); FatalError('Unterminated CDATA section', -1);
if FNesting > FSource.FStartNesting then if FNesting > FSource.FStartNesting then
FatalError('End-tag is missing for ''%s''', [FValidator[FNesting].FElement.NSI.QName^.Key]); FatalError('End-tag is missing for ''%s''', [FCurrNode^.FElement.NSI.QName^.Key]);
if ContextPop then Continue; if ContextPop then Continue;
Break; tok := xtEOF;
end end
else if wc = '>' then else if wc = '>' then
begin begin
@ -2794,7 +2827,10 @@ begin
InCDATA := False; InCDATA := False;
if FCDSectionsAsText then if FCDSectionsAsText then
Continue; Continue;
tok := xtText; FToken := xtCDSect;
FNext := xtText;
Result := True;
Exit;
end end
else else
FatalError('Literal '']]>'' is not allowed in text', 3); FatalError('Literal '']]>'' is not allowed in text', 3);
@ -2814,33 +2850,38 @@ begin
end end
else else
begin begin
ent := EntityCheck; FCurrEntity := EntityCheck;
if Assigned(ent) and FExpandEntities then if Assigned(FCurrEntity) and FExpandEntities then
begin begin
ContextPush(ent); ContextPush(FCurrEntity);
Continue; Continue;
end; end;
tok := xtEntity; tok := xtEntity;
end; end;
end; end;
// flush text accumulated this far if FValue.Length <> 0 then
if tok = xtText then begin
DoCDSect(FValue.Buffer, FValue.Length) if nonWs then FToken := xtText else FToken := xtWhitespace;
else FNext := tok;
DoText(FValue.Buffer, FValue.Length, not nonWs); Result := True;
case tok of Exit;
xtEntity: AppendReference(ent);
xtElement: ParseStartTag;
xtEndElement: ParseEndTag;
xtPI: ParsePI;
xtDoctype: ParseDoctypeDecl;
xtComment: ParseComment;
end; end;
StoreLocation(FTokenStart); Break;
FValue.Length := 0; until False
nonWs := False; else // not (FNext in [xtText, xtCDSect])
until False; tok := FNext;
DoText(FValue.Buffer, FValue.Length, not nonWs);
case tok of
xtEntity: AppendReference(FCurrEntity);
xtElement: ParseStartTag;
xtEndElement: ParseEndTag;
xtPI: ParsePI;
xtDoctype: ParseDoctypeDecl;
xtComment: ParseComment;
end;
FToken := tok;
FNext := xtText;
Result := tok <> xtEOF;
end; end;
procedure TXMLCharSource.NextChar; procedure TXMLCharSource.NextChar;
@ -2889,10 +2930,14 @@ begin
ValidationError('Using undeclared element ''%s''',[ElName^.Key], FName.Length); ValidationError('Using undeclared element ''%s''',[ElName^.Key], FName.Length);
// Check if new element is allowed in current context // Check if new element is allowed in current context
if FValidate and not FValidator[FNesting].IsElementAllowed(ElDef) then if FValidate and not FCurrNode^.IsElementAllowed(ElDef) then
ValidationError('Element ''%s'' is not allowed in this context',[ElName^.Key], FName.Length); ValidationError('Element ''%s'' is not allowed in this context',[ElName^.Key], FName.Length);
IsEmpty := False; IsEmpty := False;
FAttrCount := 0;
PushVC(NewElem, ElDef); // this increases FNesting
FCurrNode^.FQName := ElName;
while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do
begin begin
SkipS(True); SkipS(True);
@ -2910,7 +2955,7 @@ begin
if Assigned(ElDef) and Assigned(ElDef.FAttributes) then if Assigned(ElDef) and Assigned(ElDef.FAttributes) then
ProcessDefaultAttributes(NewElem, ElDef.FAttributes); ProcessDefaultAttributes(NewElem, ElDef.FAttributes);
PushVC(NewElem, ElDef); // this increases FNesting
if FNamespaces then if FNamespaces then
ProcessNamespaceAtts(NewElem); ProcessNamespaceAtts(NewElem);
@ -2928,12 +2973,12 @@ procedure TXMLReader.DoEndElement(ErrOffset: Integer);
var var
NewElem: TDOMElement; NewElem: TDOMElement;
begin begin
NewElem := FValidator[FNesting].FElement; NewElem := FCurrNode^.FElement;
TDOMNode(FCursor) := NewElem.ParentNode; TDOMNode(FCursor) := NewElem.ParentNode;
if FCursor = doc then if FCursor = doc then
FState := rsEpilog; FState := rsEpilog;
if FValidate and FValidator[FNesting].Incomplete then if FValidate and FCurrNode^.Incomplete then
ValidationError('Element ''%s'' is missing required sub-elements', [NewElem.NSI.QName^.Key], ErrOffset); ValidationError('Element ''%s'' is missing required sub-elements', [NewElem.NSI.QName^.Key], ErrOffset);
if FNamespaces then if FNamespaces then
@ -2949,7 +2994,7 @@ begin
FatalError('End-tag is not allowed here'); FatalError('End-tag is not allowed here');
Inc(FSource.FBuf); Inc(FSource.FBuf);
ElName := FValidator[FNesting].FElement.NSI.QName; ElName := FCurrNode^.FElement.NSI.QName;
CheckName; CheckName;
if not BufEquals(FName, ElName^.Key) then if not BufEquals(FName, ElName^.Key) then
@ -3002,6 +3047,7 @@ end;
begin begin
CheckName; CheckName;
Inc(FAttrCount);
attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length); attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length);
if Assigned(ElDef) then if Assigned(ElDef) then
@ -3388,30 +3434,38 @@ begin
ValidationError('Duplicate notation declaration: ''%s''', [aName]); ValidationError('Duplicate notation declaration: ''%s''', [aName]);
end; end;
function TXMLReader.AllocNodeData(AIndex: Integer): PNodeData;
begin
if AIndex >= Length(FNodeStack) then
SetLength(FNodeStack, AIndex * 2);
Result := @FNodeStack[AIndex];
end;
procedure TXMLReader.PushVC(aElement: TDOMElement; aElDef: TDOMElementDef); procedure TXMLReader.PushVC(aElement: TDOMElement; aElDef: TDOMElementDef);
begin begin
Inc(FNesting); Inc(FNesting);
if FNesting >= Length(FValidator) then FCurrNode := AllocNodeData(FNesting);
SetLength(FValidator, FNesting * 2); FCurrNode^.FElement := aElement;
FValidator[FNesting].FElement := aElement; FCurrNode^.FElementDef := aElDef;
FValidator[FNesting].FElementDef := aElDef; FCurrNode^.FCurCP := nil;
FValidator[FNesting].FCurCP := nil; FCurrNode^.FFailed := False;
FValidator[FNesting].FFailed := False;
UpdateConstraints; UpdateConstraints;
end; end;
procedure TXMLReader.PopVC; procedure TXMLReader.PopVC;
begin begin
if FNesting > 0 then Dec(FNesting); if FNesting > 0 then Dec(FNesting);
FCurrNode := @FNodeStack[FNesting];
UpdateConstraints; UpdateConstraints;
end; end;
procedure TXMLReader.UpdateConstraints; procedure TXMLReader.UpdateConstraints;
begin begin
if FValidate and Assigned(FValidator[FNesting].FElementDef) then if FValidate and Assigned(FCurrNode^.FElementDef) then
begin begin
FCurrContentType := FValidator[FNesting].FElementDef.ContentType; FCurrContentType := FCurrNode^.FElementDef.ContentType;
FSaViolation := FStandalone and (FValidator[FNesting].FElementDef.FExternallyDeclared); FSaViolation := FStandalone and (FCurrNode^.FElementDef.FExternallyDeclared);
end end
else else
begin begin
@ -3420,9 +3474,9 @@ begin
end; end;
end; end;
{ TElementValidator } { TNodeData }
function TElementValidator.IsElementAllowed(Def: TDOMElementDef): Boolean; function TNodeData.IsElementAllowed(Def: TDOMElementDef): Boolean;
var var
I: Integer; I: Integer;
Next: TContentParticle; Next: TContentParticle;
@ -3459,7 +3513,7 @@ begin
end; end;
end; end;
function TElementValidator.Incomplete: Boolean; function TNodeData.Incomplete: Boolean;
begin begin
if Assigned(FElementDef) and (FElementDef.ContentType = ctChildren) and (not FFailed) then if Assigned(FElementDef) and (FElementDef.ContentType = ctChildren) and (not FFailed) then
begin begin