From 6cc206ac77af95d0dda0d1e038cb651a5fa32c0f Mon Sep 17 00:00:00 2001 From: sergei Date: Tue, 5 Oct 2010 07:35:00 +0000 Subject: [PATCH] * xmlread.pp: started implementing the streaming API, aka XmlTextReader from .net world. git-svn-id: trunk@16079 - --- packages/fcl-xml/src/xmlread.pp | 150 ++++++++++++++++++++++---------- 1 file changed, 102 insertions(+), 48 deletions(-) diff --git a/packages/fcl-xml/src/xmlread.pp b/packages/fcl-xml/src/xmlread.pp index 49dea29791..ee8a11d5e5 100644 --- a/packages/fcl-xml/src/xmlread.pp +++ b/packages/fcl-xml/src/xmlread.pp @@ -284,7 +284,14 @@ type property Children[Index: Integer]: TContentParticle read GetChild; end; - TElementValidator = object + PNodeData = ^TNodeData; + TNodeData = object + // generic members + FQName: PHashItem; + FDOMNode: TObject; // temporary + + + // validation-specific members FElement: TDOMElement; FElementDef: TDOMElementDef; FCurCP: TContentParticle; @@ -304,6 +311,9 @@ type ); TCheckNameFlags = set of (cnOptional, cnToken); + + TXMLToken = (xtNone, xtEOF, xtText, xtWhitespace, xtElement, xtEndElement, xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd); + TPrefixedAttr = record Attr: TDOMAttr; @@ -337,6 +347,9 @@ type FAttrTag: Cardinal; FOwnsDoctype: Boolean; FDTDProcessed: Boolean; + FToken: TXMLToken; + FNext: TXMLToken; + FCurrEntity: TDOMEntityEx; FNSHelper: TNSSupport; FWorkAtts: array of TPrefixedAttr; @@ -374,10 +387,13 @@ type function FindOrCreateElDef: TDOMElementDef; function SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar; c2: WideChar = #0): Boolean; procedure CheckMaxChars; + function AllocNodeData(AIndex: Integer): PNodeData; protected FCursor: TDOMNode_WithChildren; FNesting: Integer; - FValidator: array of TElementValidator; + FCurrNode: PNodeData; + FAttrCount: Integer; + FNodeStack: array of TNodeData; procedure DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer=0); procedure DoErrorPos(Severity: TErrorSeverity; const descr: string; @@ -410,6 +426,7 @@ type procedure DoEndElement(ErrOffset: Integer); procedure ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef); procedure ParseContent; // [43] + function Read: Boolean; function ResolvePredefined: Boolean; function EntityCheck(NoExternals: Boolean = False): TDOMEntityEx; procedure AppendReference(AEntity: TDOMEntityEx); @@ -1268,7 +1285,7 @@ begin FStdPrefix_xmlns := FNSHelper.GetPrefix(@PrefixDefault, 5); // Set char rules to XML 1.0 FNamePages := @NamePages; - SetLength(FValidator, 16); + SetLength(FNodeStack, 16); end; constructor TXMLReader.Create(AParser: TDOMParser); @@ -1325,6 +1342,8 @@ begin FCursor := doc; FState := rsProlog; FNesting := 0; + FCurrNode := @FNodeStack[0]; + FCurrNode^.FDOMNode := doc; Initialize(ASource); ParseContent; @@ -1340,6 +1359,8 @@ begin doc := AOwner.OwnerDocument; FCursor := AOwner as TDOMNode_WithChildren; FState := rsRoot; + FNesting := 0; + FCurrNode := @FNodeStack[0]; FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1'); Initialize(ASource); FDocType := TDOMDocumentTypeEx(doc.DocType); @@ -2714,21 +2735,33 @@ const [#0, '>'] ); -type - TXMLToken = (xtNone, xtText, xtElement, xtEndElement, xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd); - procedure TXMLReader.ParseContent; +begin + FNext := xtText; + while Read do + begin + case FToken of + xtText, xtWhitespace: + DoText(FValue.Buffer, FValue.Length, FToken = xtWhitespace); + xtCDSect: + DoCDSect(FValue.Buffer, FValue.Length); + end; + end; +end; + +function TXMLReader.Read: Boolean; var nonWs: Boolean; wc: WideChar; - ent: TDOMEntityEx; InCDATA: Boolean; tok: TXMLToken; begin - InCDATA := False; + InCDATA := (FNext = xtCDSect); StoreLocation(FTokenStart); nonWs := False; FValue.Length := 0; + + if FNext in [xtCDSect, xtText] then repeat wc := FSource.SkipUntil(FValue, TextDelims[InCDATA], @nonWs); if wc = '<' then @@ -2776,9 +2809,9 @@ begin if InCDATA then FatalError('Unterminated CDATA section', -1); if FNesting > FSource.FStartNesting then - FatalError('End-tag is missing for ''%s''', [FValidator[FNesting].FElement.NSI.QName^.Key]); + FatalError('End-tag is missing for ''%s''', [FCurrNode^.FElement.NSI.QName^.Key]); if ContextPop then Continue; - Break; + tok := xtEOF; end else if wc = '>' then begin @@ -2794,7 +2827,10 @@ begin InCDATA := False; if FCDSectionsAsText then Continue; - tok := xtText; + FToken := xtCDSect; + FNext := xtText; + Result := True; + Exit; end else FatalError('Literal '']]>'' is not allowed in text', 3); @@ -2814,33 +2850,38 @@ begin end else begin - ent := EntityCheck; - if Assigned(ent) and FExpandEntities then + FCurrEntity := EntityCheck; + if Assigned(FCurrEntity) and FExpandEntities then begin - ContextPush(ent); + ContextPush(FCurrEntity); Continue; end; tok := xtEntity; end; end; - // flush text accumulated this far - if tok = xtText then - DoCDSect(FValue.Buffer, FValue.Length) - else - DoText(FValue.Buffer, FValue.Length, not nonWs); - case tok of - xtEntity: AppendReference(ent); - xtElement: ParseStartTag; - xtEndElement: ParseEndTag; - xtPI: ParsePI; - xtDoctype: ParseDoctypeDecl; - xtComment: ParseComment; + if FValue.Length <> 0 then + begin + if nonWs then FToken := xtText else FToken := xtWhitespace; + FNext := tok; + Result := True; + Exit; end; - StoreLocation(FTokenStart); - FValue.Length := 0; - nonWs := False; - until False; - DoText(FValue.Buffer, FValue.Length, not nonWs); + Break; + until False + else // not (FNext in [xtText, xtCDSect]) + tok := FNext; + + case tok of + xtEntity: AppendReference(FCurrEntity); + xtElement: ParseStartTag; + xtEndElement: ParseEndTag; + xtPI: ParsePI; + xtDoctype: ParseDoctypeDecl; + xtComment: ParseComment; + end; + FToken := tok; + FNext := xtText; + Result := tok <> xtEOF; end; procedure TXMLCharSource.NextChar; @@ -2889,10 +2930,14 @@ begin ValidationError('Using undeclared element ''%s''',[ElName^.Key], FName.Length); // Check if new element is allowed in current context - if FValidate and not FValidator[FNesting].IsElementAllowed(ElDef) then + if FValidate and not FCurrNode^.IsElementAllowed(ElDef) then ValidationError('Element ''%s'' is not allowed in this context',[ElName^.Key], FName.Length); IsEmpty := False; + FAttrCount := 0; + PushVC(NewElem, ElDef); // this increases FNesting + FCurrNode^.FQName := ElName; + while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do begin SkipS(True); @@ -2910,7 +2955,7 @@ begin if Assigned(ElDef) and Assigned(ElDef.FAttributes) then ProcessDefaultAttributes(NewElem, ElDef.FAttributes); - PushVC(NewElem, ElDef); // this increases FNesting + if FNamespaces then ProcessNamespaceAtts(NewElem); @@ -2928,12 +2973,12 @@ procedure TXMLReader.DoEndElement(ErrOffset: Integer); var NewElem: TDOMElement; begin - NewElem := FValidator[FNesting].FElement; + NewElem := FCurrNode^.FElement; TDOMNode(FCursor) := NewElem.ParentNode; if FCursor = doc then FState := rsEpilog; - if FValidate and FValidator[FNesting].Incomplete then + if FValidate and FCurrNode^.Incomplete then ValidationError('Element ''%s'' is missing required sub-elements', [NewElem.NSI.QName^.Key], ErrOffset); if FNamespaces then @@ -2949,7 +2994,7 @@ begin FatalError('End-tag is not allowed here'); Inc(FSource.FBuf); - ElName := FValidator[FNesting].FElement.NSI.QName; + ElName := FCurrNode^.FElement.NSI.QName; CheckName; if not BufEquals(FName, ElName^.Key) then @@ -3002,6 +3047,7 @@ end; begin CheckName; + Inc(FAttrCount); attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length); if Assigned(ElDef) then @@ -3388,30 +3434,38 @@ begin ValidationError('Duplicate notation declaration: ''%s''', [aName]); end; +function TXMLReader.AllocNodeData(AIndex: Integer): PNodeData; +begin + if AIndex >= Length(FNodeStack) then + SetLength(FNodeStack, AIndex * 2); + + Result := @FNodeStack[AIndex]; +end; + procedure TXMLReader.PushVC(aElement: TDOMElement; aElDef: TDOMElementDef); begin Inc(FNesting); - if FNesting >= Length(FValidator) then - SetLength(FValidator, FNesting * 2); - FValidator[FNesting].FElement := aElement; - FValidator[FNesting].FElementDef := aElDef; - FValidator[FNesting].FCurCP := nil; - FValidator[FNesting].FFailed := False; + FCurrNode := AllocNodeData(FNesting); + FCurrNode^.FElement := aElement; + FCurrNode^.FElementDef := aElDef; + FCurrNode^.FCurCP := nil; + FCurrNode^.FFailed := False; UpdateConstraints; end; procedure TXMLReader.PopVC; begin if FNesting > 0 then Dec(FNesting); + FCurrNode := @FNodeStack[FNesting]; UpdateConstraints; end; procedure TXMLReader.UpdateConstraints; begin - if FValidate and Assigned(FValidator[FNesting].FElementDef) then + if FValidate and Assigned(FCurrNode^.FElementDef) then begin - FCurrContentType := FValidator[FNesting].FElementDef.ContentType; - FSaViolation := FStandalone and (FValidator[FNesting].FElementDef.FExternallyDeclared); + FCurrContentType := FCurrNode^.FElementDef.ContentType; + FSaViolation := FStandalone and (FCurrNode^.FElementDef.FExternallyDeclared); end else begin @@ -3420,9 +3474,9 @@ begin end; end; -{ TElementValidator } +{ TNodeData } -function TElementValidator.IsElementAllowed(Def: TDOMElementDef): Boolean; +function TNodeData.IsElementAllowed(Def: TDOMElementDef): Boolean; var I: Integer; Next: TContentParticle; @@ -3459,7 +3513,7 @@ begin end; end; -function TElementValidator.Incomplete: Boolean; +function TNodeData.Incomplete: Boolean; begin if Assigned(FElementDef) and (FElementDef.ContentType = ctChildren) and (not FFailed) then begin