mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-16 23:21:57 +02:00
* xmlread.pp, doing progress with streaming API:
+ state transitions needed to report start/end element events correctly + procedures for maintaining attribute data * excluded FCursor from attribute value parsing git-svn-id: trunk@16161 -
This commit is contained in:
parent
89df75c731
commit
d3bdf2577c
@ -164,6 +164,17 @@ type
|
||||
LinePos: Integer;
|
||||
end;
|
||||
|
||||
TXMLNodeType = (ntNone, ntElement, ntAttribute, ntText,
|
||||
ntCDATA, ntEntityReference, ntEntity, ntProcessingInstruction,
|
||||
ntComment, ntDocument, ntDocumentType, ntDocumentFragment,
|
||||
ntNotation,
|
||||
ntWhitespace,
|
||||
ntSignificantWhitespace,
|
||||
ntEndElement,
|
||||
ntEndEntity,
|
||||
ntXmlDeclaration
|
||||
);
|
||||
|
||||
TDOMEntityEx = class(TDOMEntity)
|
||||
protected
|
||||
FExternallyDeclared: Boolean;
|
||||
@ -287,9 +298,14 @@ type
|
||||
PNodeData = ^TNodeData;
|
||||
TNodeData = object
|
||||
// generic members
|
||||
FNext: PNodeData;
|
||||
FQName: PHashItem;
|
||||
FNodeType: TXMLNodeType;
|
||||
FDOMNode: TObject; // temporary
|
||||
|
||||
FValueStr: WideString;
|
||||
FValueStart: PWideChar;
|
||||
FValueLength: Integer;
|
||||
|
||||
// validation-specific members
|
||||
FElement: TDOMElement;
|
||||
@ -300,6 +316,8 @@ type
|
||||
function Incomplete: Boolean;
|
||||
end;
|
||||
|
||||
TNodeDataDynArray = array of TNodeData;
|
||||
|
||||
TXMLReadState = (rsProlog, rsDTD, rsRoot, rsEpilog);
|
||||
|
||||
TElementContentType = (
|
||||
@ -312,9 +330,10 @@ type
|
||||
|
||||
TCheckNameFlags = set of (cnOptional, cnToken);
|
||||
|
||||
TXMLToken = (xtNone, xtEOF, xtText, xtWhitespace, xtElement, xtEndElement, xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd);
|
||||
TXMLToken = (xtNone, xtEOF, xtText, xtWhitespace, xtElement, xtEndElement,
|
||||
xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd, xtPopElement,
|
||||
xtPopEmptyElement, xtPushElement);
|
||||
|
||||
|
||||
TPrefixedAttr = record
|
||||
Attr: TDOMAttr;
|
||||
PrefixLen: Integer; // to avoid recalculation
|
||||
@ -388,12 +407,19 @@ type
|
||||
function SkipUntilSeq(const Delim: TSetOfChar; c1: WideChar; c2: WideChar = #0): Boolean;
|
||||
procedure CheckMaxChars;
|
||||
function AllocNodeData(AIndex: Integer): PNodeData;
|
||||
function AllocAttributeData(AName: PHashItem): PNodeData;
|
||||
function AllocAttributeValueChunk(APrev: PNodeData): PNodeData;
|
||||
procedure CleanupAttributeData;
|
||||
procedure SetNodeInfoWithValue(typ: TXMLNodeType);
|
||||
protected
|
||||
FCursor: TDOMNode_WithChildren;
|
||||
FNesting: Integer;
|
||||
FCurrNode: PNodeData;
|
||||
FAttrCount: Integer;
|
||||
FNodeStack: array of TNodeData;
|
||||
FNodeStack: TNodeDataDynArray;
|
||||
FAttrChunks: TFPList;
|
||||
FFreeAttrChunk: PNodeData;
|
||||
FAttrCleanupFlag: Boolean;
|
||||
|
||||
procedure DoError(Severity: TErrorSeverity; const descr: string; LineOffs: Integer=0);
|
||||
procedure DoErrorPos(Severity: TErrorSeverity; const descr: string;
|
||||
@ -414,7 +440,7 @@ type
|
||||
function ExpectName: WideString; // [5]
|
||||
function ParseLiteral(var ToFill: TWideCharBuf; aType: TLiteralType;
|
||||
Required: Boolean; Normalized: PBoolean = nil): Boolean;
|
||||
procedure ExpectAttValue; // [10]
|
||||
procedure ExpectAttValue(attr: TDOMAttr); // [10]
|
||||
procedure ParseComment; // [15]
|
||||
procedure ParsePI; // [16]
|
||||
procedure ParseXmlOrTextDecl(TextDecl: Boolean);
|
||||
@ -429,7 +455,7 @@ type
|
||||
function Read: Boolean;
|
||||
function ResolvePredefined: Boolean;
|
||||
function EntityCheck(NoExternals: Boolean = False): TDOMEntityEx;
|
||||
procedure AppendReference(AEntity: TDOMEntityEx);
|
||||
procedure AppendReference(cur: TDOMNode; AEntity: TDOMEntityEx);
|
||||
function PrefetchEntity(AEntity: TDOMEntityEx): Boolean;
|
||||
procedure StartPE;
|
||||
function ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
|
||||
@ -453,7 +479,7 @@ type
|
||||
procedure ValidateDTD;
|
||||
procedure ValidateRoot;
|
||||
procedure ValidationError(const Msg: string; const args: array of const; LineOffs: Integer = -1);
|
||||
procedure DoAttrText(ch: PWideChar; Count: Integer);
|
||||
procedure DoAttrText(node: TDOMAttr; ch: PWideChar; Count: Integer);
|
||||
procedure DTDReloadHook;
|
||||
procedure ConvertSource(SrcIn: TXMLInputSource; out SrcOut: TXMLCharSource);
|
||||
// Some SAX-alike stuff (at a very early stage)
|
||||
@ -1278,6 +1304,7 @@ begin
|
||||
FNotationRefs := TFPList.Create;
|
||||
|
||||
FNSHelper := TNSSupport.Create;
|
||||
FAttrChunks := TFPList.Create;
|
||||
|
||||
FNsAttHash := TDblHashArray.Create;
|
||||
SetLength(FWorkAtts, 16);
|
||||
@ -1307,7 +1334,11 @@ begin
|
||||
end;
|
||||
|
||||
destructor TXMLReader.Destroy;
|
||||
var
|
||||
i: Integer;
|
||||
begin
|
||||
for i := FAttrChunks.Count-1 downto 0 do
|
||||
Dispose(PNodeData(FAttrChunks.List^[i]));
|
||||
if Assigned(FEntityValue.Buffer) then
|
||||
FreeMem(FEntityValue.Buffer);
|
||||
FreeMem(FName.Buffer);
|
||||
@ -1325,6 +1356,7 @@ begin
|
||||
|
||||
FNotationRefs.Free;
|
||||
FIDRefs.Free;
|
||||
FAttrChunks.Free;
|
||||
inherited Destroy;
|
||||
end;
|
||||
|
||||
@ -1361,6 +1393,7 @@ begin
|
||||
FState := rsRoot;
|
||||
FNesting := 0;
|
||||
FCurrNode := @FNodeStack[0];
|
||||
FCurrNode^.FDOMNode := FCursor;
|
||||
FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1');
|
||||
Initialize(ASource);
|
||||
FDocType := TDOMDocumentTypeEx(doc.DocType);
|
||||
@ -1550,11 +1583,16 @@ begin
|
||||
ExpectChar(';');
|
||||
end;
|
||||
|
||||
procedure TXMLReader.DoAttrText(node: TDOMAttr; ch: PWideChar; Count: Integer);
|
||||
begin
|
||||
node.InternalAppend(Doc.CreateTextNodeBuf(ch, Count, False));
|
||||
end;
|
||||
|
||||
const
|
||||
AttrDelims: TSetOfChar = [#0, '<', '&', '''', '"', #9, #10, #13];
|
||||
GT_Delim: TSetOfChar = [#0, '>'];
|
||||
|
||||
procedure TXMLReader.ExpectAttValue;
|
||||
procedure TXMLReader.ExpectAttValue(attr: TDOMAttr);
|
||||
var
|
||||
wc: WideChar;
|
||||
Delim: WideChar;
|
||||
@ -1578,10 +1616,10 @@ begin
|
||||
begin
|
||||
if FValue.Length > 0 then
|
||||
begin
|
||||
DoAttrText(FValue.Buffer, FValue.Length);
|
||||
DoAttrText(attr, FValue.Buffer, FValue.Length);
|
||||
FValue.Length := 0;
|
||||
end;
|
||||
AppendReference(ent);
|
||||
AppendReference(attr, ent);
|
||||
end
|
||||
else
|
||||
ContextPush(ent);
|
||||
@ -1599,7 +1637,7 @@ begin
|
||||
FatalError('Literal has no closing quote', -1);
|
||||
until False;
|
||||
if FValue.Length > 0 then
|
||||
DoAttrText(FValue.Buffer, FValue.Length);
|
||||
DoAttrText(attr, FValue.Buffer, FValue.Length);
|
||||
FValue.Length := 0;
|
||||
end;
|
||||
|
||||
@ -1706,12 +1744,14 @@ begin
|
||||
// To build children of the entity itself, we must parse it "out of context"
|
||||
InnerReader := TXMLReader.Create(FCtrl);
|
||||
try
|
||||
InnerReader.FAttrTag := FAttrTag;
|
||||
EntityToSource(Result, Src);
|
||||
Result.SetReadOnly(False);
|
||||
if Assigned(Src) then
|
||||
InnerReader.ProcessFragment(Src, Result);
|
||||
Result.FResolved := True;
|
||||
finally
|
||||
FAttrTag := InnerReader.FAttrTag;
|
||||
InnerReader.Free;
|
||||
Result.FOnStack := False;
|
||||
Result.SetReadOnly(True);
|
||||
@ -2464,12 +2504,10 @@ begin
|
||||
if AttDef.DataType = dtId then
|
||||
ValidationError('An attribute of type ID cannot have a default value',[]);
|
||||
|
||||
FCursor := AttDef;
|
||||
// See comments to valid-sa-094: PE expansion should be disabled in AttDef.
|
||||
// ExpectAttValue() does not recognize PEs anyway, so setting FRecognizePEs isn't needed
|
||||
// Saving/restoring FCursor is also redundant because it is always nil here.
|
||||
ExpectAttValue;
|
||||
FCursor := nil;
|
||||
ExpectAttValue(AttDef);
|
||||
if not ValidateAttrSyntax(AttDef, AttDef.NodeValue) then
|
||||
ValidationError('Default value for attribute ''%s'' has wrong syntax', [AttDef.Name]);
|
||||
end;
|
||||
@ -2675,7 +2713,7 @@ begin
|
||||
ParseMarkupDecl;
|
||||
end;
|
||||
|
||||
procedure TXMLReader.AppendReference(AEntity: TDOMEntityEx);
|
||||
procedure TXMLReader.AppendReference(cur: TDOMNode; AEntity: TDOMEntityEx);
|
||||
var
|
||||
s: WideString;
|
||||
begin
|
||||
@ -2683,7 +2721,7 @@ begin
|
||||
SetString(s, FName.Buffer, FName.Length)
|
||||
else
|
||||
s := AEntity.nodeName;
|
||||
FCursor.AppendChild(doc.CreateEntityReference(s));
|
||||
cur.AppendChild(doc.CreateEntityReference(s));
|
||||
end;
|
||||
|
||||
|
||||
@ -2735,6 +2773,11 @@ const
|
||||
[#0, '>']
|
||||
);
|
||||
|
||||
textNodeTypes: array[Boolean] of TXMLNodeType = (
|
||||
ntText,
|
||||
ntWhitespace
|
||||
);
|
||||
|
||||
procedure TXMLReader.ParseContent;
|
||||
begin
|
||||
FNext := xtText;
|
||||
@ -2745,6 +2788,8 @@ begin
|
||||
DoText(FValue.Buffer, FValue.Length, FToken = xtWhitespace);
|
||||
xtCDSect:
|
||||
DoCDSect(FValue.Buffer, FValue.Length);
|
||||
xtEndElement:
|
||||
DoEndElement(-1);
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
@ -2756,6 +2801,32 @@ var
|
||||
InCDATA: Boolean;
|
||||
tok: TXMLToken;
|
||||
begin
|
||||
if FNext = xtPopEmptyElement then
|
||||
begin
|
||||
FNext := xtPopElement;
|
||||
FToken := xtEndElement;
|
||||
FCurrNode^.FNodeType := ntEndElement;
|
||||
if FAttrCleanupFlag then
|
||||
CleanupAttributeData;
|
||||
FAttrCount := 0;
|
||||
Result := True;
|
||||
Exit;
|
||||
end;
|
||||
if FNext = xtPushElement then
|
||||
begin
|
||||
if FAttrCleanupFlag then
|
||||
CleanupAttributeData;
|
||||
FAttrCount := 0;
|
||||
FNext := xtText;
|
||||
end
|
||||
else if FNext = xtPopElement then
|
||||
begin
|
||||
if FNamespaces then
|
||||
FNSHelper.EndElement;
|
||||
PopVC;
|
||||
FNext := xtText;
|
||||
end;
|
||||
|
||||
InCDATA := (FNext = xtCDSect);
|
||||
StoreLocation(FTokenStart);
|
||||
nonWs := False;
|
||||
@ -2827,6 +2898,7 @@ begin
|
||||
InCDATA := False;
|
||||
if FCDSectionsAsText then
|
||||
Continue;
|
||||
SetNodeInfoWithValue(ntCDATA);
|
||||
FToken := xtCDSect;
|
||||
FNext := xtText;
|
||||
Result := True;
|
||||
@ -2861,6 +2933,7 @@ begin
|
||||
end;
|
||||
if FValue.Length <> 0 then
|
||||
begin
|
||||
SetNodeInfoWithValue(textNodeTypes[nonWs]);
|
||||
if nonWs then FToken := xtText else FToken := xtWhitespace;
|
||||
FNext := tok;
|
||||
Result := True;
|
||||
@ -2871,16 +2944,22 @@ begin
|
||||
else // not (FNext in [xtText, xtCDSect])
|
||||
tok := FNext;
|
||||
|
||||
FToken := tok;
|
||||
FNext := xtText;
|
||||
|
||||
case tok of
|
||||
xtEntity: AppendReference(FCurrEntity);
|
||||
xtEntity: AppendReference(FCursor, FCurrEntity);
|
||||
xtElement: ParseStartTag;
|
||||
xtEndElement: ParseEndTag;
|
||||
xtEndElement:
|
||||
begin
|
||||
ParseEndTag;
|
||||
FCurrNode^.FNodeType := ntEndElement;
|
||||
FNext := xtPopElement;
|
||||
end;
|
||||
xtPI: ParsePI;
|
||||
xtDoctype: ParseDoctypeDecl;
|
||||
xtComment: ParseComment;
|
||||
end;
|
||||
FToken := tok;
|
||||
FNext := xtText;
|
||||
Result := tok <> xtEOF;
|
||||
end;
|
||||
|
||||
@ -2920,6 +2999,8 @@ begin
|
||||
FCursor.AppendChild(NewElem);
|
||||
// we're about to process a new set of attributes
|
||||
Inc(FAttrTag);
|
||||
// can point to a child text/comment/PI node, so restore it
|
||||
FCurrNode := @FNodeStack[FNesting];
|
||||
|
||||
// Remember the hash entry, we'll need it often
|
||||
ElName := NewElem.NSI.QName;
|
||||
@ -2937,6 +3018,7 @@ begin
|
||||
FAttrCount := 0;
|
||||
PushVC(NewElem, ElDef); // this increases FNesting
|
||||
FCurrNode^.FQName := ElName;
|
||||
FCurrNode^.FNodeType := ntElement;
|
||||
|
||||
while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do
|
||||
begin
|
||||
@ -2945,6 +3027,8 @@ begin
|
||||
Break;
|
||||
ParseAttribute(NewElem, ElDef);
|
||||
end;
|
||||
// ParseAttribute might have reallocated FNodeStack, so restore FCurrNode once again
|
||||
FCurrNode := @FNodeStack[FNesting];
|
||||
|
||||
if FSource.FBuf^ = '/' then
|
||||
begin
|
||||
@ -2964,9 +3048,10 @@ begin
|
||||
FCursor := NewElem;
|
||||
if not FPreserveWhitespace then // critical for testsuite compliance
|
||||
SkipS;
|
||||
FNext := xtPushElement;
|
||||
end
|
||||
else
|
||||
DoEndElement(0);
|
||||
FNext := xtPopEmptyElement;
|
||||
end;
|
||||
|
||||
procedure TXMLReader.DoEndElement(ErrOffset: Integer);
|
||||
@ -2981,9 +3066,6 @@ begin
|
||||
if FValidate and FCurrNode^.Incomplete then
|
||||
ValidationError('Element ''%s'' is missing required sub-elements', [NewElem.NSI.QName^.Key], ErrOffset);
|
||||
|
||||
if FNamespaces then
|
||||
FNSHelper.EndElement;
|
||||
PopVC;
|
||||
end;
|
||||
|
||||
procedure TXMLReader.ParseEndTag; // [42]
|
||||
@ -2994,24 +3076,20 @@ begin
|
||||
FatalError('End-tag is not allowed here');
|
||||
Inc(FSource.FBuf);
|
||||
|
||||
ElName := FCurrNode^.FElement.NSI.QName;
|
||||
FCurrNode := @FNodeStack[FNesting]; // move off the possible child
|
||||
ElName := FCurrNode^.FQName;
|
||||
|
||||
CheckName;
|
||||
if not BufEquals(FName, ElName^.Key) then
|
||||
FatalError('Unmatching element end tag (expected "</%s>")', [ElName^.Key], FName.Length);
|
||||
if FSource.FBuf^ = '>' then // this handles majority of cases
|
||||
FSource.NextChar
|
||||
else
|
||||
begin
|
||||
FSource.NextChar;
|
||||
DoEndElement(FName.Length+1);
|
||||
end
|
||||
else // but if closing '>' is preceded by whitespace,
|
||||
begin // skipping it is likely to lose position info.
|
||||
StoreLocation(FTokenStart);
|
||||
Dec(FTokenStart.LinePos, FName.Length);
|
||||
SkipS;
|
||||
ExpectChar('>');
|
||||
DoEndElement(-1);
|
||||
end;
|
||||
Inc(FTokenStart.LinePos, 2); // move over '</' chars
|
||||
end;
|
||||
|
||||
procedure TXMLReader.ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
|
||||
@ -3068,9 +3146,9 @@ begin
|
||||
OldAttr.Free;
|
||||
FatalError('Duplicate attribute', FName.Length);
|
||||
end;
|
||||
|
||||
ExpectEq;
|
||||
FCursor := attr;
|
||||
ExpectAttValue;
|
||||
ExpectAttValue(attr);
|
||||
|
||||
if Assigned(AttDef) and ((AttDef.DataType <> dtCdata) or (AttDef.Default = adFixed)) then
|
||||
CheckValue;
|
||||
@ -3384,11 +3462,6 @@ begin
|
||||
FCursor.AppendChild(TextNode);
|
||||
end;
|
||||
|
||||
procedure TXMLReader.DoAttrText(ch: PWideChar; Count: Integer);
|
||||
begin
|
||||
FCursor.AppendChild(Doc.CreateTextNodeBuf(ch, Count, False));
|
||||
end;
|
||||
|
||||
procedure TXMLReader.DoComment(ch: PWideChar; Count: Integer);
|
||||
var
|
||||
Node: TDOMComment;
|
||||
@ -3434,14 +3507,70 @@ begin
|
||||
ValidationError('Duplicate notation declaration: ''%s''', [aName]);
|
||||
end;
|
||||
|
||||
function TXMLReader.AllocAttributeData(AName: PHashItem): PNodeData;
|
||||
begin
|
||||
Result := AllocNodeData(FNesting + FAttrCount + 1);
|
||||
Result^.FNodeType := ntAttribute;
|
||||
Result^.FQName := AName;
|
||||
Inc(FAttrCount);
|
||||
end;
|
||||
|
||||
function TXMLReader.AllocNodeData(AIndex: Integer): PNodeData;
|
||||
begin
|
||||
if AIndex >= Length(FNodeStack) then
|
||||
SetLength(FNodeStack, AIndex * 2);
|
||||
{make sure we have an extra slot to place child text/comment/etc}
|
||||
if AIndex >= Length(FNodeStack)-1 then
|
||||
SetLength(FNodeStack, AIndex * 2 + 2);
|
||||
|
||||
Result := @FNodeStack[AIndex];
|
||||
end;
|
||||
|
||||
function TXMLReader.AllocAttributeValueChunk(APrev: PNodeData): PNodeData;
|
||||
begin
|
||||
result := FFreeAttrChunk;
|
||||
if Assigned(result) then
|
||||
begin
|
||||
FFreeAttrChunk := result^.FNext;
|
||||
result^.FNext := nil;
|
||||
end
|
||||
else { no free chunks, create a new one }
|
||||
begin
|
||||
New(result);
|
||||
FillChar(result^, sizeof(TNodeData), 0);
|
||||
FAttrChunks.Add(result);
|
||||
end;
|
||||
APrev^.FNext := result;
|
||||
end;
|
||||
|
||||
procedure TXMLReader.CleanupAttributeData;
|
||||
var
|
||||
i: Integer;
|
||||
chunk, tmp: PNodeData;
|
||||
begin
|
||||
for i := 1 to FAttrCount do
|
||||
begin
|
||||
chunk := FNodeStack[FNesting+i].FNext;
|
||||
while Assigned(chunk) do
|
||||
begin
|
||||
tmp := chunk^.FNext;
|
||||
chunk^.FNext := FFreeAttrChunk;
|
||||
FFreeAttrChunk := chunk;
|
||||
chunk := tmp;
|
||||
end;
|
||||
FNodeStack[FNesting+i].FNext := nil;
|
||||
end;
|
||||
FAttrCleanupFlag := False;
|
||||
end;
|
||||
|
||||
procedure TXMLReader.SetNodeInfoWithValue(typ: TXMLNodeType);
|
||||
begin
|
||||
{FNesting+1 is available due to overallocation in AllocNodeData() }
|
||||
FCurrNode := @FNodeStack[FNesting+1];
|
||||
FCurrNode^.FNodeType := typ;
|
||||
FCurrNode^.FQName := nil;
|
||||
FCurrNode^.FValueStart := FValue.Buffer;
|
||||
FCurrNode^.FValueLength := FValue.Length;
|
||||
end;
|
||||
|
||||
procedure TXMLReader.PushVC(aElement: TDOMElement; aElDef: TDOMElementDef);
|
||||
begin
|
||||
Inc(FNesting);
|
||||
|
Loading…
Reference in New Issue
Block a user