* xmlread.pp, handle entity references in streaming style.

* Renamed TXMLCharSource.DTDSubsetType to Kind and changed its definition to reflect its purpose better.

git-svn-id: trunk@16932 -
This commit is contained in:
sergei 2011-02-18 15:27:35 +00:00
parent e5b156f29a
commit 58725cdedd

View File

@ -154,7 +154,7 @@ type
TDOMDocumentTypeEx = class(TDOMDocumentType); TDOMDocumentTypeEx = class(TDOMDocumentType);
TDOMTopNodeEx = class(TDOMNode_TopLevel); TDOMTopNodeEx = class(TDOMNode_TopLevel);
TDTDSubsetType = (dsNone, dsInternal, dsExternal); TXMLSourceKind = (skNone, skInternalSubset, skManualPop);
TLocation = xmlutils.TLocation; TLocation = xmlutils.TLocation;
@ -181,7 +181,7 @@ type
protected protected
function Reload: Boolean; virtual; function Reload: Boolean; virtual;
public public
DTDSubsetType: TDTDSubsetType; Kind: TXMLSourceKind;
constructor Create(const AData: WideString); constructor Create(const AData: WideString);
procedure NextChar; procedure NextChar;
procedure NewLine; virtual; procedure NewLine; virtual;
@ -263,7 +263,7 @@ type
TXMLToken = (xtNone, xtEOF, xtText, xtWhitespace, xtElement, xtEndElement, TXMLToken = (xtNone, xtEOF, xtText, xtWhitespace, xtElement, xtEndElement,
xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd, xtPopElement, xtCDSect, xtComment, xtPI, xtDoctype, xtEntity, xtEntityEnd, xtPopElement,
xtPopEmptyElement, xtPushElement); xtPopEmptyElement, xtPushElement, xtPushEntity, xtPopEntity);
TLiteralType = (ltPlain, ltPubid, ltEntity); TLiteralType = (ltPlain, ltPubid, ltEntity);
@ -385,13 +385,16 @@ type
procedure ParseEndTag; // [42] procedure ParseEndTag; // [42]
procedure DoStartElement; procedure DoStartElement;
procedure DoEndElement; procedure DoEndElement;
procedure HandleEntityStart;
procedure HandleEntityEnd;
procedure ResolveEntity;
procedure DoStartEntity;
procedure ParseAttribute(ElDef: TElementDecl); procedure ParseAttribute(ElDef: TElementDecl);
procedure ParseContent; // [43] procedure ParseContent; // [43]
function Read: Boolean; function Read: Boolean;
function ResolvePredefined: Boolean; function ResolvePredefined: Boolean;
function EntityCheck(NoExternals: Boolean = False): TEntityDecl; function EntityCheck(NoExternals: Boolean = False): TEntityDecl;
procedure LoadEntity(AEntity: TEntityDecl); procedure LoadEntity(AEntity: TEntityDecl);
procedure AppendReference(cur: TDOMNode; AEntity: TEntityDecl);
function PrefetchEntity(AEntity: TEntityDecl): Boolean; function PrefetchEntity(AEntity: TEntityDecl): Boolean;
procedure StartPE; procedure StartPE;
function ParseRef(var ToFill: TWideCharBuf): Boolean; // [67] function ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
@ -404,7 +407,7 @@ type
procedure ExpectChoiceOrSeq(CP: TContentParticle); procedure ExpectChoiceOrSeq(CP: TContentParticle);
procedure ParseElementDecl; procedure ParseElementDecl;
procedure ParseNotationDecl; procedure ParseNotationDecl;
function ResolveEntity(const ASystemID, APublicID, ABaseURI: WideString; out Source: TXMLCharSource): Boolean; function ResolveResource(const ASystemID, APublicID, ABaseURI: WideString; out Source: TXMLCharSource): Boolean;
procedure ProcessDefaultAttributes(ElDef: TElementDecl); procedure ProcessDefaultAttributes(ElDef: TElementDecl);
procedure ProcessNamespaceAtts; procedure ProcessNamespaceAtts;
function AddBinding(attrData: PNodeData): Boolean; function AddBinding(attrData: PNodeData): Boolean;
@ -423,6 +426,7 @@ type
procedure DoComment(ch: PWideChar; Count: Integer); procedure DoComment(ch: PWideChar; Count: Integer);
procedure DoCDSect(ch: PWideChar; Count: Integer); procedure DoCDSect(ch: PWideChar; Count: Integer);
procedure DoNotationDecl(const aName, aPubID, aSysID: WideString); procedure DoNotationDecl(const aName, aPubID, aSysID: WideString);
procedure DoEntityReference;
public public
doc: TDOMDocument; doc: TDOMDocument;
constructor Create; overload; constructor Create; overload;
@ -552,7 +556,7 @@ begin
ADoc := nil; ADoc := nil;
with TXMLReader.Create(Self) do with TXMLReader.Create(Self) do
try try
if ResolveEntity(URI, '', '', Src) then if ResolveResource(URI, '', '', Src) then
ProcessXML(Src) ProcessXML(Src)
else else
DoErrorPos(esFatal, 'The specified URI could not be resolved', NullLocation); DoErrorPos(esFatal, 'The specified URI could not be resolved', NullLocation);
@ -731,7 +735,7 @@ var
r, inLeft: Cardinal; r, inLeft: Cardinal;
rslt: Integer; rslt: Integer;
begin begin
if DTDSubsetType = dsInternal then if Kind = skInternalSubset then
FReader.DTDReloadHook; FReader.DTDReloadHook;
Remainder := FBufEnd - FBuf; Remainder := FBufEnd - FBuf;
if Remainder > 0 then if Remainder > 0 then
@ -968,7 +972,7 @@ begin
else if SrcIn.FStringData <> '' then else if SrcIn.FStringData <> '' then
SrcOut := TXMLStreamInputSource.Create(TStringStream.Create(SrcIn.FStringData), True) SrcOut := TXMLStreamInputSource.Create(TStringStream.Create(SrcIn.FStringData), True)
else if (SrcIn.SystemID <> '') then else if (SrcIn.SystemID <> '') then
ResolveEntity(SrcIn.SystemID, SrcIn.PublicID, SrcIn.BaseURI, SrcOut); ResolveResource(SrcIn.SystemID, SrcIn.PublicID, SrcIn.BaseURI, SrcOut);
end; end;
if (SrcOut = nil) and (FSource = nil) then if (SrcOut = nil) and (FSource = nil) then
DoErrorPos(esFatal, 'No input source specified', NullLocation); DoErrorPos(esFatal, 'No input source specified', NullLocation);
@ -980,7 +984,7 @@ begin
Loc.LinePos := FSource.FBuf-FSource.LFPos; Loc.LinePos := FSource.FBuf-FSource.LFPos;
end; end;
function TXMLReader.ResolveEntity(const ASystemID, APublicID, ABaseURI: WideString; out Source: TXMLCharSource): Boolean; function TXMLReader.ResolveResource(const ASystemID, APublicID, ABaseURI: WideString; out Source: TXMLCharSource): Boolean;
var var
AbsSysID: WideString; AbsSysID: WideString;
Filename: string; Filename: string;
@ -1133,7 +1137,7 @@ begin
end end
else if FSource.FBuf^ = '%' then else if FSource.FBuf^ = '%' then
begin begin
if (FState <> rsDTD) or ((FSource.DTDSubsetType = dsInternal) and FInsideDecl) then if (FState <> rsDTD) or ((FSource.Kind = skInternalSubset) and FInsideDecl) then
Break; Break;
// This is the only case where look-ahead is needed // This is the only case where look-ahead is needed
if FSource.FBuf > FSource.FBufEnd-2 then if FSource.FBuf > FSource.FBufEnd-2 then
@ -1639,7 +1643,7 @@ begin
if (AEntity.FSystemID <> '') and not AEntity.FPrefetched then if (AEntity.FSystemID <> '') and not AEntity.FPrefetched then
begin begin
if not ResolveEntity(AEntity.FSystemID, AEntity.FPublicID, AEntity.FURI, Src) then if not ResolveResource(AEntity.FSystemID, AEntity.FPublicID, AEntity.FURI, Src) then
begin begin
// TODO: a detailed message like SysErrorMessage(GetLastError) would be great here // TODO: a detailed message like SysErrorMessage(GetLastError) would be great here
ValidationError('Unable to resolve external entity ''%s''', [AEntity.FName]); ValidationError('Unable to resolve external entity ''%s''', [AEntity.FName]);
@ -1676,7 +1680,7 @@ var
Src: TXMLCharSource; Src: TXMLCharSource;
Error: Boolean; Error: Boolean;
begin begin
Result := Assigned(FSource.FParent) and (Forced or (FSource.DTDSubsetType = dsNone)); Result := Assigned(FSource.FParent) and (Forced or (FSource.Kind = skNone));
if Result then if Result then
begin begin
Src := FSource.FParent; Src := FSource.FParent;
@ -1813,7 +1817,7 @@ begin
FSource.NextChar; FSource.NextChar;
CheckName; CheckName;
ExpectChar(';'); ExpectChar(';');
if FSource.DTDSubsetType = dsInternal then if FSource.Kind = skInternalSubset then
FatalError('PE reference not allowed here in internal subset', FName.Length+2); FatalError('PE reference not allowed here in internal subset', FName.Length+2);
StartPE; StartPE;
end end
@ -2058,7 +2062,7 @@ begin
if CheckForChar('[') then if CheckForChar('[') then
begin begin
BufAllocate(FIntSubset, 256); BufAllocate(FIntSubset, 256);
FSource.DTDSubsetType := dsInternal; FSource.Kind := skInternalSubset;
try try
FDTDStartPos := FSource.FBuf; FDTDStartPos := FSource.FBuf;
ParseMarkupDecl; ParseMarkupDecl;
@ -2066,7 +2070,7 @@ begin
SetString(FDocType.FInternalSubset, FIntSubset.Buffer, FIntSubset.Length); SetString(FDocType.FInternalSubset, FIntSubset.Buffer, FIntSubset.Length);
finally finally
FreeMem(FIntSubset.Buffer); FreeMem(FIntSubset.Buffer);
FSource.DTDSubsetType := dsNone; FSource.Kind := skNone;
end; end;
ExpectChar(']'); ExpectChar(']');
SkipS; SkipS;
@ -2075,11 +2079,11 @@ begin
if (FDocType.FSystemID <> '') then if (FDocType.FSystemID <> '') then
begin begin
if ResolveEntity(FDocType.FSystemID, FDocType.FPublicID, FSource.SystemID, Src) then if ResolveResource(FDocType.FSystemID, FDocType.FPublicID, FSource.SystemID, Src) then
begin begin
Initialize(Src); Initialize(Src);
try try
Src.DTDSubsetType := dsExternal; Src.Kind := skManualPop;
ParseMarkupDecl; ParseMarkupDecl;
finally finally
ContextPop(True); ContextPop(True);
@ -2091,8 +2095,8 @@ begin
FDTDProcessed := FStandalone; FDTDProcessed := FStandalone;
end; end;
end; end;
ValidateDTD;
FState := rsAfterDTD; FState := rsAfterDTD;
FCurrNode^.FNodeType := ntDocumentType;
end; end;
procedure TXMLReader.ExpectEq; // [25] procedure TXMLReader.ExpectEq; // [25]
@ -2205,7 +2209,7 @@ begin
if ElDef.ContentType <> ctUndeclared then if ElDef.ContentType <> ctUndeclared then
ValidationErrorWithName('Duplicate declaration of element ''%s''', FName.Length); ValidationErrorWithName('Duplicate declaration of element ''%s''', FName.Length);
ExtDecl := FSource.DTDSubsetType <> dsInternal; ExtDecl := FSource.Kind <> skInternalSubset;
ExpectWhitespace; ExpectWhitespace;
if FSource.Matches('EMPTY') then if FSource.Matches('EMPTY') then
@ -2318,7 +2322,7 @@ begin
attrName := FNameTable.FindOrAdd(FName.Buffer, FName.Length); attrName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
AttDef := TAttributeDef.Create(attrName, FColonPos); AttDef := TAttributeDef.Create(attrName, FColonPos);
try try
AttDef.ExternallyDeclared := FSource.DTDSubsetType <> dsInternal; AttDef.ExternallyDeclared := FSource.Kind <> skInternalSubset;
// In case of duplicate declaration of the same attribute, we must discard it, // In case of duplicate declaration of the same attribute, we must discard it,
// not modifying ElDef, and suppressing certain validation errors. // not modifying ElDef, and suppressing certain validation errors.
DiscardIt := (not FDTDProcessed) or Assigned(ElDef.GetAttrDef(attrName)); DiscardIt := (not FDTDProcessed) or Assigned(ElDef.GetAttrDef(attrName));
@ -2454,7 +2458,7 @@ begin
Entity := TEntityDecl.Create; Entity := TEntityDecl.Create;
try try
Entity.ExternallyDeclared := FSource.DTDSubsetType <> dsInternal; Entity.ExternallyDeclared := FSource.Kind <> skInternalSubset;
Entity.FIsPE := IsPE; Entity.FIsPE := IsPE;
CheckName; CheckName;
CheckNCName; CheckNCName;
@ -2547,7 +2551,7 @@ begin
ParseComment(True) ParseComment(True)
else if CheckForChar('[') then else if CheckForChar('[') then
begin begin
if FSource.DTDSubsetType = dsInternal then if FSource.Kind = skInternalSubset then
FatalError('Conditional sections are not allowed in internal subset', 1); FatalError('Conditional sections are not allowed in internal subset', 1);
SkipWhitespace; SkipWhitespace;
@ -2613,7 +2617,7 @@ begin
until False; until False;
if IncludeLevel > 0 then if IncludeLevel > 0 then
DoErrorPos(esFatal, 'INCLUDE section is not closed', IncludeLoc); DoErrorPos(esFatal, 'INCLUDE section is not closed', IncludeLoc);
if (FSource.DTDSubsetType = dsInternal) and (FSource.FBuf^ = ']') then if (FSource.Kind = skInternalSubset) and (FSource.FBuf^ = ']') then
Exit; Exit;
if FSource.FBuf^ <> #0 then if FSource.FBuf^ <> #0 then
FatalError('Illegal character in DTD'); FatalError('Illegal character in DTD');
@ -2662,15 +2666,62 @@ begin
end; end;
end; end;
procedure TXMLReader.AppendReference(cur: TDOMNode; AEntity: TEntityDecl); procedure TXMLReader.DoEntityReference;
var
s: WideString;
begin begin
if AEntity = nil then FCursorStack[FNesting].AppendChild(doc.CreateEntityReference(FCurrNode^.FQName^.Key));
SetString(s, FName.Buffer, FName.Length) end;
procedure TXMLReader.HandleEntityStart;
begin
{ FNesting+1 is available due to overallocation in AllocNodeData() }
FCurrNode := @FNodeStack[FNesting+1];
FCurrNode^.FNodeType := ntEntityReference;
FCurrNode^.FQName := FNameTable.FindOrAdd(FName.Buffer, FName.Length);
FCurrNode^.FValueStart := nil;
FCurrNode^.FValueLength := 0;
end;
procedure TXMLReader.HandleEntityEnd;
begin
FValidators[FNesting-1] := FValidators[FNesting];
FCursorStack[FNesting-1] := FCursorStack[FNesting];
ContextPop(True);
PopVC;
FCurrNode := @FNodeStack[FNesting+1];
FCurrNode^.FNodeType := ntEndEntity;
// TODO: other properties of FCurrNode
end;
procedure TXMLReader.ResolveEntity;
begin
if FCurrNode^.FNodeType <> ntEntityReference then
raise EInvalidOperation.Create('Wrong node type');
{... here must actually call EntityCheck, but it's called in main loop}
FNext := xtPushEntity;
end;
procedure TXMLReader.DoStartEntity;
var
src: TXMLCharSource;
begin
PushVC(nil);
if Assigned(FCurrEntity) then
ContextPush(FCurrEntity)
else else
s := AEntity.FName; begin
cur.AppendChild(doc.CreateEntityReference(s)); // Undefined entity -- use a dummy inputsource, in order to get a matching EndEntity event
src := TXMLCharSource.Create('');
src.Kind := skManualPop;
Initialize(src);
end;
{ Compensate for an extra entry in node stack }
FValidators[FNesting] := FValidators[FNesting-1];
FCursorStack[FNesting] := FCursorStack[FNesting-1];
UpdateConstraints;
FNext := xtText;
end; end;
procedure TXMLReader.DoStartElement; procedure TXMLReader.DoStartElement;
@ -2754,24 +2805,27 @@ begin
FNext := xtText; FNext := xtText;
while Read do while Read do
begin begin
case FToken of case FCurrNode^.FNodeType of
xtText, xtWhitespace: ntText, ntWhitespace:
DoText(FValue.Buffer, FValue.Length, FToken = xtWhitespace); DoText(FValue.Buffer, FValue.Length, FToken = xtWhitespace);
xtCDSect: ntCDATA:
DoCDSect(FValue.Buffer, FValue.Length); DoCDSect(FValue.Buffer, FValue.Length);
xtPI: ntProcessingInstruction:
CreatePINode; CreatePINode;
xtComment: ntComment:
DoComment(FCurrNode^.FValueStart, FCurrNode^.FValueLength); DoComment(FCurrNode^.FValueStart, FCurrNode^.FValueLength);
xtElement: ntElement:
DoStartElement; DoStartElement;
xtEndElement: ntEndElement:
DoEndElement; DoEndElement;
xtDoctype: ntDocumentType:
begin begin
ValidateDTD;
if not FCanonical then if not FCanonical then
doc.AppendChild(TDOMDocumentType.Create(doc, FDocType)); doc.AppendChild(TDOMDocumentType.Create(doc, FDocType));
end; end;
ntEntityReference:
DoEntityReference;
end; end;
end; end;
end; end;
@ -2806,8 +2860,9 @@ begin
if FNamespaces then if FNamespaces then
FNSHelper.EndElement; FNSHelper.EndElement;
PopVC; PopVC;
FNext := xtText; end
end; else if FNext = xtPushEntity then
DoStartEntity;
InCDATA := (FNext = xtCDSect); InCDATA := (FNext = xtCDSect);
StoreLocation(FTokenStart); StoreLocation(FTokenStart);
@ -2868,8 +2923,16 @@ begin
FatalError('Unterminated CDATA section', -1); FatalError('Unterminated CDATA section', -1);
if FNesting > FSource.FStartNesting then if FNesting > FSource.FStartNesting then
FatalError('End-tag is missing for ''%s''', [FNodeStack[FNesting].FQName^.Key]); FatalError('End-tag is missing for ''%s''', [FNodeStack[FNesting].FQName^.Key]);
if ContextPop then Continue;
tok := xtEOF; if Assigned(FSource.FParent) then
begin
if FExpandEntities and ContextPop then
Continue
else
tok := xtEntityEnd;
end
else
tok := xtEOF;
end end
else if wc = '>' then else if wc = '>' then
begin begin
@ -2935,7 +2998,8 @@ begin
FNext := xtText; FNext := xtText;
case tok of case tok of
xtEntity: AppendReference(FCursorStack[FNesting], FCurrEntity); xtEntity: HandleEntityStart;
xtEntityEnd: HandleEntityEnd;
xtElement: ParseStartTag; xtElement: ParseStartTag;
xtEndElement: ParseEndTag; xtEndElement: ParseEndTag;
xtPI: ParsePI; xtPI: ParsePI;
@ -3634,6 +3698,7 @@ begin
if FNesting > 0 then Dec(FNesting); if FNesting > 0 then Dec(FNesting);
FCurrNode := @FNodeStack[FNesting]; FCurrNode := @FNodeStack[FNesting];
UpdateConstraints; UpdateConstraints;
FNext := xtText;
end; end;
procedure TXMLReader.UpdateConstraints; procedure TXMLReader.UpdateConstraints;