XML reader:

* Parse entities by creating another instance of TXMLReader. This is much more straightforward than saving/restoring context of the existing reader.
* Fixed version setting logic so that ReadXMLFragment procedures are now suitable to read entities:
 accept streams conforming to extParsedEnt [78], correctly read fragments into documents having version=1.1.

git-svn-id: trunk@16046 -
This commit is contained in:
sergei 2010-09-26 04:50:55 +00:00
parent b5fadc3414
commit 347267dfe6

View File

@ -358,6 +358,7 @@ type
procedure SkipQuote(out Delim: WideChar; required: Boolean = True); procedure SkipQuote(out Delim: WideChar; required: Boolean = True);
procedure Initialize(ASource: TXMLCharSource); procedure Initialize(ASource: TXMLCharSource);
procedure EntityToSource(AEntity: TDOMEntityEx; out Src: TXMLCharSource);
function ContextPush(AEntity: TDOMEntityEx): Boolean; function ContextPush(AEntity: TDOMEntityEx): Boolean;
function ContextPop(Forced: Boolean = False): Boolean; function ContextPop(Forced: Boolean = False): Boolean;
procedure XML11_BuildTables; procedure XML11_BuildTables;
@ -817,8 +818,6 @@ procedure TXMLDecodingSource.Initialize;
begin begin
inherited; inherited;
FLineNo := 1; FLineNo := 1;
FXml11Rules := FReader.FXML11;
FDecoder.Decode := @Decode_UTF8; FDecoder.Decode := @Decode_UTF8;
FFixedUCS2 := ''; FFixedUCS2 := '';
@ -847,9 +846,11 @@ begin
begin begin
FBufSize := 3; // don't decode past XML declaration FBufSize := 3; // don't decode past XML declaration
Inc(FBuf, Length(XmlSign)); Inc(FBuf, Length(XmlSign));
FReader.ParseXmlOrTextDecl(FParent <> nil); FReader.ParseXmlOrTextDecl((FParent <> nil) or (FReader.FState <> rsProlog));
end; end;
FBufSize := 2047; FBufSize := 2047;
if FReader.FXML11 then
FReader.XML11_BuildTables;
end; end;
function TXMLDecodingSource.SetEncoding(const AEncoding: string): Boolean; function TXMLDecodingSource.SetEncoding(const AEncoding: string): Boolean;
@ -1337,8 +1338,9 @@ begin
doc := AOwner.OwnerDocument; doc := AOwner.OwnerDocument;
FCursor := AOwner as TDOMNode_WithChildren; FCursor := AOwner as TDOMNode_WithChildren;
FState := rsRoot; FState := rsRoot;
Initialize(ASource);
FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1'); FXML11 := doc.InheritsFrom(TXMLDocument) and (TXMLDocument(doc).XMLVersion = '1.1');
Initialize(ASource);
FDocType := TDOMDocumentTypeEx(doc.DocType);
ParseContent; ParseContent;
end; end;
@ -1581,20 +1583,18 @@ end;
const const
PrefixChar: array[Boolean] of string = ('', '%'); PrefixChar: array[Boolean] of string = ('', '%');
function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean; procedure TXMLReader.EntityToSource(AEntity: TDOMEntityEx; out Src: TXMLCharSource);
var
Src: TXMLCharSource;
begin begin
if AEntity.FOnStack then if AEntity.FOnStack then
FatalError('Entity ''%s%s'' recursively references itself', [PrefixChar[AEntity.FIsPE], AEntity.FName]); FatalError('Entity ''%s%s'' recursively references itself', [PrefixChar[AEntity.FIsPE], AEntity.FName]);
if (AEntity.SystemID <> '') and not AEntity.FPrefetched then if (AEntity.SystemID <> '') and not AEntity.FPrefetched then
begin begin
Result := ResolveEntity(AEntity.SystemID, AEntity.PublicID, AEntity.FURI, Src); if not ResolveEntity(AEntity.SystemID, AEntity.PublicID, AEntity.FURI, Src) then
if not Result then
begin begin
// TODO: a detailed message like SysErrorMessage(GetLastError) would be great here // TODO: a detailed message like SysErrorMessage(GetLastError) would be great here
ValidationError('Unable to resolve external entity ''%s''', [AEntity.FName]); ValidationError('Unable to resolve external entity ''%s''', [AEntity.FName]);
Src := nil;
Exit; Exit;
end; end;
end end
@ -1610,9 +1610,16 @@ begin
AEntity.FOnStack := True; AEntity.FOnStack := True;
Src.FEntity := AEntity; Src.FEntity := AEntity;
end;
Initialize(Src); function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean;
Result := True; var
Src: TXMLCharSource;
begin
EntityToSource(AEntity, Src);
Result := Assigned(Src);
if Result then
Initialize(Src);
end; end;
function TXMLReader.ContextPop(Forced: Boolean): Boolean; function TXMLReader.ContextPop(Forced: Boolean): Boolean;
@ -1644,10 +1651,8 @@ function TXMLReader.EntityCheck(NoExternals: Boolean): TDOMEntityEx;
var var
RefName: WideString; RefName: WideString;
cnt: Integer; cnt: Integer;
SaveCursor: TDOMNode_WithChildren; InnerReader: TXMLReader;
SaveState: TXMLReadState; Src: TXMLCharSource;
SaveElDef: TDOMElementDef;
SaveValue: TWideCharBuf;
begin begin
Result := nil; Result := nil;
SetString(RefName, FName.Buffer, FName.Length); SetString(RefName, FName.Buffer, FName.Length);
@ -1676,30 +1681,17 @@ begin
if not Result.FResolved then if not Result.FResolved then
begin begin
// To build children of the entity itself, we must parse it "out of context" // To build children of the entity itself, we must parse it "out of context"
SaveCursor := FCursor; InnerReader := TXMLReader.Create;
SaveElDef := FValidator[FNesting].FElementDef;
SaveState := FState;
SaveValue := FValue;
if ContextPush(Result) then
try try
FCursor := Result; // build child node tree for the entity EntityToSource(Result, Src);
Result.SetReadOnly(False); Result.SetReadOnly(False);
FState := rsRoot; if Assigned(Src) then
FValidator[FNesting].FElementDef := nil; InnerReader.ProcessFragment(Src, Result);
UpdateConstraints;
FSource.DTDSubsetType := dsExternal; // avoids ContextPop at the end
BufAllocate(FValue, 256);
ParseContent;
Result.FResolved := True; Result.FResolved := True;
finally finally
FreeMem(FValue.Buffer); InnerReader.Free;
FValue := SaveValue; Result.FOnStack := False;
Result.SetReadOnly(True); Result.SetReadOnly(True);
ContextPop(True);
FCursor := SaveCursor;
FState := SaveState;
FValidator[FNesting].FElementDef := SaveElDef;
UpdateConstraints;
end; end;
end; end;
// at this point we know the charcount of the entity being included // at this point we know the charcount of the entity being included
@ -2042,8 +2034,8 @@ begin
ExpectString('?>'); ExpectString('?>');
{ Switch to 1.1 rules only after declaration is parsed completely. This is to { Switch to 1.1 rules only after declaration is parsed completely. This is to
ensure that NEL and LSEP within declaration are rejected (rmt-056, rmt-057) } ensure that NEL and LSEP within declaration are rejected (rmt-056, rmt-057) }
if (not TextDecl) and (Ver = xmlVersion11) then if Ver = xmlVersion11 then
XML11_BuildTables; FXML11 := True;
end; end;
procedure TXMLReader.DTDReloadHook; procedure TXMLReader.DTDReloadHook;
@ -2759,7 +2751,7 @@ begin
FatalError('Illegal at document level'); FatalError('Illegal at document level');
StoreLocation(FTokenStart); StoreLocation(FTokenStart);
InCDATA := True; InCDATA := True;
if FCDSectionsAsText then if FCDSectionsAsText or (FValue.Length = 0) then
Continue; Continue;
tok := xtCDSect; tok := xtCDSect;
end end