mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-18 13:49:12 +02:00
xmlread.pp: More on entity processing:
* General entities are now processed non-recursively; * They are now re-parsed on each inclusion, enabling proper validation and ensuring SAX-compatible order of events. Also less dependent on DOM-specific calls like CloneNode. git-svn-id: trunk@14232 -
This commit is contained in:
parent
1900d12a0c
commit
25a3cc09ef
@ -166,6 +166,7 @@ type
|
|||||||
TDOMEntityEx = class(TDOMEntity)
|
TDOMEntityEx = class(TDOMEntity)
|
||||||
protected
|
protected
|
||||||
FExternallyDeclared: Boolean;
|
FExternallyDeclared: Boolean;
|
||||||
|
FPrefetched: Boolean;
|
||||||
FResolved: Boolean;
|
FResolved: Boolean;
|
||||||
FOnStack: Boolean;
|
FOnStack: Boolean;
|
||||||
FBetweenDecls: Boolean;
|
FBetweenDecls: Boolean;
|
||||||
@ -196,6 +197,7 @@ type
|
|||||||
FXML11Rules: Boolean;
|
FXML11Rules: Boolean;
|
||||||
FSystemID: WideString;
|
FSystemID: WideString;
|
||||||
FCharCount: Cardinal;
|
FCharCount: Cardinal;
|
||||||
|
FStartNesting: Integer;
|
||||||
function GetSystemID: WideString;
|
function GetSystemID: WideString;
|
||||||
protected
|
protected
|
||||||
function Reload: Boolean; virtual;
|
function Reload: Boolean; virtual;
|
||||||
@ -362,7 +364,6 @@ type
|
|||||||
|
|
||||||
procedure SkipQuote(out Delim: WideChar; required: Boolean = True);
|
procedure SkipQuote(out Delim: WideChar; required: Boolean = True);
|
||||||
procedure Initialize(ASource: TXMLCharSource);
|
procedure Initialize(ASource: TXMLCharSource);
|
||||||
function DoParseAttValue(Delim: WideChar): Boolean;
|
|
||||||
function ContextPush(AEntity: TDOMEntityEx): Boolean;
|
function ContextPush(AEntity: TDOMEntityEx): Boolean;
|
||||||
function ContextPop: Boolean;
|
function ContextPop: Boolean;
|
||||||
procedure XML11_BuildTables;
|
procedure XML11_BuildTables;
|
||||||
@ -416,9 +417,10 @@ type
|
|||||||
procedure ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
|
procedure ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
|
||||||
procedure ParseContent; // [43]
|
procedure ParseContent; // [43]
|
||||||
function ResolvePredefined: Boolean;
|
function ResolvePredefined: Boolean;
|
||||||
function EntityCheck: TDOMEntityEx;
|
function EntityCheck(NoExternals: Boolean = False): TDOMEntityEx;
|
||||||
|
procedure AppendReference(AEntity: TDOMEntityEx);
|
||||||
|
procedure PrefetchEntity(AEntity: TDOMEntityEx);
|
||||||
procedure StartGE(AEntity: TDOMEntityEx);
|
procedure StartGE(AEntity: TDOMEntityEx);
|
||||||
procedure IncludeEntity(InAttr: Boolean);
|
|
||||||
procedure StartPE;
|
procedure StartPE;
|
||||||
function ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
|
function ParseRef(var ToFill: TWideCharBuf): Boolean; // [67]
|
||||||
function ParseExternalID(out SysID, PubID: WideString; // [75]
|
function ParseExternalID(out SysID, PubID: WideString; // [75]
|
||||||
@ -1703,11 +1705,16 @@ const
|
|||||||
AttrDelims: TSetOfChar = [#0, '<', '&', '''', '"', #9, #10, #13];
|
AttrDelims: TSetOfChar = [#0, '<', '&', '''', '"', #9, #10, #13];
|
||||||
GT_Delim: TSetOfChar = [#0, '>'];
|
GT_Delim: TSetOfChar = [#0, '>'];
|
||||||
|
|
||||||
function TXMLReader.DoParseAttValue(Delim: WideChar): Boolean;
|
procedure TXMLReader.ExpectAttValue;
|
||||||
var
|
var
|
||||||
wc: WideChar;
|
wc: WideChar;
|
||||||
|
Delim: WideChar;
|
||||||
|
ent: TDOMEntityEx;
|
||||||
|
start: TObject;
|
||||||
begin
|
begin
|
||||||
|
SkipQuote(Delim);
|
||||||
FValue.Length := 0;
|
FValue.Length := 0;
|
||||||
|
start := FSource.FEntity;
|
||||||
repeat
|
repeat
|
||||||
wc := FSource.SkipUntil(FValue, AttrDelims);
|
wc := FSource.SkipUntil(FValue, AttrDelims);
|
||||||
if wc = '<' then
|
if wc = '<' then
|
||||||
@ -1716,42 +1723,48 @@ begin
|
|||||||
begin
|
begin
|
||||||
if ParseRef(FValue) or ResolvePredefined then
|
if ParseRef(FValue) or ResolvePredefined then
|
||||||
Continue;
|
Continue;
|
||||||
// have to insert entity or reference
|
|
||||||
if FValue.Length > 0 then
|
ent := EntityCheck(True);
|
||||||
|
if (ent = nil) or (not FExpandEntities) then
|
||||||
begin
|
begin
|
||||||
DoAttrText(FValue.Buffer, FValue.Length);
|
if FValue.Length > 0 then
|
||||||
FValue.Length := 0;
|
begin
|
||||||
end;
|
DoAttrText(FValue.Buffer, FValue.Length);
|
||||||
IncludeEntity(True);
|
FValue.Length := 0;
|
||||||
|
end;
|
||||||
|
AppendReference(ent);
|
||||||
|
end
|
||||||
|
else
|
||||||
|
StartGE(ent);
|
||||||
end
|
end
|
||||||
else if wc <> #0 then
|
else if wc <> #0 then
|
||||||
begin
|
begin
|
||||||
FSource.NextChar;
|
FSource.NextChar;
|
||||||
if wc = Delim then
|
if (wc = Delim) and (FSource.FEntity = start) then
|
||||||
Break;
|
Break;
|
||||||
if (wc = #10) or (wc = #9) or (wc = #13) then
|
if (wc = #10) or (wc = #9) or (wc = #13) then
|
||||||
wc := #32;
|
wc := #32;
|
||||||
BufAppend(FValue, wc);
|
BufAppend(FValue, wc);
|
||||||
end;
|
end
|
||||||
until wc = #0;
|
else if (FSource.FEntity = start) or not ContextPop then // #0
|
||||||
// When processing the included entity, Delim = #0, so getting here isn't a error
|
FatalError('Literal has no closing quote', -1);
|
||||||
|
until False;
|
||||||
if FValue.Length > 0 then
|
if FValue.Length > 0 then
|
||||||
DoAttrText(FValue.Buffer, FValue.Length);
|
DoAttrText(FValue.Buffer, FValue.Length);
|
||||||
FValue.Length := 0;
|
FValue.Length := 0;
|
||||||
Result := wc <> #0;
|
|
||||||
end;
|
end;
|
||||||
|
|
||||||
function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean;
|
function TXMLReader.ContextPush(AEntity: TDOMEntityEx): Boolean;
|
||||||
var
|
var
|
||||||
Src: TXMLCharSource;
|
Src: TXMLCharSource;
|
||||||
begin
|
begin
|
||||||
if (AEntity.SystemID <> '') and not AEntity.FResolved then
|
if (AEntity.SystemID <> '') and not AEntity.FPrefetched then
|
||||||
begin
|
begin
|
||||||
Result := ResolveEntity(AEntity.SystemID, AEntity.PublicID, AEntity.FURI, Src);
|
Result := ResolveEntity(AEntity.SystemID, AEntity.PublicID, AEntity.FURI, Src);
|
||||||
if not Result then
|
if not Result then
|
||||||
begin
|
begin
|
||||||
// TODO: a detailed message like SysErrorMessage(GetLastError) would be great here
|
// TODO: a detailed message like SysErrorMessage(GetLastError) would be great here
|
||||||
ValidationError('Unable to resolve external entity ''%s''', [AEntity.NodeName]);
|
ValidationError('Unable to resolve external entity ''%s''', [AEntity.FName]);
|
||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
end
|
end
|
||||||
@ -1797,10 +1810,14 @@ begin
|
|||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
function TXMLReader.EntityCheck: TDOMEntityEx;
|
function TXMLReader.EntityCheck(NoExternals: Boolean): TDOMEntityEx;
|
||||||
var
|
var
|
||||||
RefName: WideString;
|
RefName: WideString;
|
||||||
cnt: Integer;
|
cnt: Integer;
|
||||||
|
SaveCursor: TDOMNode_WithChildren;
|
||||||
|
SaveState: TXMLReadState;
|
||||||
|
SaveElDef: TDOMElementDef;
|
||||||
|
SaveValue: TWideCharBuf;
|
||||||
begin
|
begin
|
||||||
Result := nil;
|
Result := nil;
|
||||||
SetString(RefName, FName.Buffer, FName.Length);
|
SetString(RefName, FName.Buffer, FName.Length);
|
||||||
@ -1822,6 +1839,46 @@ begin
|
|||||||
FatalError('Standalone constraint violation', cnt);
|
FatalError('Standalone constraint violation', cnt);
|
||||||
if Result.NotationName <> '' then
|
if Result.NotationName <> '' then
|
||||||
FatalError('Reference to unparsed entity ''%s''', [RefName], cnt);
|
FatalError('Reference to unparsed entity ''%s''', [RefName], cnt);
|
||||||
|
|
||||||
|
if NoExternals and (Result.SystemID <> '') then
|
||||||
|
FatalError('External entity reference is not allowed in attribute value', cnt);
|
||||||
|
|
||||||
|
if not Result.FResolved then
|
||||||
|
begin
|
||||||
|
if Result.FOnStack then
|
||||||
|
FatalError('Entity ''%s'' recursively references itself', [RefName]);
|
||||||
|
|
||||||
|
// To build children of the entity itself, we must parse it "out of context"
|
||||||
|
SaveCursor := FCursor;
|
||||||
|
SaveElDef := FValidator[FNesting].FElementDef;
|
||||||
|
SaveState := FState;
|
||||||
|
SaveValue := FValue;
|
||||||
|
if ContextPush(Result) then
|
||||||
|
try
|
||||||
|
FCursor := Result; // build child node tree for the entity
|
||||||
|
Result.SetReadOnly(False);
|
||||||
|
FState := rsRoot;
|
||||||
|
FValidator[FNesting].FElementDef := nil;
|
||||||
|
UpdateConstraints;
|
||||||
|
FSource.DTDSubsetType := dsExternal; // avoids ContextPop at the end
|
||||||
|
BufAllocate(FValue, 256);
|
||||||
|
ParseContent;
|
||||||
|
Result.FResolved := True;
|
||||||
|
finally
|
||||||
|
FreeMem(FValue.Buffer);
|
||||||
|
FValue := SaveValue;
|
||||||
|
Result.SetReadOnly(True);
|
||||||
|
FSource.DTDSubsetType := dsNone;
|
||||||
|
ContextPop;
|
||||||
|
FCursor := SaveCursor;
|
||||||
|
FState := SaveState;
|
||||||
|
FValidator[FNesting].FElementDef := SaveElDef;
|
||||||
|
UpdateConstraints;
|
||||||
|
end;
|
||||||
|
end;
|
||||||
|
// at this point we know the charcount of the entity being included
|
||||||
|
Inc(FSource.FCharCount, Result.FCharCount - cnt);
|
||||||
|
CheckMaxChars;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TXMLReader.StartGE(AEntity: TDOMEntityEx);
|
procedure TXMLReader.StartGE(AEntity: TDOMEntityEx);
|
||||||
@ -1831,69 +1888,6 @@ begin
|
|||||||
ContextPush(AEntity);
|
ContextPush(AEntity);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TXMLReader.IncludeEntity(InAttr: Boolean);
|
|
||||||
var
|
|
||||||
AEntity: TDOMEntityEx;
|
|
||||||
RefName: WideString;
|
|
||||||
Child: TDOMNode;
|
|
||||||
SaveCursor: TDOMNode_WithChildren;
|
|
||||||
cnt: Cardinal;
|
|
||||||
begin
|
|
||||||
SetString(RefName, FName.Buffer, FName.Length);
|
|
||||||
cnt := FName.Length+2;
|
|
||||||
AEntity := EntityCheck;
|
|
||||||
|
|
||||||
if AEntity = nil then
|
|
||||||
begin
|
|
||||||
FCursor.AppendChild(doc.CreateEntityReference(RefName));
|
|
||||||
Exit;
|
|
||||||
end;
|
|
||||||
|
|
||||||
if InAttr and (AEntity.SystemID <> '') then
|
|
||||||
FatalError('External entity reference is not allowed in attribute value', cnt);
|
|
||||||
|
|
||||||
if not AEntity.FResolved then
|
|
||||||
begin
|
|
||||||
if AEntity.FOnStack then
|
|
||||||
FatalError('Entity ''%s'' recursively references itself', [RefName]);
|
|
||||||
|
|
||||||
if ContextPush(AEntity) then
|
|
||||||
begin
|
|
||||||
SaveCursor := FCursor;
|
|
||||||
FCursor := AEntity; // build child node tree for the entity
|
|
||||||
try
|
|
||||||
AEntity.SetReadOnly(False);
|
|
||||||
if InAttr then
|
|
||||||
DoParseAttValue(#0)
|
|
||||||
else
|
|
||||||
ParseContent;
|
|
||||||
AEntity.FResolved := True;
|
|
||||||
finally
|
|
||||||
AEntity.SetReadOnly(True);
|
|
||||||
ContextPop;
|
|
||||||
FCursor := SaveCursor;
|
|
||||||
FValue.Length := 0;
|
|
||||||
end;
|
|
||||||
end;
|
|
||||||
end;
|
|
||||||
// charcount of the entity included is known at this point
|
|
||||||
Inc(FSource.FCharCount, AEntity.FCharCount - cnt);
|
|
||||||
CheckMaxChars;
|
|
||||||
if (not FExpandEntities) or (not AEntity.FResolved) then
|
|
||||||
begin
|
|
||||||
// This will clone Entity children
|
|
||||||
FCursor.AppendChild(doc.CreateEntityReference(RefName));
|
|
||||||
Exit;
|
|
||||||
end;
|
|
||||||
|
|
||||||
Child := AEntity.FirstChild; // clone the entity node tree
|
|
||||||
while Assigned(Child) do
|
|
||||||
begin
|
|
||||||
FCursor.AppendChild(Child.CloneNode(True));
|
|
||||||
Child := Child.NextSibling;
|
|
||||||
end;
|
|
||||||
end;
|
|
||||||
|
|
||||||
procedure TXMLReader.StartPE;
|
procedure TXMLReader.StartPE;
|
||||||
var
|
var
|
||||||
PEName: WideString;
|
PEName: WideString;
|
||||||
@ -1910,26 +1904,11 @@ begin
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
if PEnt.FOnStack then
|
if PEnt.FOnStack then
|
||||||
FatalError('Entity ''%%%s'' recursively references itself', [PEnt.NodeName]);
|
FatalError('Entity ''%%%s'' recursively references itself', [PEnt.FName]);
|
||||||
|
|
||||||
{ cache an external PE so it's only fetched once }
|
{ cache an external PE so it's only fetched once }
|
||||||
if (PEnt.SystemID <> '') and not PEnt.FResolved then
|
if (PEnt.SystemID <> '') and not PEnt.FPrefetched then
|
||||||
begin
|
PrefetchEntity(PEnt);
|
||||||
if ContextPush(PEnt) then
|
|
||||||
try
|
|
||||||
FValue.Length := 0;
|
|
||||||
FSource.SkipUntil(FValue, [#0]);
|
|
||||||
SetString(PEnt.FReplacementText, FValue.Buffer, FValue.Length);
|
|
||||||
PEnt.FCharCount := FValue.Length;
|
|
||||||
PEnt.FStartLocation.Line := 1;
|
|
||||||
PEnt.FStartLocation.LinePos := 1;
|
|
||||||
PEnt.FURI := FSource.SystemID; // replace base URI with absolute one
|
|
||||||
finally
|
|
||||||
ContextPop;
|
|
||||||
PEnt.FResolved := True;
|
|
||||||
FValue.Length := 0;
|
|
||||||
end;
|
|
||||||
end;
|
|
||||||
|
|
||||||
Inc(FSource.FCharCount, PEnt.FCharCount);
|
Inc(FSource.FCharCount, PEnt.FCharCount);
|
||||||
CheckMaxChars;
|
CheckMaxChars;
|
||||||
@ -1939,13 +1918,22 @@ begin
|
|||||||
FHavePERefs := True;
|
FHavePERefs := True;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TXMLReader.ExpectAttValue; // [10]
|
procedure TXMLReader.PrefetchEntity(AEntity: TDOMEntityEx);
|
||||||
var
|
|
||||||
Delim: WideChar;
|
|
||||||
begin
|
begin
|
||||||
SkipQuote(Delim);
|
if ContextPush(AEntity) then
|
||||||
if not DoParseAttValue(Delim) then
|
try
|
||||||
FatalError('Literal has no closing quote',-1);
|
FValue.Length := 0;
|
||||||
|
FSource.SkipUntil(FValue, [#0]);
|
||||||
|
SetString(AEntity.FReplacementText, FValue.Buffer, FValue.Length);
|
||||||
|
AEntity.FCharCount := FValue.Length;
|
||||||
|
AEntity.FStartLocation.Line := 1;
|
||||||
|
AEntity.FStartLocation.LinePos := 1;
|
||||||
|
AEntity.FURI := FSource.SystemID; // replace base URI with absolute one
|
||||||
|
finally
|
||||||
|
ContextPop;
|
||||||
|
AEntity.FPrefetched := True;
|
||||||
|
FValue.Length := 0;
|
||||||
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure Normalize(var Buf: TWideCharBuf; Modified: PBoolean);
|
procedure Normalize(var Buf: TWideCharBuf; Modified: PBoolean);
|
||||||
@ -2032,11 +2020,9 @@ begin
|
|||||||
begin
|
begin
|
||||||
if ResolvePredefined then
|
if ResolvePredefined then
|
||||||
Continue;
|
Continue;
|
||||||
ent := EntityCheck;
|
ent := EntityCheck(True);
|
||||||
if ent = nil then
|
if ent = nil then
|
||||||
Continue;
|
Continue;
|
||||||
if ent.SystemID <> '' then
|
|
||||||
FatalError('External entity reference is not allowed in attribute value', FName.Length+2);
|
|
||||||
StartGE(ent);
|
StartGE(ent);
|
||||||
end;
|
end;
|
||||||
end
|
end
|
||||||
@ -2860,6 +2846,18 @@ begin
|
|||||||
FatalError('Unterminated CDATA section', -1);
|
FatalError('Unterminated CDATA section', -1);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
procedure TXMLReader.AppendReference(AEntity: TDOMEntityEx);
|
||||||
|
var
|
||||||
|
s: WideString;
|
||||||
|
begin
|
||||||
|
if AEntity = nil then
|
||||||
|
SetString(s, FName.Buffer, FName.Length)
|
||||||
|
else
|
||||||
|
s := AEntity.nodeName;
|
||||||
|
FCursor.AppendChild(doc.CreateEntityReference(s));
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
// The code below does the bulk of the parsing, and must be as fast as possible.
|
// The code below does the bulk of the parsing, and must be as fast as possible.
|
||||||
// To minimize CPU cache effects, methods from different classes are kept together
|
// To minimize CPU cache effects, methods from different classes are kept together
|
||||||
|
|
||||||
@ -2906,9 +2904,9 @@ procedure TXMLReader.ParseContent;
|
|||||||
var
|
var
|
||||||
nonWs: Boolean;
|
nonWs: Boolean;
|
||||||
wc: WideChar;
|
wc: WideChar;
|
||||||
StartNesting: Integer;
|
ent: TDOMEntityEx;
|
||||||
begin
|
begin
|
||||||
StartNesting := FNesting;
|
FSource.FStartNesting := FNesting;
|
||||||
repeat
|
repeat
|
||||||
if FSource.FBuf^ = '<' then
|
if FSource.FBuf^ = '<' then
|
||||||
begin
|
begin
|
||||||
@ -2917,7 +2915,7 @@ begin
|
|||||||
FSource.Reload;
|
FSource.Reload;
|
||||||
if FSource.FBuf^ = '/' then
|
if FSource.FBuf^ = '/' then
|
||||||
begin
|
begin
|
||||||
if FNesting <= StartNesting then
|
if FNesting <= FSource.FStartNesting then
|
||||||
FatalError('End-tag is not allowed here');
|
FatalError('End-tag is not allowed here');
|
||||||
Inc(FSource.FBuf);
|
Inc(FSource.FBuf);
|
||||||
ParseEndTag;
|
ParseEndTag;
|
||||||
@ -2939,6 +2937,12 @@ begin
|
|||||||
else
|
else
|
||||||
RaiseNameNotFound;
|
RaiseNameNotFound;
|
||||||
end
|
end
|
||||||
|
else if FSource.FBuf^ = #0 then
|
||||||
|
begin
|
||||||
|
if FNesting > FSource.FStartNesting then
|
||||||
|
FatalError('End-tag is missing for ''%s''', [FValidator[FNesting].FElement.NSI.QName^.Key]);
|
||||||
|
if not ContextPop then Break;
|
||||||
|
end
|
||||||
else
|
else
|
||||||
begin
|
begin
|
||||||
FValue.Length := 0;
|
FValue.Length := 0;
|
||||||
@ -2974,7 +2978,14 @@ begin
|
|||||||
DoText(FValue.Buffer, FValue.Length, not nonWs);
|
DoText(FValue.Buffer, FValue.Length, not nonWs);
|
||||||
FValue.Length := 0;
|
FValue.Length := 0;
|
||||||
end;
|
end;
|
||||||
IncludeEntity(False);
|
ent := EntityCheck;
|
||||||
|
if (ent = nil) or (not FExpandEntities) then
|
||||||
|
AppendReference(ent)
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
StartGE(ent);
|
||||||
|
FSource.FStartNesting := FNesting;
|
||||||
|
end;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
until False;
|
until False;
|
||||||
@ -2990,9 +3001,7 @@ begin
|
|||||||
else if nonWs then
|
else if nonWs then
|
||||||
FatalError('Illegal at document level', -1);
|
FatalError('Illegal at document level', -1);
|
||||||
end;
|
end;
|
||||||
until FSource.FBuf^ = #0;
|
until False;
|
||||||
if FNesting > StartNesting then
|
|
||||||
FatalError('End-tag is missing for ''%s''', [FValidator[FNesting].FElement.NSI.QName^.Key]);
|
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TXMLCharSource.NextChar;
|
procedure TXMLCharSource.NextChar;
|
||||||
|
Loading…
Reference in New Issue
Block a user