* Patch from Sergei Gorelkin

xmlutils.pp:
  + Added THashTable - a simple hashed container with WideString keys.

  dom.pp:
  * Use the hash table instead of a sorted list for storing document IDs.
  * Replaced all TLists by TFPList (which is smaller and faster).
  * Fixed TDOMElement.RemoveAttributeNode to throw NOT_FOUND_ERR when
    the requested node is not one of the element's attributes.
  + Added node read-only checks where required by the specs, this fixes
    about 50 DOM tests.

  xmlread.pp:

  * Got rid of TXMLCharSource.FReloadHook, the corresponding procedure may
    be called directly.
  * Used a separate buffer to store the entity value literals, this
    enables correct including of external PEs that have a text declaration
    at the beginning.
  * Some refactoring: ParseAttribute has been split into a separate
    procedure, ProcessTextAndRefs was merged into ParseContent.

git-svn-id: trunk@11942 -
This commit is contained in:
michael 2008-10-22 12:25:12 +00:00
parent 8fe91950e9
commit e632e754cf
3 changed files with 429 additions and 183 deletions

View File

@ -38,7 +38,7 @@ unit DOM;
interface interface
uses uses
SysUtils, Classes, AVL_Tree; SysUtils, Classes, AVL_Tree, xmlutils;
// ------------------------------------------------------- // -------------------------------------------------------
// DOMException // DOMException
@ -221,6 +221,8 @@ type
function GetPrefix: DOMString; virtual; function GetPrefix: DOMString; virtual;
procedure SetPrefix(const Value: DOMString); virtual; procedure SetPrefix(const Value: DOMString); virtual;
function GetOwnerDocument: TDOMDocument; virtual; function GetOwnerDocument: TDOMDocument; virtual;
procedure SetReadOnly(Value: Boolean);
procedure Changing;
public public
constructor Create(AOwner: TDOMDocument); constructor Create(AOwner: TDOMDocument);
destructor Destroy; override; destructor Destroy; override;
@ -299,7 +301,7 @@ type
protected protected
FNode: TDOMNode; FNode: TDOMNode;
FRevision: Integer; FRevision: Integer;
FList: TList; FList: TFPList;
function GetCount: LongWord; function GetCount: LongWord;
function GetItem(index: LongWord): TDOMNode; function GetItem(index: LongWord): TDOMNode;
procedure BuildList; virtual; procedure BuildList; virtual;
@ -333,7 +335,7 @@ type
protected protected
FOwner: TDOMNode; FOwner: TDOMNode;
FNodeType: Integer; FNodeType: Integer;
FList: TList; FList: TFPList;
function GetItem(index: LongWord): TDOMNode; function GetItem(index: LongWord): TDOMNode;
function GetLength: LongWord; function GetLength: LongWord;
function Find(const name: DOMString; out Index: LongWord): Boolean; function Find(const name: DOMString; out Index: LongWord): Boolean;
@ -415,7 +417,7 @@ type
TDOMDocument = class(TDOMNode_WithChildren) TDOMDocument = class(TDOMNode_WithChildren)
protected protected
FIDList: TList; FIDList: THashTable;
FRevision: Integer; FRevision: Integer;
FXML11: Boolean; FXML11: Boolean;
FImplementation: TDOMImplementation; FImplementation: TDOMImplementation;
@ -427,8 +429,6 @@ type
function GetOwnerDocument: TDOMDocument; override; function GetOwnerDocument: TDOMDocument; override;
procedure SetTextContent(const value: DOMString); override; procedure SetTextContent(const value: DOMString); override;
function IndexOfNS(const nsURI: DOMString): Integer; function IndexOfNS(const nsURI: DOMString): Integer;
function FindID(const aID: DOMString; out Index: LongWord): Boolean;
procedure ClearIDList;
procedure RemoveID(Elem: TDOMElement); procedure RemoveID(Elem: TDOMElement);
public public
property DocType: TDOMDocumentType read GetDocType; property DocType: TDOMDocumentType read GetDocType;
@ -713,16 +713,6 @@ type
implementation implementation
uses
xmlutils;
type
PIDItem = ^TIDItem;
TIDItem = record
ID: WideString;
Element: TDOMElement;
end;
constructor TRefClass.Create; constructor TRefClass.Create;
begin begin
inherited Create; inherited Create;
@ -858,12 +848,14 @@ end;
function TDOMNode.InsertBefore(NewChild, RefChild: TDOMNode): TDOMNode; function TDOMNode.InsertBefore(NewChild, RefChild: TDOMNode): TDOMNode;
begin begin
Changing; // merely to comply with core3/nodeinsertbefore14
raise EDOMHierarchyRequest.Create('Node.InsertBefore'); raise EDOMHierarchyRequest.Create('Node.InsertBefore');
Result:=nil; Result:=nil;
end; end;
function TDOMNode.ReplaceChild(NewChild, OldChild: TDOMNode): TDOMNode; function TDOMNode.ReplaceChild(NewChild, OldChild: TDOMNode): TDOMNode;
begin begin
Changing; // merely to comply with core3/nodereplacechild21
raise EDOMHierarchyRequest.Create('Node.ReplaceChild'); raise EDOMHierarchyRequest.Create('Node.ReplaceChild');
Result:=nil; Result:=nil;
end; end;
@ -1000,6 +992,36 @@ begin
Result := FOwnerDocument; Result := FOwnerDocument;
end; end;
procedure TDOMNode.SetReadOnly(Value: Boolean);
var
child: TDOMNode;
attrs: TDOMNamedNodeMap;
I: Integer;
begin
if Value then
Include(FFlags, nfReadOnly)
else
Exclude(FFlags, nfReadOnly);
child := FirstChild;
while Assigned(child) do
begin
child.SetReadOnly(Value);
child := child.NextSibling;
end;
attrs := Attributes;
if Assigned(attrs) then
begin
for I := 0 to attrs.Length-1 do
attrs[I].SetReadOnly(Value);
end;
end;
procedure TDOMNode.Changing;
begin
if nfReadOnly in FFlags then
raise EDOMError.Create(NO_MODIFICATION_ALLOWED_ERR, 'Node.CheckReadOnly');
end;
function CompareDOMStrings(const s1, s2: DOMPChar; l1, l2: integer): integer; function CompareDOMStrings(const s1, s2: DOMPChar; l1, l2: integer): integer;
var i: integer; var i: integer;
begin begin
@ -1082,6 +1104,7 @@ begin
Result := NewChild; Result := NewChild;
NewChildType := NewChild.NodeType; NewChildType := NewChild.NodeType;
Changing;
if NewChild.FOwnerDocument <> FOwnerDocument then if NewChild.FOwnerDocument <> FOwnerDocument then
begin begin
if (NewChildType <> DOCUMENT_TYPE_NODE) or if (NewChildType <> DOCUMENT_TYPE_NODE) or
@ -1171,6 +1194,8 @@ end;
function TDOMNode_WithChildren.DetachChild(OldChild: TDOMNode): TDOMNode; function TDOMNode_WithChildren.DetachChild(OldChild: TDOMNode): TDOMNode;
begin begin
Changing;
if OldChild.ParentNode <> Self then if OldChild.ParentNode <> Self then
raise EDOMNotFound.Create('NodeWC.RemoveChild'); raise EDOMNotFound.Create('NodeWC.RemoveChild');
@ -1266,6 +1291,7 @@ end;
procedure TDOMNode_WithChildren.SetTextContent(const AValue: DOMString); procedure TDOMNode_WithChildren.SetTextContent(const AValue: DOMString);
begin begin
Changing;
FreeChildren; FreeChildren;
if AValue <> '' then if AValue <> '' then
AppendChild(FOwnerDocument.CreateTextNode(AValue)); AppendChild(FOwnerDocument.CreateTextNode(AValue));
@ -1295,7 +1321,7 @@ begin
inherited Create; inherited Create;
FNode := ANode; FNode := ANode;
FRevision := ANode.GetRevision-1; // force BuildList at first access FRevision := ANode.GetRevision-1; // force BuildList at first access
FList := TList.Create; FList := TFPList.Create;
end; end;
destructor TDOMNodeList.Destroy; destructor TDOMNodeList.Destroy;
@ -1395,7 +1421,7 @@ begin
inherited Create; inherited Create;
FOwner := AOwner; FOwner := AOwner;
FNodeType := ANodeType; FNodeType := ANodeType;
FList := TList.Create; FList := TFPList.Create;
end; end;
destructor TDOMNamedNodeMap.Destroy; destructor TDOMNamedNodeMap.Destroy;
@ -1467,7 +1493,9 @@ var
AttrOwner: TDOMNode; AttrOwner: TDOMNode;
begin begin
Result := 0; Result := 0;
if arg.FOwnerDocument <> FOwner.FOwnerDocument then if nfReadOnly in FOwner.FFlags then
Result := NO_MODIFICATION_ALLOWED_ERR
else if arg.FOwnerDocument <> FOwner.FOwnerDocument then
Result := WRONG_DOCUMENT_ERR Result := WRONG_DOCUMENT_ERR
else if arg.NodeType <> FNodeType then else if arg.NodeType <> FNodeType then
Result := HIERARCHY_REQUEST_ERR Result := HIERARCHY_REQUEST_ERR
@ -1537,6 +1565,8 @@ end;
function TDOMNamedNodeMap.RemoveNamedItem(const name: DOMString): TDOMNode; function TDOMNamedNodeMap.RemoveNamedItem(const name: DOMString): TDOMNode;
begin begin
if nfReadOnly in FOwner.FFlags then
raise EDOMError.Create(NO_MODIFICATION_ALLOWED_ERR, 'NamedNodeMap.RemoveNamedItem');
Result := InternalRemove(name); Result := InternalRemove(name);
if Result = nil then if Result = nil then
raise EDOMNotFound.Create('NamedNodeMap.RemoveNamedItem'); raise EDOMNotFound.Create('NamedNodeMap.RemoveNamedItem');
@ -1544,6 +1574,8 @@ end;
function TDOMNamedNodeMap.RemoveNamedItemNS(const namespaceURI, localName: DOMString): TDOMNode; function TDOMNamedNodeMap.RemoveNamedItemNS(const namespaceURI, localName: DOMString): TDOMNode;
begin begin
if nfReadOnly in FOwner.FFlags then
raise EDOMError.Create(NO_MODIFICATION_ALLOWED_ERR, 'NamedNodeMap.RemoveNamedItemNS');
// TODO: Implement TDOMNamedNodeMap.RemoveNamedItemNS // TODO: Implement TDOMNamedNodeMap.RemoveNamedItemNS
Result := nil; Result := nil;
end; end;
@ -1565,6 +1597,7 @@ end;
procedure TDOMCharacterData.SetNodeValue(const AValue: DOMString); procedure TDOMCharacterData.SetNodeValue(const AValue: DOMString);
begin begin
Changing;
FNodeValue := AValue; FNodeValue := AValue;
end; end;
@ -1577,11 +1610,13 @@ end;
procedure TDOMCharacterData.AppendData(const arg: DOMString); procedure TDOMCharacterData.AppendData(const arg: DOMString);
begin begin
Changing;
FNodeValue := FNodeValue + arg; FNodeValue := FNodeValue + arg;
end; end;
procedure TDOMCharacterData.InsertData(offset: LongWord; const arg: DOMString); procedure TDOMCharacterData.InsertData(offset: LongWord; const arg: DOMString);
begin begin
Changing;
if offset > Length then if offset > Length then
raise EDOMIndexSize.Create('CharacterData.InsertData'); raise EDOMIndexSize.Create('CharacterData.InsertData');
Insert(arg, FNodeValue, offset+1); Insert(arg, FNodeValue, offset+1);
@ -1589,6 +1624,7 @@ end;
procedure TDOMCharacterData.DeleteData(offset, count: LongWord); procedure TDOMCharacterData.DeleteData(offset, count: LongWord);
begin begin
Changing;
if offset > Length then if offset > Length then
raise EDOMIndexSize.Create('CharacterData.DeleteData'); raise EDOMIndexSize.Create('CharacterData.DeleteData');
Delete(FNodeValue, offset+1, count); Delete(FNodeValue, offset+1, count);
@ -1685,86 +1721,61 @@ end;
destructor TDOMDocument.Destroy; destructor TDOMDocument.Destroy;
begin begin
ClearIDList;
FreeAndNil(FIDList); // set to nil before starting destroying chidlren FreeAndNil(FIDList); // set to nil before starting destroying chidlren
inherited Destroy; inherited Destroy;
end; end;
function TDOMDocument.AddID(Attr: TDOMAttr): Boolean; function TDOMDocument.AddID(Attr: TDOMAttr): Boolean;
var var
I: Cardinal; ID: DOMString;
Item: PIDItem; Exists: Boolean;
p: PHashItem;
begin begin
if FIDList = nil then if FIDList = nil then
FIDList := TList.Create; FIDList := THashTable.Create(256, False);
New(Item);
Item^.ID := Attr.Value; ID := Attr.Value;
Item^.Element := Attr.OwnerElement; p := FIDList.FindOrAdd(DOMPChar(ID), Length(ID), Exists);
if not FindID(Item^.ID, I) then if not Exists then
begin begin
FIDList.Insert(I, Item); p^.Data := Attr.OwnerElement;
Result := True; Result := True;
end end
else else
begin
Dispose(Item);
Result := False; Result := False;
end;
end; end;
// This shouldn't be called if document has no IDs, // This shouldn't be called if document has no IDs,
// or when it is being destroyed // or when it is being destroyed
// TODO: This could be much faster if removing ID happens
// upon modification of corresponding attribute value.
type
TempRec = record
Element: TDOMElement;
Entry: PHashItem;
end;
function CheckID(Entry: PHashItem; arg: Pointer): Boolean;
begin
if Entry^.Data = TempRec(arg^).Element then
begin
TempRec(arg^).Entry := Entry;
Result := False;
end
else
Result := True;
end;
procedure TDOMDocument.RemoveID(Elem: TDOMElement); procedure TDOMDocument.RemoveID(Elem: TDOMElement);
var var
I: Integer; hr: TempRec;
begin begin
for I := 0 to FIDList.Count-1 do hr.Element := Elem;
begin hr.Entry := nil;
if PIDItem(FIDList.List^[I])^.Element = Elem then FIDList.ForEach(@CheckID, @hr);
begin if Assigned(hr.Entry) then
Dispose(PIDItem(FIDList.List^[I])); FIDList.Remove(hr.Entry);
FIDList.Delete(I);
Exit;
end;
end;
end;
function TDOMDocument.FindID(const aID: DOMString; out Index: LongWord): Boolean;
var
L, H, I, C: Integer;
P: PIDItem;
begin
Result := False;
L := 0;
H := FIDList.Count - 1;
while L <= H do
begin
I := (L + H) shr 1;
P := PIDItem(FIDList.List^[I]);
C := CompareDOMStrings(PWideChar(aID), PWideChar(P^.ID), Length(aID), Length(P^.ID));
if C > 0 then L := I + 1 else
begin
H := I - 1;
if C = 0 then
begin
Result := True;
L := I;
end;
end;
end;
Index := L;
end;
procedure TDOMDocument.ClearIDList;
var
I: Integer;
begin
if Assigned(FIDList) then
begin
for I := 0 to FIDList.Count-1 do
Dispose(PIDItem(FIDList.List^[I]));
FIDList.Clear;
end;
end; end;
function TDOMDocument.GetNodeType: Integer; function TDOMDocument.GetNodeType: Integer;
@ -1924,13 +1935,10 @@ begin
end; end;
function TDOMDocument.GetElementById(const ElementID: DOMString): TDOMElement; function TDOMDocument.GetElementById(const ElementID: DOMString): TDOMElement;
var
I: Cardinal;
begin begin
if Assigned(FIDList) and FindID(ElementID, I) then
Result := PIDItem(FIDList.List^[I])^.Element
else
Result := nil; Result := nil;
if Assigned(FIDList) then
Result := TDOMElement(FIDList.Get(DOMPChar(ElementID), Length(ElementID)));
end; end;
function TDOMDocument.ImportNode(ImportedNode: TDOMNode; function TDOMDocument.ImportNode(ImportedNode: TDOMNode;
@ -1980,6 +1988,7 @@ begin
if Assigned(ent) then if Assigned(ent) then
ent.CloneChildren(Result, Self); ent.CloneChildren(Result, Self);
end; end;
Result.SetReadOnly(True);
end; end;
procedure TXMLDocument.SetXMLVersion(const aValue: DOMString); procedure TXMLDocument.SetXMLVersion(const aValue: DOMString);
@ -2119,6 +2128,7 @@ var
I: Cardinal; I: Cardinal;
attr: TDOMAttr; attr: TDOMAttr;
begin begin
Changing;
if Attributes.Find(name, I) then if Attributes.Find(name, I) then
Attr := FAttributes[I] as TDOMAttr Attr := FAttributes[I] as TDOMAttr
else else
@ -2132,6 +2142,7 @@ end;
procedure TDOMElement.RemoveAttribute(const name: DOMString); procedure TDOMElement.RemoveAttribute(const name: DOMString);
begin begin
Changing;
// (note) NamedNodeMap.RemoveNamedItem can raise NOT_FOUND_ERR and we should not. // (note) NamedNodeMap.RemoveNamedItem can raise NOT_FOUND_ERR and we should not.
if Assigned(FAttributes) then if Assigned(FAttributes) then
FAttributes.InternalRemove(name).Free; FAttributes.InternalRemove(name).Free;
@ -2140,6 +2151,7 @@ end;
procedure TDOMElement.RemoveAttributeNS(const nsURI, procedure TDOMElement.RemoveAttributeNS(const nsURI,
aLocalName: DOMString); aLocalName: DOMString);
begin begin
Changing;
// TODO: Implement TDOMElement.RemoveAttributeNS // TODO: Implement TDOMElement.RemoveAttributeNS
raise EDOMNotSupported.Create('TDOMElement.RemoveAttributeNS'); raise EDOMNotSupported.Create('TDOMElement.RemoveAttributeNS');
end; end;
@ -2202,14 +2214,18 @@ end;
function TDOMElement.RemoveAttributeNode(OldAttr: TDOMAttr): TDOMAttr; function TDOMElement.RemoveAttributeNode(OldAttr: TDOMAttr): TDOMAttr;
begin begin
Changing;
Result:=nil; Result:=nil;
if FAttributes=nil then exit;
// TODO: DOM 2: must raise NOT_FOUND_ERR if OldAttr is not ours. // TODO: DOM 2: must raise NOT_FOUND_ERR if OldAttr is not ours.
// -- but what is the purpose of return value then? // -- but what is the purpose of return value then?
// TODO: delegate to TNamedNodeMap? Nope, it does not have such method // TODO: delegate to TNamedNodeMap? Nope, it does not have such method
// (note) one way around is to remove by name // (note) one way around is to remove by name
if FAttributes.FList.Remove(OldAttr) > -1 then if Assigned(FAttributes) and (FAttributes.FList.Remove(OldAttr) > -1) then
begin
Result := OldAttr; Result := OldAttr;
end
else
raise EDOMNotFound.Create('Element.RemoveAttributeNode');
end; end;
function TDOMElement.GetElementsByTagName(const name: DOMString): TDOMNodeList; function TDOMElement.GetElementsByTagName(const name: DOMString): TDOMNodeList;
@ -2418,6 +2434,7 @@ begin
TDOMEntity(Result).FNotationName := FNotationName; TDOMEntity(Result).FNotationName := FNotationName;
if deep then if deep then
CloneChildren(Result, aCloneOwner); CloneChildren(Result, aCloneOwner);
Result.SetReadOnly(True);
end; end;
// ------------------------------------------------------- // -------------------------------------------------------
@ -2466,6 +2483,7 @@ end;
procedure TDOMProcessingInstruction.SetNodeValue(const AValue: DOMString); procedure TDOMProcessingInstruction.SetNodeValue(const AValue: DOMString);
begin begin
Changing;
FNodeValue := AValue; FNodeValue := AValue;
end; end;

View File

@ -171,7 +171,6 @@ type
FXML11Rules: Boolean; FXML11Rules: Boolean;
FSystemID: WideString; FSystemID: WideString;
FPublicID: WideString; FPublicID: WideString;
FReloadHook: procedure of object;
function GetSystemID: WideString; function GetSystemID: WideString;
function GetPublicID: WideString; function GetPublicID: WideString;
protected protected
@ -306,6 +305,7 @@ type
FInsideDecl: Boolean; FInsideDecl: Boolean;
FDocNotValid: Boolean; FDocNotValid: Boolean;
FValue: TWideCharBuf; FValue: TWideCharBuf;
FEntityValue: TWideCharBuf;
FName: TWideCharBuf; FName: TWideCharBuf;
FTokenStart: TLocation; FTokenStart: TLocation;
FStandalone: Boolean; // property of Doc ? FStandalone: Boolean; // property of Doc ?
@ -379,14 +379,14 @@ type
procedure ParseDoctypeDecl; // [28] procedure ParseDoctypeDecl; // [28]
procedure ParseMarkupDecl; // [29] procedure ParseMarkupDecl; // [29]
procedure ParseElement; // [39] procedure ParseElement; // [39]
procedure ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
procedure ParseContent; // [43] procedure ParseContent; // [43]
function ResolvePredefined: Boolean; function ResolvePredefined: Boolean;
procedure IncludeEntity(InAttr: Boolean); procedure IncludeEntity(InAttr: Boolean);
procedure StartPE; procedure StartPE;
function ParseCharRef: Boolean; // [66] function ParseCharRef(var ToFill: TWideCharBuf): Boolean; // [66]
function ParseExternalID(out SysID, PubID: WideString; // [75] function ParseExternalID(out SysID, PubID: WideString; // [75]
SysIdOptional: Boolean): Boolean; SysIdOptional: Boolean): Boolean;
procedure ProcessTextAndRefs;
procedure BadPENesting(S: TErrorSeverity = esError); procedure BadPENesting(S: TErrorSeverity = esError);
procedure ParseEntityDecl; procedure ParseEntityDecl;
@ -806,8 +806,8 @@ var
c: WideChar; c: WideChar;
r: Integer; r: Integer;
begin begin
if Assigned(FReloadHook) then if DTDSubsetType = dsInternal then
FReloadHook; FReader.DTDReloadHook;
r := FBufEnd - FBuf; r := FBufEnd - FBuf;
if r > 0 then if r > 0 then
Move(FBuf^, FBufStart^, r * sizeof(WideChar)); Move(FBuf^, FBufStart^, r * sizeof(WideChar));
@ -1260,6 +1260,8 @@ end;
destructor TXMLReader.Destroy; destructor TXMLReader.Destroy;
begin begin
if Assigned(FEntityValue.Buffer) then
FreeMem(FEntityValue.Buffer);
FreeMem(FName.Buffer); FreeMem(FName.Buffer);
FreeMem(FValue.Buffer); FreeMem(FValue.Buffer);
if Assigned(FSource) then if Assigned(FSource) then
@ -1425,7 +1427,7 @@ begin
Result := True; Result := True;
end; end;
function TXMLReader.ParseCharRef: Boolean; // [66] function TXMLReader.ParseCharRef(var ToFill: TWideCharBuf): Boolean; // [66]
var var
Value: Integer; Value: Integer;
begin begin
@ -1460,15 +1462,15 @@ begin
case Value of case Value of
$01..$08, $0B..$0C, $0E..$1F: $01..$08, $0B..$0C, $0E..$1F:
if FXML11 then if FXML11 then
BufAppend(FValue, WideChar(Value)) BufAppend(ToFill, WideChar(Value))
else else
FatalError('Invalid character reference'); FatalError('Invalid character reference');
$09, $0A, $0D, $20..$D7FF, $E000..$FFFD: $09, $0A, $0D, $20..$D7FF, $E000..$FFFD:
BufAppend(FValue, WideChar(Value)); BufAppend(ToFill, WideChar(Value));
$10000..$10FFFF: $10000..$10FFFF:
begin begin
BufAppend(FValue, WideChar($D7C0 + (Value shr 10))); BufAppend(ToFill, WideChar($D7C0 + (Value shr 10)));
BufAppend(FValue, WideChar($DC00 xor (Value and $3FF))); BufAppend(ToFill, WideChar($DC00 xor (Value and $3FF)));
end; end;
else else
FatalError('Invalid character reference'); FatalError('Invalid character reference');
@ -1495,7 +1497,7 @@ begin
end end
else else
begin begin
if ParseCharRef or ResolvePredefined then if ParseCharRef(FValue) or ResolvePredefined then
Continue; Continue;
// have to insert entity or reference // have to insert entity or reference
if FValue.Length > 0 then if FValue.Length > 0 then
@ -1622,12 +1624,14 @@ begin
SaveCursor := FCursor; SaveCursor := FCursor;
FCursor := AEntity; // build child node tree for the entity FCursor := AEntity; // build child node tree for the entity
try try
AEntity.SetReadOnly(False);
if InAttr then if InAttr then
DoParseAttValue(#0) DoParseAttValue(#0)
else else
DoParseFragment; DoParseFragment;
AEntity.FResolved := True; AEntity.FResolved := True;
finally finally
AEntity.SetReadOnly(True);
ContextPop; ContextPop;
FCursor := SaveCursor; FCursor := SaveCursor;
FValue.Length := 0; FValue.Length := 0;
@ -1672,60 +1676,6 @@ begin
FHavePERefs := True; FHavePERefs := True;
end; end;
procedure TXMLReader.ProcessTextAndRefs;
var
nonWs: Boolean;
begin
FValue.Length := 0;
nonWs := False;
StoreLocation(FTokenStart);
while (FCurChar <> '<') and (FCurChar <> #0) do
begin
if FCurChar <> '&' then
begin
if (FCurChar <> #32) and (FCurChar <> #10) and (FCurChar <> #9) and (FCurChar <> #13) then
nonWs := True;
BufAppend(FValue, FCurChar);
if FCurChar = '>' then
with FValue do
if (Length >= 3) and (Buffer[Length-2] = ']') and (Buffer[Length-3] = ']') then
FatalError('Literal '']]>'' is not allowed in text', 2);
GetChar;
end
else
begin
if FState <> rsRoot then
FatalError('Illegal at document level');
if FCurrContentType = ctEmpty then
ValidationError('References are illegal in EMPTY elements', []);
if ParseCharRef or ResolvePredefined then
nonWs := True // CharRef to whitespace is not considered whitespace
else
begin
if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then
begin
// 'Reference illegal at root' is checked above, no need to check here
DoText(FValue.Buffer, FValue.Length, not nonWs);
FValue.Length := 0;
end;
IncludeEntity(False);
end;
end;
end; // while
if FState = rsRoot then
begin
if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then
begin
DoText(FValue.Buffer, FValue.Length, not nonWs);
FValue.Length := 0;
end;
end
else if nonWs then
FatalError('Illegal at document level', -1);
end;
procedure TXMLReader.ExpectAttValue; // [10] procedure TXMLReader.ExpectAttValue; // [10]
var var
Delim: WideChar; Delim: WideChar;
@ -1955,14 +1905,12 @@ begin
begin begin
BufAllocate(FIntSubset, 256); BufAllocate(FIntSubset, 256);
FSource.DTDSubsetType := dsInternal; FSource.DTDSubsetType := dsInternal;
FSource.FReloadHook := {$IFDEF FPC}@{$ENDIF}DTDReloadHook;
try try
FDTDStartPos := FSource.FBuf; FDTDStartPos := FSource.FBuf;
ParseMarkupDecl; ParseMarkupDecl;
DTDReloadHook; // fetch last chunk DTDReloadHook; // fetch last chunk
SetString(FDocType.FInternalSubset, FIntSubset.Buffer, FIntSubset.Length); SetString(FDocType.FInternalSubset, FIntSubset.Buffer, FIntSubset.Length);
finally finally
FSource.FReloadHook := nil;
FreeMem(FIntSubset.Buffer); FreeMem(FIntSubset.Buffer);
FSource.DTDSubsetType := dsNone; FSource.DTDSubsetType := dsNone;
end; end;
@ -1989,6 +1937,7 @@ begin
end; end;
FCursor := Doc; FCursor := Doc;
ValidateDTD; ValidateDTD;
FDocType.SetReadOnly(True);
end; end;
procedure TXMLReader.ExpectEq; // [25] procedure TXMLReader.ExpectEq; // [25]
@ -2324,7 +2273,9 @@ var
CurrentEntity: TObject; CurrentEntity: TObject;
begin begin
CurrentEntity := FSource.FEntity; CurrentEntity := FSource.FEntity;
FValue.Length := 0; if FEntityValue.Buffer = nil then
BufAllocate(FEntityValue, 256);
FEntityValue.Length := 0;
// "Included in literal": process until delimiter hit IN SAME context // "Included in literal": process until delimiter hit IN SAME context
while not ((FSource.FEntity = CurrentEntity) and CheckForChar(Delim)) do while not ((FSource.FEntity = CurrentEntity) and CheckForChar(Delim)) do
if CheckForChar('%') then if CheckForChar('%') then
@ -2337,16 +2288,16 @@ begin
end end
else if FCurChar = '&' then // CharRefs: include, EntityRefs: bypass else if FCurChar = '&' then // CharRefs: include, EntityRefs: bypass
begin begin
if not ParseCharRef then if not ParseCharRef(FEntityValue) then
begin begin
BufAppend(FValue, '&'); BufAppend(FEntityValue, '&');
BufAppendChunk(FValue, FName.Buffer, FName.Length); BufAppendChunk(FEntityValue, FName.Buffer, FName.Length);
BufAppend(FValue, ';'); BufAppend(FEntityValue, ';');
end; end;
end end
else if FCurChar <> #0 then // Regular character else if FCurChar <> #0 then // Regular character
begin begin
BufAppend(FValue, FCurChar); BufAppend(FEntityValue, FCurChar);
GetChar; GetChar;
end end
else if (FSource.FEntity = CurrentEntity) or not ContextPop then // #0 else if (FSource.FEntity = CurrentEntity) or not ContextPop then // #0
@ -2378,6 +2329,7 @@ begin
end; end;
Entity := TDOMEntityEx.Create(Doc); Entity := TDOMEntityEx.Create(Doc);
Entity.SetReadOnly(True);
try try
Entity.FExternallyDeclared := FSource.DTDSubsetType <> dsInternal; Entity.FExternallyDeclared := FSource.DTDSubsetType <> dsInternal;
Entity.FName := ExpectName; Entity.FName := ExpectName;
@ -2392,7 +2344,7 @@ begin
StoreLocation(Entity.FStartLocation); StoreLocation(Entity.FStartLocation);
if not ParseEntityDeclValue(Delim) then if not ParseEntityDeclValue(Delim) then
DoErrorPos(esFatal, 'Literal has no closing quote', Entity.FStartLocation); DoErrorPos(esFatal, 'Literal has no closing quote', Entity.FStartLocation);
SetString(Entity.FReplacementText, FValue.Buffer, FValue.Length); SetString(Entity.FReplacementText, FEntityValue.Buffer, FEntityValue.Length);
end end
else else
if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, False) then if not ParseExternalID(Entity.FSystemID, Entity.FPublicID, False) then
@ -2575,6 +2527,8 @@ begin
end; end;
procedure TXMLReader.ParseContent; procedure TXMLReader.ParseContent;
var
nonWs: Boolean;
begin begin
repeat repeat
if FCurChar = '<' then if FCurChar = '<' then
@ -2600,7 +2554,56 @@ begin
RaiseNameNotFound; RaiseNameNotFound;
end end
else else
ProcessTextAndRefs; begin
FValue.Length := 0;
nonWs := False;
StoreLocation(FTokenStart);
while (FCurChar <> '<') and (FCurChar <> #0) do
begin
if FCurChar <> '&' then
begin
if (FCurChar <> #32) and (FCurChar <> #10) and (FCurChar <> #9) and (FCurChar <> #13) then
nonWs := True;
BufAppend(FValue, FCurChar);
if FCurChar = '>' then
with FValue do
if (Length >= 3) and (Buffer[Length-2] = ']') and (Buffer[Length-3] = ']') then
FatalError('Literal '']]>'' is not allowed in text', 2);
GetChar;
end
else
begin
if FState <> rsRoot then
FatalError('Illegal at document level');
if FCurrContentType = ctEmpty then
ValidationError('References are illegal in EMPTY elements', []);
if ParseCharRef(FValue) or ResolvePredefined then
nonWs := True // CharRef to whitespace is not considered whitespace
else
begin
if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then
begin
// 'Reference illegal at root' is checked above, no need to check here
DoText(FValue.Buffer, FValue.Length, not nonWs);
FValue.Length := 0;
end;
IncludeEntity(False);
end;
end;
end; // while
if FState = rsRoot then
begin
if (nonWs or FPreserveWhitespace) and (FValue.Length > 0) then
begin
DoText(FValue.Buffer, FValue.Length, not nonWs);
FValue.Length := 0;
end;
end
else if nonWs then
FatalError('Illegal at document level', -1);
end;
until FCurChar = #0; until FCurChar = #0;
end; end;
@ -2610,8 +2613,6 @@ var
NewElem: TDOMElement; NewElem: TDOMElement;
ElDef: TDOMElementDef; ElDef: TDOMElementDef;
IsEmpty: Boolean; IsEmpty: Boolean;
attr: TDOMAttr;
OldAttr: TDOMNode;
begin begin
if FState > rsRoot then if FState > rsRoot then
FatalError('Only one top-level element allowed', FName.Length) FatalError('Only one top-level element allowed', FName.Length)
@ -2639,28 +2640,15 @@ begin
ValidationError('Element ''%s'' is not allowed in this context',[NewElem.TagName], FName.Length); ValidationError('Element ''%s'' is not allowed in this context',[NewElem.TagName], FName.Length);
IsEmpty := False; IsEmpty := False;
if SkipS then while (FSource.FBuf^ <> '>') and (FSource.FBuf^ <> '/') do
begin begin
while (FCurChar <> '>') and (FCurChar <> '/') do SkipS(True);
begin if (FSource.FBuf^ = '>') or (FSource.FBuf^ = '/') then
CheckName; Break;
attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length); ParseAttribute(NewElem, ElDef);
// !!cannot use TDOMElement.SetAttributeNode because it will free old attribute
OldAttr := NewElem.Attributes.SetNamedItem(Attr);
if Assigned(OldAttr) then
begin
OldAttr.Free;
FatalError('Duplicate attribute', FName.Length);
end;
ExpectEq;
FCursor := attr;
ExpectAttValue;
if (FCurChar <> '>') and (FCurChar <> '/') then
SkipS(True);
end; // while
end; end;
if FCurChar = '/' then
if FSource.FBuf^ = '/' then
begin begin
IsEmpty := True; IsEmpty := True;
GetChar; GetChar;
@ -2706,6 +2694,25 @@ begin
PopVC; PopVC;
end; end;
procedure TXMLReader.ParseAttribute(Elem: TDOMElement; ElDef: TDOMElementDef);
var
attr: TDOMAttr;
OldAttr: TDOMNode;
begin
CheckName;
attr := doc.CreateAttributeBuf(FName.Buffer, FName.Length);
// !!cannot use TDOMElement.SetAttributeNode because it will free old attribute
OldAttr := Elem.Attributes.SetNamedItem(Attr);
if Assigned(OldAttr) then
begin
OldAttr.Free;
FatalError('Duplicate attribute', FName.Length);
end;
ExpectEq;
FCursor := attr;
ExpectAttValue;
end;
procedure TXMLReader.AddForwardRef(aList: TFPList; Buf: PWideChar; Length: Integer); procedure TXMLReader.AddForwardRef(aList: TFPList; Buf: PWideChar; Length: Integer);
var var
w: PForwardRef; w: PForwardRef;

View File

@ -30,6 +30,42 @@ function IsXmlNmTokens(const Value: WideString; Xml11: Boolean = False): Boolean
function IsValidXmlEncoding(const Value: WideString): Boolean; function IsValidXmlEncoding(const Value: WideString): Boolean;
function Xml11NamePages: PByteArray; function Xml11NamePages: PByteArray;
procedure NormalizeSpaces(var Value: WideString); procedure NormalizeSpaces(var Value: WideString);
function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
{ a simple hash table with WideString keys }
type
PPHashItem = ^PHashItem;
PHashItem = ^THashItem;
THashItem = record
Key: WideString;
HashValue: LongWord;
Next: PHashItem;
Data: TObject;
end;
THashForEach = function(Entry: PHashItem; arg: Pointer): Boolean;
THashTable = class(TObject)
private
FCount: LongWord;
FBucketCount: LongWord;
FBucket: PPHashItem;
FOwnsObjects: Boolean;
function Lookup(Key: PWideChar; KeyLength: Integer; var Found: Boolean; CanCreate: Boolean): PHashItem;
procedure Resize(NewCapacity: LongWord);
public
constructor Create(InitSize: Integer; OwnObjects: Boolean);
destructor Destroy; override;
procedure Clear;
function Find(Key: PWideChar; KeyLen: Integer): PHashItem;
function FindOrAdd(Key: PWideChar; KeyLen: Integer; var Found: Boolean): PHashItem; overload;
function FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem; overload;
function Get(Key: PWideChar; KeyLen: Integer): TObject;
function Remove(Entry: PHashItem): Boolean;
procedure ForEach(proc: THashForEach; arg: Pointer);
property Count: LongWord read FCount;
end;
{$i names.inc} {$i names.inc}
@ -239,6 +275,191 @@ begin
end; end;
end; end;
function Hash(InitValue: LongWord; Key: PWideChar; KeyLen: Integer): LongWord;
begin
Result := InitValue;
while KeyLen <> 0 do
begin
Result := Result * $F4243 xor ord(Key^);
Inc(Key);
Dec(KeyLen);
end;
end;
function KeyCompare(const Key1: WideString; Key2: Pointer; Key2Len: Integer): Boolean;
begin
Result := (Length(Key1)=Key2Len) and (CompareWord(Pointer(Key1)^, Key2^, Key2Len) = 0);
end;
{ THashTable }
constructor THashTable.Create(InitSize: Integer; OwnObjects: Boolean);
var
I: Integer;
begin
inherited Create;
FOwnsObjects := OwnObjects;
I := 256;
while I < InitSize do I := I shl 1;
FBucketCount := I;
FBucket := AllocMem(I * sizeof(PHashItem));
end;
destructor THashTable.Destroy;
begin
Clear;
FreeMem(FBucket);
inherited Destroy;
end;
procedure THashTable.Clear;
var
I: Integer;
item, next: PHashItem;
begin
for I := 0 to FBucketCount-1 do
begin
item := FBucket[I];
while Assigned(item) do
begin
next := item^.Next;
if FOwnsObjects then
item^.Data.Free;
Dispose(item);
item := next;
end;
end;
FillChar(FBucket^, FBucketCount * sizeof(PHashItem), 0);
end;
function THashTable.Find(Key: PWideChar; KeyLen: Integer): PHashItem;
var
Dummy: Boolean;
begin
Result := Lookup(Key, KeyLen, Dummy, False);
end;
function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer;
var Found: Boolean): PHashItem;
begin
Result := Lookup(Key, KeyLen, Found, True);
end;
function THashTable.FindOrAdd(Key: PWideChar; KeyLen: Integer): PHashItem;
var
Dummy: Boolean;
begin
Result := Lookup(Key, KeyLen, Dummy, True);
end;
function THashTable.Get(Key: PWideChar; KeyLen: Integer): TObject;
var
e: PHashItem;
Dummy: Boolean;
begin
e := Lookup(Key, KeyLen, Dummy, False);
if Assigned(e) then
Result := e^.Data
else
Result := nil;
end;
function THashTable.Lookup(Key: PWideChar; KeyLength: Integer;
var Found: Boolean; CanCreate: Boolean): PHashItem;
var
Entry: PPHashItem;
h: LongWord;
begin
h := Hash(0, Key, KeyLength);
Entry := @FBucket[h mod FBucketCount];
while Assigned(Entry^) and not ((Entry^^.HashValue = h) and KeyCompare(Entry^^.Key, Key, KeyLength) ) do
Entry := @Entry^^.Next;
Found := Assigned(Entry^);
if Found or (not CanCreate) then
begin
Result := Entry^;
Exit;
end;
if FCount > FBucketCount then { arbitrary limit, probably too high }
begin
Resize(FBucketCount * 2);
Result := Lookup(Key, KeyLength, Found, CanCreate);
end
else
begin
New(Result);
SetString(Result^.Key, Key, KeyLength);
Result^.HashValue := h;
Result^.Data := nil;
Result^.Next := nil;
Inc(FCount);
Entry^ := Result;
end;
end;
procedure THashTable.Resize(NewCapacity: LongWord);
var
p, chain: PPHashItem;
i: Integer;
e, n: PHashItem;
begin
p := AllocMem(NewCapacity * sizeof(PHashItem));
for i := 0 to FBucketCount-1 do
begin
e := FBucket[i];
while Assigned(e) do
begin
chain := @p[e^.HashValue mod NewCapacity];
n := e^.Next;
e^.Next := chain^;
chain^ := e;
e := n;
end;
end;
FBucketCount := NewCapacity;
FreeMem(FBucket);
FBucket := p;
end;
function THashTable.Remove(Entry: PHashItem): Boolean;
var
chain: PPHashItem;
begin
chain := @FBucket[Entry^.HashValue mod FBucketCount];
while Assigned(chain^) do
begin
if chain^ = Entry then
begin
chain^ := Entry^.Next;
if FOwnsObjects then
Entry^.Data.Free;
Dispose(Entry);
Dec(FCount);
Result := True;
Exit;
end;
chain := @chain^^.Next;
end;
Result := False;
end;
procedure THashTable.ForEach(proc: THashForEach; arg: Pointer);
var
i: Integer;
e: PHashItem;
begin
for i := 0 to FBucketCount-1 do
begin
e := FBucket[i];
while Assigned(e) do
begin
if not proc(e, arg) then
Exit;
e := e^.Next;
end;
end;
end;
initialization initialization
finalization finalization