mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-17 19:29:18 +02:00
XPath, use a perfect hash to recognize all possible keywords.
git-svn-id: trunk@15638 -
This commit is contained in:
parent
829f8164fc
commit
0f5795baaf
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -2323,6 +2323,7 @@ packages/fcl-xml/src/xmlstreaming.pp svneol=native#text/plain
|
|||||||
packages/fcl-xml/src/xmlutils.pp svneol=native#text/plain
|
packages/fcl-xml/src/xmlutils.pp svneol=native#text/plain
|
||||||
packages/fcl-xml/src/xmlwrite.pp svneol=native#text/plain
|
packages/fcl-xml/src/xmlwrite.pp svneol=native#text/plain
|
||||||
packages/fcl-xml/src/xpath.pp svneol=native#text/plain
|
packages/fcl-xml/src/xpath.pp svneol=native#text/plain
|
||||||
|
packages/fcl-xml/src/xpathkw.inc svneol=native#text/plain
|
||||||
packages/fcl-xml/tests/README.txt svneol=native#text/plain
|
packages/fcl-xml/tests/README.txt svneol=native#text/plain
|
||||||
packages/fcl-xml/tests/README_DOM.txt svneol=native#text/plain
|
packages/fcl-xml/tests/README_DOM.txt svneol=native#text/plain
|
||||||
packages/fcl-xml/tests/api.xml svneol=native#text/plain
|
packages/fcl-xml/tests/api.xml svneol=native#text/plain
|
||||||
|
@ -95,6 +95,23 @@ type
|
|||||||
tkPipe // "|"
|
tkPipe // "|"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
TXPathKeyword = (
|
||||||
|
// axis names
|
||||||
|
xkNone, xkAncestor, xkAncestorOrSelf, xkAttribute, xkChild,
|
||||||
|
xkDescendant, xkDescendantOrSelf, xkFollowing, xkFollowingSibling,
|
||||||
|
xkNamespace, xkParent, xkPreceding, xkPrecedingSibling, xkSelf,
|
||||||
|
// node tests
|
||||||
|
xkComment, xkText, xkProcessingInstruction, xkNode,
|
||||||
|
// operators
|
||||||
|
xkAnd, xkOr, xkDiv, xkMod,
|
||||||
|
// standard functions
|
||||||
|
xkLast, xkPosition, xkCount, xkId, xkLocalName, xkNamespaceUri,
|
||||||
|
xkName, xkString, xkConcat, xkStartsWith, xkContains,
|
||||||
|
xkSubstringBefore, xkSubstringAfter, xkSubstring,
|
||||||
|
xkStringLength, xkNormalizeSpace, xkTranslate, xkBoolean,
|
||||||
|
xkNot, xkTrue, xkFalse, xkLang, xkNumber, xkSum, xkFloor,
|
||||||
|
xkCeiling, xkRound
|
||||||
|
);
|
||||||
|
|
||||||
{ XPath expression parse tree }
|
{ XPath expression parse tree }
|
||||||
|
|
||||||
@ -347,6 +364,7 @@ type
|
|||||||
FTokenStart: DOMPChar;
|
FTokenStart: DOMPChar;
|
||||||
FTokenLength: Integer;
|
FTokenLength: Integer;
|
||||||
FPrefixLength: Integer;
|
FPrefixLength: Integer;
|
||||||
|
FTokenId: TXPathKeyword;
|
||||||
FResolver: TXPathNSResolver;
|
FResolver: TXPathNSResolver;
|
||||||
procedure Error(const Msg: String);
|
procedure Error(const Msg: String);
|
||||||
procedure ParsePredicates(var Dest: TXPathNodeArray);
|
procedure ParsePredicates(var Dest: TXPathNodeArray);
|
||||||
@ -485,6 +503,23 @@ implementation
|
|||||||
|
|
||||||
uses Math, xmlutils;
|
uses Math, xmlutils;
|
||||||
|
|
||||||
|
{$i xpathkw.inc}
|
||||||
|
|
||||||
|
const
|
||||||
|
AxisNameKeywords = [xkAncestor..xkSelf];
|
||||||
|
AxisNameMap: array[xkAncestor..xkSelf] of TAxis = (
|
||||||
|
axisAncestor, axisAncestorOrSelf, axisAttribute, axisChild,
|
||||||
|
axisDescendant, axisDescendantOrSelf, axisFollowing,
|
||||||
|
axisFollowingSibling, axisNamespace, axisParent, axisPreceding,
|
||||||
|
axisPrecedingSibling, axisSelf
|
||||||
|
);
|
||||||
|
NodeTestKeywords = [xkComment..xkNode];
|
||||||
|
NodeTestMap: array[xkComment..xkNode] of TNodeTestType = (
|
||||||
|
ntCommentNode, ntTextNode, ntPINode, ntAnyNode
|
||||||
|
);
|
||||||
|
|
||||||
|
FunctionKeywords = [xkLast..xkRound];
|
||||||
|
|
||||||
{ Helper functions }
|
{ Helper functions }
|
||||||
|
|
||||||
function NodeToText(Node: TDOMNode): DOMString;
|
function NodeToText(Node: TDOMNode): DOMString;
|
||||||
@ -1593,6 +1628,10 @@ begin
|
|||||||
FCurToken := Result;
|
FCurToken := Result;
|
||||||
if Result in [tkIdentifier, tkNSNameTest, tkNumber, tkString, tkVariable] then
|
if Result in [tkIdentifier, tkNSNameTest, tkNumber, tkString, tkVariable] then
|
||||||
SetString(FCurTokenString, FTokenStart, FTokenLength);
|
SetString(FCurTokenString, FTokenStart, FTokenLength);
|
||||||
|
if Result = tkIdentifier then
|
||||||
|
FTokenId := LookupXPathKeyword(FTokenStart, FTokenLength)
|
||||||
|
else
|
||||||
|
FTokenId := xkNone;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
function TXPathScanner.SkipToken(tok: TXPathToken): Boolean; { inline? }
|
function TXPathScanner.SkipToken(tok: TXPathToken): Boolean; { inline? }
|
||||||
@ -1832,36 +1871,10 @@ begin
|
|||||||
end
|
end
|
||||||
else if (CurToken = tkIdentifier) and (PeekToken = tkColonColon) then // [5] AxisName '::'
|
else if (CurToken = tkIdentifier) and (PeekToken = tkColonColon) then // [5] AxisName '::'
|
||||||
begin
|
begin
|
||||||
// Check for [6] AxisName
|
if FTokenId in AxisNameKeywords then
|
||||||
if CurTokenString = 'ancestor' then
|
Axis := AxisNameMap[FTokenId]
|
||||||
Axis := axisAncestor
|
|
||||||
else if CurTokenString = 'ancestor-or-self' then
|
|
||||||
Axis := axisAncestorOrSelf
|
|
||||||
else if CurTokenString = 'attribute' then
|
|
||||||
Axis := axisAttribute
|
|
||||||
else if CurTokenString = 'child' then
|
|
||||||
Axis := axisChild
|
|
||||||
else if CurTokenString = 'descendant' then
|
|
||||||
Axis := axisDescendant
|
|
||||||
else if CurTokenString = 'descendant-or-self' then
|
|
||||||
Axis := axisDescendantOrSelf
|
|
||||||
else if CurTokenString = 'following' then
|
|
||||||
Axis := axisFollowing
|
|
||||||
else if CurTokenString = 'following-sibling' then
|
|
||||||
Axis := axisFollowingSibling
|
|
||||||
else if CurTokenString = 'namespace' then
|
|
||||||
Axis := axisNamespace
|
|
||||||
else if CurTokenString = 'parent' then
|
|
||||||
Axis := axisParent
|
|
||||||
else if CurTokenString = 'preceding' then
|
|
||||||
Axis := axisPreceding
|
|
||||||
else if CurTokenString = 'preceding-sibling' then
|
|
||||||
Axis := axisPrecedingSibling
|
|
||||||
else if CurTokenString = 'self' then
|
|
||||||
Axis := axisSelf
|
|
||||||
else
|
else
|
||||||
Error(SParserBadAxisName);
|
Error(SParserBadAxisName);
|
||||||
|
|
||||||
NextToken; // skip identifier and the '::'
|
NextToken; // skip identifier and the '::'
|
||||||
NextToken;
|
NextToken;
|
||||||
end
|
end
|
||||||
@ -1874,15 +1887,6 @@ begin
|
|||||||
end;
|
end;
|
||||||
|
|
||||||
function TXPathScanner.ParseNodeTest(Axis: TAxis): TStep; // [7]
|
function TXPathScanner.ParseNodeTest(Axis: TAxis): TStep; // [7]
|
||||||
|
|
||||||
procedure NeedBrackets;
|
|
||||||
begin
|
|
||||||
NextToken;
|
|
||||||
if NextToken <> tkRightBracket then
|
|
||||||
Error(SParserExpectedRightBracket);
|
|
||||||
NextToken;
|
|
||||||
end;
|
|
||||||
|
|
||||||
var
|
var
|
||||||
nodeType: TNodeTestType;
|
nodeType: TNodeTestType;
|
||||||
nodeName: DOMString;
|
nodeName: DOMString;
|
||||||
@ -1910,33 +1914,26 @@ begin
|
|||||||
// Check for case [38] NodeType
|
// Check for case [38] NodeType
|
||||||
if PeekToken = tkLeftBracket then
|
if PeekToken = tkLeftBracket then
|
||||||
begin
|
begin
|
||||||
if CurTokenString = 'comment' then
|
if FTokenId in NodeTestKeywords then
|
||||||
begin
|
begin
|
||||||
NeedBrackets;
|
nodeType := NodeTestMap[FTokenId];
|
||||||
nodeType := ntCommentNode;
|
if FTokenId = xkProcessingInstruction then
|
||||||
end
|
|
||||||
else if CurTokenString = 'text' then
|
|
||||||
begin
|
|
||||||
NeedBrackets;
|
|
||||||
nodeType := ntTextNode;
|
|
||||||
end
|
|
||||||
else if CurTokenString = 'processing-instruction' then
|
|
||||||
begin
|
|
||||||
NextToken; { skip '('; we know it's there }
|
|
||||||
if NextToken = tkString then
|
|
||||||
begin
|
begin
|
||||||
nodeName := CurTokenString;
|
NextToken;
|
||||||
|
if NextToken = tkString then
|
||||||
|
begin
|
||||||
|
nodeName := CurTokenString;
|
||||||
|
NextToken;
|
||||||
|
end;
|
||||||
|
end
|
||||||
|
else
|
||||||
|
begin
|
||||||
|
NextToken;
|
||||||
NextToken;
|
NextToken;
|
||||||
end;
|
end;
|
||||||
if CurToken <> tkRightBracket then
|
if CurToken <> tkRightBracket then
|
||||||
Error(SParserExpectedRightBracket);
|
Error(SParserExpectedRightBracket);
|
||||||
NextToken;
|
NextToken;
|
||||||
nodeType := ntPINode;
|
|
||||||
end
|
|
||||||
else if CurTokenString = 'node' then
|
|
||||||
begin
|
|
||||||
NeedBrackets;
|
|
||||||
nodeType := ntAnyNode;
|
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
Error(SParserBadNodeType);
|
Error(SParserBadNodeType);
|
||||||
@ -2029,10 +2026,7 @@ begin
|
|||||||
Result := nil;
|
Result := nil;
|
||||||
// Try to detect whether a LocationPath [1] or a FilterExpr [20] follows
|
// Try to detect whether a LocationPath [1] or a FilterExpr [20] follows
|
||||||
if ((CurToken = tkIdentifier) and (PeekToken = tkLeftBracket) and
|
if ((CurToken = tkIdentifier) and (PeekToken = tkLeftBracket) and
|
||||||
(CurTokenString <> 'comment') and
|
not (FTokenId in NodeTestKeywords)) or
|
||||||
(CurTokenString <> 'text') and
|
|
||||||
(CurTokenString <> 'processing-instruction') and
|
|
||||||
(CurTokenString <> 'node')) or
|
|
||||||
(CurToken in [tkVariable, tkLeftBracket, tkString, tkNumber]) then
|
(CurToken in [tkVariable, tkLeftBracket, tkString, tkNumber]) then
|
||||||
begin
|
begin
|
||||||
// second, third or fourth case of [19]
|
// second, third or fourth case of [19]
|
||||||
@ -2083,7 +2077,7 @@ end;
|
|||||||
function TXPathScanner.ParseOrExpr: TXPathExprNode; // [21]
|
function TXPathScanner.ParseOrExpr: TXPathExprNode; // [21]
|
||||||
begin
|
begin
|
||||||
Result := ParseAndExpr;
|
Result := ParseAndExpr;
|
||||||
while (CurToken = tkIdentifier) and (CurTokenString = 'or') do
|
while FTokenId = xkOr do
|
||||||
begin
|
begin
|
||||||
NextToken;
|
NextToken;
|
||||||
Result := TXPathBooleanOpNode.Create(opOr, Result, ParseAndExpr);
|
Result := TXPathBooleanOpNode.Create(opOr, Result, ParseAndExpr);
|
||||||
@ -2093,7 +2087,7 @@ end;
|
|||||||
function TXPathScanner.ParseAndExpr: TXPathExprNode; // [22]
|
function TXPathScanner.ParseAndExpr: TXPathExprNode; // [22]
|
||||||
begin
|
begin
|
||||||
Result := ParseEqualityExpr;
|
Result := ParseEqualityExpr;
|
||||||
while (CurToken = tkIdentifier) and (CurTokenString = 'and') do
|
while FTokenId = xkAnd do
|
||||||
begin
|
begin
|
||||||
NextToken;
|
NextToken;
|
||||||
Result := TXPathBooleanOpNode.Create(opAnd, Result, ParseEqualityExpr);
|
Result := TXPathBooleanOpNode.Create(opAnd, Result, ParseEqualityExpr);
|
||||||
@ -2163,9 +2157,9 @@ begin
|
|||||||
tkAsterisk:
|
tkAsterisk:
|
||||||
op := opMultiply;
|
op := opMultiply;
|
||||||
tkIdentifier:
|
tkIdentifier:
|
||||||
if CurTokenString = 'div' then
|
if FTokenId = xkDiv then
|
||||||
op := opDivide
|
op := opDivide
|
||||||
else if CurTokenString = 'mod' then
|
else if FTokenId = xkMod then
|
||||||
op := opMod
|
op := opMod
|
||||||
else
|
else
|
||||||
break;
|
break;
|
||||||
|
162
packages/fcl-xml/src/xpathkw.inc
Normal file
162
packages/fcl-xml/src/xpathkw.inc
Normal file
@ -0,0 +1,162 @@
|
|||||||
|
{
|
||||||
|
This file is part of the Free Component Library
|
||||||
|
|
||||||
|
A perfect hash for XPath keywords
|
||||||
|
|
||||||
|
See the file COPYING.FPC, included in this distribution,
|
||||||
|
for details about the copyright.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||||
|
|
||||||
|
**********************************************************************}
|
||||||
|
|
||||||
|
const
|
||||||
|
XPathKeywords: array [TXPathKeyword] of PWideChar = (
|
||||||
|
'',
|
||||||
|
#08'ancestor',
|
||||||
|
#16'ancestor-or-self',
|
||||||
|
#09'attribute',
|
||||||
|
#05'child',
|
||||||
|
#10'descendant',
|
||||||
|
#18'descendant-or-self',
|
||||||
|
#09'following',
|
||||||
|
#17'following-sibling',
|
||||||
|
#09'namespace',
|
||||||
|
#06'parent',
|
||||||
|
#09'preceding',
|
||||||
|
#17'preceding-sibling',
|
||||||
|
#04'self',
|
||||||
|
#07'comment',
|
||||||
|
#04'text',
|
||||||
|
#22'processing-instruction',
|
||||||
|
#04'node',
|
||||||
|
#03'and',
|
||||||
|
#02'or',
|
||||||
|
#03'div',
|
||||||
|
#03'mod',
|
||||||
|
#04'last',
|
||||||
|
#08'position',
|
||||||
|
#05'count',
|
||||||
|
#02'id',
|
||||||
|
#10'local-name',
|
||||||
|
#13'namespace-uri',
|
||||||
|
#04'name',
|
||||||
|
#06'string',
|
||||||
|
#06'concat',
|
||||||
|
#11'starts-with',
|
||||||
|
#08'contains',
|
||||||
|
#16'substring-before',
|
||||||
|
#15'substring-after',
|
||||||
|
#09'substring',
|
||||||
|
#13'string-length',
|
||||||
|
#15'normalize-space',
|
||||||
|
#09'translate',
|
||||||
|
#07'boolean',
|
||||||
|
#03'not',
|
||||||
|
#04'true',
|
||||||
|
#05'false',
|
||||||
|
#04'lang',
|
||||||
|
#06'number',
|
||||||
|
#03'sum',
|
||||||
|
#05'floor',
|
||||||
|
#07'ceiling',
|
||||||
|
#05'round'
|
||||||
|
);
|
||||||
|
|
||||||
|
{ The following code is not very maintainable because it was hand-ported from
|
||||||
|
C code generated by gperf. Unless a tool like gperf is ported or modified to
|
||||||
|
generate Pascal, modifying it will be painful.
|
||||||
|
The good side is that one shouldn't ever need to modify it. }
|
||||||
|
|
||||||
|
MaxHash = 55;
|
||||||
|
|
||||||
|
KeywordIndex: array[0..MaxHash-1] of TXPathKeyword = (
|
||||||
|
xkNone, xkNone,
|
||||||
|
xkId,
|
||||||
|
xkNone, xkNone, xkNone,
|
||||||
|
xkString,
|
||||||
|
xkSum,
|
||||||
|
xkParent,
|
||||||
|
xkSubstring,
|
||||||
|
xkNone,
|
||||||
|
xkComment,
|
||||||
|
xkName,
|
||||||
|
xkStringLength,
|
||||||
|
xkNumber,
|
||||||
|
xkSubstringAfter,
|
||||||
|
xkSubstringBefore,
|
||||||
|
xkNamespace,
|
||||||
|
xkFloor,
|
||||||
|
xkNormalizeSpace,
|
||||||
|
xkSelf,
|
||||||
|
xkNamespaceUri,
|
||||||
|
xkPreceding,
|
||||||
|
xkOr,
|
||||||
|
xkPosition,
|
||||||
|
xkText,
|
||||||
|
xkProcessingInstruction,
|
||||||
|
xkConcat,
|
||||||
|
xkLast,
|
||||||
|
xkContains,
|
||||||
|
xkPrecedingSibling,
|
||||||
|
xkAncestor,
|
||||||
|
xkFalse,
|
||||||
|
xkLocalName,
|
||||||
|
xkCount,
|
||||||
|
xkLang,
|
||||||
|
xkFollowing,
|
||||||
|
xkDescendant,
|
||||||
|
xkNode,
|
||||||
|
xkAncestorOrSelf,
|
||||||
|
xkBoolean,
|
||||||
|
xkNot,
|
||||||
|
xkStartsWith,
|
||||||
|
xkAnd,
|
||||||
|
xkFollowingSibling,
|
||||||
|
xkDescendantOrSelf,
|
||||||
|
xkChild,
|
||||||
|
xkTrue,
|
||||||
|
xkCeiling,
|
||||||
|
xkMod,
|
||||||
|
xkDiv,
|
||||||
|
xkRound,
|
||||||
|
xkNone,
|
||||||
|
xkAttribute,
|
||||||
|
xkTranslate
|
||||||
|
);
|
||||||
|
|
||||||
|
AssoValues: array[97..122] of Byte = (
|
||||||
|
10, 31, 0, 13, 30, 11, 55, 55, 0, 41,
|
||||||
|
55, 10, 16, 4, 21, 2, 55, 17, 0, 14,
|
||||||
|
34, 29, 34, 55, 7, 55
|
||||||
|
);
|
||||||
|
|
||||||
|
function LookupXPathKeyword(p: PWideChar; Len: Integer): TXPathKeyword;
|
||||||
|
var
|
||||||
|
hash: Integer;
|
||||||
|
p1: PWideChar;
|
||||||
|
begin
|
||||||
|
result := xkNone;
|
||||||
|
hash := Len;
|
||||||
|
if Len >= 1 then
|
||||||
|
begin
|
||||||
|
if (p^ >= 'a') and (p^ <= 'y') then
|
||||||
|
Inc(hash, AssoValues[ord(p^)])
|
||||||
|
else
|
||||||
|
Exit;
|
||||||
|
if Len > 2 then
|
||||||
|
if (p[2] >= 'a') and (p[2] <= 'y') then
|
||||||
|
Inc(hash, AssoValues[ord(p[2])+1])
|
||||||
|
else
|
||||||
|
Exit;
|
||||||
|
end;
|
||||||
|
if (hash >= 0) and (hash <= MaxHash) then
|
||||||
|
begin
|
||||||
|
p1 := XPathKeywords[KeywordIndex[hash]];
|
||||||
|
if (ord(p1^) = Len) and
|
||||||
|
CompareMem(p, p1+1, Len*sizeof(WideChar)) then
|
||||||
|
Result := KeywordIndex[hash];
|
||||||
|
end;
|
||||||
|
end;
|
Loading…
Reference in New Issue
Block a user