XPath, use a perfect hash to recognize all possible keywords.

git-svn-id: trunk@15638 -
This commit is contained in:
sergei 2010-07-26 13:49:46 +00:00
parent 829f8164fc
commit 0f5795baaf
3 changed files with 222 additions and 65 deletions

1
.gitattributes vendored
View File

@ -2323,6 +2323,7 @@ packages/fcl-xml/src/xmlstreaming.pp svneol=native#text/plain
packages/fcl-xml/src/xmlutils.pp svneol=native#text/plain
packages/fcl-xml/src/xmlwrite.pp svneol=native#text/plain
packages/fcl-xml/src/xpath.pp svneol=native#text/plain
packages/fcl-xml/src/xpathkw.inc svneol=native#text/plain
packages/fcl-xml/tests/README.txt svneol=native#text/plain
packages/fcl-xml/tests/README_DOM.txt svneol=native#text/plain
packages/fcl-xml/tests/api.xml svneol=native#text/plain

View File

@ -95,6 +95,23 @@ type
tkPipe // "|"
);
TXPathKeyword = (
// axis names
xkNone, xkAncestor, xkAncestorOrSelf, xkAttribute, xkChild,
xkDescendant, xkDescendantOrSelf, xkFollowing, xkFollowingSibling,
xkNamespace, xkParent, xkPreceding, xkPrecedingSibling, xkSelf,
// node tests
xkComment, xkText, xkProcessingInstruction, xkNode,
// operators
xkAnd, xkOr, xkDiv, xkMod,
// standard functions
xkLast, xkPosition, xkCount, xkId, xkLocalName, xkNamespaceUri,
xkName, xkString, xkConcat, xkStartsWith, xkContains,
xkSubstringBefore, xkSubstringAfter, xkSubstring,
xkStringLength, xkNormalizeSpace, xkTranslate, xkBoolean,
xkNot, xkTrue, xkFalse, xkLang, xkNumber, xkSum, xkFloor,
xkCeiling, xkRound
);
{ XPath expression parse tree }
@ -347,6 +364,7 @@ type
FTokenStart: DOMPChar;
FTokenLength: Integer;
FPrefixLength: Integer;
FTokenId: TXPathKeyword;
FResolver: TXPathNSResolver;
procedure Error(const Msg: String);
procedure ParsePredicates(var Dest: TXPathNodeArray);
@ -485,6 +503,23 @@ implementation
uses Math, xmlutils;
{$i xpathkw.inc}
const
AxisNameKeywords = [xkAncestor..xkSelf];
AxisNameMap: array[xkAncestor..xkSelf] of TAxis = (
axisAncestor, axisAncestorOrSelf, axisAttribute, axisChild,
axisDescendant, axisDescendantOrSelf, axisFollowing,
axisFollowingSibling, axisNamespace, axisParent, axisPreceding,
axisPrecedingSibling, axisSelf
);
NodeTestKeywords = [xkComment..xkNode];
NodeTestMap: array[xkComment..xkNode] of TNodeTestType = (
ntCommentNode, ntTextNode, ntPINode, ntAnyNode
);
FunctionKeywords = [xkLast..xkRound];
{ Helper functions }
function NodeToText(Node: TDOMNode): DOMString;
@ -1593,6 +1628,10 @@ begin
FCurToken := Result;
if Result in [tkIdentifier, tkNSNameTest, tkNumber, tkString, tkVariable] then
SetString(FCurTokenString, FTokenStart, FTokenLength);
if Result = tkIdentifier then
FTokenId := LookupXPathKeyword(FTokenStart, FTokenLength)
else
FTokenId := xkNone;
end;
function TXPathScanner.SkipToken(tok: TXPathToken): Boolean; { inline? }
@ -1832,36 +1871,10 @@ begin
end
else if (CurToken = tkIdentifier) and (PeekToken = tkColonColon) then // [5] AxisName '::'
begin
// Check for [6] AxisName
if CurTokenString = 'ancestor' then
Axis := axisAncestor
else if CurTokenString = 'ancestor-or-self' then
Axis := axisAncestorOrSelf
else if CurTokenString = 'attribute' then
Axis := axisAttribute
else if CurTokenString = 'child' then
Axis := axisChild
else if CurTokenString = 'descendant' then
Axis := axisDescendant
else if CurTokenString = 'descendant-or-self' then
Axis := axisDescendantOrSelf
else if CurTokenString = 'following' then
Axis := axisFollowing
else if CurTokenString = 'following-sibling' then
Axis := axisFollowingSibling
else if CurTokenString = 'namespace' then
Axis := axisNamespace
else if CurTokenString = 'parent' then
Axis := axisParent
else if CurTokenString = 'preceding' then
Axis := axisPreceding
else if CurTokenString = 'preceding-sibling' then
Axis := axisPrecedingSibling
else if CurTokenString = 'self' then
Axis := axisSelf
if FTokenId in AxisNameKeywords then
Axis := AxisNameMap[FTokenId]
else
Error(SParserBadAxisName);
NextToken; // skip identifier and the '::'
NextToken;
end
@ -1874,15 +1887,6 @@ begin
end;
function TXPathScanner.ParseNodeTest(Axis: TAxis): TStep; // [7]
procedure NeedBrackets;
begin
NextToken;
if NextToken <> tkRightBracket then
Error(SParserExpectedRightBracket);
NextToken;
end;
var
nodeType: TNodeTestType;
nodeName: DOMString;
@ -1910,33 +1914,26 @@ begin
// Check for case [38] NodeType
if PeekToken = tkLeftBracket then
begin
if CurTokenString = 'comment' then
if FTokenId in NodeTestKeywords then
begin
NeedBrackets;
nodeType := ntCommentNode;
end
else if CurTokenString = 'text' then
begin
NeedBrackets;
nodeType := ntTextNode;
end
else if CurTokenString = 'processing-instruction' then
begin
NextToken; { skip '('; we know it's there }
if NextToken = tkString then
nodeType := NodeTestMap[FTokenId];
if FTokenId = xkProcessingInstruction then
begin
nodeName := CurTokenString;
NextToken;
if NextToken = tkString then
begin
nodeName := CurTokenString;
NextToken;
end;
end
else
begin
NextToken;
NextToken;
end;
if CurToken <> tkRightBracket then
Error(SParserExpectedRightBracket);
NextToken;
nodeType := ntPINode;
end
else if CurTokenString = 'node' then
begin
NeedBrackets;
nodeType := ntAnyNode;
end
else
Error(SParserBadNodeType);
@ -2029,10 +2026,7 @@ begin
Result := nil;
// Try to detect whether a LocationPath [1] or a FilterExpr [20] follows
if ((CurToken = tkIdentifier) and (PeekToken = tkLeftBracket) and
(CurTokenString <> 'comment') and
(CurTokenString <> 'text') and
(CurTokenString <> 'processing-instruction') and
(CurTokenString <> 'node')) or
not (FTokenId in NodeTestKeywords)) or
(CurToken in [tkVariable, tkLeftBracket, tkString, tkNumber]) then
begin
// second, third or fourth case of [19]
@ -2083,7 +2077,7 @@ end;
function TXPathScanner.ParseOrExpr: TXPathExprNode; // [21]
begin
Result := ParseAndExpr;
while (CurToken = tkIdentifier) and (CurTokenString = 'or') do
while FTokenId = xkOr do
begin
NextToken;
Result := TXPathBooleanOpNode.Create(opOr, Result, ParseAndExpr);
@ -2093,7 +2087,7 @@ end;
function TXPathScanner.ParseAndExpr: TXPathExprNode; // [22]
begin
Result := ParseEqualityExpr;
while (CurToken = tkIdentifier) and (CurTokenString = 'and') do
while FTokenId = xkAnd do
begin
NextToken;
Result := TXPathBooleanOpNode.Create(opAnd, Result, ParseEqualityExpr);
@ -2163,9 +2157,9 @@ begin
tkAsterisk:
op := opMultiply;
tkIdentifier:
if CurTokenString = 'div' then
if FTokenId = xkDiv then
op := opDivide
else if CurTokenString = 'mod' then
else if FTokenId = xkMod then
op := opMod
else
break;

View File

@ -0,0 +1,162 @@
{
This file is part of the Free Component Library
A perfect hash for XPath keywords
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
const
XPathKeywords: array [TXPathKeyword] of PWideChar = (
'',
#08'ancestor',
#16'ancestor-or-self',
#09'attribute',
#05'child',
#10'descendant',
#18'descendant-or-self',
#09'following',
#17'following-sibling',
#09'namespace',
#06'parent',
#09'preceding',
#17'preceding-sibling',
#04'self',
#07'comment',
#04'text',
#22'processing-instruction',
#04'node',
#03'and',
#02'or',
#03'div',
#03'mod',
#04'last',
#08'position',
#05'count',
#02'id',
#10'local-name',
#13'namespace-uri',
#04'name',
#06'string',
#06'concat',
#11'starts-with',
#08'contains',
#16'substring-before',
#15'substring-after',
#09'substring',
#13'string-length',
#15'normalize-space',
#09'translate',
#07'boolean',
#03'not',
#04'true',
#05'false',
#04'lang',
#06'number',
#03'sum',
#05'floor',
#07'ceiling',
#05'round'
);
{ The following code is not very maintainable because it was hand-ported from
C code generated by gperf. Unless a tool like gperf is ported or modified to
generate Pascal, modifying it will be painful.
The good side is that one shouldn't ever need to modify it. }
MaxHash = 55;
KeywordIndex: array[0..MaxHash-1] of TXPathKeyword = (
xkNone, xkNone,
xkId,
xkNone, xkNone, xkNone,
xkString,
xkSum,
xkParent,
xkSubstring,
xkNone,
xkComment,
xkName,
xkStringLength,
xkNumber,
xkSubstringAfter,
xkSubstringBefore,
xkNamespace,
xkFloor,
xkNormalizeSpace,
xkSelf,
xkNamespaceUri,
xkPreceding,
xkOr,
xkPosition,
xkText,
xkProcessingInstruction,
xkConcat,
xkLast,
xkContains,
xkPrecedingSibling,
xkAncestor,
xkFalse,
xkLocalName,
xkCount,
xkLang,
xkFollowing,
xkDescendant,
xkNode,
xkAncestorOrSelf,
xkBoolean,
xkNot,
xkStartsWith,
xkAnd,
xkFollowingSibling,
xkDescendantOrSelf,
xkChild,
xkTrue,
xkCeiling,
xkMod,
xkDiv,
xkRound,
xkNone,
xkAttribute,
xkTranslate
);
AssoValues: array[97..122] of Byte = (
10, 31, 0, 13, 30, 11, 55, 55, 0, 41,
55, 10, 16, 4, 21, 2, 55, 17, 0, 14,
34, 29, 34, 55, 7, 55
);
function LookupXPathKeyword(p: PWideChar; Len: Integer): TXPathKeyword;
var
hash: Integer;
p1: PWideChar;
begin
result := xkNone;
hash := Len;
if Len >= 1 then
begin
if (p^ >= 'a') and (p^ <= 'y') then
Inc(hash, AssoValues[ord(p^)])
else
Exit;
if Len > 2 then
if (p[2] >= 'a') and (p[2] <= 'y') then
Inc(hash, AssoValues[ord(p[2])+1])
else
Exit;
end;
if (hash >= 0) and (hash <= MaxHash) then
begin
p1 := XPathKeywords[KeywordIndex[hash]];
if (ord(p1^) = Len) and
CompareMem(p, p1+1, Len*sizeof(WideChar)) then
Result := KeywordIndex[hash];
end;
end;