XPath, use a perfect hash to recognize all possible keywords.

git-svn-id: trunk@15638 -
This commit is contained in:
sergei 2010-07-26 13:49:46 +00:00
parent 829f8164fc
commit 0f5795baaf
3 changed files with 222 additions and 65 deletions

1
.gitattributes vendored
View File

@ -2323,6 +2323,7 @@ packages/fcl-xml/src/xmlstreaming.pp svneol=native#text/plain
packages/fcl-xml/src/xmlutils.pp svneol=native#text/plain packages/fcl-xml/src/xmlutils.pp svneol=native#text/plain
packages/fcl-xml/src/xmlwrite.pp svneol=native#text/plain packages/fcl-xml/src/xmlwrite.pp svneol=native#text/plain
packages/fcl-xml/src/xpath.pp svneol=native#text/plain packages/fcl-xml/src/xpath.pp svneol=native#text/plain
packages/fcl-xml/src/xpathkw.inc svneol=native#text/plain
packages/fcl-xml/tests/README.txt svneol=native#text/plain packages/fcl-xml/tests/README.txt svneol=native#text/plain
packages/fcl-xml/tests/README_DOM.txt svneol=native#text/plain packages/fcl-xml/tests/README_DOM.txt svneol=native#text/plain
packages/fcl-xml/tests/api.xml svneol=native#text/plain packages/fcl-xml/tests/api.xml svneol=native#text/plain

View File

@ -95,6 +95,23 @@ type
tkPipe // "|" tkPipe // "|"
); );
TXPathKeyword = (
// axis names
xkNone, xkAncestor, xkAncestorOrSelf, xkAttribute, xkChild,
xkDescendant, xkDescendantOrSelf, xkFollowing, xkFollowingSibling,
xkNamespace, xkParent, xkPreceding, xkPrecedingSibling, xkSelf,
// node tests
xkComment, xkText, xkProcessingInstruction, xkNode,
// operators
xkAnd, xkOr, xkDiv, xkMod,
// standard functions
xkLast, xkPosition, xkCount, xkId, xkLocalName, xkNamespaceUri,
xkName, xkString, xkConcat, xkStartsWith, xkContains,
xkSubstringBefore, xkSubstringAfter, xkSubstring,
xkStringLength, xkNormalizeSpace, xkTranslate, xkBoolean,
xkNot, xkTrue, xkFalse, xkLang, xkNumber, xkSum, xkFloor,
xkCeiling, xkRound
);
{ XPath expression parse tree } { XPath expression parse tree }
@ -347,6 +364,7 @@ type
FTokenStart: DOMPChar; FTokenStart: DOMPChar;
FTokenLength: Integer; FTokenLength: Integer;
FPrefixLength: Integer; FPrefixLength: Integer;
FTokenId: TXPathKeyword;
FResolver: TXPathNSResolver; FResolver: TXPathNSResolver;
procedure Error(const Msg: String); procedure Error(const Msg: String);
procedure ParsePredicates(var Dest: TXPathNodeArray); procedure ParsePredicates(var Dest: TXPathNodeArray);
@ -485,6 +503,23 @@ implementation
uses Math, xmlutils; uses Math, xmlutils;
{$i xpathkw.inc}
const
AxisNameKeywords = [xkAncestor..xkSelf];
AxisNameMap: array[xkAncestor..xkSelf] of TAxis = (
axisAncestor, axisAncestorOrSelf, axisAttribute, axisChild,
axisDescendant, axisDescendantOrSelf, axisFollowing,
axisFollowingSibling, axisNamespace, axisParent, axisPreceding,
axisPrecedingSibling, axisSelf
);
NodeTestKeywords = [xkComment..xkNode];
NodeTestMap: array[xkComment..xkNode] of TNodeTestType = (
ntCommentNode, ntTextNode, ntPINode, ntAnyNode
);
FunctionKeywords = [xkLast..xkRound];
{ Helper functions } { Helper functions }
function NodeToText(Node: TDOMNode): DOMString; function NodeToText(Node: TDOMNode): DOMString;
@ -1593,6 +1628,10 @@ begin
FCurToken := Result; FCurToken := Result;
if Result in [tkIdentifier, tkNSNameTest, tkNumber, tkString, tkVariable] then if Result in [tkIdentifier, tkNSNameTest, tkNumber, tkString, tkVariable] then
SetString(FCurTokenString, FTokenStart, FTokenLength); SetString(FCurTokenString, FTokenStart, FTokenLength);
if Result = tkIdentifier then
FTokenId := LookupXPathKeyword(FTokenStart, FTokenLength)
else
FTokenId := xkNone;
end; end;
function TXPathScanner.SkipToken(tok: TXPathToken): Boolean; { inline? } function TXPathScanner.SkipToken(tok: TXPathToken): Boolean; { inline? }
@ -1832,36 +1871,10 @@ begin
end end
else if (CurToken = tkIdentifier) and (PeekToken = tkColonColon) then // [5] AxisName '::' else if (CurToken = tkIdentifier) and (PeekToken = tkColonColon) then // [5] AxisName '::'
begin begin
// Check for [6] AxisName if FTokenId in AxisNameKeywords then
if CurTokenString = 'ancestor' then Axis := AxisNameMap[FTokenId]
Axis := axisAncestor
else if CurTokenString = 'ancestor-or-self' then
Axis := axisAncestorOrSelf
else if CurTokenString = 'attribute' then
Axis := axisAttribute
else if CurTokenString = 'child' then
Axis := axisChild
else if CurTokenString = 'descendant' then
Axis := axisDescendant
else if CurTokenString = 'descendant-or-self' then
Axis := axisDescendantOrSelf
else if CurTokenString = 'following' then
Axis := axisFollowing
else if CurTokenString = 'following-sibling' then
Axis := axisFollowingSibling
else if CurTokenString = 'namespace' then
Axis := axisNamespace
else if CurTokenString = 'parent' then
Axis := axisParent
else if CurTokenString = 'preceding' then
Axis := axisPreceding
else if CurTokenString = 'preceding-sibling' then
Axis := axisPrecedingSibling
else if CurTokenString = 'self' then
Axis := axisSelf
else else
Error(SParserBadAxisName); Error(SParserBadAxisName);
NextToken; // skip identifier and the '::' NextToken; // skip identifier and the '::'
NextToken; NextToken;
end end
@ -1874,15 +1887,6 @@ begin
end; end;
function TXPathScanner.ParseNodeTest(Axis: TAxis): TStep; // [7] function TXPathScanner.ParseNodeTest(Axis: TAxis): TStep; // [7]
procedure NeedBrackets;
begin
NextToken;
if NextToken <> tkRightBracket then
Error(SParserExpectedRightBracket);
NextToken;
end;
var var
nodeType: TNodeTestType; nodeType: TNodeTestType;
nodeName: DOMString; nodeName: DOMString;
@ -1910,33 +1914,26 @@ begin
// Check for case [38] NodeType // Check for case [38] NodeType
if PeekToken = tkLeftBracket then if PeekToken = tkLeftBracket then
begin begin
if CurTokenString = 'comment' then if FTokenId in NodeTestKeywords then
begin begin
NeedBrackets; nodeType := NodeTestMap[FTokenId];
nodeType := ntCommentNode; if FTokenId = xkProcessingInstruction then
end
else if CurTokenString = 'text' then
begin
NeedBrackets;
nodeType := ntTextNode;
end
else if CurTokenString = 'processing-instruction' then
begin
NextToken; { skip '('; we know it's there }
if NextToken = tkString then
begin begin
nodeName := CurTokenString; NextToken;
if NextToken = tkString then
begin
nodeName := CurTokenString;
NextToken;
end;
end
else
begin
NextToken;
NextToken; NextToken;
end; end;
if CurToken <> tkRightBracket then if CurToken <> tkRightBracket then
Error(SParserExpectedRightBracket); Error(SParserExpectedRightBracket);
NextToken; NextToken;
nodeType := ntPINode;
end
else if CurTokenString = 'node' then
begin
NeedBrackets;
nodeType := ntAnyNode;
end end
else else
Error(SParserBadNodeType); Error(SParserBadNodeType);
@ -2029,10 +2026,7 @@ begin
Result := nil; Result := nil;
// Try to detect whether a LocationPath [1] or a FilterExpr [20] follows // Try to detect whether a LocationPath [1] or a FilterExpr [20] follows
if ((CurToken = tkIdentifier) and (PeekToken = tkLeftBracket) and if ((CurToken = tkIdentifier) and (PeekToken = tkLeftBracket) and
(CurTokenString <> 'comment') and not (FTokenId in NodeTestKeywords)) or
(CurTokenString <> 'text') and
(CurTokenString <> 'processing-instruction') and
(CurTokenString <> 'node')) or
(CurToken in [tkVariable, tkLeftBracket, tkString, tkNumber]) then (CurToken in [tkVariable, tkLeftBracket, tkString, tkNumber]) then
begin begin
// second, third or fourth case of [19] // second, third or fourth case of [19]
@ -2083,7 +2077,7 @@ end;
function TXPathScanner.ParseOrExpr: TXPathExprNode; // [21] function TXPathScanner.ParseOrExpr: TXPathExprNode; // [21]
begin begin
Result := ParseAndExpr; Result := ParseAndExpr;
while (CurToken = tkIdentifier) and (CurTokenString = 'or') do while FTokenId = xkOr do
begin begin
NextToken; NextToken;
Result := TXPathBooleanOpNode.Create(opOr, Result, ParseAndExpr); Result := TXPathBooleanOpNode.Create(opOr, Result, ParseAndExpr);
@ -2093,7 +2087,7 @@ end;
function TXPathScanner.ParseAndExpr: TXPathExprNode; // [22] function TXPathScanner.ParseAndExpr: TXPathExprNode; // [22]
begin begin
Result := ParseEqualityExpr; Result := ParseEqualityExpr;
while (CurToken = tkIdentifier) and (CurTokenString = 'and') do while FTokenId = xkAnd do
begin begin
NextToken; NextToken;
Result := TXPathBooleanOpNode.Create(opAnd, Result, ParseEqualityExpr); Result := TXPathBooleanOpNode.Create(opAnd, Result, ParseEqualityExpr);
@ -2163,9 +2157,9 @@ begin
tkAsterisk: tkAsterisk:
op := opMultiply; op := opMultiply;
tkIdentifier: tkIdentifier:
if CurTokenString = 'div' then if FTokenId = xkDiv then
op := opDivide op := opDivide
else if CurTokenString = 'mod' then else if FTokenId = xkMod then
op := opMod op := opMod
else else
break; break;

View File

@ -0,0 +1,162 @@
{
This file is part of the Free Component Library
A perfect hash for XPath keywords
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
const
XPathKeywords: array [TXPathKeyword] of PWideChar = (
'',
#08'ancestor',
#16'ancestor-or-self',
#09'attribute',
#05'child',
#10'descendant',
#18'descendant-or-self',
#09'following',
#17'following-sibling',
#09'namespace',
#06'parent',
#09'preceding',
#17'preceding-sibling',
#04'self',
#07'comment',
#04'text',
#22'processing-instruction',
#04'node',
#03'and',
#02'or',
#03'div',
#03'mod',
#04'last',
#08'position',
#05'count',
#02'id',
#10'local-name',
#13'namespace-uri',
#04'name',
#06'string',
#06'concat',
#11'starts-with',
#08'contains',
#16'substring-before',
#15'substring-after',
#09'substring',
#13'string-length',
#15'normalize-space',
#09'translate',
#07'boolean',
#03'not',
#04'true',
#05'false',
#04'lang',
#06'number',
#03'sum',
#05'floor',
#07'ceiling',
#05'round'
);
{ The following code is not very maintainable because it was hand-ported from
C code generated by gperf. Unless a tool like gperf is ported or modified to
generate Pascal, modifying it will be painful.
The good side is that one shouldn't ever need to modify it. }
MaxHash = 55;
KeywordIndex: array[0..MaxHash-1] of TXPathKeyword = (
xkNone, xkNone,
xkId,
xkNone, xkNone, xkNone,
xkString,
xkSum,
xkParent,
xkSubstring,
xkNone,
xkComment,
xkName,
xkStringLength,
xkNumber,
xkSubstringAfter,
xkSubstringBefore,
xkNamespace,
xkFloor,
xkNormalizeSpace,
xkSelf,
xkNamespaceUri,
xkPreceding,
xkOr,
xkPosition,
xkText,
xkProcessingInstruction,
xkConcat,
xkLast,
xkContains,
xkPrecedingSibling,
xkAncestor,
xkFalse,
xkLocalName,
xkCount,
xkLang,
xkFollowing,
xkDescendant,
xkNode,
xkAncestorOrSelf,
xkBoolean,
xkNot,
xkStartsWith,
xkAnd,
xkFollowingSibling,
xkDescendantOrSelf,
xkChild,
xkTrue,
xkCeiling,
xkMod,
xkDiv,
xkRound,
xkNone,
xkAttribute,
xkTranslate
);
AssoValues: array[97..122] of Byte = (
10, 31, 0, 13, 30, 11, 55, 55, 0, 41,
55, 10, 16, 4, 21, 2, 55, 17, 0, 14,
34, 29, 34, 55, 7, 55
);
function LookupXPathKeyword(p: PWideChar; Len: Integer): TXPathKeyword;
var
hash: Integer;
p1: PWideChar;
begin
result := xkNone;
hash := Len;
if Len >= 1 then
begin
if (p^ >= 'a') and (p^ <= 'y') then
Inc(hash, AssoValues[ord(p^)])
else
Exit;
if Len > 2 then
if (p[2] >= 'a') and (p[2] <= 'y') then
Inc(hash, AssoValues[ord(p[2])+1])
else
Exit;
end;
if (hash >= 0) and (hash <= MaxHash) then
begin
p1 := XPathKeywords[KeywordIndex[hash]];
if (ord(p1^) = Len) and
CompareMem(p, p1+1, Len*sizeof(WideChar)) then
Result := KeywordIndex[hash];
end;
end;