XPath, use a perfect hash to recognize all possible keywords.

git-svn-id: trunk@15638 -
2025-08-17 19:29:18 +02:00 · 2010-07-26 13:49:46 +00:00 · 2010-07-26 13:49:46 +00:00 · 0f5795baaf
commit 0f5795baaf
parent 829f8164fc
3 changed files with 222 additions and 65 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -2323,6 +2323,7 @@ packages/fcl-xml/src/xmlstreaming.pp svneol=native#text/plain
 packages/fcl-xml/src/xmlutils.pp svneol=native#text/plain
 packages/fcl-xml/src/xmlwrite.pp svneol=native#text/plain
 packages/fcl-xml/src/xpath.pp svneol=native#text/plain
 packages/fcl-xml/src/xpathkw.inc svneol=native#text/plain
 packages/fcl-xml/tests/README.txt svneol=native#text/plain
 packages/fcl-xml/tests/README_DOM.txt svneol=native#text/plain
 packages/fcl-xml/tests/api.xml svneol=native#text/plain
--- a/packages/fcl-xml/src/xpath.pp
+++ b/packages/fcl-xml/src/xpath.pp
@ -95,6 +95,23 @@ type
    tkPipe                      // "|"
  );
  TXPathKeyword = (
    // axis names
    xkNone, xkAncestor,  xkAncestorOrSelf,  xkAttribute,  xkChild,
    xkDescendant, xkDescendantOrSelf, xkFollowing, xkFollowingSibling,
    xkNamespace, xkParent, xkPreceding, xkPrecedingSibling, xkSelf,
    // node tests
    xkComment, xkText, xkProcessingInstruction, xkNode,
    // operators
    xkAnd, xkOr, xkDiv, xkMod,
    // standard functions
    xkLast, xkPosition, xkCount, xkId, xkLocalName, xkNamespaceUri,
    xkName, xkString, xkConcat, xkStartsWith, xkContains,
    xkSubstringBefore, xkSubstringAfter, xkSubstring,
    xkStringLength, xkNormalizeSpace, xkTranslate, xkBoolean,
    xkNot, xkTrue, xkFalse, xkLang, xkNumber, xkSum, xkFloor,
    xkCeiling, xkRound
  );
 { XPath expression parse tree }
@ -347,6 +364,7 @@ type
    FTokenStart: DOMPChar;
    FTokenLength: Integer;
    FPrefixLength: Integer;
    FTokenId: TXPathKeyword;
    FResolver: TXPathNSResolver;
    procedure Error(const Msg: String);
    procedure ParsePredicates(var Dest: TXPathNodeArray);
@ -485,6 +503,23 @@ implementation
 uses Math, xmlutils;
 {$i xpathkw.inc}
 const
  AxisNameKeywords = [xkAncestor..xkSelf];
  AxisNameMap: array[xkAncestor..xkSelf] of TAxis = (
    axisAncestor, axisAncestorOrSelf, axisAttribute, axisChild,
    axisDescendant, axisDescendantOrSelf, axisFollowing,
    axisFollowingSibling, axisNamespace, axisParent, axisPreceding,
    axisPrecedingSibling, axisSelf
  );
  NodeTestKeywords = [xkComment..xkNode];
  NodeTestMap: array[xkComment..xkNode] of TNodeTestType = (
    ntCommentNode, ntTextNode, ntPINode, ntAnyNode
  );
  FunctionKeywords = [xkLast..xkRound];
 { Helper functions }
 function NodeToText(Node: TDOMNode): DOMString;
@ -1593,6 +1628,10 @@ begin
  FCurToken := Result;
  if Result in [tkIdentifier, tkNSNameTest, tkNumber, tkString, tkVariable] then
    SetString(FCurTokenString, FTokenStart, FTokenLength);
  if Result = tkIdentifier then
    FTokenId := LookupXPathKeyword(FTokenStart, FTokenLength)
  else
    FTokenId := xkNone;
 end;
 function TXPathScanner.SkipToken(tok: TXPathToken): Boolean; { inline? }
@ -1832,36 +1871,10 @@ begin
    end
    else if (CurToken = tkIdentifier) and (PeekToken = tkColonColon) then  // [5] AxisName '::'
    begin
-      // Check for [6] AxisName
+      if FTokenId in AxisNameKeywords then
-      if CurTokenString = 'ancestor' then
+        Axis := AxisNameMap[FTokenId]
        Axis := axisAncestor
      else if CurTokenString = 'ancestor-or-self' then
        Axis := axisAncestorOrSelf
      else if CurTokenString = 'attribute' then
        Axis := axisAttribute
      else if CurTokenString = 'child' then
        Axis := axisChild
      else if CurTokenString = 'descendant' then
        Axis := axisDescendant
      else if CurTokenString = 'descendant-or-self' then
        Axis := axisDescendantOrSelf
      else if CurTokenString = 'following' then
        Axis := axisFollowing
      else if CurTokenString = 'following-sibling' then
        Axis := axisFollowingSibling
      else if CurTokenString = 'namespace' then
        Axis := axisNamespace
      else if CurTokenString = 'parent' then
        Axis := axisParent
      else if CurTokenString = 'preceding' then
        Axis := axisPreceding
      else if CurTokenString = 'preceding-sibling' then
        Axis := axisPrecedingSibling
      else if CurTokenString = 'self' then
        Axis := axisSelf
      else
        Error(SParserBadAxisName);
      NextToken;  // skip identifier and the '::'
      NextToken;
    end
@ -1874,15 +1887,6 @@ begin
 end;
 function TXPathScanner.ParseNodeTest(Axis: TAxis): TStep; // [7]
  procedure NeedBrackets;
  begin
    NextToken;
    if NextToken <> tkRightBracket then
       Error(SParserExpectedRightBracket);
    NextToken;
  end;
 var
  nodeType: TNodeTestType;
  nodeName: DOMString;
@ -1910,33 +1914,26 @@ begin
    // Check for case [38] NodeType
    if PeekToken = tkLeftBracket then
    begin
-      if CurTokenString = 'comment' then
+      if FTokenId in NodeTestKeywords then
      begin
-        NeedBrackets;
+        nodeType := NodeTestMap[FTokenId];
-        nodeType := ntCommentNode;
+        if FTokenId = xkProcessingInstruction then
      end
      else if CurTokenString = 'text' then
      begin
        NeedBrackets;
        nodeType := ntTextNode;
      end
      else if CurTokenString = 'processing-instruction' then
      begin
        NextToken;   { skip '('; we know it's there }
        if NextToken = tkString then
        begin
-          nodeName := CurTokenString;
+          NextToken;
          if NextToken = tkString then
          begin
            nodeName := CurTokenString;
            NextToken;
          end;
        end
        else
        begin
          NextToken;
          NextToken;
        end;
        if CurToken <> tkRightBracket then
          Error(SParserExpectedRightBracket);
        NextToken;
        nodeType := ntPINode;
      end
      else if CurTokenString = 'node' then
      begin
        NeedBrackets;
        nodeType := ntAnyNode;
      end
      else
        Error(SParserBadNodeType);
@ -2029,10 +2026,7 @@ begin
  Result := nil;
  // Try to detect whether a LocationPath [1] or a FilterExpr [20] follows
  if ((CurToken = tkIdentifier) and (PeekToken = tkLeftBracket) and
-    (CurTokenString <> 'comment') and
+    not (FTokenId in NodeTestKeywords)) or
    (CurTokenString <> 'text') and
    (CurTokenString <> 'processing-instruction') and
    (CurTokenString <> 'node')) or
    (CurToken in [tkVariable, tkLeftBracket, tkString, tkNumber]) then
  begin
    // second, third or fourth case of [19]
@ -2083,7 +2077,7 @@ end;
 function TXPathScanner.ParseOrExpr: TXPathExprNode;  // [21]
 begin
  Result := ParseAndExpr;
-  while (CurToken = tkIdentifier) and (CurTokenString = 'or') do
+  while FTokenId = xkOr do
  begin
    NextToken;
    Result := TXPathBooleanOpNode.Create(opOr, Result, ParseAndExpr);
@ -2093,7 +2087,7 @@ end;
 function TXPathScanner.ParseAndExpr: TXPathExprNode;  // [22]
 begin
  Result := ParseEqualityExpr;
-  while (CurToken = tkIdentifier) and (CurTokenString = 'and') do
+  while FTokenId = xkAnd do
  begin
    NextToken;
    Result := TXPathBooleanOpNode.Create(opAnd, Result, ParseEqualityExpr);
@ -2163,9 +2157,9 @@ begin
      tkAsterisk:
        op := opMultiply;
      tkIdentifier:
-        if CurTokenString = 'div' then
+        if FTokenId = xkDiv then
          op := opDivide
-        else if CurTokenString = 'mod' then
+        else if FTokenId = xkMod then
          op := opMod
        else
          break;
--- a/packages/fcl-xml/src/xpathkw.inc
+++ b/packages/fcl-xml/src/xpathkw.inc
@ -0,0 +1,162 @@
 {
    This file is part of the Free Component Library
    A perfect hash for XPath keywords
    See the file COPYING.FPC, included in this distribution,
    for details about the copyright.
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 **********************************************************************}
 const
  XPathKeywords: array [TXPathKeyword] of PWideChar = (
    '',
    #08'ancestor',
    #16'ancestor-or-self',
    #09'attribute',
    #05'child',
    #10'descendant',
    #18'descendant-or-self',
    #09'following',
    #17'following-sibling',
    #09'namespace',
    #06'parent',
    #09'preceding',
    #17'preceding-sibling',
    #04'self',
    #07'comment',
    #04'text',
    #22'processing-instruction',
    #04'node',
    #03'and',
    #02'or',
    #03'div',
    #03'mod',
    #04'last',
    #08'position',
    #05'count',
    #02'id',
    #10'local-name',
    #13'namespace-uri',
    #04'name',
    #06'string',
    #06'concat',
    #11'starts-with',
    #08'contains',
    #16'substring-before',
    #15'substring-after',
    #09'substring',
    #13'string-length',
    #15'normalize-space',
    #09'translate',
    #07'boolean',
    #03'not',
    #04'true',
    #05'false',
    #04'lang',
    #06'number',
    #03'sum',
    #05'floor',
    #07'ceiling',
    #05'round'
  );
 { The following code is not very maintainable because it was hand-ported from 
  C code generated by gperf. Unless a tool like gperf is ported or modified to
  generate Pascal, modifying it will be painful.
  The good side is that one shouldn't ever need to modify it. }
  MaxHash = 55;
  KeywordIndex: array[0..MaxHash-1] of TXPathKeyword = (
    xkNone, xkNone,
    xkId,
    xkNone, xkNone, xkNone,
    xkString,
    xkSum,
    xkParent,
    xkSubstring,
    xkNone,
    xkComment,
    xkName,
    xkStringLength,
    xkNumber,
    xkSubstringAfter,
    xkSubstringBefore,
    xkNamespace,
    xkFloor,
    xkNormalizeSpace,
    xkSelf,
    xkNamespaceUri,
    xkPreceding,
    xkOr,
    xkPosition,
    xkText,
    xkProcessingInstruction,
    xkConcat,
    xkLast,
    xkContains,
    xkPrecedingSibling,
    xkAncestor,
    xkFalse,
    xkLocalName,
    xkCount,
    xkLang,
    xkFollowing,
    xkDescendant,
    xkNode,
    xkAncestorOrSelf,
    xkBoolean,
    xkNot,
    xkStartsWith,
    xkAnd,
    xkFollowingSibling,
    xkDescendantOrSelf,
    xkChild,
    xkTrue,
    xkCeiling,
    xkMod,
    xkDiv,
    xkRound,
    xkNone,
    xkAttribute,
    xkTranslate
  );
  AssoValues: array[97..122] of Byte = (
    10, 31,  0, 13, 30, 11, 55, 55, 0, 41,
    55, 10, 16,  4, 21,  2, 55, 17, 0, 14,
    34, 29, 34, 55,  7, 55
  );
 function LookupXPathKeyword(p: PWideChar; Len: Integer): TXPathKeyword;
 var
  hash: Integer;
  p1: PWideChar;
 begin
  result := xkNone;
  hash := Len;
  if Len >= 1 then
  begin
    if (p^ >= 'a') and (p^ <= 'y') then
      Inc(hash, AssoValues[ord(p^)])
    else
      Exit;
    if Len > 2 then
      if (p[2] >= 'a') and (p[2] <= 'y') then
        Inc(hash, AssoValues[ord(p[2])+1])
      else
        Exit;
  end;
  if (hash >= 0) and (hash <= MaxHash) then
  begin
    p1 := XPathKeywords[KeywordIndex[hash]];
    if (ord(p1^) = Len) and
      CompareMem(p, p1+1, Len*sizeof(WideChar)) then
        Result := KeywordIndex[hash];
  end;
 end;