From 795365eee7a472c40e68d662e53787104a2b0c50 Mon Sep 17 00:00:00 2001
From: michael <michael@freepascal.org>
Date: Fri, 3 Jul 2020 19:37:41 +0000
Subject: [PATCH] * Patch from Pawel Dmitruk to add CDATA and comment support

git-svn-id: trunk@45724 -
---
 packages/fcl-xml/src/sax_xml.pp | 77 ++++++++++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 2 deletions(-)

diff --git a/packages/fcl-xml/src/sax_xml.pp b/packages/fcl-xml/src/sax_xml.pp
index 4cfb3af187..29ddff1be2 100644
--- a/packages/fcl-xml/src/sax_xml.pp
+++ b/packages/fcl-xml/src/sax_xml.pp
@@ -31,6 +31,8 @@ type
     scUnknown,
     scWhitespace,       // within whitespace
     scText,             // within text
+    scCData,            // within cdata section
+    scComment,          // within comment
     scEntityReference,  // within entity reference ("&...;")
     scTag);             // within a start tag or end tag
 
@@ -59,7 +61,7 @@ type
 
 { TXMLToDOMConverter }
 
-  TXMLNodeType = (ntWhitespace, ntText, ntEntityReference, ntTag);
+  TXMLNodeType = (ntWhitespace, ntText, ntEntityReference, ntTag, ntComment);
 
   TXMLNodeInfo = class
     NodeType: TXMLNodeType;
@@ -77,6 +79,8 @@ type
 
     procedure ReaderCharacters(Sender: TObject; const ch: PSAXChar;
       Start, Count: Integer);
+    procedure ReaderComment(Sender: TObject; const ch: PSAXChar;
+      Start, Count: Integer);
     procedure ReaderIgnorableWhitespace(Sender: TObject; const ch: PSAXChar;
       Start, Count: Integer);
     procedure ReaderSkippedEntity(Sender: TObject; const Name: SAXString);
@@ -171,6 +175,17 @@ begin
             '<':
               begin
                 Inc(BufferPos);
+                if (Buffer[BufferPos]='!') and (Buffer[BufferPos + 1]='[') then 
+                begin
+                  Inc(BufferPos, 8);
+                  EnterNewScannerContext(scCData);
+                end
+                else if (Buffer[BufferPos]='!') and (Buffer[BufferPos + 1]='-') then 
+                begin
+                  Inc(BufferPos, 3);
+                  EnterNewScannerContext(scComment);
+                end
+                else
                 EnterNewScannerContext(scTag);
               end;
             else
@@ -191,6 +206,17 @@ begin
             '<':
               begin
                 Inc(BufferPos);
+                if (Buffer[BufferPos]='!') and (Buffer[BufferPos + 1]='[') then 
+                begin
+                  Inc(BufferPos, 8);
+                  EnterNewScannerContext(scCData);
+                end
+                else if (Buffer[BufferPos]='!') and (Buffer[BufferPos + 1]='-') then 
+                begin
+                  Inc(BufferPos, 3);
+                  EnterNewScannerContext(scComment);
+                end
+                else
                 EnterNewScannerContext(scTag);
               end;
             else
@@ -206,6 +232,17 @@ begin
             '<':
               begin
                 Inc(BufferPos);
+                if (Buffer[BufferPos]='!') and (Buffer[BufferPos + 1]='[') then 
+                begin
+                  Inc(BufferPos, 8);
+                  EnterNewScannerContext(scCData);
+                end
+                else if (Buffer[BufferPos]='!') and (Buffer[BufferPos + 1]='-') then 
+                begin
+                  Inc(BufferPos, 3);
+                  EnterNewScannerContext(scComment);
+                end
+                else
                 EnterNewScannerContext(scTag);
               end;
             else
@@ -214,6 +251,28 @@ begin
               Inc(BufferPos);
             end;
           end;
+        scCData:
+          if (Buffer[BufferPos] = ']') and (Buffer[BufferPos + 1]=']') and (Buffer[BufferPos + 2]='>') then 
+          begin
+            Inc(BufferPos, 3);
+            EnterNewScannerContext(scUnknown);
+          end
+          else
+          begin
+            FRawTokenText := FRawTokenText + Buffer[BufferPos];
+            Inc(BufferPos);
+          end;
+        scComment:
+          if (Buffer[BufferPos] = '-') and (Buffer[BufferPos + 1]='-') and (Buffer[BufferPos + 2]='>') then  
+          begin
+            Inc(BufferPos, 3);
+            EnterNewScannerContext(scUnknown);
+          end
+          else
+          begin
+            FRawTokenText := FRawTokenText + Buffer[BufferPos];
+            Inc(BufferPos);
+          end;
         scEntityReference:
           if Buffer[BufferPos] = ';' then
           begin
@@ -353,8 +412,11 @@ begin
   case ScannerContext of
     scWhitespace:
       DoIgnorableWhitespace(PSAXChar(TokenText), 0, Length(TokenText));
-    scText:
+    scText,
+    scCData:
       DoCharacters(PSAXChar(TokenText), 0, Length(TokenText));
+    scComment:
+      DoComment(PSAXChar(TokenText), 0, Length(TokenText));
     scEntityReference:
       begin
         if (Length(TokenText) >= 2) and (TokenText[1] = '#') and
@@ -459,6 +521,17 @@ begin
   FNodeBuffer.Add(NodeInfo);
 end;
 
+procedure TXMLToDOMConverter.ReaderComment(Sender: TObject;
+  const ch: PSAXChar; Start, Count: Integer);
+var
+  NodeInfo: TXMLNodeInfo;
+begin
+  NodeInfo := TXMLNodeInfo.Create;
+  NodeInfo.NodeType := ntComment;
+  NodeInfo.DOMNode := FDocument.CreateCommentBuf(ch, Count);
+  FNodeBuffer.Add(NodeInfo);
+end;
+
 procedure TXMLToDOMConverter.ReaderIgnorableWhitespace(Sender: TObject;
   const ch: PSAXChar; Start, Count: Integer);
 var