{
Reads a HTML Document
License: The same modified LGPL as the Free Pascal RTL
         See the file COPYING.modifiedLGPL for more details
AUTHORS: Felipe Monteiro de Carvalho
}
unit htmlvectorialreader;
{$mode objfpc}{$H+}
interface
uses
  Classes, SysUtils, math, contnrs,
  fpimage, fpcanvas, laz2_xmlread, laz2_dom, fgl, lazfileutils,
  // image data formats
  fpreadpng,
  // HTML can contain SVG
  svgvectorialreader,
  fpvectorial, fpvutils, lazutf8, TypInfo;
type
  { TvHTMLVectorialReader }
  TvHTMLVectorialReader = class(TvCustomVectorialReader)
  private
    FPointSeparator, FCommaSeparator: TFormatSettings;
    //
    function GetTextContentFromNode(ANode: TDOMNode): string;
    //
    function ReadEntityFromNode(ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
    function ReadHeaderFromNode(ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
    procedure ReadParagraphFromNode(ADest: TvParagraph; ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument);
    function ReadSVGFromNode(ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
    function ReadSVGFromNode_WithEmbDoc(ANode: TDOMNode; AEmbDoc: TvEmbeddedVectorialDoc; ADoc: TvVectorialDocument): TvEntity;
    function ReadMathFromNode(ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
    function ReadTableFromNode(ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
    function ReadTableRowNode(ATable: TvTable; ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
    function ReadUListFromNode(ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
  public
    { General reading methods }
    constructor Create; override;
    Destructor Destroy; override;
    procedure ReadFromStrings(AStrings: TStrings; AData: TvVectorialDocument); override;
    procedure ReadFromXML(Doc: TXMLDocument; AData: TvVectorialDocument);
    class function IsSupportedRasterImage(AFileName: string): Boolean;
  end;
implementation
const
  // SVG requires hardcoding a DPI value
  // The Opera Browser and Inkscape use 90 DPI, so we follow that
  // 1 Inch = 25.4 milimiters
  // 90 inches per pixel = (1 / 90) * 25.4 = 0.2822
  // FLOAT_MILIMETERS_PER_PIXEL = 0.3528; // DPI 72 = 1 / 72 inches per pixel
  FLOAT_MILIMETERS_PER_PIXEL = 5*0.2822; // DPI 90 = 1 / 90 inches per pixel => Actually I changed the value by this factor! Because otherwise it looks ugly!
  FLOAT_PIXELS_PER_MILIMETER = 1 / FLOAT_MILIMETERS_PER_PIXEL; // DPI 90 = 1 / 90 inches per pixel
{ TvHTMLVectorialReader }
function TvHTMLVectorialReader.GetTextContentFromNode(ANode: TDOMNode): string;
var
  i: Integer;
begin
  Result := '';
  for i := 0 to ANode.ChildNodes.Count-1 do
  begin
    if ANode.ChildNodes.Item[i] is TDOMText then
      Result := ANode.ChildNodes.Item[i].NodeValue;
  end;
end;
// if something is returned, it will be added to the base document
// if nothing is returned, either nothing was written, or it was already added
function TvHTMLVectorialReader.ReadEntityFromNode(ANode: TDOMNode;
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
var
  lEntityName, lTextValue: DOMString;
  lPara: TvParagraph;
begin
  Result := nil;
  lEntityName := LowerCase(ANode.NodeName);
  case lEntityName of
    'h1', 'h2', 'h3', 'h4', 'h5', 'h6': Result := ReadHeaderFromNode(ANode, AData, ADoc);
    'p':
    begin
      lPara := AData.AddParagraph();
      ReadParagraphFromNode(lPara, ANode, AData, ADoc);
      Result := nil;
    end;
    'svg': Result := ReadSVGFromNode(ANode, AData, ADoc);
    'math': Result := ReadMathFromNode(ANode, AData, ADoc);
    'table': Result := ReadTableFromNode(ANode, AData, ADoc);
    'br':
    begin
      AData.AddParagraph().AddText(LineEnding);
      Result := nil;
    end;
    'ul': Result := ReadUListFromNode(ANode, AData, ADoc);
  end;
  // Raw text
  if (ANode is TDOMText) and (lEntityName = '#text') then
  begin
    lTextValue := RemoveLineEndingsAndTrim(ANode.NodeValue);
    if (lTextValue <> '') then
    begin
      AData.AddParagraph().AddText(lTextValue);
      Result := nil;
    end;
  end;
end;
function TvHTMLVectorialReader.ReadHeaderFromNode(ANode: TDOMNode;
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
var
  CurParagraph: TvParagraph;
  lText: TvText;
  lTextStr: string;
  lHeaderType: DOMString;
begin
  Result := nil;
  CurParagraph := AData.AddParagraph();
  CurParagraph.Style := ADoc.StyleTextBody;
  lTextStr := ANode.FirstChild.NodeValue;
  lText := CurParagraph.AddText(lTextStr);
  lHeaderType := LowerCase(ANode.NodeName);
  case lHeaderType of
    'h1': lText.Style := ADoc.StyleHeading1;
    'h2': lText.Style := ADoc.StyleHeading2;
    'h3': lText.Style := ADoc.StyleHeading3;
    'h4': lText.Style := ADoc.StyleHeading4;
    'h5': lText.Style := ADoc.StyleHeading5;
    'h6': lText.Style := ADoc.StyleHeading6;
  end;
end;
procedure TvHTMLVectorialReader.ReadParagraphFromNode(ADest: TvParagraph; ANode: TDOMNode;
  AData: TvTextPageSequence; ADoc: TvVectorialDocument);
var
  lText: TvText = nil;
  lTextStr: string = '';
  lCurNode: TDOMNode;
  lNodeName, lNodeValue, lAttrName, lAttrValue: DOMString;
  lCurAttr: TDOMNode;
  lRasterImage: TvRasterImage;
  lEmbVecImg: TvEmbeddedVectorialDoc = nil;
  i: Integer;
  lWidth, lHeight: Double;
  lAltText: string;
  // xlink:href
  lx, ly, lw, lh: Double;
  lImageDataParts: TStringList;
  lImageDataBase64: string;
  lImageData: array of Byte;
  lImageDataStream: TMemoryStream;
  lImageReader: TFPCustomImageReader;
  procedure TextMerging();
  begin
    if lTextStr <> '' then
    begin
      if lText = nil then
        lText := ADest.AddText(lTextStr)
      else
        lText.Value.Add(lTextStr);
      lTextStr := '';
    end;
  end;
begin
  ADest.Style := ADoc.StyleTextBody;
  lCurNode := ANode.FirstChild;
  while Assigned(lCurNode) do
  begin
    lNodeName := LowerCase(lCurNode.NodeName);
    lNodeValue := lCurNode.NodeValue;
    if (lCurNode is TDOMText) then
    begin
      lTextStr += RemoveLineEndingsAndTrim(lNodeValue);
      lCurNode := lCurNode.NextSibling;
      Continue;
    end;
    // text merging
    TextMerging();
    // reset text merging
    if lNodeName <> 'br' then
      lText := nil;
    case lNodeName of
    // 
    //  'img', 'image':
    begin
      lRasterImage := nil;
      lEmbVecImg := nil;
      lWidth := -1;
      lHeight := -1;
      lAltText := '';
      for i := 0 to lCurNode.Attributes.Length - 1 do
      begin
        lCurAttr := lCurNode.Attributes.Item[i];
        lAttrName := lCurAttr.NodeName;
        lAttrValue := lCurAttr.NodeValue;
        case lAttrName of
        'alt':
        begin
          lAltText := lAttrValue;
        end;
        'src':
        begin
          lAttrValue := lCurAttr.NodeValue;
          lAttrValue := ExtractFilePath(FFilename) + lAttrValue;
          if TvHTMLVectorialReader.IsSupportedRasterImage(lAttrValue) then
          begin
            if not FileExists(lAttrValue) then Continue;
            lRasterImage := ADest.AddRasterImage();
            lRasterImage.CreateImageFromFile(lAttrValue);
          end
          else if TvVectorialDocument.GetFormatFromExtension(lAttrValue, False) <> vfUnknown then
          begin
            lEmbVecImg := ADest.AddEmbeddedVectorialDoc();
            if FileExistsUTF8(lAttrValue) then
              lEmbVecImg.Document.ReadFromFile(lAttrValue);
          end;
        end;
        'xlink:href':
        begin
          lRasterImage := ADest.AddRasterImage();
          lImageDataParts := TvSVGVectorialReader.ReadSpaceSeparatedStrings(lNodeValue, ':;,');
          try
            if (lImageDataParts.Strings[0] = 'data') and
               (lImageDataParts.Strings[1] = 'image/png') and
               (lImageDataParts.Strings[2] = 'base64') then
            begin
              lImageReader := TFPReaderPNG.Create;
              lImageDataStream := TMemoryStream.Create;
              try
                lImageDataBase64 := lImageDataParts.Strings[3];
                DecodeBase64(lImageDataBase64, lImageDataStream);
                lImageDataStream.Position := 0;
                lRasterImage.CreateRGB888Image(10, 10);
                lRasterImage.RasterImage.LoadFromStream(lImageDataStream, lImageReader);
              finally
                lImageDataStream.Free;
                lImageReader.Free;
              end;
            end
            else
              raise Exception.Create('[TvSVGVectorialReader.ReadImageFromNode] Unimplemented image format');
          finally
            lImageDataParts.Free;
          end;
        end;
        'width':
        begin
          lWidth := StrToInt(lAttrValue);
        end;
        'height':
        begin
          lHeight := StrToInt(lAttrValue);
        end;
        end;
      end;
      if lRasterImage <> nil then
      begin
        if lWidth <= 0 then
          lWidth := lRasterImage.RasterImage.Width;
        if lHeight <= 0 then
          lHeight := lRasterImage.RasterImage.Height;
        lRasterImage.Width := lWidth;
        lRasterImage.Height := lHeight;
        lRasterImage.AltText := lAltText;
      end
      else if (lEmbVecImg <> nil) and (lWidth > 0) and (lHeight > 0) then
      begin
        lEmbVecImg.SetWidth(lWidth);
        lEmbVecImg.SetHeight(lHeight);
      end;
    end;
    'svg':
    begin
      lEmbVecImg := ADest.AddEmbeddedVectorialDoc();
      ReadSVGFromNode_WithEmbDoc(lCurNode, lEmbVecImg, ADoc);
    end;
    end;
    lCurNode := lCurNode.NextSibling;
  end;
  TextMerging();
end;
function TvHTMLVectorialReader.ReadSVGFromNode(ANode: TDOMNode;
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
var
  CurSVG: TvEmbeddedVectorialDoc;
  lText: TvText;
  lDoc: TXMLDocument;
  lImportedNode: TDOMNode;
begin
  Result := nil;
  CurSVG := AData.AddEmbeddedVectorialDoc();
  lDoc := TXMLDocument.Create;
  try
    lImportedNode := lDoc.ImportNode(ANode, True);
    lDoc.AppendChild(lImportedNode);
    CurSVG.Document.ReadFromXML(lDoc, vfSVG);
  finally
    lDoc.Free;
  end;
end;
function TvHTMLVectorialReader.ReadSVGFromNode_WithEmbDoc(ANode: TDOMNode;
  AEmbDoc: TvEmbeddedVectorialDoc; ADoc: TvVectorialDocument): TvEntity;
var
  CurSVG: TvEmbeddedVectorialDoc;
  lText: TvText;
  lDoc: TXMLDocument;
  lImportedNode: TDOMNode;
begin
  Result := nil;
  CurSVG := AEmbDoc;
  lDoc := TXMLDocument.Create;
  try
    lImportedNode := lDoc.ImportNode(ANode, True);
    lDoc.AppendChild(lImportedNode);
    CurSVG.Document.ReadFromXML(lDoc, vfSVG);
  finally
    lDoc.Free;
  end;
end;
function TvHTMLVectorialReader.ReadMathFromNode(ANode: TDOMNode;
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
var
  CurSVG: TvEmbeddedVectorialDoc;
  lText: TvText;
  lDoc: TXMLDocument;
  lImportedNode: TDOMNode;
begin
  Result := nil;
  CurSVG := AData.AddEmbeddedVectorialDoc();
  lDoc := TXMLDocument.Create;
  try
    lImportedNode := lDoc.ImportNode(ANode, True);
    lDoc.AppendChild(lImportedNode);
    CurSVG.Document.ReadFromXML(lDoc, vfMathML);
  finally
    lDoc.Free;
  end;
end;
function TvHTMLVectorialReader.ReadTableFromNode(ANode: TDOMNode;
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
var
  CurTable: TvTable;
  lCurNode, lCurSubnode: TDOMNode;
  lNodeName, lNodeValue: DOMString;
  CurRow: TvTableRow;
  Caption_Cell: TvTableCell = nil;
  CurCellPara: TvParagraph;
  // attributes
  i, lBorderNr: Integer;
  lAttrName, lAttrValue: DOMString;
  procedure SetBorderLineType(AType: TvTableBorderType);
  begin
    CurTable.Borders.Left.LineType := AType;
    CurTable.Borders.Right.LineType := AType;
    CurTable.Borders.Top.LineType := AType;
    CurTable.Borders.Bottom.LineType := AType;
    CurTable.Borders.InsideHoriz.LineType := AType;
    CurTable.Borders.InsideVert.LineType := AType;
  end;
begin
  Result := nil;
  CurTable := AData.AddTable();
  CurTable.CellSpacingLeft := 3;
  CurTable.CellSpacingTop := 2;
  // Default to no border without "border" attribute
  SetBorderLineType(tbtNone);
  // table attributes
  for i := 0 to ANode.Attributes.Length - 1 do
  begin
    lAttrName := ANode.Attributes.Item[i].NodeName;
    lAttrValue := ANode.Attributes.Item[i].NodeValue;
    case lAttrName of
    'border':
    begin
      lBorderNr := StrToInt(lAttrValue);
      SetBorderLineType(tbtSingle);
      CurTable.Borders.Left.Width := lBorderNr;
      CurTable.Borders.Right.Width := lBorderNr;
      CurTable.Borders.Top.Width := lBorderNr;
      CurTable.Borders.Bottom.Width := lBorderNr;
      CurTable.Borders.InsideHoriz.Width := lBorderNr;
      CurTable.Borders.InsideVert.Width := lBorderNr;
      if lBorderNr = 0 then
        SetBorderLineType(tbtNone);
    end;
    end;
  end;
  // table child nodes
  lCurNode := ANode.FirstChild;
  while Assigned(lCurNode) do
  begin
    lNodeName := lCurNode.NodeName;
    lNodeValue := lCurNode.NodeValue;
    case lNodeName of
    'caption':
    begin
      CurRow := CurTable.AddRow();
      Caption_Cell := CurRow.AddCell();
      {Caption_Cell.Borders.Left.LineType := tbtNone;
      Caption_Cell.Borders.Top.LineType := tbtNone;
      Caption_Cell.Borders.Right.LineType := tbtNone;
      Caption_Cell.Borders.Bottom.LineType := tbtNone;}
      CurCellPara := Caption_Cell.AddParagraph();
      CurCellPara.Style := ADoc.StyleTextBodyCentralized;
      CurCellPara.AddText(GetTextContentFromNode(lCurNode));
    end;
    'tbody':
    begin
      lCurSubnode := lCurNode.FirstChild;
      while Assigned(lCurSubnode) do
      begin
        ReadTableRowNode(CurTable, lCurSubnode, AData, ADoc);
        lCurSubnode := lCurSubnode.NextSibling;
      end;
    end;
    end;
    lCurNode := lCurNode.NextSibling;
  end;
  // the caption spans all columns
  if Caption_Cell <> nil then
    Caption_Cell.SpannedCols := CurTable.GetColCount();
end;
function TvHTMLVectorialReader.ReadTableRowNode(ATable: TvTable; ANode: TDOMNode;
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
var
  lCurNode: TDOMNode;
  lNodeName, lNodeValue: DOMString;
  CurRow: TvTableRow;
  CurCell: TvTableCell;
  CurCellPara: TvParagraph;
begin
  Result := nil;
  CurRow := ATable.AddRow();
  lCurNode := ANode.FirstChild;
  while Assigned(lCurNode) do
  begin
    lNodeName := lCurNode.NodeName;
    lNodeValue := lCurNode.NodeValue;
    case lNodeName of
    'th':
    begin
      CurCell := CurRow.AddCell();
      CurCellPara := CurCell.AddParagraph();
      CurCellPara.Style := ADoc.StyleTextBodyBold;
      CurCellPara.AddText(GetTextContentFromNode(lCurNode));
    end;
    'td':
    begin
      CurCell := CurRow.AddCell();
      CurCellPara := CurCell.AddParagraph();
      Self.ReadParagraphFromNode(CurCellPara, lCurNode, AData, ADoc);
    end;
    end;
    lCurNode := lCurNode.NextSibling;
  end;
end;
function TvHTMLVectorialReader.ReadUListFromNode(ANode: TDOMNode;
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
var
  lCurNode: TDOMNode;
  lNodeName, lNodeValue: DOMString;
  lNodeText: string;
  //
  lList: TvList;
  lCurPara: TvParagraph;
begin
  Result := nil;
  lList := AData.AddList();
  lCurNode := ANode.FirstChild;
  while Assigned(lCurNode) do
  begin
    lNodeName := lCurNode.NodeName;
    lNodeValue := lCurNode.NodeValue;
    case lNodeName of
    'li':
    begin
      lNodeText := GetTextContentFromNode(lCurNode);
      lCurPara := lList.AddParagraph(lNodeText);
    end;
    end;
    lCurNode := lCurNode.NextSibling;
  end;
end;
constructor TvHTMLVectorialReader.Create;
begin
  inherited Create;
  FPointSeparator := DefaultFormatSettings;
  FPointSeparator.DecimalSeparator := '.';
  FPointSeparator.ThousandSeparator := '#';// disable the thousand separator
end;
destructor TvHTMLVectorialReader.Destroy;
begin
  inherited Destroy;
end;
procedure TvHTMLVectorialReader.ReadFromStrings(AStrings: TStrings;
  AData: TvVectorialDocument);
var
  Doc: TXMLDocument = nil;
  lStream: TMemoryStream;
  lTmp: String;
begin
  lStream := TMemoryStream.Create();
  try
    // Remove the  0 then
      AStrings.Delete(0);
    // Create a header
    AStrings.Insert(0, '');
    lTmp := AStrings.Text;
    // Flush it back to a stream
    AStrings.SaveToStream(lStream);
    lStream.Position := 0;
    // HTML is not XML, but might be compatible enough... a dedicated reader will be complex, but eventually necessary
    ReadXMLFile(Doc, lStream);
    lStream.Free; // Release as soon as unnecessary
    lStream := nil;
    //
    ReadFromXML(Doc, AData);
  finally
    Doc.Free;
    lStream.Free;
  end;
end;
procedure TvHTMLVectorialReader.ReadFromXML(Doc: TXMLDocument;
  AData: TvVectorialDocument);
var
  lCurNode, lCurSubnode: TDOMNode;
  lPage: TvTextPageSequence;
  lNodeName, lNodeValue: DOMString;
  ANode: TDOMElement;
  i: Integer;
  lCurEntity: TvEntity;
begin
  {ANode := Doc.DocumentElement;
  for i := 0 to ANode.Attributes.Length - 1 do
  begin
    lNodeName := ANode.Attributes.Item[i].NodeName;
    lNodeValue := ANode.Attributes.Item[i].NodeValue;
  end;}
  AData.AddStandardTextDocumentStyles(vfHTML);
  // ----------------
  // Now process the elements
  // ----------------
  lCurNode := Doc.DocumentElement.FirstChild;
  lPage := AData.AddTextPageSequence();
  //lPage.Width := AData.Width;
  //lPage.Height := AData.Height;
  while Assigned(lCurNode) do
  begin
    lNodeName := lCurNode.NodeName;
    if lNodeName = 'body' then
    begin
      lCurSubnode := lCurNode.FirstChild;
      while Assigned(lCurSubnode) do
      begin
        lCurEntity := ReadEntityFromNode(lCurSubnode, lPage, AData);
        if lCurEntity <> nil then
          lPage.AddEntity(lCurEntity);
        lCurSubnode := lCurSubnode.NextSibling;
      end;
    end;
    lCurNode := lCurNode.NextSibling;
  end;
end;
class function TvHTMLVectorialReader.IsSupportedRasterImage(AFileName: string): Boolean;
var
  lExt: string;
begin
  Result := False;
  lExt := LowerCase(ExtractFileExt(AFileName));
  case lExt of
  '.png', '.jpg', '.jpeg', '.bmp', '.xpm', '.gif':
    Result := True
  end;
end;
initialization
  RegisterVectorialReader(TvHTMLVectorialReader, vfHTML);
end.
    'img', 'image':
    begin
      lRasterImage := nil;
      lEmbVecImg := nil;
      lWidth := -1;
      lHeight := -1;
      lAltText := '';
      for i := 0 to lCurNode.Attributes.Length - 1 do
      begin
        lCurAttr := lCurNode.Attributes.Item[i];
        lAttrName := lCurAttr.NodeName;
        lAttrValue := lCurAttr.NodeValue;
        case lAttrName of
        'alt':
        begin
          lAltText := lAttrValue;
        end;
        'src':
        begin
          lAttrValue := lCurAttr.NodeValue;
          lAttrValue := ExtractFilePath(FFilename) + lAttrValue;
          if TvHTMLVectorialReader.IsSupportedRasterImage(lAttrValue) then
          begin
            if not FileExists(lAttrValue) then Continue;
            lRasterImage := ADest.AddRasterImage();
            lRasterImage.CreateImageFromFile(lAttrValue);
          end
          else if TvVectorialDocument.GetFormatFromExtension(lAttrValue, False) <> vfUnknown then
          begin
            lEmbVecImg := ADest.AddEmbeddedVectorialDoc();
            if FileExistsUTF8(lAttrValue) then
              lEmbVecImg.Document.ReadFromFile(lAttrValue);
          end;
        end;
        'xlink:href':
        begin
          lRasterImage := ADest.AddRasterImage();
          lImageDataParts := TvSVGVectorialReader.ReadSpaceSeparatedStrings(lNodeValue, ':;,');
          try
            if (lImageDataParts.Strings[0] = 'data') and
               (lImageDataParts.Strings[1] = 'image/png') and
               (lImageDataParts.Strings[2] = 'base64') then
            begin
              lImageReader := TFPReaderPNG.Create;
              lImageDataStream := TMemoryStream.Create;
              try
                lImageDataBase64 := lImageDataParts.Strings[3];
                DecodeBase64(lImageDataBase64, lImageDataStream);
                lImageDataStream.Position := 0;
                lRasterImage.CreateRGB888Image(10, 10);
                lRasterImage.RasterImage.LoadFromStream(lImageDataStream, lImageReader);
              finally
                lImageDataStream.Free;
                lImageReader.Free;
              end;
            end
            else
              raise Exception.Create('[TvSVGVectorialReader.ReadImageFromNode] Unimplemented image format');
          finally
            lImageDataParts.Free;
          end;
        end;
        'width':
        begin
          lWidth := StrToInt(lAttrValue);
        end;
        'height':
        begin
          lHeight := StrToInt(lAttrValue);
        end;
        end;
      end;
      if lRasterImage <> nil then
      begin
        if lWidth <= 0 then
          lWidth := lRasterImage.RasterImage.Width;
        if lHeight <= 0 then
          lHeight := lRasterImage.RasterImage.Height;
        lRasterImage.Width := lWidth;
        lRasterImage.Height := lHeight;
        lRasterImage.AltText := lAltText;
      end
      else if (lEmbVecImg <> nil) and (lWidth > 0) and (lHeight > 0) then
      begin
        lEmbVecImg.SetWidth(lWidth);
        lEmbVecImg.SetHeight(lHeight);
      end;
    end;
    'svg':
    begin
      lEmbVecImg := ADest.AddEmbeddedVectorialDoc();
      ReadSVGFromNode_WithEmbDoc(lCurNode, lEmbVecImg, ADoc);
    end;
    end;
    lCurNode := lCurNode.NextSibling;
  end;
  TextMerging();
end;
function TvHTMLVectorialReader.ReadSVGFromNode(ANode: TDOMNode;
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
var
  CurSVG: TvEmbeddedVectorialDoc;
  lText: TvText;
  lDoc: TXMLDocument;
  lImportedNode: TDOMNode;
begin
  Result := nil;
  CurSVG := AData.AddEmbeddedVectorialDoc();
  lDoc := TXMLDocument.Create;
  try
    lImportedNode := lDoc.ImportNode(ANode, True);
    lDoc.AppendChild(lImportedNode);
    CurSVG.Document.ReadFromXML(lDoc, vfSVG);
  finally
    lDoc.Free;
  end;
end;
function TvHTMLVectorialReader.ReadSVGFromNode_WithEmbDoc(ANode: TDOMNode;
  AEmbDoc: TvEmbeddedVectorialDoc; ADoc: TvVectorialDocument): TvEntity;
var
  CurSVG: TvEmbeddedVectorialDoc;
  lText: TvText;
  lDoc: TXMLDocument;
  lImportedNode: TDOMNode;
begin
  Result := nil;
  CurSVG := AEmbDoc;
  lDoc := TXMLDocument.Create;
  try
    lImportedNode := lDoc.ImportNode(ANode, True);
    lDoc.AppendChild(lImportedNode);
    CurSVG.Document.ReadFromXML(lDoc, vfSVG);
  finally
    lDoc.Free;
  end;
end;
function TvHTMLVectorialReader.ReadMathFromNode(ANode: TDOMNode;
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
var
  CurSVG: TvEmbeddedVectorialDoc;
  lText: TvText;
  lDoc: TXMLDocument;
  lImportedNode: TDOMNode;
begin
  Result := nil;
  CurSVG := AData.AddEmbeddedVectorialDoc();
  lDoc := TXMLDocument.Create;
  try
    lImportedNode := lDoc.ImportNode(ANode, True);
    lDoc.AppendChild(lImportedNode);
    CurSVG.Document.ReadFromXML(lDoc, vfMathML);
  finally
    lDoc.Free;
  end;
end;
function TvHTMLVectorialReader.ReadTableFromNode(ANode: TDOMNode;
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
var
  CurTable: TvTable;
  lCurNode, lCurSubnode: TDOMNode;
  lNodeName, lNodeValue: DOMString;
  CurRow: TvTableRow;
  Caption_Cell: TvTableCell = nil;
  CurCellPara: TvParagraph;
  // attributes
  i, lBorderNr: Integer;
  lAttrName, lAttrValue: DOMString;
  procedure SetBorderLineType(AType: TvTableBorderType);
  begin
    CurTable.Borders.Left.LineType := AType;
    CurTable.Borders.Right.LineType := AType;
    CurTable.Borders.Top.LineType := AType;
    CurTable.Borders.Bottom.LineType := AType;
    CurTable.Borders.InsideHoriz.LineType := AType;
    CurTable.Borders.InsideVert.LineType := AType;
  end;
begin
  Result := nil;
  CurTable := AData.AddTable();
  CurTable.CellSpacingLeft := 3;
  CurTable.CellSpacingTop := 2;
  // Default to no border without "border" attribute
  SetBorderLineType(tbtNone);
  // table attributes
  for i := 0 to ANode.Attributes.Length - 1 do
  begin
    lAttrName := ANode.Attributes.Item[i].NodeName;
    lAttrValue := ANode.Attributes.Item[i].NodeValue;
    case lAttrName of
    'border':
    begin
      lBorderNr := StrToInt(lAttrValue);
      SetBorderLineType(tbtSingle);
      CurTable.Borders.Left.Width := lBorderNr;
      CurTable.Borders.Right.Width := lBorderNr;
      CurTable.Borders.Top.Width := lBorderNr;
      CurTable.Borders.Bottom.Width := lBorderNr;
      CurTable.Borders.InsideHoriz.Width := lBorderNr;
      CurTable.Borders.InsideVert.Width := lBorderNr;
      if lBorderNr = 0 then
        SetBorderLineType(tbtNone);
    end;
    end;
  end;
  // table child nodes
  lCurNode := ANode.FirstChild;
  while Assigned(lCurNode) do
  begin
    lNodeName := lCurNode.NodeName;
    lNodeValue := lCurNode.NodeValue;
    case lNodeName of
    'caption':
    begin
      CurRow := CurTable.AddRow();
      Caption_Cell := CurRow.AddCell();
      {Caption_Cell.Borders.Left.LineType := tbtNone;
      Caption_Cell.Borders.Top.LineType := tbtNone;
      Caption_Cell.Borders.Right.LineType := tbtNone;
      Caption_Cell.Borders.Bottom.LineType := tbtNone;}
      CurCellPara := Caption_Cell.AddParagraph();
      CurCellPara.Style := ADoc.StyleTextBodyCentralized;
      CurCellPara.AddText(GetTextContentFromNode(lCurNode));
    end;
    'tbody':
    begin
      lCurSubnode := lCurNode.FirstChild;
      while Assigned(lCurSubnode) do
      begin
        ReadTableRowNode(CurTable, lCurSubnode, AData, ADoc);
        lCurSubnode := lCurSubnode.NextSibling;
      end;
    end;
    end;
    lCurNode := lCurNode.NextSibling;
  end;
  // the caption spans all columns
  if Caption_Cell <> nil then
    Caption_Cell.SpannedCols := CurTable.GetColCount();
end;
function TvHTMLVectorialReader.ReadTableRowNode(ATable: TvTable; ANode: TDOMNode;
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
var
  lCurNode: TDOMNode;
  lNodeName, lNodeValue: DOMString;
  CurRow: TvTableRow;
  CurCell: TvTableCell;
  CurCellPara: TvParagraph;
begin
  Result := nil;
  CurRow := ATable.AddRow();
  lCurNode := ANode.FirstChild;
  while Assigned(lCurNode) do
  begin
    lNodeName := lCurNode.NodeName;
    lNodeValue := lCurNode.NodeValue;
    case lNodeName of
    'th':
    begin
      CurCell := CurRow.AddCell();
      CurCellPara := CurCell.AddParagraph();
      CurCellPara.Style := ADoc.StyleTextBodyBold;
      CurCellPara.AddText(GetTextContentFromNode(lCurNode));
    end;
    'td':
    begin
      CurCell := CurRow.AddCell();
      CurCellPara := CurCell.AddParagraph();
      Self.ReadParagraphFromNode(CurCellPara, lCurNode, AData, ADoc);
    end;
    end;
    lCurNode := lCurNode.NextSibling;
  end;
end;
function TvHTMLVectorialReader.ReadUListFromNode(ANode: TDOMNode;
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
var
  lCurNode: TDOMNode;
  lNodeName, lNodeValue: DOMString;
  lNodeText: string;
  //
  lList: TvList;
  lCurPara: TvParagraph;
begin
  Result := nil;
  lList := AData.AddList();
  lCurNode := ANode.FirstChild;
  while Assigned(lCurNode) do
  begin
    lNodeName := lCurNode.NodeName;
    lNodeValue := lCurNode.NodeValue;
    case lNodeName of
    'li':
    begin
      lNodeText := GetTextContentFromNode(lCurNode);
      lCurPara := lList.AddParagraph(lNodeText);
    end;
    end;
    lCurNode := lCurNode.NextSibling;
  end;
end;
constructor TvHTMLVectorialReader.Create;
begin
  inherited Create;
  FPointSeparator := DefaultFormatSettings;
  FPointSeparator.DecimalSeparator := '.';
  FPointSeparator.ThousandSeparator := '#';// disable the thousand separator
end;
destructor TvHTMLVectorialReader.Destroy;
begin
  inherited Destroy;
end;
procedure TvHTMLVectorialReader.ReadFromStrings(AStrings: TStrings;
  AData: TvVectorialDocument);
var
  Doc: TXMLDocument = nil;
  lStream: TMemoryStream;
  lTmp: String;
begin
  lStream := TMemoryStream.Create();
  try
    // Remove the  0 then
      AStrings.Delete(0);
    // Create a header
    AStrings.Insert(0, '');
    lTmp := AStrings.Text;
    // Flush it back to a stream
    AStrings.SaveToStream(lStream);
    lStream.Position := 0;
    // HTML is not XML, but might be compatible enough... a dedicated reader will be complex, but eventually necessary
    ReadXMLFile(Doc, lStream);
    lStream.Free; // Release as soon as unnecessary
    lStream := nil;
    //
    ReadFromXML(Doc, AData);
  finally
    Doc.Free;
    lStream.Free;
  end;
end;
procedure TvHTMLVectorialReader.ReadFromXML(Doc: TXMLDocument;
  AData: TvVectorialDocument);
var
  lCurNode, lCurSubnode: TDOMNode;
  lPage: TvTextPageSequence;
  lNodeName, lNodeValue: DOMString;
  ANode: TDOMElement;
  i: Integer;
  lCurEntity: TvEntity;
begin
  {ANode := Doc.DocumentElement;
  for i := 0 to ANode.Attributes.Length - 1 do
  begin
    lNodeName := ANode.Attributes.Item[i].NodeName;
    lNodeValue := ANode.Attributes.Item[i].NodeValue;
  end;}
  AData.AddStandardTextDocumentStyles(vfHTML);
  // ----------------
  // Now process the elements
  // ----------------
  lCurNode := Doc.DocumentElement.FirstChild;
  lPage := AData.AddTextPageSequence();
  //lPage.Width := AData.Width;
  //lPage.Height := AData.Height;
  while Assigned(lCurNode) do
  begin
    lNodeName := lCurNode.NodeName;
    if lNodeName = 'body' then
    begin
      lCurSubnode := lCurNode.FirstChild;
      while Assigned(lCurSubnode) do
      begin
        lCurEntity := ReadEntityFromNode(lCurSubnode, lPage, AData);
        if lCurEntity <> nil then
          lPage.AddEntity(lCurEntity);
        lCurSubnode := lCurSubnode.NextSibling;
      end;
    end;
    lCurNode := lCurNode.NextSibling;
  end;
end;
class function TvHTMLVectorialReader.IsSupportedRasterImage(AFileName: string): Boolean;
var
  lExt: string;
begin
  Result := False;
  lExt := LowerCase(ExtractFileExt(AFileName));
  case lExt of
  '.png', '.jpg', '.jpeg', '.bmp', '.xpm', '.gif':
    Result := True
  end;
end;
initialization
  RegisterVectorialReader(TvHTMLVectorialReader, vfHTML);
end.