mirror of
				https://gitlab.com/freepascal.org/lazarus/lazarus.git
				synced 2025-11-04 15:32:00 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			640 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
			
		
		
	
	
			640 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			ObjectPascal
		
	
	
	
	
	
{
 | 
						|
Reads a HTML Document
 | 
						|
 | 
						|
License: The same modified LGPL as the Free Pascal RTL
 | 
						|
         See the file COPYING.modifiedLGPL for more details
 | 
						|
 | 
						|
AUTHORS: Felipe Monteiro de Carvalho
 | 
						|
}
 | 
						|
unit htmlvectorialreader;
 | 
						|
 | 
						|
{$mode objfpc}{$H+}
 | 
						|
 | 
						|
interface
 | 
						|
 | 
						|
uses
 | 
						|
  Classes, SysUtils, math, contnrs,
 | 
						|
  fpimage, fpcanvas, laz2_xmlread, laz2_dom, fgl, lazfileutils,
 | 
						|
  // image data formats
 | 
						|
  fpreadpng,
 | 
						|
  // HTML can contain SVG
 | 
						|
  svgvectorialreader,
 | 
						|
  fpvectorial, fpvutils, lazutf8, TypInfo;
 | 
						|
 | 
						|
type
 | 
						|
  { TvHTMLVectorialReader }
 | 
						|
 | 
						|
  TvHTMLVectorialReader = class(TvCustomVectorialReader)
 | 
						|
  private
 | 
						|
    FPointSeparator, FCommaSeparator: TFormatSettings;
 | 
						|
    //
 | 
						|
    function GetTextContentFromNode(ANode: TDOMNode): string;
 | 
						|
    //
 | 
						|
    function ReadEntityFromNode(ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
    function ReadHeaderFromNode(ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
    procedure ReadParagraphFromNode(ADest: TvParagraph; ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument);
 | 
						|
    function ReadSVGFromNode(ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
    function ReadSVGFromNode_WithEmbDoc(ANode: TDOMNode; AEmbDoc: TvEmbeddedVectorialDoc; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
    function ReadMathFromNode(ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
    function ReadTableFromNode(ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
    function ReadTableRowNode(ATable: TvTable; ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
    function ReadUListFromNode(ANode: TDOMNode; AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
  public
 | 
						|
    { General reading methods }
 | 
						|
    constructor Create; override;
 | 
						|
    Destructor Destroy; override;
 | 
						|
    procedure ReadFromStrings(AStrings: TStrings; AData: TvVectorialDocument); override;
 | 
						|
    procedure ReadFromXML(Doc: TXMLDocument; AData: TvVectorialDocument);
 | 
						|
    class function IsSupportedRasterImage(AFileName: string): Boolean;
 | 
						|
  end;
 | 
						|
 | 
						|
implementation
 | 
						|
 | 
						|
const
 | 
						|
  // SVG requires hardcoding a DPI value
 | 
						|
 | 
						|
  // The Opera Browser and Inkscape use 90 DPI, so we follow that
 | 
						|
 | 
						|
  // 1 Inch = 25.4 milimiters
 | 
						|
  // 90 inches per pixel = (1 / 90) * 25.4 = 0.2822
 | 
						|
  // FLOAT_MILIMETERS_PER_PIXEL = 0.3528; // DPI 72 = 1 / 72 inches per pixel
 | 
						|
 | 
						|
  FLOAT_MILIMETERS_PER_PIXEL = 5*0.2822; // DPI 90 = 1 / 90 inches per pixel => Actually I changed the value by this factor! Because otherwise it looks ugly!
 | 
						|
  FLOAT_PIXELS_PER_MILIMETER = 1 / FLOAT_MILIMETERS_PER_PIXEL; // DPI 90 = 1 / 90 inches per pixel
 | 
						|
 | 
						|
{ TvHTMLVectorialReader }
 | 
						|
 | 
						|
function TvHTMLVectorialReader.GetTextContentFromNode(ANode: TDOMNode): string;
 | 
						|
var
 | 
						|
  i: Integer;
 | 
						|
begin
 | 
						|
  Result := '';
 | 
						|
  for i := 0 to ANode.ChildNodes.Count-1 do
 | 
						|
  begin
 | 
						|
    if ANode.ChildNodes.Item[i] is TDOMText then
 | 
						|
      Result := ANode.ChildNodes.Item[i].NodeValue;
 | 
						|
  end;
 | 
						|
end;
 | 
						|
 | 
						|
// if something is returned, it will be added to the base document
 | 
						|
// if nothing is returned, either nothing was written, or it was already added
 | 
						|
function TvHTMLVectorialReader.ReadEntityFromNode(ANode: TDOMNode;
 | 
						|
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
var
 | 
						|
  lEntityName, lTextValue: DOMString;
 | 
						|
  lPara: TvParagraph;
 | 
						|
begin
 | 
						|
  Result := nil;
 | 
						|
  lEntityName := LowerCase(ANode.NodeName);
 | 
						|
  case lEntityName of
 | 
						|
    'h1', 'h2', 'h3', 'h4', 'h5', 'h6': Result := ReadHeaderFromNode(ANode, AData, ADoc);
 | 
						|
    'p':
 | 
						|
    begin
 | 
						|
      lPara := AData.AddParagraph();
 | 
						|
      ReadParagraphFromNode(lPara, ANode, AData, ADoc);
 | 
						|
      Result := nil;
 | 
						|
    end;
 | 
						|
    'svg': Result := ReadSVGFromNode(ANode, AData, ADoc);
 | 
						|
    'math': Result := ReadMathFromNode(ANode, AData, ADoc);
 | 
						|
    'table': Result := ReadTableFromNode(ANode, AData, ADoc);
 | 
						|
    'br':
 | 
						|
    begin
 | 
						|
      AData.AddParagraph().AddText(LineEnding);
 | 
						|
      Result := nil;
 | 
						|
    end;
 | 
						|
    'ul': Result := ReadUListFromNode(ANode, AData, ADoc);
 | 
						|
  end;
 | 
						|
  // Raw text
 | 
						|
  if (ANode is TDOMText) and (lEntityName = '#text') then
 | 
						|
  begin
 | 
						|
    lTextValue := RemoveLineEndingsAndTrim(ANode.NodeValue);
 | 
						|
    if (lTextValue <> '') then
 | 
						|
    begin
 | 
						|
      AData.AddParagraph().AddText(lTextValue);
 | 
						|
      Result := nil;
 | 
						|
    end;
 | 
						|
  end;
 | 
						|
end;
 | 
						|
 | 
						|
function TvHTMLVectorialReader.ReadHeaderFromNode(ANode: TDOMNode;
 | 
						|
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
var
 | 
						|
  CurParagraph: TvParagraph;
 | 
						|
  lText: TvText;
 | 
						|
  lTextStr: string;
 | 
						|
  lHeaderType: DOMString;
 | 
						|
begin
 | 
						|
  Result := nil;
 | 
						|
  CurParagraph := AData.AddParagraph();
 | 
						|
  CurParagraph.Style := ADoc.StyleTextBody;
 | 
						|
  lTextStr := ANode.FirstChild.NodeValue;
 | 
						|
  lText := CurParagraph.AddText(lTextStr);
 | 
						|
  lHeaderType := LowerCase(ANode.NodeName);
 | 
						|
  case lHeaderType of
 | 
						|
    'h1': lText.Style := ADoc.StyleHeading1;
 | 
						|
    'h2': lText.Style := ADoc.StyleHeading2;
 | 
						|
    'h3': lText.Style := ADoc.StyleHeading3;
 | 
						|
    'h4': lText.Style := ADoc.StyleHeading4;
 | 
						|
    'h5': lText.Style := ADoc.StyleHeading5;
 | 
						|
    'h6': lText.Style := ADoc.StyleHeading6;
 | 
						|
  end;
 | 
						|
end;
 | 
						|
 | 
						|
procedure TvHTMLVectorialReader.ReadParagraphFromNode(ADest: TvParagraph; ANode: TDOMNode;
 | 
						|
  AData: TvTextPageSequence; ADoc: TvVectorialDocument);
 | 
						|
var
 | 
						|
  lText: TvText = nil;
 | 
						|
  lTextStr: string = '';
 | 
						|
  lCurNode: TDOMNode;
 | 
						|
  lNodeName, lNodeValue, lAttrName, lAttrValue: DOMString;
 | 
						|
  lCurAttr: TDOMNode;
 | 
						|
  lRasterImage: TvRasterImage;
 | 
						|
  lEmbVecImg: TvEmbeddedVectorialDoc = nil;
 | 
						|
  i: Integer;
 | 
						|
  lWidth, lHeight: Double;
 | 
						|
  lAltText: string;
 | 
						|
  // xlink:href
 | 
						|
  lx, ly, lw, lh: Double;
 | 
						|
  lImageDataParts: TStringList;
 | 
						|
  lImageDataBase64: string;
 | 
						|
  lImageData: array of Byte;
 | 
						|
  lImageDataStream: TMemoryStream;
 | 
						|
  lImageReader: TFPCustomImageReader;
 | 
						|
 | 
						|
  procedure TextMerging();
 | 
						|
  begin
 | 
						|
    if lTextStr <> '' then
 | 
						|
    begin
 | 
						|
      if lText = nil then
 | 
						|
        lText := ADest.AddText(lTextStr)
 | 
						|
      else
 | 
						|
        lText.Value.Add(lTextStr);
 | 
						|
      lTextStr := '';
 | 
						|
    end;
 | 
						|
  end;
 | 
						|
 | 
						|
begin
 | 
						|
  ADest.Style := ADoc.StyleTextBody;
 | 
						|
 | 
						|
  lCurNode := ANode.FirstChild;
 | 
						|
  while Assigned(lCurNode) do
 | 
						|
  begin
 | 
						|
    lNodeName := LowerCase(lCurNode.NodeName);
 | 
						|
    lNodeValue := lCurNode.NodeValue;
 | 
						|
 | 
						|
    if (lCurNode is TDOMText) then
 | 
						|
    begin
 | 
						|
      lTextStr += RemoveLineEndingsAndTrim(lNodeValue);
 | 
						|
      lCurNode := lCurNode.NextSibling;
 | 
						|
      Continue;
 | 
						|
    end;
 | 
						|
 | 
						|
    // text merging
 | 
						|
    TextMerging();
 | 
						|
    // reset text merging
 | 
						|
    if lNodeName <> 'br' then
 | 
						|
      lText := nil;
 | 
						|
 | 
						|
    case lNodeName of
 | 
						|
    // <image width="100" height="100" xlink:href="....QSK5CYII="/>
 | 
						|
    // <img src="images/noimage.gif" width="100" height="100" alt="No image" />
 | 
						|
    'img', 'image':
 | 
						|
    begin
 | 
						|
      lRasterImage := nil;
 | 
						|
      lEmbVecImg := nil;
 | 
						|
      lWidth := -1;
 | 
						|
      lHeight := -1;
 | 
						|
      lAltText := '';
 | 
						|
 | 
						|
      for i := 0 to lCurNode.Attributes.Length - 1 do
 | 
						|
      begin
 | 
						|
        lCurAttr := lCurNode.Attributes.Item[i];
 | 
						|
        lAttrName := lCurAttr.NodeName;
 | 
						|
        lAttrValue := lCurAttr.NodeValue;
 | 
						|
 | 
						|
        case lAttrName of
 | 
						|
        'alt':
 | 
						|
        begin
 | 
						|
          lAltText := lAttrValue;
 | 
						|
        end;
 | 
						|
        'src':
 | 
						|
        begin
 | 
						|
          lAttrValue := lCurAttr.NodeValue;
 | 
						|
          lAttrValue := ExtractFilePath(FFilename) + lAttrValue;
 | 
						|
 | 
						|
          if TvHTMLVectorialReader.IsSupportedRasterImage(lAttrValue) then
 | 
						|
          begin
 | 
						|
            if not FileExists(lAttrValue) then Continue;
 | 
						|
 | 
						|
            lRasterImage := ADest.AddRasterImage();
 | 
						|
            lRasterImage.CreateImageFromFile(lAttrValue);
 | 
						|
          end
 | 
						|
          else if TvVectorialDocument.GetFormatFromExtension(lAttrValue, False) <> vfUnknown then
 | 
						|
          begin
 | 
						|
            lEmbVecImg := ADest.AddEmbeddedVectorialDoc();
 | 
						|
            if FileExistsUTF8(lAttrValue) then
 | 
						|
              lEmbVecImg.Document.ReadFromFile(lAttrValue);
 | 
						|
          end;
 | 
						|
        end;
 | 
						|
        'xlink:href':
 | 
						|
        begin
 | 
						|
          lRasterImage := ADest.AddRasterImage();
 | 
						|
          lImageDataParts := TvSVGVectorialReader.ReadSpaceSeparatedStrings(lNodeValue, ':;,');
 | 
						|
          try
 | 
						|
            if (lImageDataParts.Strings[0] = 'data') and
 | 
						|
               (lImageDataParts.Strings[1] = 'image/png') and
 | 
						|
               (lImageDataParts.Strings[2] = 'base64') then
 | 
						|
            begin
 | 
						|
              lImageReader := TFPReaderPNG.Create;
 | 
						|
              lImageDataStream := TMemoryStream.Create;
 | 
						|
              try
 | 
						|
                lImageDataBase64 := lImageDataParts.Strings[3];
 | 
						|
                DecodeBase64(lImageDataBase64, lImageDataStream);
 | 
						|
                lImageDataStream.Position := 0;
 | 
						|
                lRasterImage.CreateRGB888Image(10, 10);
 | 
						|
                lRasterImage.RasterImage.LoadFromStream(lImageDataStream, lImageReader);
 | 
						|
              finally
 | 
						|
                lImageDataStream.Free;
 | 
						|
                lImageReader.Free;
 | 
						|
              end;
 | 
						|
            end
 | 
						|
            else
 | 
						|
              raise Exception.Create('[TvSVGVectorialReader.ReadImageFromNode] Unimplemented image format');
 | 
						|
          finally
 | 
						|
            lImageDataParts.Free;
 | 
						|
          end;
 | 
						|
        end;
 | 
						|
        'width':
 | 
						|
        begin
 | 
						|
          lWidth := StrToInt(lAttrValue);
 | 
						|
        end;
 | 
						|
        'height':
 | 
						|
        begin
 | 
						|
          lHeight := StrToInt(lAttrValue);
 | 
						|
        end;
 | 
						|
        end;
 | 
						|
      end;
 | 
						|
 | 
						|
      if lRasterImage <> nil then
 | 
						|
      begin
 | 
						|
        if lWidth <= 0 then
 | 
						|
          lWidth := lRasterImage.RasterImage.Width;
 | 
						|
        if lHeight <= 0 then
 | 
						|
          lHeight := lRasterImage.RasterImage.Height;
 | 
						|
 | 
						|
        lRasterImage.Width := lWidth;
 | 
						|
        lRasterImage.Height := lHeight;
 | 
						|
        lRasterImage.AltText := lAltText;
 | 
						|
      end
 | 
						|
      else if (lEmbVecImg <> nil) and (lWidth > 0) and (lHeight > 0) then
 | 
						|
      begin
 | 
						|
        lEmbVecImg.SetWidth(lWidth);
 | 
						|
        lEmbVecImg.SetHeight(lHeight);
 | 
						|
      end;
 | 
						|
    end;
 | 
						|
    'svg':
 | 
						|
    begin
 | 
						|
      lEmbVecImg := ADest.AddEmbeddedVectorialDoc();
 | 
						|
      ReadSVGFromNode_WithEmbDoc(lCurNode, lEmbVecImg, ADoc);
 | 
						|
    end;
 | 
						|
    end;
 | 
						|
 | 
						|
    lCurNode := lCurNode.NextSibling;
 | 
						|
  end;
 | 
						|
 | 
						|
  TextMerging();
 | 
						|
end;
 | 
						|
 | 
						|
function TvHTMLVectorialReader.ReadSVGFromNode(ANode: TDOMNode;
 | 
						|
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
var
 | 
						|
  CurSVG: TvEmbeddedVectorialDoc;
 | 
						|
  lText: TvText;
 | 
						|
  lDoc: TXMLDocument;
 | 
						|
  lImportedNode: TDOMNode;
 | 
						|
begin
 | 
						|
  Result := nil;
 | 
						|
  CurSVG := AData.AddEmbeddedVectorialDoc();
 | 
						|
  lDoc := TXMLDocument.Create;
 | 
						|
  try
 | 
						|
    lImportedNode := lDoc.ImportNode(ANode, True);
 | 
						|
    lDoc.AppendChild(lImportedNode);
 | 
						|
    CurSVG.Document.ReadFromXML(lDoc, vfSVG);
 | 
						|
  finally
 | 
						|
    lDoc.Free;
 | 
						|
  end;
 | 
						|
end;
 | 
						|
 | 
						|
function TvHTMLVectorialReader.ReadSVGFromNode_WithEmbDoc(ANode: TDOMNode;
 | 
						|
  AEmbDoc: TvEmbeddedVectorialDoc; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
var
 | 
						|
  CurSVG: TvEmbeddedVectorialDoc;
 | 
						|
  lText: TvText;
 | 
						|
  lDoc: TXMLDocument;
 | 
						|
  lImportedNode: TDOMNode;
 | 
						|
begin
 | 
						|
  Result := nil;
 | 
						|
  CurSVG := AEmbDoc;
 | 
						|
  lDoc := TXMLDocument.Create;
 | 
						|
  try
 | 
						|
    lImportedNode := lDoc.ImportNode(ANode, True);
 | 
						|
    lDoc.AppendChild(lImportedNode);
 | 
						|
    CurSVG.Document.ReadFromXML(lDoc, vfSVG);
 | 
						|
  finally
 | 
						|
    lDoc.Free;
 | 
						|
  end;
 | 
						|
end;
 | 
						|
 | 
						|
function TvHTMLVectorialReader.ReadMathFromNode(ANode: TDOMNode;
 | 
						|
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
var
 | 
						|
  CurSVG: TvEmbeddedVectorialDoc;
 | 
						|
  lText: TvText;
 | 
						|
  lDoc: TXMLDocument;
 | 
						|
  lImportedNode: TDOMNode;
 | 
						|
begin
 | 
						|
  Result := nil;
 | 
						|
  CurSVG := AData.AddEmbeddedVectorialDoc();
 | 
						|
  lDoc := TXMLDocument.Create;
 | 
						|
  try
 | 
						|
    lImportedNode := lDoc.ImportNode(ANode, True);
 | 
						|
    lDoc.AppendChild(lImportedNode);
 | 
						|
    CurSVG.Document.ReadFromXML(lDoc, vfMathML);
 | 
						|
  finally
 | 
						|
    lDoc.Free;
 | 
						|
  end;
 | 
						|
end;
 | 
						|
 | 
						|
function TvHTMLVectorialReader.ReadTableFromNode(ANode: TDOMNode;
 | 
						|
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
var
 | 
						|
  CurTable: TvTable;
 | 
						|
  lCurNode, lCurSubnode: TDOMNode;
 | 
						|
  lNodeName, lNodeValue: DOMString;
 | 
						|
  CurRow: TvTableRow;
 | 
						|
  Caption_Cell: TvTableCell = nil;
 | 
						|
  CurCellPara: TvParagraph;
 | 
						|
  // attributes
 | 
						|
  i, lBorderNr: Integer;
 | 
						|
  lAttrName, lAttrValue: DOMString;
 | 
						|
 | 
						|
  procedure SetBorderLineType(AType: TvTableBorderType);
 | 
						|
  begin
 | 
						|
    CurTable.Borders.Left.LineType := AType;
 | 
						|
    CurTable.Borders.Right.LineType := AType;
 | 
						|
    CurTable.Borders.Top.LineType := AType;
 | 
						|
    CurTable.Borders.Bottom.LineType := AType;
 | 
						|
    CurTable.Borders.InsideHoriz.LineType := AType;
 | 
						|
    CurTable.Borders.InsideVert.LineType := AType;
 | 
						|
  end;
 | 
						|
 | 
						|
begin
 | 
						|
  Result := nil;
 | 
						|
  CurTable := AData.AddTable();
 | 
						|
  CurTable.CellSpacingLeft := 3;
 | 
						|
  CurTable.CellSpacingTop := 2;
 | 
						|
 | 
						|
  // Default to no border without "border" attribute
 | 
						|
  SetBorderLineType(tbtNone);
 | 
						|
 | 
						|
  // table attributes
 | 
						|
  for i := 0 to ANode.Attributes.Length - 1 do
 | 
						|
  begin
 | 
						|
    lAttrName := ANode.Attributes.Item[i].NodeName;
 | 
						|
    lAttrValue := ANode.Attributes.Item[i].NodeValue;
 | 
						|
 | 
						|
    case lAttrName of
 | 
						|
    'border':
 | 
						|
    begin
 | 
						|
      lBorderNr := StrToInt(lAttrValue);
 | 
						|
 | 
						|
      SetBorderLineType(tbtSingle);
 | 
						|
      CurTable.Borders.Left.Width := lBorderNr;
 | 
						|
      CurTable.Borders.Right.Width := lBorderNr;
 | 
						|
      CurTable.Borders.Top.Width := lBorderNr;
 | 
						|
      CurTable.Borders.Bottom.Width := lBorderNr;
 | 
						|
      CurTable.Borders.InsideHoriz.Width := lBorderNr;
 | 
						|
      CurTable.Borders.InsideVert.Width := lBorderNr;
 | 
						|
 | 
						|
      if lBorderNr = 0 then
 | 
						|
        SetBorderLineType(tbtNone);
 | 
						|
    end;
 | 
						|
    end;
 | 
						|
  end;
 | 
						|
 | 
						|
  // table child nodes
 | 
						|
  lCurNode := ANode.FirstChild;
 | 
						|
  while Assigned(lCurNode) do
 | 
						|
  begin
 | 
						|
    lNodeName := lCurNode.NodeName;
 | 
						|
    lNodeValue := lCurNode.NodeValue;
 | 
						|
    case lNodeName of
 | 
						|
    'caption':
 | 
						|
    begin
 | 
						|
      CurRow := CurTable.AddRow();
 | 
						|
      Caption_Cell := CurRow.AddCell();
 | 
						|
      {Caption_Cell.Borders.Left.LineType := tbtNone;
 | 
						|
      Caption_Cell.Borders.Top.LineType := tbtNone;
 | 
						|
      Caption_Cell.Borders.Right.LineType := tbtNone;
 | 
						|
      Caption_Cell.Borders.Bottom.LineType := tbtNone;}
 | 
						|
      CurCellPara := Caption_Cell.AddParagraph();
 | 
						|
      CurCellPara.Style := ADoc.StyleTextBodyCentralized;
 | 
						|
      CurCellPara.AddText(GetTextContentFromNode(lCurNode));
 | 
						|
    end;
 | 
						|
    'tbody':
 | 
						|
    begin
 | 
						|
      lCurSubnode := lCurNode.FirstChild;
 | 
						|
      while Assigned(lCurSubnode) do
 | 
						|
      begin
 | 
						|
        ReadTableRowNode(CurTable, lCurSubnode, AData, ADoc);
 | 
						|
 | 
						|
        lCurSubnode := lCurSubnode.NextSibling;
 | 
						|
      end;
 | 
						|
    end;
 | 
						|
    end;
 | 
						|
 | 
						|
    lCurNode := lCurNode.NextSibling;
 | 
						|
  end;
 | 
						|
 | 
						|
  // the caption spans all columns
 | 
						|
  if Caption_Cell <> nil then
 | 
						|
    Caption_Cell.SpannedCols := CurTable.GetColCount();
 | 
						|
end;
 | 
						|
 | 
						|
function TvHTMLVectorialReader.ReadTableRowNode(ATable: TvTable; ANode: TDOMNode;
 | 
						|
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
var
 | 
						|
  lCurNode: TDOMNode;
 | 
						|
  lNodeName, lNodeValue: DOMString;
 | 
						|
  CurRow: TvTableRow;
 | 
						|
  CurCell: TvTableCell;
 | 
						|
  CurCellPara: TvParagraph;
 | 
						|
begin
 | 
						|
  Result := nil;
 | 
						|
  CurRow := ATable.AddRow();
 | 
						|
 | 
						|
  lCurNode := ANode.FirstChild;
 | 
						|
  while Assigned(lCurNode) do
 | 
						|
  begin
 | 
						|
    lNodeName := lCurNode.NodeName;
 | 
						|
    lNodeValue := lCurNode.NodeValue;
 | 
						|
    case lNodeName of
 | 
						|
    'th':
 | 
						|
    begin
 | 
						|
      CurCell := CurRow.AddCell();
 | 
						|
      CurCellPara := CurCell.AddParagraph();
 | 
						|
      CurCellPara.Style := ADoc.StyleTextBodyBold;
 | 
						|
      CurCellPara.AddText(GetTextContentFromNode(lCurNode));
 | 
						|
    end;
 | 
						|
    'td':
 | 
						|
    begin
 | 
						|
      CurCell := CurRow.AddCell();
 | 
						|
 | 
						|
      CurCellPara := CurCell.AddParagraph();
 | 
						|
      Self.ReadParagraphFromNode(CurCellPara, lCurNode, AData, ADoc);
 | 
						|
    end;
 | 
						|
    end;
 | 
						|
 | 
						|
    lCurNode := lCurNode.NextSibling;
 | 
						|
  end;
 | 
						|
end;
 | 
						|
 | 
						|
function TvHTMLVectorialReader.ReadUListFromNode(ANode: TDOMNode;
 | 
						|
  AData: TvTextPageSequence; ADoc: TvVectorialDocument): TvEntity;
 | 
						|
var
 | 
						|
  lCurNode: TDOMNode;
 | 
						|
  lNodeName, lNodeValue: DOMString;
 | 
						|
  lNodeText: string;
 | 
						|
  //
 | 
						|
  lList: TvList;
 | 
						|
  lCurPara: TvParagraph;
 | 
						|
begin
 | 
						|
  Result := nil;
 | 
						|
  lList := AData.AddList();
 | 
						|
 | 
						|
  lCurNode := ANode.FirstChild;
 | 
						|
  while Assigned(lCurNode) do
 | 
						|
  begin
 | 
						|
    lNodeName := lCurNode.NodeName;
 | 
						|
    lNodeValue := lCurNode.NodeValue;
 | 
						|
    case lNodeName of
 | 
						|
    'li':
 | 
						|
    begin
 | 
						|
      lNodeText := GetTextContentFromNode(lCurNode);
 | 
						|
      lCurPara := lList.AddParagraph(lNodeText);
 | 
						|
    end;
 | 
						|
    end;
 | 
						|
 | 
						|
    lCurNode := lCurNode.NextSibling;
 | 
						|
  end;
 | 
						|
end;
 | 
						|
 | 
						|
constructor TvHTMLVectorialReader.Create;
 | 
						|
begin
 | 
						|
  inherited Create;
 | 
						|
 | 
						|
  FPointSeparator := DefaultFormatSettings;
 | 
						|
  FPointSeparator.DecimalSeparator := '.';
 | 
						|
  FPointSeparator.ThousandSeparator := '#';// disable the thousand separator
 | 
						|
end;
 | 
						|
 | 
						|
destructor TvHTMLVectorialReader.Destroy;
 | 
						|
begin
 | 
						|
  inherited Destroy;
 | 
						|
end;
 | 
						|
 | 
						|
procedure TvHTMLVectorialReader.ReadFromStrings(AStrings: TStrings;
 | 
						|
  AData: TvVectorialDocument);
 | 
						|
var
 | 
						|
  Doc: TXMLDocument = nil;
 | 
						|
  lStream: TMemoryStream;
 | 
						|
  lTmp: String;
 | 
						|
begin
 | 
						|
  lStream := TMemoryStream.Create();
 | 
						|
  try
 | 
						|
    // Remove the <!DOCTYPE line
 | 
						|
    if Pos('<!DOCTYPE', AStrings.Strings[0]) <> 0 then
 | 
						|
      AStrings.Delete(0);
 | 
						|
    // Create a header
 | 
						|
    AStrings.Insert(0, '<?xml version="1.0"?>');
 | 
						|
    lTmp := AStrings.Text;
 | 
						|
    // Flush it back to a stream
 | 
						|
    AStrings.SaveToStream(lStream);
 | 
						|
    lStream.Position := 0;
 | 
						|
    // HTML is not XML, but might be compatible enough... a dedicated reader will be complex, but eventually necessary
 | 
						|
    ReadXMLFile(Doc, lStream);
 | 
						|
    lStream.Free; // Release as soon as unnecessary
 | 
						|
    lStream := nil;
 | 
						|
    //
 | 
						|
    ReadFromXML(Doc, AData);
 | 
						|
  finally
 | 
						|
    Doc.Free;
 | 
						|
    lStream.Free;
 | 
						|
  end;
 | 
						|
end;
 | 
						|
 | 
						|
procedure TvHTMLVectorialReader.ReadFromXML(Doc: TXMLDocument;
 | 
						|
  AData: TvVectorialDocument);
 | 
						|
var
 | 
						|
  lCurNode, lCurSubnode: TDOMNode;
 | 
						|
  lPage: TvTextPageSequence;
 | 
						|
  lNodeName, lNodeValue: DOMString;
 | 
						|
  ANode: TDOMElement;
 | 
						|
  i: Integer;
 | 
						|
  lCurEntity: TvEntity;
 | 
						|
begin
 | 
						|
  {ANode := Doc.DocumentElement;
 | 
						|
  for i := 0 to ANode.Attributes.Length - 1 do
 | 
						|
  begin
 | 
						|
    lNodeName := ANode.Attributes.Item[i].NodeName;
 | 
						|
    lNodeValue := ANode.Attributes.Item[i].NodeValue;
 | 
						|
  end;}
 | 
						|
 | 
						|
  AData.AddStandardTextDocumentStyles(vfHTML);
 | 
						|
 | 
						|
  // ----------------
 | 
						|
  // Now process the elements
 | 
						|
  // ----------------
 | 
						|
  lCurNode := Doc.DocumentElement.FirstChild;
 | 
						|
  lPage := AData.AddTextPageSequence();
 | 
						|
  //lPage.Width := AData.Width;
 | 
						|
  //lPage.Height := AData.Height;
 | 
						|
  while Assigned(lCurNode) do
 | 
						|
  begin
 | 
						|
    lNodeName := lCurNode.NodeName;
 | 
						|
    if lNodeName = 'body' then
 | 
						|
    begin
 | 
						|
      lCurSubnode := lCurNode.FirstChild;
 | 
						|
      while Assigned(lCurSubnode) do
 | 
						|
      begin
 | 
						|
        lCurEntity := ReadEntityFromNode(lCurSubnode, lPage, AData);
 | 
						|
        if lCurEntity <> nil then
 | 
						|
          lPage.AddEntity(lCurEntity);
 | 
						|
 | 
						|
        lCurSubnode := lCurSubnode.NextSibling;
 | 
						|
      end;
 | 
						|
    end;
 | 
						|
 | 
						|
    lCurNode := lCurNode.NextSibling;
 | 
						|
  end;
 | 
						|
end;
 | 
						|
 | 
						|
class function TvHTMLVectorialReader.IsSupportedRasterImage(AFileName: string): Boolean;
 | 
						|
var
 | 
						|
  lExt: string;
 | 
						|
begin
 | 
						|
  Result := False;
 | 
						|
  lExt := LowerCase(ExtractFileExt(AFileName));
 | 
						|
  case lExt of
 | 
						|
  '.png', '.jpg', '.jpeg', '.bmp', '.xpm', '.gif':
 | 
						|
    Result := True
 | 
						|
  end;
 | 
						|
end;
 | 
						|
 | 
						|
initialization
 | 
						|
 | 
						|
  RegisterVectorialReader(TvHTMLVectorialReader, vfHTML);
 | 
						|
 | 
						|
end.
 | 
						|
 |