fpvectorial: html reader structural code

git-svn-id: trunk@45993 -
This commit is contained in:
sekelsenmat 2014-07-30 14:39:21 +00:00
parent fc22ff99e3
commit 3567ca39fc
6 changed files with 188 additions and 5 deletions

1
.gitattributes vendored
View File

@ -1425,6 +1425,7 @@ components/fpvectorial/fpvectorialpkg.lpk svneol=native#text/plain
components/fpvectorial/fpvectorialpkg.pas svneol=native#text/plain
components/fpvectorial/fpvtocanvas.pas svneol=native#text/plain
components/fpvectorial/fpvutils.pas svneol=native#text/plain
components/fpvectorial/htmlvectorialreader.pas svneol=native#text/pascal
components/fpvectorial/lasvectorialreader.pas svneol=native#text/plain
components/fpvectorial/lazvectorialreader.pas svneol=native#text/plain
components/fpvectorial/mathmlvectorialreader.pas svneol=native#text/plain

View File

@ -54,7 +54,7 @@ type
{ Formula formats }
vfMathML,
{ Text Document formats }
vfODT, vfDOCX,
vfODT, vfDOCX, vfHTML,
{ Raster Image formats }
vfRAW
);
@ -89,6 +89,7 @@ const
STR_ODG_EXTENSION = '.odg';
STR_ODT_EXTENSION = '.odt';
STR_DOCX_EXTENSION = '.docx';
STR_HTML_EXTENSION = '.html';
STR_FPVECTORIAL_TEXT_HEIGHT_SAMPLE = 'Ćą';
@ -6613,6 +6614,7 @@ begin
else if AnsiCompareText(lExt, STR_MATHML_EXTENSION) = 0 then Result := vfMathML
else if AnsiCompareText(lExt, STR_ODG_EXTENSION) = 0 then Result := vfODG
else if AnsiCompareText(lExt, STR_DOCX_EXTENSION) = 0 then Result := vfDOCX
else if AnsiCompareText(lExt, STR_HTML_EXTENSION) = 0 then Result := vfHTML
else
raise Exception.Create('TvVectorialDocument.GetFormatFromExtension: The extension (' + lExt + ') doesn''t match any supported formats.');
end;

View File

@ -1,4 +1,4 @@
<?xml version="1.0"?>
<?xml version="1.0" encoding="UTF-8"?>
<CONFIG>
<Package Version="4">
<Name Value="fpvectorialpkg"/>
@ -12,7 +12,7 @@
<CompilerPath Value="$(CompPath)"/>
</Other>
</CompilerOptions>
<Files Count="20">
<Files Count="21">
<Item1>
<Filename Value="fpvectorial.pas"/>
<UnitName Value="fpvectorial"/>
@ -93,6 +93,10 @@
<Filename Value="docxvectorialwriter.pas"/>
<UnitName Value="docxvectorialwriter"/>
</Item20>
<Item21>
<Filename Value="htmlvectorialreader.pas"/>
<UnitName Value="htmlvectorialreader"/>
</Item21>
</Files>
<Type Value="RunAndDesignTime"/>
<RequiredPkgs Count="2">

View File

@ -13,7 +13,7 @@ uses
lazvectorialreader, mathmlvectorialreader, odgvectorialreader,
rawvectorialreadwrite, svgvectorialreader, svgvectorialwriter,
svgzvectorialreader, odtvectorialwriter, docxvectorialwriter,
LazarusPackageIntf;
htmlvectorialreader, LazarusPackageIntf;
implementation

View File

@ -0,0 +1,176 @@
{
Reads a HTML Document
License: The same modified LGPL as the Free Pascal RTL
See the file COPYING.modifiedLGPL for more details
AUTHORS: Felipe Monteiro de Carvalho
}
unit htmlvectorialreader;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, math, contnrs,
fpimage, fpcanvas, laz2_xmlread, laz2_dom, fgl,
// image data formats
fpreadpng,
// HTML can contain SVG
svgvectorialreader,
fpvectorial, fpvutils, lazutf8, TypInfo;
type
{ TvHTMLVectorialReader }
TvHTMLVectorialReader = class(TvCustomVectorialReader)
private
FPointSeparator, FCommaSeparator: TFormatSettings;
//
function ReadEntityFromNode(ANode: TDOMNode; AData: TvVectorialPage; ADoc: TvVectorialDocument): TvEntity;
function ReadHeaderFromNode(ANode: TDOMNode; AData: TvVectorialPage; ADoc: TvVectorialDocument): TvEntity;
function ReadParagraphFromNode(ANode: TDOMNode; AData: TvVectorialPage; ADoc: TvVectorialDocument): TvEntity;
function ReadSVGFromNode(ANode: TDOMNode; AData: TvVectorialPage; ADoc: TvVectorialDocument): TvEntity;
public
{ General reading methods }
constructor Create; override;
Destructor Destroy; override;
procedure ReadFromStrings(AStrings: TStrings; AData: TvVectorialDocument); override;
procedure ReadFromXML(Doc: TXMLDocument; AData: TvVectorialDocument);
end;
implementation
const
// SVG requires hardcoding a DPI value
// The Opera Browser and Inkscape use 90 DPI, so we follow that
// 1 Inch = 25.4 milimiters
// 90 inches per pixel = (1 / 90) * 25.4 = 0.2822
// FLOAT_MILIMETERS_PER_PIXEL = 0.3528; // DPI 72 = 1 / 72 inches per pixel
FLOAT_MILIMETERS_PER_PIXEL = 5*0.2822; // DPI 90 = 1 / 90 inches per pixel => Actually I changed the value by this factor! Because otherwise it looks ugly!
FLOAT_PIXELS_PER_MILIMETER = 1 / FLOAT_MILIMETERS_PER_PIXEL; // DPI 90 = 1 / 90 inches per pixel
{ TvHTMLVectorialReader }
function TvHTMLVectorialReader.ReadEntityFromNode(ANode: TDOMNode;
AData: TvVectorialPage; ADoc: TvVectorialDocument): TvEntity;
var
lEntityName: DOMString;
begin
Result := nil;
lEntityName := LowerCase(ANode.NodeName);
case lEntityName of
'h1', 'h2', 'h3', 'h4', 'h5', 'h6': Result := ReadHeaderFromNode(ANode, AData, ADoc);
'p': Result := ReadParagraphFromNode(ANode, AData, ADoc);
'svg': Result := ReadSVGFromNode(ANode, AData, ADoc);
end;
end;
function TvHTMLVectorialReader.ReadHeaderFromNode(ANode: TDOMNode;
AData: TvVectorialPage; ADoc: TvVectorialDocument): TvEntity;
begin
end;
function TvHTMLVectorialReader.ReadParagraphFromNode(ANode: TDOMNode;
AData: TvVectorialPage; ADoc: TvVectorialDocument): TvEntity;
begin
end;
function TvHTMLVectorialReader.ReadSVGFromNode(ANode: TDOMNode;
AData: TvVectorialPage; ADoc: TvVectorialDocument): TvEntity;
begin
end;
constructor TvHTMLVectorialReader.Create;
begin
inherited Create;
FPointSeparator := DefaultFormatSettings;
FPointSeparator.DecimalSeparator := '.';
FPointSeparator.ThousandSeparator := '#';// disable the thousand separator
end;
destructor TvHTMLVectorialReader.Destroy;
begin
inherited Destroy;
end;
procedure TvHTMLVectorialReader.ReadFromStrings(AStrings: TStrings;
AData: TvVectorialDocument);
var
Doc: TXMLDocument = nil;
lStream: TMemoryStream;
lTmp: String;
begin
lStream := TMemoryStream.Create();
try
// Remove the <!DOCTYPE line
if Pos('<!DOCTYPE', AStrings.Strings[0]) <> 0 then
AStrings.Delete(0);
// Create a header
AStrings.Insert(0, '<?xml version="1.0"?>');
lTmp := AStrings.Text;
// Flush it back to a stream
AStrings.SaveToStream(lStream);
lStream.Position := 0;
// HTML is not XML, but might be compatible enough... a dedicated reader will be complex, but eventually necessary
ReadXMLFile(Doc, lStream);
lStream.Free; // Release as soon as unnecessary
lStream := nil;
//
ReadFromXML(Doc, AData);
finally
Doc.Free;
lStream.Free;
end;
end;
procedure TvHTMLVectorialReader.ReadFromXML(Doc: TXMLDocument;
AData: TvVectorialDocument);
var
lCurNode: TDOMNode;
lPage: TvVectorialPage;
lNodeName, lNodeValue: DOMString;
ANode: TDOMElement;
i: Integer;
lCurEntity: TvEntity;
lViewBox: TDoubleArray;
lStr: string;
lDocNeedsSizeAutoDetection: Boolean = True;
lx, ly, lx2, ly2: Double;
begin
{ANode := Doc.DocumentElement;
for i := 0 to ANode.Attributes.Length - 1 do
begin
lNodeName := ANode.Attributes.Item[i].NodeName;
lNodeValue := ANode.Attributes.Item[i].NodeValue;
end;}
// ----------------
// Now process the elements
// ----------------
lCurNode := Doc.DocumentElement.FirstChild;
lPage := AData.AddPage();
//lPage.Width := AData.Width;
//lPage.Height := AData.Height;
while Assigned(lCurNode) do
begin
lNodeName := lCurNode.NodeName;
lCurEntity := ReadEntityFromNode(lCurNode, lPage, AData);
if lCurEntity <> nil then
lPage.AddEntity(lCurEntity);
lCurNode := lCurNode.NextSibling;
end;
end;
initialization
RegisterVectorialReader(TvHTMLVectorialReader, vfHTML);
end.

View File

@ -2889,7 +2889,7 @@ end;
procedure TvSVGVectorialReader.ReadFromStream(AStream: TStream;
AData: TvVectorialDocument);
var
Doc: TXMLDocument;
Doc: TXMLDocument = nil;
begin
try
// Read in xml file from the stream