fpspreadsheet: New demo htmlread_http (reading html table from the internet). HTML reader now detects hyperlinks (only one per cell, though - like in xls).

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@4239 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz 2015-08-03 15:21:33 +00:00
parent 126fe87ad1
commit acaacf81d4
4 changed files with 172 additions and 4 deletions

View File

@ -0,0 +1,66 @@
<?xml version="1.0" encoding="UTF-8"?>
<CONFIG>
<ProjectOptions>
<Version Value="9"/>
<PathDelim Value="\"/>
<General>
<Flags>
<LRSInOutputDirectory Value="False"/>
</Flags>
<SessionStorage Value="InProjectDir"/>
<MainUnit Value="0"/>
<Title Value="htmlread_http"/>
<UseAppBundle Value="False"/>
</General>
<VersionInfo>
<StringTable ProductVersion=""/>
</VersionInfo>
<BuildModes Count="1">
<Item1 Name="default" Default="True"/>
</BuildModes>
<PublishOptions>
<Version Value="2"/>
<IgnoreBinaries Value="False"/>
<IncludeFileFilter Value="*.(pas|pp|inc|lfm|lpr|lrs|lpi|lpk|sh|xml)"/>
<ExcludeFileFilter Value="*.(bak|ppu|ppw|o|so);*~;backup"/>
</PublishOptions>
<RunParams>
<local>
<FormatVersion Value="1"/>
<LaunchingApplication PathPlusParams="\usr\X11R6\bin\xterm -T 'Lazarus Run Output' -e $(LazarusDir)\tools\runwait.sh $(TargetCmdLine)"/>
</local>
</RunParams>
<RequiredPackages Count="1">
<Item1>
<PackageName Value="LazUtils"/>
</Item1>
</RequiredPackages>
<Units Count="1">
<Unit0>
<Filename Value="htmlread_http.lpr"/>
<IsPartOfProject Value="True"/>
</Unit0>
</Units>
</ProjectOptions>
<CompilerOptions>
<Version Value="11"/>
<PathDelim Value="\"/>
<Target>
<Filename Value="htmlread_http"/>
</Target>
<SearchPaths>
<OtherUnitFiles Value="..\..\.."/>
<UnitOutputDirectory Value="..\..\lib\$(TargetCPU)-$(TargetOS)"/>
</SearchPaths>
<Parsing>
<SyntaxOptions>
<UseAnsiStrings Value="False"/>
</SyntaxOptions>
</Parsing>
<Linking>
<Debugging>
<DebugInfoType Value="dsDwarf2Set"/>
</Debugging>
</Linking>
</CompilerOptions>
</CONFIG>

View File

@ -0,0 +1,78 @@
{
htmlread_http.dpr
Demonstrates how to read a html file from the internet using the fpspreadsheet library.
}
program htmlread_http;
{$mode delphi}{$H+}
uses
Classes, SysUtils, LazUTF8, fphttpclient, fpstypes, fpsutils, fpspreadsheet, fpshtml;
var
MyWorkbook: TsWorkbook;
MyWorksheet: TsWorksheet;
i: Integer;
CurCell: PCell;
stream: TMemoryStream;
const
// url = 'http://unicode.e-workers.de/entities.php';
url = 'http://www.freepascal.org/docs.var';
begin
stream := TMemoryStream.Create;
try
// Get file from the internet
with TFPHttpClient.Create(nil) do
try
Get(url, stream);
finally
Free;
end;
// Parameters
HTMLParams.TableIndex := 0;
// Create the spreadsheet
MyWorkbook := TsWorkbook.Create;
try
MyWorkbook.Options := MyWorkbook.Options + [boReadFormulas];
MyWorkbook.ReadFromStream(stream, sfHTML);
MyWorksheet := MyWorkbook.GetFirstWorksheet;
// Write all cells with contents to the console
WriteLn('');
WriteLn('Contents of the first worksheet of the file:');
WriteLn('');
for CurCell in MyWorksheet.Cells do
begin
Write(
'Row: ', CurCell^.Row,
' Col: ', CurCell^.Col,
' Value: ', UTF8ToConsole(MyWorkSheet.ReadAsUTF8Text(CurCell^.Row, CurCell^.Col))
);
if MyWorksheet.HasHyperlink(CurCell) then
Write(' Hyperlink: ', MyWorksheet.ReadHyperlink(CurCell).Target);
WriteLn;
end;
finally
// Finalization
MyWorkbook.Free;
end;
finally
stream.Free;
end;
{$IFDEF MSWINDOWS}
WriteLn;
WriteLn('Press ENTER to exit.');
ReadLn;
{$ENDIF}
end.

View File

@ -31,6 +31,8 @@ type
FCelLText: String;
FAttrList: TsHTMLAttrList;
FColSpan, FRowSpan: Integer;
FHRef: String;
procedure ExtractHRef;
procedure ExtractMergedRange;
procedure TagFoundHandler(NoCaseTag, ActualTag: string);
procedure TextFoundHandler(AText: String);
@ -444,8 +446,17 @@ begin
cell := FWorksheet.AddCell(ARow, ACol);
// Merged cells
if (FColSpan > 0) or (FRowSpan > 0) then
if (FColSpan > 0) or (FRowSpan > 0) then begin
FWorksheet.MergeCells(ARow, ACol, ARow + FRowSpan, ACol + FColSpan);
FRowSpan := 0;
FColSpan := 0;
end;
// Hyperlink
if FHRef <> '' then begin
FWorksheet.WriteHyperlink(cell, FHRef);
FHRef := '';
end;
// Do not try to interpret the strings. --> everything is a LABEL cell.
if not HTMLParams.DetectContentType then
@ -486,6 +497,16 @@ begin
FWorksheet.WriteUTF8Text(cell, AText);
end;
procedure TsHTMLReader.ExtractHRef;
var
idx: Integer;
begin
FHRef := '';
idx := FAttrList.IndexOfName('href');
if idx > -1 then
FHRef := FAttrList[idx].Value;
end;
procedure TsHTMLReader.ExtractMergedRange;
var
idx: Integer;
@ -566,6 +587,7 @@ begin
if ((NoCaseTag = '<TH>') or (pos('<TH ', NoCaseTag) = 1)) and FInTable then
begin
FInCell := true;
inc(FCurrCol);
FCellText := '';
end else
if pos('<SPAN', NoCaseTag) = 1 then
@ -573,10 +595,11 @@ begin
if FInCell then
FInSpan := true;
end else
if pos('<A', NoCaseTag) = 1 then
if (pos('<A', NoCaseTag) = 1) and FInCell then
begin
if FInCell then
FInA := true
FInA := true;
FAttrList.Parse(ActualTag);
ExtractHRef;
end else
if (pos('<H', NoCaseTag) = 1) and (NoCaseTag[3] in ['1', '2', '3', '4', '5', '6']) then
begin

View File

@ -6795,6 +6795,7 @@ begin
inc(FLockCount);
try
ok := false;
AStream.Position := 0;
AReader.ReadFromStream(AStream);
ok := true;
finally