fpspreadsheet: New demo htmlread_http (reading html table from the internet). HTML reader now detects hyperlinks (only one per cell, though - like in xls).
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@4239 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
parent
126fe87ad1
commit
acaacf81d4
@ -0,0 +1,66 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<CONFIG>
|
||||||
|
<ProjectOptions>
|
||||||
|
<Version Value="9"/>
|
||||||
|
<PathDelim Value="\"/>
|
||||||
|
<General>
|
||||||
|
<Flags>
|
||||||
|
<LRSInOutputDirectory Value="False"/>
|
||||||
|
</Flags>
|
||||||
|
<SessionStorage Value="InProjectDir"/>
|
||||||
|
<MainUnit Value="0"/>
|
||||||
|
<Title Value="htmlread_http"/>
|
||||||
|
<UseAppBundle Value="False"/>
|
||||||
|
</General>
|
||||||
|
<VersionInfo>
|
||||||
|
<StringTable ProductVersion=""/>
|
||||||
|
</VersionInfo>
|
||||||
|
<BuildModes Count="1">
|
||||||
|
<Item1 Name="default" Default="True"/>
|
||||||
|
</BuildModes>
|
||||||
|
<PublishOptions>
|
||||||
|
<Version Value="2"/>
|
||||||
|
<IgnoreBinaries Value="False"/>
|
||||||
|
<IncludeFileFilter Value="*.(pas|pp|inc|lfm|lpr|lrs|lpi|lpk|sh|xml)"/>
|
||||||
|
<ExcludeFileFilter Value="*.(bak|ppu|ppw|o|so);*~;backup"/>
|
||||||
|
</PublishOptions>
|
||||||
|
<RunParams>
|
||||||
|
<local>
|
||||||
|
<FormatVersion Value="1"/>
|
||||||
|
<LaunchingApplication PathPlusParams="\usr\X11R6\bin\xterm -T 'Lazarus Run Output' -e $(LazarusDir)\tools\runwait.sh $(TargetCmdLine)"/>
|
||||||
|
</local>
|
||||||
|
</RunParams>
|
||||||
|
<RequiredPackages Count="1">
|
||||||
|
<Item1>
|
||||||
|
<PackageName Value="LazUtils"/>
|
||||||
|
</Item1>
|
||||||
|
</RequiredPackages>
|
||||||
|
<Units Count="1">
|
||||||
|
<Unit0>
|
||||||
|
<Filename Value="htmlread_http.lpr"/>
|
||||||
|
<IsPartOfProject Value="True"/>
|
||||||
|
</Unit0>
|
||||||
|
</Units>
|
||||||
|
</ProjectOptions>
|
||||||
|
<CompilerOptions>
|
||||||
|
<Version Value="11"/>
|
||||||
|
<PathDelim Value="\"/>
|
||||||
|
<Target>
|
||||||
|
<Filename Value="htmlread_http"/>
|
||||||
|
</Target>
|
||||||
|
<SearchPaths>
|
||||||
|
<OtherUnitFiles Value="..\..\.."/>
|
||||||
|
<UnitOutputDirectory Value="..\..\lib\$(TargetCPU)-$(TargetOS)"/>
|
||||||
|
</SearchPaths>
|
||||||
|
<Parsing>
|
||||||
|
<SyntaxOptions>
|
||||||
|
<UseAnsiStrings Value="False"/>
|
||||||
|
</SyntaxOptions>
|
||||||
|
</Parsing>
|
||||||
|
<Linking>
|
||||||
|
<Debugging>
|
||||||
|
<DebugInfoType Value="dsDwarf2Set"/>
|
||||||
|
</Debugging>
|
||||||
|
</Linking>
|
||||||
|
</CompilerOptions>
|
||||||
|
</CONFIG>
|
@ -0,0 +1,78 @@
|
|||||||
|
{
|
||||||
|
htmlread_http.dpr
|
||||||
|
|
||||||
|
Demonstrates how to read a html file from the internet using the fpspreadsheet library.
|
||||||
|
}
|
||||||
|
|
||||||
|
program htmlread_http;
|
||||||
|
|
||||||
|
{$mode delphi}{$H+}
|
||||||
|
|
||||||
|
uses
|
||||||
|
Classes, SysUtils, LazUTF8, fphttpclient, fpstypes, fpsutils, fpspreadsheet, fpshtml;
|
||||||
|
|
||||||
|
var
|
||||||
|
MyWorkbook: TsWorkbook;
|
||||||
|
MyWorksheet: TsWorksheet;
|
||||||
|
i: Integer;
|
||||||
|
CurCell: PCell;
|
||||||
|
stream: TMemoryStream;
|
||||||
|
|
||||||
|
const
|
||||||
|
// url = 'http://unicode.e-workers.de/entities.php';
|
||||||
|
url = 'http://www.freepascal.org/docs.var';
|
||||||
|
|
||||||
|
begin
|
||||||
|
stream := TMemoryStream.Create;
|
||||||
|
try
|
||||||
|
// Get file from the internet
|
||||||
|
with TFPHttpClient.Create(nil) do
|
||||||
|
try
|
||||||
|
Get(url, stream);
|
||||||
|
finally
|
||||||
|
Free;
|
||||||
|
end;
|
||||||
|
|
||||||
|
// Parameters
|
||||||
|
HTMLParams.TableIndex := 0;
|
||||||
|
|
||||||
|
// Create the spreadsheet
|
||||||
|
MyWorkbook := TsWorkbook.Create;
|
||||||
|
try
|
||||||
|
MyWorkbook.Options := MyWorkbook.Options + [boReadFormulas];
|
||||||
|
MyWorkbook.ReadFromStream(stream, sfHTML);
|
||||||
|
|
||||||
|
MyWorksheet := MyWorkbook.GetFirstWorksheet;
|
||||||
|
|
||||||
|
// Write all cells with contents to the console
|
||||||
|
WriteLn('');
|
||||||
|
WriteLn('Contents of the first worksheet of the file:');
|
||||||
|
WriteLn('');
|
||||||
|
|
||||||
|
for CurCell in MyWorksheet.Cells do
|
||||||
|
begin
|
||||||
|
Write(
|
||||||
|
'Row: ', CurCell^.Row,
|
||||||
|
' Col: ', CurCell^.Col,
|
||||||
|
' Value: ', UTF8ToConsole(MyWorkSheet.ReadAsUTF8Text(CurCell^.Row, CurCell^.Col))
|
||||||
|
);
|
||||||
|
if MyWorksheet.HasHyperlink(CurCell) then
|
||||||
|
Write(' Hyperlink: ', MyWorksheet.ReadHyperlink(CurCell).Target);
|
||||||
|
WriteLn;
|
||||||
|
end;
|
||||||
|
|
||||||
|
finally
|
||||||
|
// Finalization
|
||||||
|
MyWorkbook.Free;
|
||||||
|
end;
|
||||||
|
finally
|
||||||
|
stream.Free;
|
||||||
|
end;
|
||||||
|
|
||||||
|
{$IFDEF MSWINDOWS}
|
||||||
|
WriteLn;
|
||||||
|
WriteLn('Press ENTER to exit.');
|
||||||
|
ReadLn;
|
||||||
|
{$ENDIF}
|
||||||
|
end.
|
||||||
|
|
@ -31,6 +31,8 @@ type
|
|||||||
FCelLText: String;
|
FCelLText: String;
|
||||||
FAttrList: TsHTMLAttrList;
|
FAttrList: TsHTMLAttrList;
|
||||||
FColSpan, FRowSpan: Integer;
|
FColSpan, FRowSpan: Integer;
|
||||||
|
FHRef: String;
|
||||||
|
procedure ExtractHRef;
|
||||||
procedure ExtractMergedRange;
|
procedure ExtractMergedRange;
|
||||||
procedure TagFoundHandler(NoCaseTag, ActualTag: string);
|
procedure TagFoundHandler(NoCaseTag, ActualTag: string);
|
||||||
procedure TextFoundHandler(AText: String);
|
procedure TextFoundHandler(AText: String);
|
||||||
@ -444,8 +446,17 @@ begin
|
|||||||
cell := FWorksheet.AddCell(ARow, ACol);
|
cell := FWorksheet.AddCell(ARow, ACol);
|
||||||
|
|
||||||
// Merged cells
|
// Merged cells
|
||||||
if (FColSpan > 0) or (FRowSpan > 0) then
|
if (FColSpan > 0) or (FRowSpan > 0) then begin
|
||||||
FWorksheet.MergeCells(ARow, ACol, ARow + FRowSpan, ACol + FColSpan);
|
FWorksheet.MergeCells(ARow, ACol, ARow + FRowSpan, ACol + FColSpan);
|
||||||
|
FRowSpan := 0;
|
||||||
|
FColSpan := 0;
|
||||||
|
end;
|
||||||
|
|
||||||
|
// Hyperlink
|
||||||
|
if FHRef <> '' then begin
|
||||||
|
FWorksheet.WriteHyperlink(cell, FHRef);
|
||||||
|
FHRef := '';
|
||||||
|
end;
|
||||||
|
|
||||||
// Do not try to interpret the strings. --> everything is a LABEL cell.
|
// Do not try to interpret the strings. --> everything is a LABEL cell.
|
||||||
if not HTMLParams.DetectContentType then
|
if not HTMLParams.DetectContentType then
|
||||||
@ -486,6 +497,16 @@ begin
|
|||||||
FWorksheet.WriteUTF8Text(cell, AText);
|
FWorksheet.WriteUTF8Text(cell, AText);
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
procedure TsHTMLReader.ExtractHRef;
|
||||||
|
var
|
||||||
|
idx: Integer;
|
||||||
|
begin
|
||||||
|
FHRef := '';
|
||||||
|
idx := FAttrList.IndexOfName('href');
|
||||||
|
if idx > -1 then
|
||||||
|
FHRef := FAttrList[idx].Value;
|
||||||
|
end;
|
||||||
|
|
||||||
procedure TsHTMLReader.ExtractMergedRange;
|
procedure TsHTMLReader.ExtractMergedRange;
|
||||||
var
|
var
|
||||||
idx: Integer;
|
idx: Integer;
|
||||||
@ -566,6 +587,7 @@ begin
|
|||||||
if ((NoCaseTag = '<TH>') or (pos('<TH ', NoCaseTag) = 1)) and FInTable then
|
if ((NoCaseTag = '<TH>') or (pos('<TH ', NoCaseTag) = 1)) and FInTable then
|
||||||
begin
|
begin
|
||||||
FInCell := true;
|
FInCell := true;
|
||||||
|
inc(FCurrCol);
|
||||||
FCellText := '';
|
FCellText := '';
|
||||||
end else
|
end else
|
||||||
if pos('<SPAN', NoCaseTag) = 1 then
|
if pos('<SPAN', NoCaseTag) = 1 then
|
||||||
@ -573,10 +595,11 @@ begin
|
|||||||
if FInCell then
|
if FInCell then
|
||||||
FInSpan := true;
|
FInSpan := true;
|
||||||
end else
|
end else
|
||||||
if pos('<A', NoCaseTag) = 1 then
|
if (pos('<A', NoCaseTag) = 1) and FInCell then
|
||||||
begin
|
begin
|
||||||
if FInCell then
|
FInA := true;
|
||||||
FInA := true
|
FAttrList.Parse(ActualTag);
|
||||||
|
ExtractHRef;
|
||||||
end else
|
end else
|
||||||
if (pos('<H', NoCaseTag) = 1) and (NoCaseTag[3] in ['1', '2', '3', '4', '5', '6']) then
|
if (pos('<H', NoCaseTag) = 1) and (NoCaseTag[3] in ['1', '2', '3', '4', '5', '6']) then
|
||||||
begin
|
begin
|
||||||
|
@ -6795,6 +6795,7 @@ begin
|
|||||||
inc(FLockCount);
|
inc(FLockCount);
|
||||||
try
|
try
|
||||||
ok := false;
|
ok := false;
|
||||||
|
AStream.Position := 0;
|
||||||
AReader.ReadFromStream(AStream);
|
AReader.ReadFromStream(AStream);
|
||||||
ok := true;
|
ok := true;
|
||||||
finally
|
finally
|
||||||
|
Loading…
Reference in New Issue
Block a user