From acaacf81d4d692dcd7f1f5ddfaa9caf9989cf3ca Mon Sep 17 00:00:00 2001 From: wp_xxyyzz Date: Mon, 3 Aug 2015 15:21:33 +0000 Subject: [PATCH] fpspreadsheet: New demo htmlread_http (reading html table from the internet). HTML reader now detects hyperlinks (only one per cell, though - like in xls). git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@4239 8e941d3f-bd1b-0410-a28a-d453659cc2b4 --- .../read_write/htmldemo/htmlread_http.lpi | 66 ++++++++++++++++ .../read_write/htmldemo/htmlread_http.lpr | 78 +++++++++++++++++++ components/fpspreadsheet/fpshtml.pas | 31 +++++++- components/fpspreadsheet/fpspreadsheet.pas | 1 + 4 files changed, 172 insertions(+), 4 deletions(-) create mode 100644 components/fpspreadsheet/examples/read_write/htmldemo/htmlread_http.lpi create mode 100644 components/fpspreadsheet/examples/read_write/htmldemo/htmlread_http.lpr diff --git a/components/fpspreadsheet/examples/read_write/htmldemo/htmlread_http.lpi b/components/fpspreadsheet/examples/read_write/htmldemo/htmlread_http.lpi new file mode 100644 index 000000000..a24466c24 --- /dev/null +++ b/components/fpspreadsheet/examples/read_write/htmldemo/htmlread_http.lpi @@ -0,0 +1,66 @@ + + + + + + + + + + + + + <UseAppBundle Value="False"/> + </General> + <VersionInfo> + <StringTable ProductVersion=""/> + </VersionInfo> + <BuildModes Count="1"> + <Item1 Name="default" Default="True"/> + </BuildModes> + <PublishOptions> + <Version Value="2"/> + <IgnoreBinaries Value="False"/> + <IncludeFileFilter Value="*.(pas|pp|inc|lfm|lpr|lrs|lpi|lpk|sh|xml)"/> + <ExcludeFileFilter Value="*.(bak|ppu|ppw|o|so);*~;backup"/> + </PublishOptions> + <RunParams> + <local> + <FormatVersion Value="1"/> + <LaunchingApplication PathPlusParams="\usr\X11R6\bin\xterm -T 'Lazarus Run Output' -e $(LazarusDir)\tools\runwait.sh $(TargetCmdLine)"/> + </local> + </RunParams> + <RequiredPackages Count="1"> + <Item1> + <PackageName Value="LazUtils"/> + </Item1> + </RequiredPackages> + <Units Count="1"> + <Unit0> + <Filename Value="htmlread_http.lpr"/> + <IsPartOfProject Value="True"/> + </Unit0> + </Units> + </ProjectOptions> + <CompilerOptions> + <Version Value="11"/> + <PathDelim Value="\"/> + <Target> + <Filename Value="htmlread_http"/> + </Target> + <SearchPaths> + <OtherUnitFiles Value="..\..\.."/> + <UnitOutputDirectory Value="..\..\lib\$(TargetCPU)-$(TargetOS)"/> + </SearchPaths> + <Parsing> + <SyntaxOptions> + <UseAnsiStrings Value="False"/> + </SyntaxOptions> + </Parsing> + <Linking> + <Debugging> + <DebugInfoType Value="dsDwarf2Set"/> + </Debugging> + </Linking> + </CompilerOptions> +</CONFIG> diff --git a/components/fpspreadsheet/examples/read_write/htmldemo/htmlread_http.lpr b/components/fpspreadsheet/examples/read_write/htmldemo/htmlread_http.lpr new file mode 100644 index 000000000..6406d943b --- /dev/null +++ b/components/fpspreadsheet/examples/read_write/htmldemo/htmlread_http.lpr @@ -0,0 +1,78 @@ +{ +htmlread_http.dpr + +Demonstrates how to read a html file from the internet using the fpspreadsheet library. +} + +program htmlread_http; + +{$mode delphi}{$H+} + +uses + Classes, SysUtils, LazUTF8, fphttpclient, fpstypes, fpsutils, fpspreadsheet, fpshtml; + +var + MyWorkbook: TsWorkbook; + MyWorksheet: TsWorksheet; + i: Integer; + CurCell: PCell; + stream: TMemoryStream; + +const +// url = 'http://unicode.e-workers.de/entities.php'; + url = 'http://www.freepascal.org/docs.var'; + +begin + stream := TMemoryStream.Create; + try + // Get file from the internet + with TFPHttpClient.Create(nil) do + try + Get(url, stream); + finally + Free; + end; + + // Parameters + HTMLParams.TableIndex := 0; + + // Create the spreadsheet + MyWorkbook := TsWorkbook.Create; + try + MyWorkbook.Options := MyWorkbook.Options + [boReadFormulas]; + MyWorkbook.ReadFromStream(stream, sfHTML); + + MyWorksheet := MyWorkbook.GetFirstWorksheet; + + // Write all cells with contents to the console + WriteLn(''); + WriteLn('Contents of the first worksheet of the file:'); + WriteLn(''); + + for CurCell in MyWorksheet.Cells do + begin + Write( + 'Row: ', CurCell^.Row, + ' Col: ', CurCell^.Col, + ' Value: ', UTF8ToConsole(MyWorkSheet.ReadAsUTF8Text(CurCell^.Row, CurCell^.Col)) + ); + if MyWorksheet.HasHyperlink(CurCell) then + Write(' Hyperlink: ', MyWorksheet.ReadHyperlink(CurCell).Target); + WriteLn; + end; + + finally + // Finalization + MyWorkbook.Free; + end; + finally + stream.Free; + end; + + {$IFDEF MSWINDOWS} + WriteLn; + WriteLn('Press ENTER to exit.'); + ReadLn; + {$ENDIF} +end. + diff --git a/components/fpspreadsheet/fpshtml.pas b/components/fpspreadsheet/fpshtml.pas index b542e4401..bed134042 100644 --- a/components/fpspreadsheet/fpshtml.pas +++ b/components/fpspreadsheet/fpshtml.pas @@ -31,6 +31,8 @@ type FCelLText: String; FAttrList: TsHTMLAttrList; FColSpan, FRowSpan: Integer; + FHRef: String; + procedure ExtractHRef; procedure ExtractMergedRange; procedure TagFoundHandler(NoCaseTag, ActualTag: string); procedure TextFoundHandler(AText: String); @@ -444,8 +446,17 @@ begin cell := FWorksheet.AddCell(ARow, ACol); // Merged cells - if (FColSpan > 0) or (FRowSpan > 0) then + if (FColSpan > 0) or (FRowSpan > 0) then begin FWorksheet.MergeCells(ARow, ACol, ARow + FRowSpan, ACol + FColSpan); + FRowSpan := 0; + FColSpan := 0; + end; + + // Hyperlink + if FHRef <> '' then begin + FWorksheet.WriteHyperlink(cell, FHRef); + FHRef := ''; + end; // Do not try to interpret the strings. --> everything is a LABEL cell. if not HTMLParams.DetectContentType then @@ -486,6 +497,16 @@ begin FWorksheet.WriteUTF8Text(cell, AText); end; +procedure TsHTMLReader.ExtractHRef; +var + idx: Integer; +begin + FHRef := ''; + idx := FAttrList.IndexOfName('href'); + if idx > -1 then + FHRef := FAttrList[idx].Value; +end; + procedure TsHTMLReader.ExtractMergedRange; var idx: Integer; @@ -566,6 +587,7 @@ begin if ((NoCaseTag = '<TH>') or (pos('<TH ', NoCaseTag) = 1)) and FInTable then begin FInCell := true; + inc(FCurrCol); FCellText := ''; end else if pos('<SPAN', NoCaseTag) = 1 then @@ -573,10 +595,11 @@ begin if FInCell then FInSpan := true; end else - if pos('<A', NoCaseTag) = 1 then + if (pos('<A', NoCaseTag) = 1) and FInCell then begin - if FInCell then - FInA := true + FInA := true; + FAttrList.Parse(ActualTag); + ExtractHRef; end else if (pos('<H', NoCaseTag) = 1) and (NoCaseTag[3] in ['1', '2', '3', '4', '5', '6']) then begin diff --git a/components/fpspreadsheet/fpspreadsheet.pas b/components/fpspreadsheet/fpspreadsheet.pas index 3fe7d95ed..d72d613c0 100755 --- a/components/fpspreadsheet/fpspreadsheet.pas +++ b/components/fpspreadsheet/fpspreadsheet.pas @@ -6795,6 +6795,7 @@ begin inc(FLockCount); try ok := false; + AStream.Position := 0; AReader.ReadFromStream(AStream); ok := true; finally