fpsreadsheet: Introduce "virtual writing mode" where the writer does not get its data from the spreadsheet, but from an event ("OnNeedCellData"). Introduce stream switching for xlsxooxml. Both feature allow to write HUGE spreadsheet files. Add example "test_virtualmode".

git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@3306 8e941d3f-bd1b-0410-a28a-d453659cc2b4
This commit is contained in:
wp_xxyyzz 2014-07-10 20:43:46 +00:00
parent ddd6902edf
commit e2391c142b
5 changed files with 388 additions and 61 deletions

View File

@ -0,0 +1,98 @@
<?xml version="1.0" encoding="UTF-8"?>
<CONFIG>
<ProjectOptions>
<Version Value="9"/>
<PathDelim Value="\"/>
<General>
<Flags>
<MainUnitHasCreateFormStatements Value="False"/>
<MainUnitHasTitleStatement Value="False"/>
</Flags>
<SessionStorage Value="InProjectDir"/>
<MainUnit Value="0"/>
<Title Value="test_virtualmode"/>
<UseAppBundle Value="False"/>
<ResourceType Value="res"/>
</General>
<i18n>
<EnableI18N LFM="False"/>
</i18n>
<VersionInfo>
<StringTable ProductVersion=""/>
</VersionInfo>
<BuildModes Count="2">
<Item1 Name="Debug" Default="True"/>
<Item2 Name="Release">
<CompilerOptions>
<Version Value="11"/>
<PathDelim Value="\"/>
<SearchPaths>
<IncludeFiles Value="$(ProjOutDir)"/>
</SearchPaths>
<CodeGeneration>
<SmartLinkUnit Value="True"/>
</CodeGeneration>
<Linking>
<Debugging>
<StripSymbols Value="True"/>
</Debugging>
<LinkSmart Value="True"/>
<Options>
<Win32>
<GraphicApplication Value="True"/>
</Win32>
</Options>
</Linking>
</CompilerOptions>
</Item2>
</BuildModes>
<PublishOptions>
<Version Value="2"/>
</PublishOptions>
<RunParams>
<local>
<FormatVersion Value="1"/>
</local>
</RunParams>
<RequiredPackages Count="1">
<Item1>
<PackageName Value="laz_fpspreadsheet"/>
</Item1>
</RequiredPackages>
<Units Count="1">
<Unit0>
<Filename Value="test_virtualmode.lpr"/>
<IsPartOfProject Value="True"/>
<UnitName Value="test_virtualmode"/>
</Unit0>
</Units>
</ProjectOptions>
<CompilerOptions>
<Version Value="11"/>
<PathDelim Value="\"/>
<SearchPaths>
<IncludeFiles Value="$(ProjOutDir)"/>
</SearchPaths>
<CodeGeneration>
<SmartLinkUnit Value="True"/>
</CodeGeneration>
<Linking>
<Debugging>
<DebugInfoType Value="dsDwarf2Set"/>
</Debugging>
</Linking>
</CompilerOptions>
<Debugging>
<Exceptions Count="3">
<Item1>
<Name Value="EAbort"/>
</Item1>
<Item2>
<Name Value="ECodetoolError"/>
</Item2>
<Item3>
<Name Value="EFOpenError"/>
</Item3>
</Exceptions>
</Debugging>
</CONFIG>

View File

@ -0,0 +1,82 @@
program test_virtualmode;
{$mode objfpc}{$H+}
uses
{$IFDEF UNIX}{$IFDEF UseCThreads}
cthreads,
{$ENDIF}{$ENDIF}
Classes, laz_fpspreadsheet,
{ you can add units after this }
SysUtils, variants, fpspreadsheet, xlsxooxml;
type
TDataProvider = class
procedure NeedCellData(Sender: TObject; ARow,ACol: Cardinal; var AData: variant);
end;
procedure TDataProvider.NeedCellData(Sender: TObject; ARow, ACol: Cardinal;
var AData: variant);
{ This is just a sample using random data. Normally, in case of a database,
you would read a record and return its field values, such as:
Dataset.Fields[ACol].AsVariant := AData;
if ACol = Dataset.FieldCount then Dataset.Next;
// NOTE: you have to take care of advancing the database cursor!
}
var
s: String;
n: Double;
begin
if odd(random(10)) then begin
s := Format('R=%d-C=%d', [ARow, ACol]);
AData := s;
end else
AData := 10000*ARow + ACol;
// you can use the OnNeedData also to provide feedback on how the process
// progresses.
if (ACol = 0) and (ARow mod 1000 = 0) then
WriteLn('Writing row ', ARow, '...');
end;
var
workbook: TsWorkbook;
worksheet: TsWorksheet;
dataprovider: TDataProvider;
begin
dataprovider := TDataProvider.Create;
try
workbook := TsWorkbook.Create;
try
worksheet := workbook.AddWorksheet('Sheet1');
{ These are the essential commands to activate virtual mode: }
workbook.WritingOptions := [woVirtualMode, woSaveMemory];
// woSaveMemory can be omitted, but is essential for large files: it causes
// writing temporaray data to a file stream instead of to a memory stream.
workbook.VirtualRowCount := 10000;
workbook.VirtualColCount := 100;
// These two numbers define the size of virtual spreadsheet.
// In case of a database, VirtualRowCount is the RecordCount, VirtualColCount
// the number of fields to be written to the spreadsheet file
workbook.OnNeedCellData := @dataprovider.NeedCellData;
// This links the worksheet to the method from which it gets the
// data to write.
// In case of a database, you would open the dataset before calling this:
workbook.WriteToFile('test_virtual.xlsx', sfOOXML, true);
finally
workbook.Free;
end;
WriteLn('Press [ENTER] to quit...');
ReadLn;
finally
dataprovider.Free;
end;
end.

View File

@ -442,12 +442,15 @@ type
{@@ Pointer to a TCol record }
PCol = ^TCol;
{@@ User interface options:
{@@ WSorksheet user interface options:
@param soShowGridLines Show or hide the grid lines in the spreadsheet
@param soShowHeaders Show or hide the column or row headers of the spreadsheet
@param soHasFrozenPanes If set a number of rows and columns of the spreadsheet
is fixed and does not scroll. The number is defined by
LeftPaneWidth and TopPaneHeight. }
LeftPaneWidth and TopPaneHeight.
@param soCalcBeforeSaving Calculates formulas before saving the file. Otherwise
there are no results when the file is loaded back by
fpspreadsheet. }
TsSheetOption = (soShowGridLines, soShowHeaders, soHasFrozenPanes,
soCalcBeforeSaving);
@ -689,6 +692,23 @@ type
property OnChangeFont: TsCellEvent read FOnChangeFont write FOnChangeFont;
end;
{@@
Options considered when writing a workbook
@param woVirtualMode If in virtual mode date are not taken from cells
when a spreadsheet is written to file, but are
provided by means of the event OnNeedCellData.
@param woSaveMemory When this option is set temporary files are not
written to memory streams but to file streams using
temporary files. }
TsWorkbookWritingOption = (woVirtualMode, woSaveMemory);
{@@
Options considered when writing a workbook }
TsWorkbookWritingOptions = set of TsWorkbookWritingOption;
TsWorkbookNeedCellDataEvent = procedure(Sender: TObject; ARow, ACol: Cardinal;
var AValue: variant) of object;
{@@
The workbook contains the worksheets and provides methods for reading from
@ -706,8 +726,17 @@ type
FReadFormulas: Boolean;
FDefaultColWidth: Single; // in "characters". Excel uses the width of char "0" in 1st font
FDefaultRowHeight: Single; // in "character heights", i.e. line count
FVirtualColCount: Cardinal;
FVirtualRowCount: Cardinal;
FWriting: Boolean;
FWritingOptions: TsWorkbookWritingOptions;
FOnNeedCellData: TsWorkbookNeedCellDataEvent;
FFileName: String;
{ Setter/Getter }
procedure SetVirtualColCount(AValue: Cardinal);
procedure SetVirtualRowCount(AValue: Cardinal);
{ Internal methods }
procedure PrepareBeforeSaving;
procedure RemoveWorksheetsCallback(data, arg: pointer);
@ -787,6 +816,13 @@ type
precaution since formulas not correctly implemented by fpspreadsheet
could crash the reading operation. }
property ReadFormulas: Boolean read FReadFormulas write FReadFormulas;
property VirtualColCount: cardinal read FVirtualColCount write SetVirtualColCount;
property VirtualRowCount: cardinal read FVirtualRowCount write SetVirtualRowCount;
property WritingOptions: TsWorkbookWritingOptions read FWritingOptions write FWritingOptions;
{@@ This event allows to provide external cell data for writing to file,
standard cells are ignored. Intended for converting large database files
to s spreadsheet format. Requires WritingOption woVirtualMode to be set. }
property OnNeedCellData: TsWorkbookNeedCellDataEvent read FOnNeedCellData write FOnNeedCellData;
end;
{@@ Contents of a number format record }
@ -934,18 +970,18 @@ type
procedure WriteCellCallback(ACell: PCell; AStream: TStream);
procedure WriteCellsToStream(AStream: TStream; ACells: TAVLTree);
{ Record writing methods }
{@@ abstract method for writing a blank cell. Must be overridden by descendent classes. }
{@@ Abstract method for writing a blank cell. Must be overridden by descendent classes. }
procedure WriteBlank(AStream: TStream; const ARow, ACol: Cardinal; ACell: PCell); virtual; abstract;
{@@ abstract method for a date/time value to a cell. Must be overridden by descendent classes. }
{@@ Abstract method for writing a date/time value to a cell. Must be overridden by descendent classes. }
procedure WriteDateTime(AStream: TStream; const ARow, ACol: Cardinal; const AValue: TDateTime; ACell: PCell); virtual; abstract;
{@@ abstract method for a formula to a cell. Must be overridden by descendent classes. }
{@@ Abstract method for writing a formula to a cell. Must be overridden by descendent classes. }
procedure WriteFormula(AStream: TStream; const ARow, ACol: Cardinal; const AFormula: TsFormula; ACell: PCell); virtual;
{@@ abstract method for am RPN formula to a cell. Must be overridden by descendent classes. }
{@@ Abstract method for writing an RPN formula to a cell. Must be overridden by descendent classes. }
procedure WriteRPNFormula(AStream: TStream; const ARow, ACol: Cardinal;
const AFormula: TsRPNFormula; ACell: PCell); virtual;
{@@ abstract method for a string to a cell. Must be overridden by descendent classes. }
{@@ Abstract method for writing a string to a cell. Must be overridden by descendent classes. }
procedure WriteLabel(AStream: TStream; const ARow, ACol: Cardinal; const AValue: string; ACell: PCell); virtual; abstract;
{@@ abstract method for a number value to a cell. Must be overridden by descendent classes. }
{@@ Abstract method for writing a number value to a cell. Must be overridden by descendent classes. }
procedure WriteNumber(AStream: TStream; const ARow, ACol: Cardinal; const AValue: double; ACell: PCell); virtual; abstract;
public
@ -4156,6 +4192,18 @@ begin
end;
end;
procedure TsWorkbook.SetVirtualColCount(AValue: Cardinal);
begin
if FWriting then exit;
FVirtualColCount := AValue;
end;
procedure TsWorkbook.SetVirtualRowCount(AValue: Cardinal);
begin
if FWriting then exit;
FVirtualRowCount := AValue;
end;
{@@
Writes the document to a file. If the file doesn't exist, it will be created.
@ -4173,9 +4221,11 @@ begin
AWriter := CreateSpreadWriter(AFormat);
try
FFileName := AFileName;
FWriting := true;
PrepareBeforeSaving;
AWriter.WriteToFile(AFileName, AOverwriteExisting);
finally
FWriting := false;
AWriter.Free;
end;
end;
@ -4213,9 +4263,11 @@ var
begin
AWriter := CreateSpreadWriter(AFormat);
try
FWriting := true;
PrepareBeforeSaving;
AWriter.WriteToStream(AStream);
finally
FWriting := false;
AWriter.Free;
end;
end;

View File

@ -34,8 +34,6 @@ type
}
TFormatDateTimeOptions = set of TFormatDateTimeOption;
TsStreamClass = class of TStream;
const
{@@ Date formatting string for unambiguous date/time display as strings
Can be used for text output when date/time cell support is not available }

View File

@ -66,10 +66,10 @@ type
procedure CreateNumFormatList; override;
procedure CreateStreams;
procedure DestroyStreams;
procedure ResetStreams;
function GetStyleIndex(ACell: PCell): Cardinal;
protected
{ Streams with the contents of files }
FStreamClass: TsStreamClass;
FSContentTypes: TStream;
FSRelsRels: TStream;
FSWorkbook: TStream;
@ -101,6 +101,9 @@ type
implementation
uses
variants;
const
{ OOXML general XML constants }
XML_HEADER = '<?xml version="1.0" encoding="utf-8" ?>';
@ -353,12 +356,19 @@ var
LCell: TCell;
AVLNode: TAVLTreeNode;
CellPosText: string;
// S: String;
value: Variant;
fn: String;
begin
FCurSheetNum := Length(FSSheets);
SetLength(FSSheets, FCurSheetNum + 1);
FSSheets[FCurSheetNum] := FStreamClass.Create; // create the stream
// Create the stream
if (woSaveMemory in Workbook.WritingOptions) then begin
fn := IncludeTrailingPathDelimiter(GetTempDir);
fn := GetTempFileName(fn, Format('fpsSH%d-', [FCurSheetNum+1]));
FSSheets[FCurSheetNum] := TFileStream.Create(fn, fmCreate);
end else
FSSheets[FCurSheetNum] := TMemoryStream.Create;
// Header
AppendToStream(FSSheets[FCurSheetNum],
@ -374,31 +384,72 @@ begin
AppendToStream(FSSheets[FCurSheetNum],
'<sheetData>');
// The cells need to be written in order, row by row, cell by cell
LastColIndex := CurSheet.GetLastColIndex;
for r := 0 to CurSheet.GetLastRowIndex do begin
AppendToStream(FSSheets[FCurSheetNum], Format(
'<row r="%d" spans="1:%d">', [r+1, LastColIndex+1]));
// Write cells belonging to this row.
for c := 0 to LastColIndex do
begin
LCell.Row := r;
LCell.Col := c;
AVLNode := CurSheet.Cells.Find(@LCell);
if Assigned(AVLNode) then
WriteCellCallback(PCell(AVLNode.Data), nil)
else
begin
if (woVirtualMode in Workbook.WritingOptions) and Assigned(Workbook.OnNeedCellData)
then begin
for r := 0 to Workbook.VirtualRowCount-1 do begin
AppendToStream(FSSheets[FCurSheetNum], Format(
'<row r="%d" spans="1:%d">', [r+1, Workbook.VirtualRowCount]));
for c := 0 to Workbook.VirtualColCount-1 do begin
CellPosText := CurSheet.CellPosToText(r, c);
AppendToStream(FSSheets[FCurSheetNum], Format(
'<c r="%s">', [CellPosText]),
'<v></v>',
'</c>');
value := varNull;
Workbook.OnNeedCellData(Workbook, r, c, value);
if VarIsNull(value) then
AppendToStream(FSSheets[FCurSheetNum], Format(
'<c r="%s"', [CellPosText]),
'<v></v>',
'</c>')
else begin
lCell.Row := r;
lCell.Col := c;
if VarIsNumeric(value) then begin
lCell.ContentType := cctNumber;
lCell.NumberValue := value;
end
{
else if VarIsDateTime(value) then begin
lCell.ContentType := cctNumber;
lCell.DateTimeValue := value;
end
}
else if VarIsStr(value) then begin
lCell.ContentType := cctUTF8String;
lCell.UTF8StringValue := VarToStrDef(value, '');
end else
if VarIsBool(value) then begin
lCell.ContentType := cctBool;
lCell.BoolValue := value <> 0;
end;
WriteCellCallback(@lCell, nil);
end;
end;
AppendToStream(FSSheets[FCurSheetNum],
'</row>');
end;
end else
begin
// The cells need to be written in order, row by row, cell by cell
LastColIndex := CurSheet.GetLastColIndex;
for r := 0 to CurSheet.GetLastRowIndex do begin
AppendToStream(FSSheets[FCurSheetNum], Format(
'<row r="%d" spans="1:%d">', [r+1, LastColIndex+1]));
// Write cells belonging to this row.
for c := 0 to LastColIndex do begin
LCell.Row := r;
LCell.Col := c;
AVLNode := CurSheet.Cells.Find(@LCell);
if Assigned(AVLNode) then
WriteCellCallback(PCell(AVLNode.Data), nil)
else begin
CellPosText := CurSheet.CellPosToText(r, c);
AppendToStream(FSSheets[FCurSheetNum], Format(
'<c r="%s">', [CellPosText]),
'<v></v>',
'</c>');
end;
end;
AppendToStream(FSSheets[FCurSheetNum],
'</row>');
end;
AppendToStream(FSSheets[FCurSheetNum],
'</row>');
end;
// Footer
@ -417,8 +468,6 @@ end;
constructor TsSpreadOOXMLWriter.Create(AWorkbook: TsWorkbook);
begin
inherited Create(AWorkbook);
FStreamClass := TMemoryStream;
FPointSeparatorSettings := DefaultFormatSettings;
FPointSeparatorSettings.DecimalSeparator := '.';
end;
@ -430,18 +479,29 @@ begin
end;
{ Creates the streams for the individual data files. Will be zipped into a
single xlsx file.
We use the variable FStreamClass here to be able to easily switch from a
memory stream to a file stream for very big files. }
single xlsx file. }
procedure TsSpreadOOXMLWriter.CreateStreams;
var
dir: String;
begin
FSContentTypes := FStreamClass.Create;
FSRelsRels := FStreamClass.Create;
FSWorkbookRels := FStreamClass.Create;
FSWorkbook := FStreamClass.Create;
FSStyles := FStreamClass.Create;
FSSharedStrings := FStreamClass.Create;
FSSharedStrings_complete := FStreamClass.Create;
if (woSaveMemory in Workbook.WritingOptions) then begin
dir := IncludeTrailingPathDelimiter(GetTempDir);
FSContentTypes := TFileStream.Create(GetTempFileName(dir, 'fpsCT'), fmCreate);
FSRelsRels := TFileStream.Create(GetTempFileName(dir, 'fpsRR'), fmCreate);
FSWorkbookRels := TFileStream.Create(GetTempFileName(dir, 'fpsWBR'), fmCreate);
FSWorkbook := TFileStream.Create(GetTempFileName(dir, 'fpsWB'), fmCreate);
FSStyles := TFileStream.Create(GetTempFileName(dir, 'fpsSTY'), fmCreate);
FSSharedStrings := TFileStream.Create(GetTempFileName(dir, 'fpsSST'), fmCreate);
FSSharedStrings_complete := TFileStream.Create(GetTempFileName(dir, 'fpsSSTc'), fmCreate);
end else begin;
FSContentTypes := TMemoryStream.Create;
FSRelsRels := TMemoryStream.Create;
FSWorkbookRels := TMemoryStream.Create;
FSWorkbook := TMemoryStream.Create;
FSStyles := TMemoryStream.Create;
FSSharedStrings := TMemoryStream.Create;
FSSharedStrings_complete := TMemoryStream.Create;
end;
// FSSheets will be created when needed.
end;
@ -449,20 +509,62 @@ end;
procedure TsSpreadOOXMLWriter.DestroyStreams;
var
i: Integer;
procedure DestroyStream(AStream: TStream);
var
fn: String;
begin
if AStream is TFileStream then begin
fn := TFileStream(AStream).Filename;
DeleteFile(fn);
end;
AStream.Free;
end;
begin
FSContentTypes.Free;
FSRelsRels.Free;
FSWorkbookRels.Free;
FSWorkbook.Free;
FSStyles.Free;
FSSharedStrings.Free;
FSSharedStrings_complete.Free;
DestroyStream(FSContentTypes);
DestroyStream(FSRelsRels);
DestroyStream(FSWorkbookRels);
DestroyStream(FSWorkbook);
DestroyStream(FSStyles);
DestroyStream(FSSharedStrings);
DestroyStream(FSSharedStrings_complete);
for i := 0 to Length(FSSheets) - 1 do
FSSheets[i].Free;
DestroyStream(FSSheets[i]);
SetLength(FSSheets, 0);
end;
{ Is called before zipping the individual file parts. Rewinds the memory streams,
or, if the stream are file streams, the streams are closed and re-opened for
reading. }
procedure TsSpreadOOXMLWriter.ResetStreams;
var
i: Integer;
procedure ResetStream(AStream: TStream);
var
fn: String;
begin
if AStream is TFileStream then begin
fn := TFileStream(AStream).FileName;
AStream.Free;
AStream := TFileStream.Create(fn, fmOpenRead);
end else
AStream.Position := 0;
end;
begin
ResetStream(FSContentTypes);
ResetStream(FSRelsRels);
ResetStream(FSWorkbookRels);
ResetStream(FSWorkbook);
ResetStream(FSStyles);
ResetStream(FSSharedStrings_complete);
for i:=0 to Length(FSSheets) - 1 do
ResetStream(FSSheets[i]);
end;
{
Writes a string to a file. Helper convenience method.
}
@ -526,12 +628,7 @@ begin
end;
// Stream position must be at beginning, it was moved to end during adding of xml strings.
FSContentTypes.Position := 0;
FSRelsRels.Position := 0;
FSWorkbookRels.Position := 0;
FSWorkbook.Position := 0;
FSStyles.Position := 0;
FSSharedStrings_complete.Position := 0;
ResetStreams;
FZip.SaveToStream(AStream);