LCL, grids, fix loading CSV data with multiline fields, issue #27443

git-svn-id: trunk@48780 -
This commit is contained in:
jesus 2015-04-20 17:50:27 +00:00
parent 0406b9f4ce
commit a95002e0bf
5 changed files with 333 additions and 121 deletions

1
.gitattributes vendored
View File

@ -2784,6 +2784,7 @@ components/lazutils/lazutils.lpk svneol=native#text/plain
components/lazutils/lazutils.pas svneol=native#text/pascal components/lazutils/lazutils.pas svneol=native#text/pascal
components/lazutils/lazutilsstrconsts.pas svneol=native#text/pascal components/lazutils/lazutilsstrconsts.pas svneol=native#text/pascal
components/lazutils/lconvencoding.pas svneol=native#text/pascal components/lazutils/lconvencoding.pas svneol=native#text/pascal
components/lazutils/lcsvutils.pas svneol=native#text/pascal
components/lazutils/masks.pas svneol=native#text/pascal components/lazutils/masks.pas svneol=native#text/pascal
components/lazutils/paswstring.pas svneol=native#text/pascal components/lazutils/paswstring.pas svneol=native#text/pascal
components/lazutils/ttcache.pas svneol=native#text/pascal components/lazutils/ttcache.pas svneol=native#text/pascal

View File

@ -16,7 +16,7 @@
<Description Value="Useful units for Lazarus packages."/> <Description Value="Useful units for Lazarus packages."/>
<License Value="Modified LGPL-2"/> <License Value="Modified LGPL-2"/>
<Version Major="1"/> <Version Major="1"/>
<Files Count="75"> <Files Count="76">
<Item1> <Item1>
<Filename Value="laz2_dom.pas"/> <Filename Value="laz2_dom.pas"/>
<UnitName Value="Laz2_DOM"/> <UnitName Value="Laz2_DOM"/>
@ -318,6 +318,10 @@
<Filename Value="lazutilities.pas"/> <Filename Value="lazutilities.pas"/>
<UnitName Value="lazutilities"/> <UnitName Value="lazutilities"/>
</Item75> </Item75>
<Item76>
<Filename Value="lcsvutils.pas"/>
<UnitName Value="lcsvutils"/>
</Item76>
</Files> </Files>
<LazDoc Paths="../../docs/xml/lazutils"/> <LazDoc Paths="../../docs/xml/lazutils"/>
<i18n> <i18n>

View File

@ -16,7 +16,7 @@ uses
TTProfile, TTRASTER, TTTables, TTTypes, EasyLazFreeType, LazLoggerBase, TTProfile, TTRASTER, TTTables, TTTypes, EasyLazFreeType, LazLoggerBase,
LazLoggerDummy, LazClasses, LazFreeTypeFontCollection, LazConfigStorage, LazLoggerDummy, LazClasses, LazFreeTypeFontCollection, LazConfigStorage,
UTF8Process, laz2_xpath, DictionaryStringList, LazLoggerProfiling, FPCAdds, UTF8Process, laz2_xpath, DictionaryStringList, LazLoggerProfiling, FPCAdds,
LazUtilities, LazarusPackageIntf; LazUtilities, lcsvutils, LazarusPackageIntf;
implementation implementation

View File

@ -0,0 +1,266 @@
unit lcsvutils;
{$mode objfpc}{$H+}
{$modeswitch nestedprocvars}
interface
uses
Classes, SysUtils;
type
TCSVRecordProc = procedure(Fields: TStringList) is nested;
TCSVEncoding = (ceAuto, ceUTF8, ceUTF16, ceUTF16be);
procedure LoadFromCSVStream(AStream:TStream; AProc: TCSVRecordProc;
ADelimiter:Char=','; CSVEncoding:TCSVEncoding=ceAuto);
implementation
const
BUFSIZE=1024;
MAXGROW = 1 shl 29;
type
TSoc = set of char;
procedure LoadFromCSVStream(AStream: TStream; AProc: TCSVRecordProc;
ADelimiter:Char; CSVEncoding: TCSVEncoding);
var
Buffer, curWord: ansistring;
BytesRead, BufLen, I, BufDelta: Longint;
leadPtr, tailPtr, wordPtr, X:Pchar;
Line: TStringList = nil;
function SkipSet(aSet: TSoc): boolean;
begin
while (leadPtr<tailPtr) and (leadPtr^ in aSet) do Inc(leadPtr);
result := leadPtr<tailPtr;
end;
function FindSet(aSet: TSoc): boolean;
begin
while (leadPtr<tailPtr) and (not (leadPtr^ in ASet)) do Inc(leadPtr);
result := leadPtr<tailPtr;
end;
procedure NotifyLine;
begin
if (Line<>nil) and (Line.Count>0) then begin
AProc(Line);
Line.Clear;
end;
end;
procedure StorePart;
var
Len: Integer;
begin
Len := Length(curWord);
SetLength(curWord, Len+(leadPtr-wordPtr));
Move(wordPtr^, curWord[Len+1], (leadPtr-wordPtr));
Inc(leadPtr);
wordPtr := leadPtr;
end;
procedure StoreWord;
begin
StorePart;
if Line=nil then
Line := TStringList.Create;
Line.Add(curWord);
curWord := '';
end;
procedure StoreLine;
begin
StoreWord;
NotifyLine;
end;
procedure ProcessEndline;
var
le: PChar;
begin
le := leadPtr;
StoreLine;
if leadPtr>=tailPtr then
exit;
if (le^=#13) and (leadPtr^=#10) then
Inc(leadPtr);
wordPtr := leadPtr;
end;
procedure ProcessQuote;
var
endQuote,endField: pchar;
isDelimiter: boolean;
begin
// got a valid opening quote
Inc(leadPtr);
wordPtr := leadPtr;
// look for valid ending quote
while leadPtr<tailPtr do begin
if FindSet(['"']) then begin
// is this an encoded quote?
if (leadPtr+1)^='"' then begin
// yes, store part and keep looking
inc(leadPtr); // points to second quote
StorePart; // add to current word including the first "
end else begin
// try to match: "\s*(,|$|EOF)
endQuote := leadPtr; // points to valid closing quote (if found later)
Inc(leadPtr); // points to \s if exists
SkipSet([' ']); // skip \s if exists
endField := leadPtr; // points to field terminator
if (leadPtr>=tailPtr) or (leadPtr^ in [ADelimiter, #10, #13]) then begin
isDelimiter := (leadPtr<tailPtr) and (leadPtr^=ADelimiter);
if leadPtr<tailPtr then begin
if (leadPtr^=#13) and ((leadPtr+1)<tailPtr) and ((leadPtr+1)^=#10) then
Inc(endField); // point to second byte of line ending
Inc(endField); // skip last byte of line ending or delimiter
end;
leadPtr := endQuote; // leadPtr points to closing quote
if isDelimiter then
StoreWord
else
StoreLine;
leadPtr := endField; // restore next position
wordPtr := leadPtr;
break;
end;
end;
end;
end;
if leadPtr<>wordPtr then begin
StoreLine;
wordPtr := leadPtr;
end;
end;
procedure ConvertToUTF16;
var
n: Integer;
u: pchar;
ch: char;
begin
n := (tailPtr-leadPtr) div 2;
u := leadPtr;
while n>0 do begin
ch := u^;
u^ := (u+1)^;
(u+1)^ := ch;
inc(u, 2);
dec(n);
end;
end;
procedure ConvertEncoding;
var
Pw: PWidechar;
begin
if (CSVEncoding=ceAuto) and (BufLen>1) then begin
if (leadPtr[0]=#$FF) and (leadPtr[1]=#$FE) then begin
inc(leadPtr,2); // skip little endian UTF-16 BOM
CSVEncoding := ceUTF16;
end else
if (leadPtr[0]=#$FE) and (leadPtr[1]=#$FF) then begin
inc(leadPtr,2); // skip big endian UTF-16 BOM
CSVEncoding := ceUTF16be;
end else
if (leadPtr[0]<>#$00) and (leadPtr[1]=#$00) then // quick guess
CSVEncoding := ceUTF16
else
if (leadPtr[0]=#$00) and (leadPtr[1]<>#$00) then // quick guess
CSVEncoding := ceUTF16be
end;
if (CSVEncoding=ceAuto) and (BufLen>2) then begin
if (leadPtr[0]=#$EF) and (leadPtr[1]=#$BB) and (leadPtr[2]=#$BF) then
inc(leadPtr,3); // skip UTF-8 BOM
end;
if CSVEncoding=ceAuto then
CSVEncoding := ceUTF8;
case CSVEncoding of
ceUTF16, ceUTF16be:
begin
if CSVEncoding=ceUTF16be then
ConvertToUTF16;
Pw := pointer(leadPtr);
Buffer := UTF8Encode(widestring(Pw));
leadPtr := @Buffer[1];
end;
end;
end;
begin
if AProc=nil then
exit;
// read buffer ala fpc tstrings
Buffer:='';
BufLen:=0;
I:=1;
repeat
BufDelta:=BUFSIZE*I;
SetLength(Buffer,BufLen+BufDelta);
BytesRead:=AStream.Read(Buffer[BufLen+1],BufDelta);
inc(BufLen,BufDelta);
If I<MAXGROW then
I:=I shl 1;
until BytesRead<>BufDelta;
BufLen := BufLen-BufDelta+BytesRead;
SetLength(Buffer, BufLen);
if BufLen=0 then
exit;
curWord := '';
leadPtr := @Buffer[1];
tailPtr := leadPtr + BufLen;
ConvertEncoding;
try
wordPtr := leadPtr; // wordPtr always points to starting word or part
tailPtr := leadPtr + Length(Buffer); // tailPtr is an end of buffer marker
while leadPtr<tailPtr do begin
// skip initial spaces
SkipSet([' ']);
X := leadPtr;
// find next marker
if not FindSet([ADelimiter, '"', #10, #13]) then
break;
case leadPtr^ of
'"':
begin
// is the first char?
if leadPtr=X then
ProcessQuote
else begin
// got an invalid open quote, sync until next delimiter, $ or EOB
FindSet([ADelimiter, #10, #13]);
if leadPtr^=ADelimiter then
StoreWord
else
ProcessEndline;
end;
end;
#10, #13:
ProcessEndline;
else
if leadPtr^=ADelimiter then
StoreWord
end;
end;
if wordPtr<>leadPtr then
StoreWord;
NotifyLine;
finally
Line.Free;
SetLength(Buffer,0);
end;
end;
end.

View File

@ -27,6 +27,7 @@ email: jesusrmx@yahoo.com.mx
unit Grids; unit Grids;
{$mode objfpc}{$H+} {$mode objfpc}{$H+}
{$modeswitch nestedprocvars}
{$define NewCols} {$define NewCols}
interface interface
@ -35,7 +36,7 @@ uses
Types, Classes, SysUtils, Math, Maps, LCLStrConsts, LCLProc, LCLType, LCLIntf, Types, Classes, SysUtils, Math, Maps, LCLStrConsts, LCLProc, LCLType, LCLIntf,
FileUtil, FPCanvas, Controls, GraphType, Graphics, Forms, DynamicArray, FileUtil, FPCanvas, Controls, GraphType, Graphics, Forms, DynamicArray,
LMessages, StdCtrls, LResources, MaskEdit, Buttons, Clipbrd, Themes, LMessages, StdCtrls, LResources, MaskEdit, Buttons, Clipbrd, Themes,
LazUTF8, LazUtf8Classes, Laz2_XMLCfg; // <-- replaces XMLConf (part of FPC libs) LazUTF8, LazUtf8Classes, Laz2_XMLCfg, LCSVUtils; // <-- replaces XMLConf (part of FPC libs)
const const
//GRIDFILEVERSION = 1; // Original //GRIDFILEVERSION = 1; // Original
@ -10721,129 +10722,69 @@ end;
procedure TCustomStringGrid.LoadFromCSVStream(AStream: TStream; procedure TCustomStringGrid.LoadFromCSVStream(AStream: TStream;
ADelimiter: Char=','; WithHeader: boolean=true); ADelimiter: Char=','; WithHeader: boolean=true);
Procedure ParseDelimitedText(const AValue: string; const ADelimiter, AQuoteChar: Char; TS: TStrings);
{ Helper function for LoadFromCSVFile
Adapted from TStrings.SetDelimitedText
- Only ADelimiter is used for separating the fields and not other whitespace
- If a field is quoted and it contains AQuoteChar, this occurrence is treated as a literal part of the field
- As per RFC4180 whitespace is considered to be part of the field, even if the field is not quoted
- Trailing spaces of a quoted field are trimmed
Example with ADelimiter = ',' and AQuoteChar = '"'
AValue = '111,2,22,333' -> 111|2|22|333
AValue = '111,"2,22",333' -> 111|2,22|333
AValue = '111, 222 ,333' -> 111| 222 |333
}
var i,j:integer;
aNotFirst:boolean;
begin
TS.BeginUpdate;
i:=1;
j:=1;
aNotFirst:=false;
try
TS.Clear;
while i<=length(AValue) do
begin
// skip delimiter
if aNotFirst and (i<=length(AValue)) and (AValue[i]=ADelimiter) then inc(i);
// read next string
if i<=length(AValue) then
begin
if AValue[i]=AQuoteChar then
begin
// next string is quoted
j:=i+1;
while (j<=length(AValue)) and
( (AValue[j]<>AQuoteChar) or
( (j+1<=length(AValue)) and (AValue[j+1]=AQuoteChar) ) ) do
begin
if (j<=length(AValue)) and (AValue[j]=AQuoteChar) then
inc(j,2)
else
inc(j);
end;
// j is position of closing quote
TS.Add( StringReplace (Copy(AValue,i+1,j-i-1),
AQuoteChar+AQuoteChar,AQuoteChar, [rfReplaceAll]));
i:=j+1;
end
else
begin
// next string is not quoted
j:=i;
while (j<=length(AValue)) and
//basically any other character means some invalid text
((Ord(AValue[j])>=Ord(' ')) or (AValue[j] in [#10,#13,#9,#0])) and
(AValue[j]<>ADelimiter) do inc(j);
TS.Add(Copy(AValue,i,j-i));
i:=j;
end;
end
else
begin
if aNotFirst then TS.Add('');
end;
// skip trailing spaces of a quoted field
// not really sure if that is RFC4180 compliant
while (i<=length(AValue)) and (Ord(AValue[i])<=Ord(' ')) and (AValue[i] <> ADelimiter) do inc(i);
aNotFirst:=true;
end; //end of string
finally
TS.EndUpdate;
end;
end;//ParseDelimitedText
var var
Lines, HeaderL: TStringList; MaxCols: Integer = 0;
i, j, StartRow: Integer; MaxRows: Integer = 0;
procedure NewRecord(Fields:TStringlist);
var
i, aRow: Integer;
begin
if Fields.Count=0 then
exit;
Inc(MaxRows);
if (MaxRows=1) then
// first record
if not WithHeader then
exit; // ... no header wanted
// make sure we have enough columns
if MaxCols<Fields.Count then
MaxCols := Fields.Count;
if Columns.Enabled then begin
while Columns.VisibleCount<MaxCols do
Columns.Add;
end
else begin
if ColCount<MaxCols then
ColCount := MaxCols;
end;
// and rows ...
if RowCount<MaxRows then
RowCount := RowCount + 20;
// setup columns captions of custom columns if they are enabled
if (MaxRows=1) and withHeader and Columns.Enabled then begin
for i:=0 to Fields.Count-1 do
Columns[i].Title.Caption:=Fields[i];
end;
if not WithHeader then
aRow := FixedRows + (MaxRows-2) // MaxRows is 1 based, 2nd one is our first row
else begin
aRow := FixedRows-1;
if aRow<0 then
aRow := 0;
aRow := aRow + (MaxRows-1);
end;
for i:=0 to Fields.Count-1 do
Cells[i, aRow] := Fields[i];
end;
begin begin
Lines := TStringList.Create;
HeaderL := TStringList.Create;
BeginUpdate; BeginUpdate;
try try
Lines.LoadFromStream(AStream); LCSVUtils.LoadFromCSVStream(AStream, @NewRecord, ADelimiter);
// check for empty lines RowCount := MaxRows;
for i:=Lines.Count-1 downto 0 do if not Columns.Enabled then
if Trim(Lines[i])='' then ColCount := MaxCols
Lines.Delete(i); else
if Lines.Count>0 then begin while Columns.Count > MaxCols do
ParseDelimitedText(Lines[0], ADelimiter, '"', HeaderL); Columns.Delete(Columns.Count-1);
// Set Columns count based on loaded data
if Columns.Enabled then begin
while Columns.VisibleCount<>HeaderL.Count do
if Columns.VisibleCount<HeaderL.Count then
Columns.Add
else
Columns.Delete(Columns.Count-1);
end
else
ColCount := HeaderL.Count; // New column count
// Rest of the lines are for rows
if WithHeader and (FixedRows=0) then
RowCount := Lines.Count
else
RowCount := FixedRows + Lines.Count-1;
// Set column captions and set StartRow for the following rows
if WithHeader then begin
if (FixedRows>0) and Columns.Enabled then
for i:=0 to Columns.Count-1 do
Columns[i].Title.Caption:=HeaderL[i];
StartRow := Max(FixedRows-1, 0);
j := 0;
end else begin
StartRow := FixedRows;
j := 1;
end;
// Store the row data
for i:=StartRow to RowCount-1 do begin
Rows[i].Delimiter := ADelimiter;
ParseDelimitedText(Lines[i-StartRow+j], ADelimiter, '"', Rows[i]);
end;
end;
finally finally
HeaderL.Free;
Lines.Free;
EndUpdate; EndUpdate;
end; end;
end; end;