LCL, grids, fix loading CSV data with multiline fields, issue #27443

git-svn-id: trunk@48780 -
This commit is contained in:
jesus 2015-04-20 17:50:27 +00:00
parent 0406b9f4ce
commit a95002e0bf
5 changed files with 333 additions and 121 deletions

1
.gitattributes vendored
View File

@ -2784,6 +2784,7 @@ components/lazutils/lazutils.lpk svneol=native#text/plain
components/lazutils/lazutils.pas svneol=native#text/pascal
components/lazutils/lazutilsstrconsts.pas svneol=native#text/pascal
components/lazutils/lconvencoding.pas svneol=native#text/pascal
components/lazutils/lcsvutils.pas svneol=native#text/pascal
components/lazutils/masks.pas svneol=native#text/pascal
components/lazutils/paswstring.pas svneol=native#text/pascal
components/lazutils/ttcache.pas svneol=native#text/pascal

View File

@ -16,7 +16,7 @@
<Description Value="Useful units for Lazarus packages."/>
<License Value="Modified LGPL-2"/>
<Version Major="1"/>
<Files Count="75">
<Files Count="76">
<Item1>
<Filename Value="laz2_dom.pas"/>
<UnitName Value="Laz2_DOM"/>
@ -318,6 +318,10 @@
<Filename Value="lazutilities.pas"/>
<UnitName Value="lazutilities"/>
</Item75>
<Item76>
<Filename Value="lcsvutils.pas"/>
<UnitName Value="lcsvutils"/>
</Item76>
</Files>
<LazDoc Paths="../../docs/xml/lazutils"/>
<i18n>

View File

@ -16,7 +16,7 @@ uses
TTProfile, TTRASTER, TTTables, TTTypes, EasyLazFreeType, LazLoggerBase,
LazLoggerDummy, LazClasses, LazFreeTypeFontCollection, LazConfigStorage,
UTF8Process, laz2_xpath, DictionaryStringList, LazLoggerProfiling, FPCAdds,
LazUtilities, LazarusPackageIntf;
LazUtilities, lcsvutils, LazarusPackageIntf;
implementation

View File

@ -0,0 +1,266 @@
unit lcsvutils;
{$mode objfpc}{$H+}
{$modeswitch nestedprocvars}
interface
uses
Classes, SysUtils;
type
TCSVRecordProc = procedure(Fields: TStringList) is nested;
TCSVEncoding = (ceAuto, ceUTF8, ceUTF16, ceUTF16be);
procedure LoadFromCSVStream(AStream:TStream; AProc: TCSVRecordProc;
ADelimiter:Char=','; CSVEncoding:TCSVEncoding=ceAuto);
implementation
const
BUFSIZE=1024;
MAXGROW = 1 shl 29;
type
TSoc = set of char;
procedure LoadFromCSVStream(AStream: TStream; AProc: TCSVRecordProc;
ADelimiter:Char; CSVEncoding: TCSVEncoding);
var
Buffer, curWord: ansistring;
BytesRead, BufLen, I, BufDelta: Longint;
leadPtr, tailPtr, wordPtr, X:Pchar;
Line: TStringList = nil;
function SkipSet(aSet: TSoc): boolean;
begin
while (leadPtr<tailPtr) and (leadPtr^ in aSet) do Inc(leadPtr);
result := leadPtr<tailPtr;
end;
function FindSet(aSet: TSoc): boolean;
begin
while (leadPtr<tailPtr) and (not (leadPtr^ in ASet)) do Inc(leadPtr);
result := leadPtr<tailPtr;
end;
procedure NotifyLine;
begin
if (Line<>nil) and (Line.Count>0) then begin
AProc(Line);
Line.Clear;
end;
end;
procedure StorePart;
var
Len: Integer;
begin
Len := Length(curWord);
SetLength(curWord, Len+(leadPtr-wordPtr));
Move(wordPtr^, curWord[Len+1], (leadPtr-wordPtr));
Inc(leadPtr);
wordPtr := leadPtr;
end;
procedure StoreWord;
begin
StorePart;
if Line=nil then
Line := TStringList.Create;
Line.Add(curWord);
curWord := '';
end;
procedure StoreLine;
begin
StoreWord;
NotifyLine;
end;
procedure ProcessEndline;
var
le: PChar;
begin
le := leadPtr;
StoreLine;
if leadPtr>=tailPtr then
exit;
if (le^=#13) and (leadPtr^=#10) then
Inc(leadPtr);
wordPtr := leadPtr;
end;
procedure ProcessQuote;
var
endQuote,endField: pchar;
isDelimiter: boolean;
begin
// got a valid opening quote
Inc(leadPtr);
wordPtr := leadPtr;
// look for valid ending quote
while leadPtr<tailPtr do begin
if FindSet(['"']) then begin
// is this an encoded quote?
if (leadPtr+1)^='"' then begin
// yes, store part and keep looking
inc(leadPtr); // points to second quote
StorePart; // add to current word including the first "
end else begin
// try to match: "\s*(,|$|EOF)
endQuote := leadPtr; // points to valid closing quote (if found later)
Inc(leadPtr); // points to \s if exists
SkipSet([' ']); // skip \s if exists
endField := leadPtr; // points to field terminator
if (leadPtr>=tailPtr) or (leadPtr^ in [ADelimiter, #10, #13]) then begin
isDelimiter := (leadPtr<tailPtr) and (leadPtr^=ADelimiter);
if leadPtr<tailPtr then begin
if (leadPtr^=#13) and ((leadPtr+1)<tailPtr) and ((leadPtr+1)^=#10) then
Inc(endField); // point to second byte of line ending
Inc(endField); // skip last byte of line ending or delimiter
end;
leadPtr := endQuote; // leadPtr points to closing quote
if isDelimiter then
StoreWord
else
StoreLine;
leadPtr := endField; // restore next position
wordPtr := leadPtr;
break;
end;
end;
end;
end;
if leadPtr<>wordPtr then begin
StoreLine;
wordPtr := leadPtr;
end;
end;
procedure ConvertToUTF16;
var
n: Integer;
u: pchar;
ch: char;
begin
n := (tailPtr-leadPtr) div 2;
u := leadPtr;
while n>0 do begin
ch := u^;
u^ := (u+1)^;
(u+1)^ := ch;
inc(u, 2);
dec(n);
end;
end;
procedure ConvertEncoding;
var
Pw: PWidechar;
begin
if (CSVEncoding=ceAuto) and (BufLen>1) then begin
if (leadPtr[0]=#$FF) and (leadPtr[1]=#$FE) then begin
inc(leadPtr,2); // skip little endian UTF-16 BOM
CSVEncoding := ceUTF16;
end else
if (leadPtr[0]=#$FE) and (leadPtr[1]=#$FF) then begin
inc(leadPtr,2); // skip big endian UTF-16 BOM
CSVEncoding := ceUTF16be;
end else
if (leadPtr[0]<>#$00) and (leadPtr[1]=#$00) then // quick guess
CSVEncoding := ceUTF16
else
if (leadPtr[0]=#$00) and (leadPtr[1]<>#$00) then // quick guess
CSVEncoding := ceUTF16be
end;
if (CSVEncoding=ceAuto) and (BufLen>2) then begin
if (leadPtr[0]=#$EF) and (leadPtr[1]=#$BB) and (leadPtr[2]=#$BF) then
inc(leadPtr,3); // skip UTF-8 BOM
end;
if CSVEncoding=ceAuto then
CSVEncoding := ceUTF8;
case CSVEncoding of
ceUTF16, ceUTF16be:
begin
if CSVEncoding=ceUTF16be then
ConvertToUTF16;
Pw := pointer(leadPtr);
Buffer := UTF8Encode(widestring(Pw));
leadPtr := @Buffer[1];
end;
end;
end;
begin
if AProc=nil then
exit;
// read buffer ala fpc tstrings
Buffer:='';
BufLen:=0;
I:=1;
repeat
BufDelta:=BUFSIZE*I;
SetLength(Buffer,BufLen+BufDelta);
BytesRead:=AStream.Read(Buffer[BufLen+1],BufDelta);
inc(BufLen,BufDelta);
If I<MAXGROW then
I:=I shl 1;
until BytesRead<>BufDelta;
BufLen := BufLen-BufDelta+BytesRead;
SetLength(Buffer, BufLen);
if BufLen=0 then
exit;
curWord := '';
leadPtr := @Buffer[1];
tailPtr := leadPtr + BufLen;
ConvertEncoding;
try
wordPtr := leadPtr; // wordPtr always points to starting word or part
tailPtr := leadPtr + Length(Buffer); // tailPtr is an end of buffer marker
while leadPtr<tailPtr do begin
// skip initial spaces
SkipSet([' ']);
X := leadPtr;
// find next marker
if not FindSet([ADelimiter, '"', #10, #13]) then
break;
case leadPtr^ of
'"':
begin
// is the first char?
if leadPtr=X then
ProcessQuote
else begin
// got an invalid open quote, sync until next delimiter, $ or EOB
FindSet([ADelimiter, #10, #13]);
if leadPtr^=ADelimiter then
StoreWord
else
ProcessEndline;
end;
end;
#10, #13:
ProcessEndline;
else
if leadPtr^=ADelimiter then
StoreWord
end;
end;
if wordPtr<>leadPtr then
StoreWord;
NotifyLine;
finally
Line.Free;
SetLength(Buffer,0);
end;
end;
end.

View File

@ -27,6 +27,7 @@ email: jesusrmx@yahoo.com.mx
unit Grids;
{$mode objfpc}{$H+}
{$modeswitch nestedprocvars}
{$define NewCols}
interface
@ -35,7 +36,7 @@ uses
Types, Classes, SysUtils, Math, Maps, LCLStrConsts, LCLProc, LCLType, LCLIntf,
FileUtil, FPCanvas, Controls, GraphType, Graphics, Forms, DynamicArray,
LMessages, StdCtrls, LResources, MaskEdit, Buttons, Clipbrd, Themes,
LazUTF8, LazUtf8Classes, Laz2_XMLCfg; // <-- replaces XMLConf (part of FPC libs)
LazUTF8, LazUtf8Classes, Laz2_XMLCfg, LCSVUtils; // <-- replaces XMLConf (part of FPC libs)
const
//GRIDFILEVERSION = 1; // Original
@ -10721,129 +10722,69 @@ end;
procedure TCustomStringGrid.LoadFromCSVStream(AStream: TStream;
ADelimiter: Char=','; WithHeader: boolean=true);
Procedure ParseDelimitedText(const AValue: string; const ADelimiter, AQuoteChar: Char; TS: TStrings);
{ Helper function for LoadFromCSVFile
Adapted from TStrings.SetDelimitedText
- Only ADelimiter is used for separating the fields and not other whitespace
- If a field is quoted and it contains AQuoteChar, this occurrence is treated as a literal part of the field
- As per RFC4180 whitespace is considered to be part of the field, even if the field is not quoted
- Trailing spaces of a quoted field are trimmed
Example with ADelimiter = ',' and AQuoteChar = '"'
AValue = '111,2,22,333' -> 111|2|22|333
AValue = '111,"2,22",333' -> 111|2,22|333
AValue = '111, 222 ,333' -> 111| 222 |333
}
var i,j:integer;
aNotFirst:boolean;
begin
TS.BeginUpdate;
i:=1;
j:=1;
aNotFirst:=false;
try
TS.Clear;
while i<=length(AValue) do
begin
// skip delimiter
if aNotFirst and (i<=length(AValue)) and (AValue[i]=ADelimiter) then inc(i);
// read next string
if i<=length(AValue) then
begin
if AValue[i]=AQuoteChar then
begin
// next string is quoted
j:=i+1;
while (j<=length(AValue)) and
( (AValue[j]<>AQuoteChar) or
( (j+1<=length(AValue)) and (AValue[j+1]=AQuoteChar) ) ) do
begin
if (j<=length(AValue)) and (AValue[j]=AQuoteChar) then
inc(j,2)
else
inc(j);
end;
// j is position of closing quote
TS.Add( StringReplace (Copy(AValue,i+1,j-i-1),
AQuoteChar+AQuoteChar,AQuoteChar, [rfReplaceAll]));
i:=j+1;
end
else
begin
// next string is not quoted
j:=i;
while (j<=length(AValue)) and
//basically any other character means some invalid text
((Ord(AValue[j])>=Ord(' ')) or (AValue[j] in [#10,#13,#9,#0])) and
(AValue[j]<>ADelimiter) do inc(j);
TS.Add(Copy(AValue,i,j-i));
i:=j;
end;
end
else
begin
if aNotFirst then TS.Add('');
end;
// skip trailing spaces of a quoted field
// not really sure if that is RFC4180 compliant
while (i<=length(AValue)) and (Ord(AValue[i])<=Ord(' ')) and (AValue[i] <> ADelimiter) do inc(i);
aNotFirst:=true;
end; //end of string
finally
TS.EndUpdate;
end;
end;//ParseDelimitedText
var
Lines, HeaderL: TStringList;
i, j, StartRow: Integer;
MaxCols: Integer = 0;
MaxRows: Integer = 0;
procedure NewRecord(Fields:TStringlist);
var
i, aRow: Integer;
begin
if Fields.Count=0 then
exit;
Inc(MaxRows);
if (MaxRows=1) then
// first record
if not WithHeader then
exit; // ... no header wanted
// make sure we have enough columns
if MaxCols<Fields.Count then
MaxCols := Fields.Count;
if Columns.Enabled then begin
while Columns.VisibleCount<MaxCols do
Columns.Add;
end
else begin
if ColCount<MaxCols then
ColCount := MaxCols;
end;
// and rows ...
if RowCount<MaxRows then
RowCount := RowCount + 20;
// setup columns captions of custom columns if they are enabled
if (MaxRows=1) and withHeader and Columns.Enabled then begin
for i:=0 to Fields.Count-1 do
Columns[i].Title.Caption:=Fields[i];
end;
if not WithHeader then
aRow := FixedRows + (MaxRows-2) // MaxRows is 1 based, 2nd one is our first row
else begin
aRow := FixedRows-1;
if aRow<0 then
aRow := 0;
aRow := aRow + (MaxRows-1);
end;
for i:=0 to Fields.Count-1 do
Cells[i, aRow] := Fields[i];
end;
begin
Lines := TStringList.Create;
HeaderL := TStringList.Create;
BeginUpdate;
try
Lines.LoadFromStream(AStream);
// check for empty lines
for i:=Lines.Count-1 downto 0 do
if Trim(Lines[i])='' then
Lines.Delete(i);
if Lines.Count>0 then begin
ParseDelimitedText(Lines[0], ADelimiter, '"', HeaderL);
// Set Columns count based on loaded data
if Columns.Enabled then begin
while Columns.VisibleCount<>HeaderL.Count do
if Columns.VisibleCount<HeaderL.Count then
Columns.Add
else
Columns.Delete(Columns.Count-1);
end
else
ColCount := HeaderL.Count; // New column count
// Rest of the lines are for rows
if WithHeader and (FixedRows=0) then
RowCount := Lines.Count
else
RowCount := FixedRows + Lines.Count-1;
// Set column captions and set StartRow for the following rows
if WithHeader then begin
if (FixedRows>0) and Columns.Enabled then
for i:=0 to Columns.Count-1 do
Columns[i].Title.Caption:=HeaderL[i];
StartRow := Max(FixedRows-1, 0);
j := 0;
end else begin
StartRow := FixedRows;
j := 1;
end;
// Store the row data
for i:=StartRow to RowCount-1 do begin
Rows[i].Delimiter := ADelimiter;
ParseDelimitedText(Lines[i-StartRow+j], ADelimiter, '"', Rows[i]);
end;
end;
LCSVUtils.LoadFromCSVStream(AStream, @NewRecord, ADelimiter);
RowCount := MaxRows;
if not Columns.Enabled then
ColCount := MaxCols
else
while Columns.Count > MaxCols do
Columns.Delete(Columns.Count-1);
finally
HeaderL.Free;
Lines.Free;
EndUpdate;
end;
end;