* Patch for sdfdata multiline support and assoiated test case from Reinier Olislagers (bug 22237 and bug #22213)

git-svn-id: trunk@22145 -
This commit is contained in:
michael 2012-08-20 16:41:15 +00:00
parent 649bbae1c3
commit 2939c41263
4 changed files with 291 additions and 29 deletions

1
.gitattributes vendored
View File

@ -2100,6 +2100,7 @@ packages/fcl-db/tests/sdfdstoolsunit.pas svneol=native#text/plain
packages/fcl-db/tests/sqldbtoolsunit.pas svneol=native#text/plain packages/fcl-db/tests/sqldbtoolsunit.pas svneol=native#text/plain
packages/fcl-db/tests/tcgensql.pas svneol=native#text/plain packages/fcl-db/tests/tcgensql.pas svneol=native#text/plain
packages/fcl-db/tests/tcparser.pas svneol=native#text/plain packages/fcl-db/tests/tcparser.pas svneol=native#text/plain
packages/fcl-db/tests/tcsdfdata.pp svneol=native#text/plain
packages/fcl-db/tests/tcsqlscanner.pas svneol=native#text/plain packages/fcl-db/tests/tcsqlscanner.pas svneol=native#text/plain
packages/fcl-db/tests/test.json svneol=native#text/plain packages/fcl-db/tests/test.json svneol=native#text/plain
packages/fcl-db/tests/testbasics.pas svneol=native#text/plain packages/fcl-db/tests/testbasics.pas svneol=native#text/plain

View File

@ -13,11 +13,13 @@ unit SdfData;
--------------- ---------------
Modifications Modifications
--------------- ---------------
7/Jun/12 BigChimp:
Quote fields with delimiters or quotes to match Delphi SDF definition
(see e.g. help on TStrings.CommaText)
14/Jul/11 BigChimp: 14/Jul/11 BigChimp:
Added AllowMultiLine property so user can use fields that have line endings Added AllowMultiLine property so user can use fields that have line endings
(Carriage Return and/or Line Feed) embedded in their fields (fields need to be (Carriage Return and/or Line Feed) embedded in their fields (fields need to be
quoted). Enabled by default; will break compatibility with earlier versions of quoted). For now: output only (reading these fields does not work yet)
SdfData, but using multilines would have resulted in corrupted import anyway.
12/Mar/04 Lazarus version (Sergey Smirnov AKA SSY) 12/Mar/04 Lazarus version (Sergey Smirnov AKA SSY)
Locate and CheckString functions are removed because of Variant data type. Locate and CheckString functions are removed because of Variant data type.
Many things are changed for FPC/Lazarus compatibility. Many things are changed for FPC/Lazarus compatibility.
@ -939,25 +941,33 @@ end;
function TSdfDataSet.StoreToBuf(Source: String): String; function TSdfDataSet.StoreToBuf(Source: String): String;
const const
CR :char = #13; CR :char = #13;
LF :char = #10; LF :char = #10;
Quote :char = #34; // Character that encloses field if quoted. Hard-coded to "
var var
i, IsQuoted // Whether or not field starts with a quote
p :Integer; :Boolean;
pRet, FieldMaxSize, // Maximum fields size as defined in FieldDefs
pStr, i, // Field counter (0..)
pStrEnd :PChar; p // Length of string in field
:Integer;
pDeQuoted, // Temporary buffer for dedoubling quotes
pRet, // Pointer to insertion point in return value
pStr, // Beginning of field
pStrEnd // End of field
:PChar;
Ret :String; Ret :String;
begin begin
SetLength(Ret, FRecordSize); SetLength(Ret, FRecordSize);
FillChar(PChar(Ret)^, FRecordSize, Ord(' ')); FillChar(PChar(Ret)^, FRecordSize, Ord(' '));
PStrEnd := PChar(Source);
PStrEnd := PChar(Source);
pRet := PChar(Ret); pRet := PChar(Ret);
for i := 0 to FieldDefs.Count - 1 do for i := 0 to FieldDefs.Count - 1 do
begin begin
FieldMaxSize := FieldDefs[i].Size;
IsQuoted := false;
while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] in [#1..' ']) do while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] in [#1..' ']) do
begin begin
if FFMultiLine then if FFMultiLine then
@ -980,14 +990,15 @@ begin
pStr := pStrEnd; pStr := pStrEnd;
if (pStr[0] = '"') then if (pStr[0] = Quote) then
begin begin
IsQuoted := true; // See below: accept end of string without explicit quote
if FFMultiLine then if FFMultiLine then
begin begin
repeat repeat
Inc(pStrEnd); Inc(pStrEnd);
until not Boolean(Byte(pStrEnd[0])) or until not Boolean(Byte(pStrEnd[0])) or
((pStrEnd[0] = '"') and ((pStrEnd + 1)[0] in [Delimiter,#0])); ((pStrEnd[0] = Quote) and ((pStrEnd + 1)[0] in [Delimiter,#0]));
end end
else else
begin begin
@ -995,33 +1006,52 @@ begin
repeat repeat
Inc(pStrEnd); Inc(pStrEnd);
until not Boolean(Byte(pStrEnd[0])) or until not Boolean(Byte(pStrEnd[0])) or
((pStrEnd[0] = '"') and ((pStrEnd + 1)[0] in [Delimiter,CR,LF, #0])); ((pStrEnd[0] = Quote) and ((pStrEnd + 1)[0] in [Delimiter,CR,LF,#0]));
end; end;
if (pStrEnd[0] = Quote) then
if (pStrEnd[0] = '"') then Inc(pStr); //Skip final quote
Inc(pStr);
end end
else else
while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] <> Delimiter) do while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] <> Delimiter) do
Inc(pStrEnd); Inc(pStrEnd);
// Copy over entire field (or at least up to field length):
p := pStrEnd - pStr; p := pStrEnd - pStr;
if (p > FieldDefs[i].Size) then if IsQuoted then
p := FieldDefs[i].Size; begin
pDeQuoted := pRet; //Needed to avoid changing insertion point
// Copy entire field but not more than maximum field length:
// (We can mess with pStr now; the next loop will reset it after
// pStrEnd):
while (pstr < pStrEnd) and (pDeQuoted-pRet <= FieldMaxSize) do
begin
if pStr^ = Quote then inc(pStr);// skip first quote
pDeQuoted^ := pStr^;
inc(pStr);
inc(pDeQuoted);
end;
end
else
begin
if (p > FieldMaxSize) then
p := FieldMaxSize;
Move(pStr[0], pRet[0], p);
end;
Move(pStr[0], pRet[0], p); Inc(pRet, FieldMaxSize);
Inc(pRet, FieldDefs[i].Size); // Move the end of field position past quotes and delimiters
// ready for processing the next field
if (pStrEnd[0] = '"') then if (pStrEnd[0] = Quote) then
while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] <> Delimiter) do while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] <> Delimiter) do
Inc(pStrEnd); Inc(pStrEnd);
if (pStrEnd[0] = Delimiter) then if (pStrEnd[0] = Delimiter) then
Inc(pStrEnd); Inc(pStrEnd);
end; end;
Result := Ret;
Result := ret;
end; end;
function TSdfDataSet.BufToStore(Buffer: TRecordBuffer): String; function TSdfDataSet.BufToStore(Buffer: TRecordBuffer): String;
@ -1034,9 +1064,9 @@ var
begin begin
Result := ''; Result := '';
p := 1; p := 1;
QuoteMe:=false;
for i := 0 to FieldDefs.Count - 1 do for i := 0 to FieldDefs.Count - 1 do
begin begin
QuoteMe:=false;
Str := Trim(Copy(pansichar(Buffer), p, FieldDefs[i].Size)); Str := Trim(Copy(pansichar(Buffer), p, FieldDefs[i].Size));
Inc(p, FieldDefs[i].Size); Inc(p, FieldDefs[i].Size);
if FFMultiLine then if FFMultiLine then
@ -1051,11 +1081,13 @@ begin
Str := StringReplace(Str, #10, '', [rfReplaceAll]); Str := StringReplace(Str, #10, '', [rfReplaceAll]);
Str := StringReplace(Str, #13, '', [rfReplaceAll]); Str := StringReplace(Str, #13, '', [rfReplaceAll]);
end; end;
// Check for any delimiters occurring in field text // Check for any delimiters or quotes occurring in field text
if ((not QuoteMe) and (StrScan(PChar(Str), FDelimiter) <> nil)) then QuoteMe:=true; if (not QuoteMe) then
if (StrScan(PChar(Str), FDelimiter) <> nil) or
(StrScan(PChar(Str), QuoteDelimiter) <> nil) then QuoteMe:=true;
if (QuoteMe) then if (QuoteMe) then
begin begin
Str:=Stringreplace(Str,QuoteDelimiter,QuoteDelimiter+QuoteDelimiter,[rfReplaceAll]); Str := Stringreplace(Str, QuoteDelimiter, QuoteDelimiter+QuoteDelimiter, [rfReplaceAll]);
Str := QuoteDelimiter + Str + QuoteDelimiter; Str := QuoteDelimiter + Str + QuoteDelimiter;
end; end;
Result := Result + Str + FDelimiter; Result := Result + Str + FDelimiter;

View File

@ -17,6 +17,7 @@ uses
bufdatasettoolsunit, bufdatasettoolsunit,
memdstoolsunit, memdstoolsunit,
SdfDSToolsUnit, SdfDSToolsUnit,
tcsdfdata,
// Units wich contains the tests // Units wich contains the tests
TestBasics, TestBasics,
TestFieldTypes, TestFieldTypes,

View File

@ -0,0 +1,228 @@
unit tcsdfdata;
// Tests multiline functionality of sdfdataset
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, Fpcunit, Testutils, Testregistry,
dateutils, sdfdata;
type
{ Ttestexport1 }
Ttestexport1 = class(Ttestcase)
protected
TestDataset: TSDFDataset;
procedure Setup; override;
procedure Teardown; override;
published
procedure TestOutput;
procedure TestInputOurFormat;
procedure TestDelimitedTextOutput;
end;
implementation
procedure Ttestexport1.TestOutput;
const
OutputFilename='output.csv';
begin
TestDataSet.Close;
if FileExists(OutputFilename) then DeleteFile(OutputFileName);
TestDataset.FileName:=OutputFileName;
TestDataset.Open;
// Fill test data
TestDataset.Append;
TestDataset.FieldByName('ID').AsInteger := 1;
// Data with quotes
TestDataset.FieldByName('NAME').AsString := 'J"T"';
TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
TestDataset.Post;
TestDataset.Append;
TestDataset.FieldByName('ID').AsInteger := 2;
// Data with delimiter
TestDataset.FieldByName('NAME').AsString := 'Hello'+TestDataset.Delimiter+' goodbye';
TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
TestDataset.Post;
TestDataset.Append;
TestDataset.FieldByName('ID').AsInteger := 3;
//Data with delimiter and quote (to test 19376)
TestDataset.FieldByName('NAME').AsString := 'Delimiter,"and";quote';
TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
TestDataset.Post;
TestDataset.Append;
TestDataset.FieldByName('ID').AsInteger := 4;
// Regular data
TestDataset.FieldByName('NAME').AsString := 'Just a long line of text without anything special';
TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
TestDataset.Post;
TestDataset.Last;
TestDataset.First;
// This fails - seems it sees the header as a record, too?
AssertEquals('Number of records in test dataset', 4, TestDataset.RecordCount);
TestDataset.Close;
end;
procedure Ttestexport1.TestInputOurFormat;
// Test if input works with our format
// Mainly check if reading quotes is according to Delphi sdf specs and works.
// See test results from bug 19610 for evidence that the strings below should work.
// If this works, we can switch to this and be RFC 4180 compliant and Delphi compliant.
const
OutputFileName='input.csv';
//Value1 is the on disk format; it should translate to Expected1
Value1='"Delimiter,""and"";quote"';
Expected1='Delimiter,"and";quote';
Value2='"J""T"""';
Expected2='J"T"';
Value3='Just a long line';
Expected3='Just a long line';
//Note: Delphi can read this, see evidence in bug 19610 (the "quoted and space" value)
Value4='"Just a quoted long line"';
Expected4='Just a quoted long line';
// Delphi can read multiline, see evidence in bug 19610 (the multiline entry)
Value5='"quoted_multi'+#13+#10+'line"';
Expected5='quoted_multi'+#13+#10+'line';
Value6='"Delimiter,and;quoted"';
Expected6='Delimiter,and;quoted';
Value7='"A random""quote"';
Expected7='A random"quote';
var
FileStrings: TStringList;
begin
TestDataset.Close;
TestDataset.AllowMultiLine:=true;
if FileExists(OutputFilename) then DeleteFile(OutputFileName);
FileStrings:=TStringList.Create;
try
FileStrings.Add('ID,NAME,BIRTHDAY');
FileStrings.Add('1,'+Value1+',31-12-1976');
FileStrings.Add('2,'+Value2+',31-12-1976');
FileStrings.Add('3,'+Value3+',31-12-1976');
FileStrings.Add('4,'+Value4+',31-12-1976');
FileStrings.Add('5,'+Value5+',31-12-1976');
FileStrings.Add('6,'+Value6+',31-12-1976');
FileStrings.Add('7,'+Value7+',31-12-1976');
FileStrings.SaveToFile(OutputFileName);
finally
FileStrings.Free;
end;
// Load our dataset
TestDataset.FileName:=OutputFileName;
TestDataset.Open;
TestDataset.First;
AssertEquals(Expected1, TestDataSet.FieldByName('NAME').AsString);
TestDataSet.Next;
AssertEquals(Expected2, TestDataSet.FieldByName('NAME').AsString);
TestDataSet.Next;
AssertEquals(Expected3, TestDataSet.FieldByName('NAME').AsString);
TestDataSet.Next;
AssertEquals(Expected4, TestDataSet.FieldByName('NAME').AsString);
TestDataSet.Next;
AssertEquals(Expected5, TestDataSet.FieldByName('NAME').AsString);
TestDataSet.Next;
AssertEquals(Expected6, TestDataSet.FieldByName('NAME').AsString);
TestDataSet.Next;
AssertEquals(Expected7, TestDataSet.FieldByName('NAME').AsString);
end;
procedure Ttestexport1.TestDelimitedTextOutput;
// Test if input works with our format
// Mainly check if reading quotes is according to Delphi sdf specs and works.
// See test results from bug 19610 for evidence that the strings below should work.
// If this works, we can switch to this and be RFC 4180 compliant and Delphi compliant.
const
OutputFileName='delim.csv';
//Value1 is the on disk format; it should translate to Expected1
Value1='Delimiter,"and";quote';
Value2='J"T"';
Value3='Just a long line';
Value4='Just a quoted long line';
Value5='multi'+#13+#10+'line';
Value6='Delimiter,and;done';
Value7='Some "random" quotes';
var
FileStrings: TStringList;
OneRecord: TStringList;
begin
TestDataset.Close;
TestDataset.AllowMultiLine:=true;
if FileExists(OutputFileName) then DeleteFile(OutputFileName);
FileStrings:=TStringList.Create;
OneRecord:=TStringList.Create;
try
FileStrings.Add('Field1,Field2,Field3,Field4,Field5,Field6,Field7');
OneRecord.Add(Value1);
OneRecord.Add(Value2);
OneRecord.Add(Value3);
OneRecord.Add(Value4);
OneRecord.Add(Value5);
OneRecord.Add(Value6);
OneRecord.Add(Value7);
OneRecord.Delimiter:=',';
OneRecord.QuoteChar:='"';
OneRecord.StrictDelimiter:=true;
FileStrings.Add(OneRecord.DelimitedText);
FileStrings.SaveToFile(OutputFileName);
finally
FileStrings.Free;
OneRecord.Free;
end;
// Load our dataset
TestDataset.FileName:=OutputFileName;
TestDataset.Open;
TestDataset.First;
AssertEquals(Value1, TestDataSet.Fields[0].AsString);
AssertEquals(Value2, TestDataSet.Fields[1].AsString);
AssertEquals(Value3, TestDataSet.Fields[2].AsString);
AssertEquals(Value4, TestDataSet.Fields[3].AsString);
AssertEquals(Value5, TestDataSet.Fields[4].AsString);
AssertEquals(Value6, TestDataSet.Fields[5].AsString);
AssertEquals(Value7, TestDataSet.Fields[6].AsString);
end;
procedure Ttestexport1.Setup;
begin
TestDataset := TSDFDataset.Create(nil);
TestDataset.Delimiter := ',';
TestDataset.FileMustExist:=false;
TestDataset.FirstLineAsSchema := True;
TestDataset.Schema.Add('ID');
TestDataset.Schema.Add('NAME');
TestDataset.Schema.Add('BIRTHDAY');
end;
procedure Ttestexport1.Teardown;
begin
try
TestDataset.Close;
except
//swallow
end;
TestDataset.Free;
try
//DeleteFile(FCSVFileName);
except
//swallow
end;
end;
initialization
Registertest(Ttestexport1);
end.