* Patch for sdfdata multiline support and assoiated test case from Reinier Olislagers (bug 22237 and bug #22213)

git-svn-id: trunk@22145 -
This commit is contained in:
michael 2012-08-20 16:41:15 +00:00
parent 649bbae1c3
commit 2939c41263
4 changed files with 291 additions and 29 deletions

1
.gitattributes vendored
View File

@ -2100,6 +2100,7 @@ packages/fcl-db/tests/sdfdstoolsunit.pas svneol=native#text/plain
packages/fcl-db/tests/sqldbtoolsunit.pas svneol=native#text/plain
packages/fcl-db/tests/tcgensql.pas svneol=native#text/plain
packages/fcl-db/tests/tcparser.pas svneol=native#text/plain
packages/fcl-db/tests/tcsdfdata.pp svneol=native#text/plain
packages/fcl-db/tests/tcsqlscanner.pas svneol=native#text/plain
packages/fcl-db/tests/test.json svneol=native#text/plain
packages/fcl-db/tests/testbasics.pas svneol=native#text/plain

View File

@ -13,11 +13,13 @@ unit SdfData;
---------------
Modifications
---------------
7/Jun/12 BigChimp:
Quote fields with delimiters or quotes to match Delphi SDF definition
(see e.g. help on TStrings.CommaText)
14/Jul/11 BigChimp:
Added AllowMultiLine property so user can use fields that have line endings
(Carriage Return and/or Line Feed) embedded in their fields (fields need to be
quoted). Enabled by default; will break compatibility with earlier versions of
SdfData, but using multilines would have resulted in corrupted import anyway.
quoted). For now: output only (reading these fields does not work yet)
12/Mar/04 Lazarus version (Sergey Smirnov AKA SSY)
Locate and CheckString functions are removed because of Variant data type.
Many things are changed for FPC/Lazarus compatibility.
@ -939,25 +941,33 @@ end;
function TSdfDataSet.StoreToBuf(Source: String): String;
const
CR :char = #13;
LF :char = #10;
CR :char = #13;
LF :char = #10;
Quote :char = #34; // Character that encloses field if quoted. Hard-coded to "
var
i,
p :Integer;
pRet,
pStr,
pStrEnd :PChar;
IsQuoted // Whether or not field starts with a quote
:Boolean;
FieldMaxSize, // Maximum fields size as defined in FieldDefs
i, // Field counter (0..)
p // Length of string in field
:Integer;
pDeQuoted, // Temporary buffer for dedoubling quotes
pRet, // Pointer to insertion point in return value
pStr, // Beginning of field
pStrEnd // End of field
:PChar;
Ret :String;
begin
SetLength(Ret, FRecordSize);
FillChar(PChar(Ret)^, FRecordSize, Ord(' '));
PStrEnd := PChar(Source);
PStrEnd := PChar(Source);
pRet := PChar(Ret);
for i := 0 to FieldDefs.Count - 1 do
begin
FieldMaxSize := FieldDefs[i].Size;
IsQuoted := false;
while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] in [#1..' ']) do
begin
if FFMultiLine then
@ -980,14 +990,15 @@ begin
pStr := pStrEnd;
if (pStr[0] = '"') then
if (pStr[0] = Quote) then
begin
IsQuoted := true; // See below: accept end of string without explicit quote
if FFMultiLine then
begin
repeat
Inc(pStrEnd);
until not Boolean(Byte(pStrEnd[0])) or
((pStrEnd[0] = '"') and ((pStrEnd + 1)[0] in [Delimiter,#0]));
((pStrEnd[0] = Quote) and ((pStrEnd + 1)[0] in [Delimiter,#0]));
end
else
begin
@ -995,33 +1006,52 @@ begin
repeat
Inc(pStrEnd);
until not Boolean(Byte(pStrEnd[0])) or
((pStrEnd[0] = '"') and ((pStrEnd + 1)[0] in [Delimiter,CR,LF, #0]));
((pStrEnd[0] = Quote) and ((pStrEnd + 1)[0] in [Delimiter,CR,LF,#0]));
end;
if (pStrEnd[0] = '"') then
Inc(pStr);
if (pStrEnd[0] = Quote) then
Inc(pStr); //Skip final quote
end
else
while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] <> Delimiter) do
Inc(pStrEnd);
// Copy over entire field (or at least up to field length):
p := pStrEnd - pStr;
if (p > FieldDefs[i].Size) then
p := FieldDefs[i].Size;
if IsQuoted then
begin
pDeQuoted := pRet; //Needed to avoid changing insertion point
// Copy entire field but not more than maximum field length:
// (We can mess with pStr now; the next loop will reset it after
// pStrEnd):
while (pstr < pStrEnd) and (pDeQuoted-pRet <= FieldMaxSize) do
begin
if pStr^ = Quote then inc(pStr);// skip first quote
pDeQuoted^ := pStr^;
inc(pStr);
inc(pDeQuoted);
end;
end
else
begin
if (p > FieldMaxSize) then
p := FieldMaxSize;
Move(pStr[0], pRet[0], p);
end;
Move(pStr[0], pRet[0], p);
Inc(pRet, FieldMaxSize);
Inc(pRet, FieldDefs[i].Size);
if (pStrEnd[0] = '"') then
// Move the end of field position past quotes and delimiters
// ready for processing the next field
if (pStrEnd[0] = Quote) then
while Boolean(Byte(pStrEnd[0])) and (pStrEnd[0] <> Delimiter) do
Inc(pStrEnd);
if (pStrEnd[0] = Delimiter) then
Inc(pStrEnd);
end;
Result := Ret;
Result := ret;
end;
function TSdfDataSet.BufToStore(Buffer: TRecordBuffer): String;
@ -1034,9 +1064,9 @@ var
begin
Result := '';
p := 1;
QuoteMe:=false;
for i := 0 to FieldDefs.Count - 1 do
begin
QuoteMe:=false;
Str := Trim(Copy(pansichar(Buffer), p, FieldDefs[i].Size));
Inc(p, FieldDefs[i].Size);
if FFMultiLine then
@ -1051,11 +1081,13 @@ begin
Str := StringReplace(Str, #10, '', [rfReplaceAll]);
Str := StringReplace(Str, #13, '', [rfReplaceAll]);
end;
// Check for any delimiters occurring in field text
if ((not QuoteMe) and (StrScan(PChar(Str), FDelimiter) <> nil)) then QuoteMe:=true;
// Check for any delimiters or quotes occurring in field text
if (not QuoteMe) then
if (StrScan(PChar(Str), FDelimiter) <> nil) or
(StrScan(PChar(Str), QuoteDelimiter) <> nil) then QuoteMe:=true;
if (QuoteMe) then
begin
Str:=Stringreplace(Str,QuoteDelimiter,QuoteDelimiter+QuoteDelimiter,[rfReplaceAll]);
Str := Stringreplace(Str, QuoteDelimiter, QuoteDelimiter+QuoteDelimiter, [rfReplaceAll]);
Str := QuoteDelimiter + Str + QuoteDelimiter;
end;
Result := Result + Str + FDelimiter;

View File

@ -17,6 +17,7 @@ uses
bufdatasettoolsunit,
memdstoolsunit,
SdfDSToolsUnit,
tcsdfdata,
// Units wich contains the tests
TestBasics,
TestFieldTypes,

View File

@ -0,0 +1,228 @@
unit tcsdfdata;
// Tests multiline functionality of sdfdataset
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, Fpcunit, Testutils, Testregistry,
dateutils, sdfdata;
type
{ Ttestexport1 }
Ttestexport1 = class(Ttestcase)
protected
TestDataset: TSDFDataset;
procedure Setup; override;
procedure Teardown; override;
published
procedure TestOutput;
procedure TestInputOurFormat;
procedure TestDelimitedTextOutput;
end;
implementation
procedure Ttestexport1.TestOutput;
const
OutputFilename='output.csv';
begin
TestDataSet.Close;
if FileExists(OutputFilename) then DeleteFile(OutputFileName);
TestDataset.FileName:=OutputFileName;
TestDataset.Open;
// Fill test data
TestDataset.Append;
TestDataset.FieldByName('ID').AsInteger := 1;
// Data with quotes
TestDataset.FieldByName('NAME').AsString := 'J"T"';
TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
TestDataset.Post;
TestDataset.Append;
TestDataset.FieldByName('ID').AsInteger := 2;
// Data with delimiter
TestDataset.FieldByName('NAME').AsString := 'Hello'+TestDataset.Delimiter+' goodbye';
TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
TestDataset.Post;
TestDataset.Append;
TestDataset.FieldByName('ID').AsInteger := 3;
//Data with delimiter and quote (to test 19376)
TestDataset.FieldByName('NAME').AsString := 'Delimiter,"and";quote';
TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
TestDataset.Post;
TestDataset.Append;
TestDataset.FieldByName('ID').AsInteger := 4;
// Regular data
TestDataset.FieldByName('NAME').AsString := 'Just a long line of text without anything special';
TestDataset.FieldByName('BIRTHDAY').AsDateTime := ScanDateTime('yyyymmdd', '19761231', 1);
TestDataset.Post;
TestDataset.Last;
TestDataset.First;
// This fails - seems it sees the header as a record, too?
AssertEquals('Number of records in test dataset', 4, TestDataset.RecordCount);
TestDataset.Close;
end;
procedure Ttestexport1.TestInputOurFormat;
// Test if input works with our format
// Mainly check if reading quotes is according to Delphi sdf specs and works.
// See test results from bug 19610 for evidence that the strings below should work.
// If this works, we can switch to this and be RFC 4180 compliant and Delphi compliant.
const
OutputFileName='input.csv';
//Value1 is the on disk format; it should translate to Expected1
Value1='"Delimiter,""and"";quote"';
Expected1='Delimiter,"and";quote';
Value2='"J""T"""';
Expected2='J"T"';
Value3='Just a long line';
Expected3='Just a long line';
//Note: Delphi can read this, see evidence in bug 19610 (the "quoted and space" value)
Value4='"Just a quoted long line"';
Expected4='Just a quoted long line';
// Delphi can read multiline, see evidence in bug 19610 (the multiline entry)
Value5='"quoted_multi'+#13+#10+'line"';
Expected5='quoted_multi'+#13+#10+'line';
Value6='"Delimiter,and;quoted"';
Expected6='Delimiter,and;quoted';
Value7='"A random""quote"';
Expected7='A random"quote';
var
FileStrings: TStringList;
begin
TestDataset.Close;
TestDataset.AllowMultiLine:=true;
if FileExists(OutputFilename) then DeleteFile(OutputFileName);
FileStrings:=TStringList.Create;
try
FileStrings.Add('ID,NAME,BIRTHDAY');
FileStrings.Add('1,'+Value1+',31-12-1976');
FileStrings.Add('2,'+Value2+',31-12-1976');
FileStrings.Add('3,'+Value3+',31-12-1976');
FileStrings.Add('4,'+Value4+',31-12-1976');
FileStrings.Add('5,'+Value5+',31-12-1976');
FileStrings.Add('6,'+Value6+',31-12-1976');
FileStrings.Add('7,'+Value7+',31-12-1976');
FileStrings.SaveToFile(OutputFileName);
finally
FileStrings.Free;
end;
// Load our dataset
TestDataset.FileName:=OutputFileName;
TestDataset.Open;
TestDataset.First;
AssertEquals(Expected1, TestDataSet.FieldByName('NAME').AsString);
TestDataSet.Next;
AssertEquals(Expected2, TestDataSet.FieldByName('NAME').AsString);
TestDataSet.Next;
AssertEquals(Expected3, TestDataSet.FieldByName('NAME').AsString);
TestDataSet.Next;
AssertEquals(Expected4, TestDataSet.FieldByName('NAME').AsString);
TestDataSet.Next;
AssertEquals(Expected5, TestDataSet.FieldByName('NAME').AsString);
TestDataSet.Next;
AssertEquals(Expected6, TestDataSet.FieldByName('NAME').AsString);
TestDataSet.Next;
AssertEquals(Expected7, TestDataSet.FieldByName('NAME').AsString);
end;
procedure Ttestexport1.TestDelimitedTextOutput;
// Test if input works with our format
// Mainly check if reading quotes is according to Delphi sdf specs and works.
// See test results from bug 19610 for evidence that the strings below should work.
// If this works, we can switch to this and be RFC 4180 compliant and Delphi compliant.
const
OutputFileName='delim.csv';
//Value1 is the on disk format; it should translate to Expected1
Value1='Delimiter,"and";quote';
Value2='J"T"';
Value3='Just a long line';
Value4='Just a quoted long line';
Value5='multi'+#13+#10+'line';
Value6='Delimiter,and;done';
Value7='Some "random" quotes';
var
FileStrings: TStringList;
OneRecord: TStringList;
begin
TestDataset.Close;
TestDataset.AllowMultiLine:=true;
if FileExists(OutputFileName) then DeleteFile(OutputFileName);
FileStrings:=TStringList.Create;
OneRecord:=TStringList.Create;
try
FileStrings.Add('Field1,Field2,Field3,Field4,Field5,Field6,Field7');
OneRecord.Add(Value1);
OneRecord.Add(Value2);
OneRecord.Add(Value3);
OneRecord.Add(Value4);
OneRecord.Add(Value5);
OneRecord.Add(Value6);
OneRecord.Add(Value7);
OneRecord.Delimiter:=',';
OneRecord.QuoteChar:='"';
OneRecord.StrictDelimiter:=true;
FileStrings.Add(OneRecord.DelimitedText);
FileStrings.SaveToFile(OutputFileName);
finally
FileStrings.Free;
OneRecord.Free;
end;
// Load our dataset
TestDataset.FileName:=OutputFileName;
TestDataset.Open;
TestDataset.First;
AssertEquals(Value1, TestDataSet.Fields[0].AsString);
AssertEquals(Value2, TestDataSet.Fields[1].AsString);
AssertEquals(Value3, TestDataSet.Fields[2].AsString);
AssertEquals(Value4, TestDataSet.Fields[3].AsString);
AssertEquals(Value5, TestDataSet.Fields[4].AsString);
AssertEquals(Value6, TestDataSet.Fields[5].AsString);
AssertEquals(Value7, TestDataSet.Fields[6].AsString);
end;
procedure Ttestexport1.Setup;
begin
TestDataset := TSDFDataset.Create(nil);
TestDataset.Delimiter := ',';
TestDataset.FileMustExist:=false;
TestDataset.FirstLineAsSchema := True;
TestDataset.Schema.Add('ID');
TestDataset.Schema.Add('NAME');
TestDataset.Schema.Add('BIRTHDAY');
end;
procedure Ttestexport1.Teardown;
begin
try
TestDataset.Close;
except
//swallow
end;
TestDataset.Free;
try
//DeleteFile(FCSVFileName);
except
//swallow
end;
end;
initialization
Registertest(Ttestexport1);
end.