* Patch from wp to add BOM detection to CSV reader (bug ID 30897)

git-svn-id: trunk@34871 -
This commit is contained in:
michael 2016-11-11 10:03:14 +00:00
parent 66b07b045b
commit 5af24e94ae
5 changed files with 89 additions and 0 deletions

2
.gitattributes vendored
View File

@ -1933,6 +1933,8 @@ packages/fcl-base/examples/cachetest.pp svneol=native#text/plain
packages/fcl-base/examples/cfgtest.pp svneol=native#text/plain
packages/fcl-base/examples/contit.pp svneol=native#text/plain
packages/fcl-base/examples/crittest.pp svneol=native#text/plain
packages/fcl-base/examples/csvbom.pp svneol=native#text/plain
packages/fcl-base/examples/databom.txt svneol=native#text/plain
packages/fcl-base/examples/dbugsrv.pp svneol=native#text/plain
packages/fcl-base/examples/debugtest.pp svneol=native#text/plain
packages/fcl-base/examples/decodeascii85.pp svneol=native#text/plain

View File

@ -75,3 +75,4 @@ daemon.pp Test for daemonapp (MVC)
testtimer.pp Test for TFPTimer (MVC)
testini.pp Test/Demo for inifiles, ReadSectionValues.
contit.pp Test/Demo for iterators in contnr.pp
csvbom.pp Test/Demo for BOM detection in CSV document. (needs databom.txt)

View File

@ -0,0 +1,53 @@
program csvbom;
{$APPTYPE Console}
{$mode objfpc}{$H+}
uses
sysutils, classes, dateutils, csvreadwrite;
type
TDataRec = record
FDate: TDate;
FNumber: Integer;
FText: String;
end;
const
FILENAME = 'databom.txt';
var
parser: TCSVParser;
stream: TFileStream;
data: array of TDataRec;
s: String;
i: Integer;
begin
parser := TCSVParser.Create;
try
parser.Delimiter := ',';
parser.DetectBOM := true; // uncomment for running with patched version
stream := TFileStream.Create(FILENAME, fmOpenRead);
parser.SetSource(stream);
SetLength(data, 0);
while parser.ParseNextCell do begin
if parser.CurrentRow > High(data) then
SetLength(data, parser.CurrentRow + 1);
s := parser.CurrentCellText;
case parser.CurrentCol of
0: data[High(data)].FDate := ScanDateTime('yyyy-mm-dd', s);
1: data[High(data)].FNumber := StrToInt(s);
2: data[High(data)].FText := s;
end;
end;
for i:=0 to High(data) do
WriteLn(DateToStr(data[i].FDate), '; ', data[i].FNumber, '; ', data[i].FText);
Writeln('Press enter to quit program');
Readln;
finally
stream.Free;
parser.Free;
end;
end.

View File

@ -0,0 +1,2 @@
2016-01-01,100,ABC
2016-01-02,110,DEF

View File

@ -92,12 +92,16 @@ Type
{ TCSVParser }
TCSVByteOrderMark = (bomNone, bomUTF8, bomUTF16LE, bomUTF16BE);
TCSVParser = class(TCSVHandler)
private
FFreeStream: Boolean;
// fields
FSourceStream: TStream;
FStrStreamWrapper: TStringStream;
FBOM: TCSVByteOrderMark;
FDetectBOM: Boolean;
// parser state
EndOfFile: Boolean;
EndOfLine: Boolean;
@ -140,6 +144,10 @@ Type
property MaxColCount: Integer read FMaxColCount;
// Does the parser own the stream ? If true, a previous stream is freed when set or when parser is destroyed.
Property FreeStream : Boolean Read FFreeStream Write FFreeStream;
// Return BOM found in file
property BOM: TCSVByteOrderMark read FBOM;
// Detect whether a BOM marker is present. If set to True, then BOM can be used to see what BOM marker there was.
property DetectBOM: Boolean read FDetectBOM write FDetectBOM default false;
end;
// Sequential output to CSV stream
@ -443,9 +451,32 @@ begin
end;
procedure TCSVParser.ResetParser;
var
b: packed array[0..2] of byte;
n: Integer;
begin
ClearOutput;
FSourceStream.Seek(0, soFromBeginning);
if FDetectBOM then
begin
FSourceStream.ReadBuffer(b[0], 3);
if (b[0] = $EF) and (b[1] = $BB) and (b[2] = $BF) then begin
FBOM := bomUTF8;
n := 3;
end else
if (b[0] = $FE) and (b[1] = $FF) then begin
FBOM := bomUTF16BE;
n := 2;
end else
if (b[0] = $FF) and (b[1] = $FE) then begin
FBOM := bomUTF16LE;
n := 2;
end else begin
FBOM := bomNone;
n := 0;
end;
FSourceStream.Seek(n, soFromBeginning);
end;
EndOfFile := False;
NextChar;
end;