{ ***************************************************************************** This file is part of LazUtils. See the file COPYING.modifiedLGPL.txt, included in this distribution, for details about the license. ***************************************************************************** } unit lcsvutils; {$mode objfpc}{$H+} {$modeswitch nestedprocvars} interface uses Classes, SysUtils; type TCSVRecordProc = procedure(Fields: TStringList) is nested; TCSVEncoding = (ceAuto, ceUTF8, ceUTF16, ceUTF16be); procedure LoadFromCSVStream(AStream:TStream; AProc: TCSVRecordProc; ADelimiter:Char=','; CSVEncoding:TCSVEncoding=ceAuto); procedure LoadFromCSVFile(aFilename: string; AProc: TCSVRecordProc; ADelimiter:Char=','; CSVEncoding:TCSVEncoding=ceAuto); implementation const BUFSIZE=1024; MAXGROW = 1 shl 29; type TSoc = set of char; procedure LoadFromCSVStream(AStream: TStream; AProc: TCSVRecordProc; ADelimiter:Char; CSVEncoding: TCSVEncoding); var Buffer, curWord: ansistring; BytesRead, BufLen, I, BufDelta: Longint; leadPtr, tailPtr, wordPtr, X:Pchar; Line: TStringList = nil; function SkipSet(const aSet: TSoc): boolean; begin while (leadPtrnil) and (Line.Count>0) then begin AProc(Line); Line.Clear; end; end; procedure StorePart; var Len, AddLen: SizeInt; begin Len := Length(curWord); AddLen := leadPtr-wordPtr; if AddLen > 0 then begin SetLength(curWord, Len+AddLen); Move(wordPtr^, curWord[Len+1], AddLen); end; if leadPtr=tailPtr then exit; if (le^=#13) and (leadPtr^=#10) then Inc(leadPtr); wordPtr := leadPtr; end; procedure ProcessQuote; var endQuote,endField: pchar; isDelimiter: boolean; begin // got a valid opening quote Inc(leadPtr); wordPtr := leadPtr; // look for valid ending quote while leadPtr=tailPtr) or (leadPtr^ in [ADelimiter, #10, #13]) then begin isDelimiter := (leadPtrwordPtr then begin StoreLine; wordPtr := leadPtr; end; end; procedure ConvertToUTF16; var n: Integer; u: pchar; ch: char; begin n := (tailPtr-leadPtr) div 2; u := leadPtr; while n>0 do begin ch := u^; u^ := (u+1)^; (u+1)^ := ch; inc(u, 2); dec(n); end; end; procedure ConvertEncoding; var W: WideString; begin if (CSVEncoding=ceAuto) and (BufLen>1) then begin if (leadPtr[0]=#$FF) and (leadPtr[1]=#$FE) then begin inc(leadPtr,2); // skip little endian UTF-16 BOM CSVEncoding := ceUTF16; end else if (leadPtr[0]=#$FE) and (leadPtr[1]=#$FF) then begin inc(leadPtr,2); // skip big endian UTF-16 BOM CSVEncoding := ceUTF16be; end else if (leadPtr[0]<>#$00) and (leadPtr[1]=#$00) then // quick guess CSVEncoding := ceUTF16 else if (leadPtr[0]=#$00) and (leadPtr[1]<>#$00) then // quick guess CSVEncoding := ceUTF16be end; if (CSVEncoding=ceAuto) and (BufLen>2) then begin if (leadPtr[0]=#$EF) and (leadPtr[1]=#$BB) and (leadPtr[2]=#$BF) then inc(leadPtr,3); // skip UTF-8 BOM end; if CSVEncoding=ceAuto then CSVEncoding := ceUTF8; case CSVEncoding of ceUTF16, ceUTF16be: begin if CSVEncoding=ceUTF16be then ConvertToUTF16; SetLength(W,(tailPtr-leadPtr) div 2); System.Move(leadPtr^,W[1],length(W)*2); Buffer := UTF8Encode(W); leadPtr := @Buffer[1]; tailPtr := leadPtr+length(Buffer); end; end; end; begin if AProc=nil then exit; // read buffer ala fpc tstrings Buffer:=''; BufLen:=0; I:=1; repeat BufDelta:=BUFSIZE*I; SetLength(Buffer,BufLen+BufDelta); BytesRead:=AStream.Read(Buffer[BufLen+1],BufDelta); inc(BufLen,BufDelta); If IBufDelta; BufLen := BufLen-BufDelta+BytesRead; SetLength(Buffer, BufLen); if BufLen=0 then exit; curWord := ''; leadPtr := @Buffer[1]; tailPtr := leadPtr + BufLen; ConvertEncoding; // Note: BufLen now invalid and leadPtr points into Buffer, not neccesarily at Buffer[1] try wordPtr := leadPtr; // wordPtr always points to starting word or part while leadPtrleadPtr then StoreWord; NotifyLine; finally Line.Free; SetLength(Buffer,0); end; end; procedure LoadFromCSVFile(aFilename: string; AProc: TCSVRecordProc; ADelimiter: Char; CSVEncoding: TCSVEncoding); var Stream: TFileStream; begin Stream := TFileStream.Create(aFilename, fmOpenRead); try LoadFromCSVStream(Stream, AProc, ADelimiter, CSVEncoding); finally Stream.Free; end; end; end.