From 28e7b354cc7d0828305577cb10e92e46d51274a1 Mon Sep 17 00:00:00 2001 From: michael Date: Sat, 24 Mar 2007 22:46:33 +0000 Subject: [PATCH] * Patch from Bram Kuijvenhoven to fix stream, and add MIME mode git-svn-id: trunk@6987 - --- packages/fcl-base/src/inc/base64.pp | 321 ++++++++++++++++++++-------- 1 file changed, 226 insertions(+), 95 deletions(-) diff --git a/packages/fcl-base/src/inc/base64.pp b/packages/fcl-base/src/inc/base64.pp index 7adb4ab2e1..c2ffe2436b 100644 --- a/packages/fcl-base/src/inc/base64.pp +++ b/packages/fcl-base/src/inc/base64.pp @@ -12,7 +12,12 @@ **********************************************************************} -// Encoding and decoding streams for base64 data as described in RFC2045 +// Encoding and decoding streams for base64 data as described in +// RFC2045 (Mode = bdmMIME) and +// RFC3548 (Mode = bdmStrict) + +// Addition of TBase64DecodingMode supporting both Strict and MIME mode is +// (C) 2007 Hexis BV, by Bram Kuijvenhoven (bkuijvenhoven@hexis.nl) {$MODE objfpc} {$H+} @@ -21,7 +26,7 @@ unit base64; interface -uses classes; +uses classes, sysutils; type @@ -40,51 +45,88 @@ type function Seek(Offset: Longint; Origin: Word): Longint; override; end; + (* The TBase64DecodingStream supports two modes: + * - 'strict mode': + * - follows RFC3548 + * - rejects any characters outside of base64 alphabet, + * - only accepts up to two '=' characters at the end and + * - requires the input to have a Size being a multiple of 4; otherwise raises an EBase64DecodeException + * - 'MIME mode': + * - follows RFC2045 + * - ignores any characters outside of base64 alphabet + * - takes any '=' as end of string + * - handles apparently truncated input streams gracefully + *) + TBase64DecodingMode = (bdmStrict, bdmMIME); + + { TBase64DecodingStream } TBase64DecodingStream = class(TStream) + private + FMode: TBase64DecodingMode; + procedure SetMode(const AValue: TBase64DecodingMode); + function GetSize: Int64; override; + function GetPosition: Int64; override; protected InputStream: TStream; - CurPos, InputStreamSize: LongInt; - Buf: array[0..2] of Byte; - BufPos: Integer; // Offset of byte which is to be read next - fEOF: Boolean; + CurPos, // 0-based (decoded) position of this stream (nr. of decoded & Read bytes since last reset) + DecodedSize: Int64; // length of decoded stream ((expected) decoded bytes since last Reset until Mode-dependent end of stream) + ReadBase64ByteCount: Int64; // number of valid base64 bytes read from input stream since last Reset + Buf: array[0..2] of Byte; // last 3 decoded bytes + BufPos: Integer; // offset in Buf of byte which is to be read next; if >2, next block must be read from InputStream & decoded + FEOF: Boolean; // if true, all decoded bytes have been read public constructor Create(AInputStream: TStream); + constructor Create(AInputStream: TStream; AMode: TBase64DecodingMode); procedure Reset; function Read(var Buffer; Count: Longint): Longint; override; function Write(const Buffer; Count: Longint): Longint; override; function Seek(Offset: Longint; Origin: Word): Longint; override; + property EOF: Boolean read fEOF; + property Mode: TBase64DecodingMode read FMode write SetMode; + end; + + EBase64DecodingException = class(Exception) end; - - implementation +uses + Math; + const + SStrictNonBase64Char = 'Non-valid Base64 Encoding character in input'; + SStrictInputTruncated = 'Input stream was truncated at non-4 byte boundary'; + SStrictMisplacedPadChar = 'Unexpected padding character ''='' before end of input stream'; EncodingTable: PChar = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; - DecTable: array[Byte] of Byte = - (99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, // 0-15 - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, // 16-31 - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 62, 99, 99, 99, 63, // 32-47 - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 99, 99, 99, 00, 99, 99, // 48-63 - 99, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, // 64-79 - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 99, 99, 99, 99, 99, // 80-95 - 99, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96-111 - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 99, 99, 99, 99, 99, // 112-127 - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, - 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99); +const + NA = 85; // not in base64 alphabet at all; binary: 01010101 + PC = 255; // padding character 11111111 + DecTable: array[Byte] of Byte = + (NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, // 0-15 + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, // 16-31 + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 62, NA, NA, NA, 63, // 32-47 + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, NA, NA, NA, PC, NA, NA, // 48-63 + NA, 00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, 14, // 64-79 + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, NA, NA, NA, NA, NA, // 80-95 + NA, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96-111 + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, NA, NA, NA, NA, NA, // 112-127 + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, + NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA); + + Alphabet = ['a'..'z','A'..'Z','0'..'9','+','/','=']; // all 65 chars that are in the base64 encoding alphabet constructor TBase64EncodingStream.Create(AOutputStream: TStream); begin @@ -164,96 +206,211 @@ begin raise EStreamError.Create('Invalid stream operation'); end; +procedure TBase64DecodingStream.SetMode(const AValue: TBase64DecodingMode); +begin + if FMode = AValue then exit; + FMode := AValue; + DecodedSize := -1; // forget any calculations on this +end; +function TBase64DecodingStream.GetSize: Int64; +var + endBytes: array[0..1] of Char; + ipos, isize: Int64; + scanBuf: array[0..1023] of Char; + count: LongInt; + i: Integer; + c: Char; +begin + // Note: this method only works on Seekable InputStreams (for bdmStrict we also get the Size property) + if DecodedSize<>-1 then Exit(DecodedSize); + ipos := InputStream.Position; // save position in input stream + case Mode of + bdmMIME: begin + // read until end of input stream or first occurence of a '=' + Result := ReadBase64ByteCount; // keep number of valid base64 bytes since last Reset in Result + repeat + count := InputStream.Read(scanBuf, SizeOf(scanBuf)); + for i := 0 to count-1 do begin + c := scanBuf[i]; + if c in Alphabet-['='] then // base64 encoding characters except '=' + Inc(Result) + else if c = '=' then // end marker '=' + Break; + end; + until count = 0; + writeln(Result); + // we are now either at the end of the stream, or encountered our first '=', stored in c + if c = '=' then begin // '=' found + if Result mod 4 <= 1 then // badly placed '=', disregard last block + Result := (Result div 4) * 3 + else // 4 byte block ended with '=' or '==' + Result := (Result div 4) * 3 + Result mod 4 - 1; + end else // end of stream + Result := (Result div 4) * 3; // number of valid 4 byte blocks times 3 + end; + bdmStrict:begin + // seek to end of input stream, read last two bytes and determine size + // from InputStream size and the number of leading '=' bytes + // NB we don't raise an exception here if the input does not contains an integer multiple of 4 bytes + ipos := InputStream.Position; + isize := InputStream.Size; + Result := ((ReadBase64ByteCount + (isize - ipos) + 3) div 4) * 3; + InputStream.Seek(-2, soFromEnd); + InputStream.Read(endBytes, 2); + if endBytes[1] = '=' then begin // last byte + Dec(Result); + if endBytes[0] = '=' then // second to last byte + Dec(Result); + end; + end; + end; + InputStream.Position := ipos; // restore position in input stream + // store calculated DecodedSize + DecodedSize := Result; +end; +function TBase64DecodingStream.GetPosition: Int64; +begin + Result := CurPos; +end; constructor TBase64DecodingStream.Create(AInputStream: TStream); +begin + Create(AInputStream, bdmMIME); // MIME mode is default +end; + +constructor TBase64DecodingStream.Create(AInputStream: TStream; AMode: TBase64DecodingMode); begin inherited Create; InputStream := AInputStream; + Mode := AMode; Reset; end; procedure TBase64DecodingStream.Reset; begin - InputStreamSize := -1; - BufPos := 3; - fEOF := False; + ReadBase64ByteCount := 0; // number of bytes Read form InputStream since last call to Reset + CurPos := 0; // position in decoded byte sequence since last Reset + DecodedSize := -1; // indicates unknown; will be set after first call to GetSize or when reaching end of stream + BufPos := 3; // signals we need to read & decode a new block of 4 bytes + FEOF := False; end; function TBase64DecodingStream.Read(var Buffer; Count: Longint): Longint; var - p: PChar; - b: Char; - ReadBuf: array[0..3] of Byte; - ToRead, OrgToRead, HaveRead, ReadOK, i, j: Integer; + p: PByte; + b: byte; + ReadBuf: array[0..3] of Byte; // buffer to store last read 4 input bytes + ToRead, OrgToRead, HaveRead, ReadOK, i: Integer; + + procedure DetectedEnd(ASize:Int64); + begin + DecodedSize := ASize; + // Correct Count if at end of base64 input + if CurPos + Count > DecodedSize then + Count := DecodedSize - CurPos; + end; + begin - if Count <= 0 then exit(0); - if InputStreamSize <> -1 then begin - if CurPos + Count > InputStreamSize then - Count := InputStreamSize - CurPos; + if Count <= 0 then exit(0); // nothing to read, quit + if DecodedSize <> -1 then begin // try using calculated size info if possible + if CurPos + Count > DecodedSize then + Count := DecodedSize - CurPos; if Count <= 0 then exit(0); end; Result := 0; - p := PChar(@Buffer); - while (Count > 0) and not fEOF do begin + p := @Buffer; + while true do begin + // get new 4-byte block if at end of Buf if BufPos > 2 then begin BufPos := 0; // Read the next 4 valid bytes - ToRead := 4; - ReadOK := 0; + ToRead := 4; // number of base64 bytes left to read into ReadBuf + ReadOK := 0; // number of base64 bytes already read into ReadBuf while ToRead > 0 do begin OrgToRead := ToRead; HaveRead := InputStream.Read(ReadBuf[ReadOK], ToRead); //WriteLn('ToRead = ', ToRead, ', HaveRead = ', HaveRead, ', ReadOK=', ReadOk); - if HaveRead > 0 then begin - i := ReadOk; - while i 0 then begin // if any new bytes; in ReadBuf[ReadOK .. ReadOK + HaveRead-1] + for i := ReadOK to ReadOK + HaveRead - 1 do begin + b := DecTable[ReadBuf[i]]; + if b <> NA then begin // valid base64 alphabet character ('=' inclusive) + ReadBuf[ReadOK] := b; Inc(ReadOK); Dec(ToRead); + end else if Mode=bdmStrict then begin // non-valid character + raise EBase64DecodingException.CreateFmt(SStrictNonBase64Char,[]); end; end; end; - if HaveRead <> OrgToRead then begin - //WriteLn('Ende? ReadOK=', ReadOK, ', count=', Count); + + if HaveRead <> OrgToRead then begin // less than 4 base64 bytes could be read; end of input stream + //WriteLn('End: ReadOK=', ReadOK, ', count=', Count); for i := ReadOK to 3 do - ReadBuf[i] := Ord('='); - fEOF := True; - if ReadOK < 2 then exit; // Not enough data available in input stream - break; + ReadBuf[i] := 0; // pad buffer with zeros so decoding of 4-bytes will be correct + if (Mode = bdmStrict) and (ReadOK > 0) then + raise EBase64DecodingException.CreateFmt(SStrictInputTruncated,[]); + Break; end; end; - // Check for fill bytes - if (Count >= 2) and (ReadBuf[3] = Ord('=')) then begin - //WriteLn('Endemarkierung!'); - fEOF := True; - if ReadBuf[2] = Ord('=') then - Count := 1 - else - Count := 2; + Inc(ReadBase64ByteCount, ReadOK); + + // Check for pad characters + case Mode of + bdmStrict:begin + if ReadOK = 0 then // end of input stream was reached at 4-byte boundary + DetectedEnd(CurPos) + else if (ReadBuf[0] = PC) or (ReadBuf[1] = PC) then + raise EBase64DecodingException.CreateFmt(SStrictMisplacedPadChar,[]) // =BBB or B=BB + else if (ReadBuf[2] = PC) then begin + if (ReadBuf[3] <> PC) or (InputStream.Position < InputStream.Size) then + raise EBase64DecodingException.CreateFmt(SStrictMisplacedPadChar,[]); // BB=B or BB==, but not at end of input stream + DetectedEnd(CurPos + 1) // only one byte left to read; BB==, at end of input stream + end else if (ReadBuf[3] = PC) then begin + if (InputStream.Position < InputStream.Size) then + raise EBase64DecodingException.CreateFmt(SStrictMisplacedPadChar,[]); // BBB=, but not at end of input stream + DetectedEnd(CurPos + 2); // only two bytes left to read; BBB=, at end of input stream + end; + end; + bdmMIME:begin + if ReadOK = 0 then // end of input stream was reached at 4-byte boundary + DetectedEnd(CurPos) + else if (ReadBuf[0] = PC) or (ReadBuf[1] = PC) then + DetectedEnd(CurPos) // =BBB or B=BB: end here + else if (ReadBuf[2] = PC) then begin + DetectedEnd(CurPos + 1) // only one byte left to read; BB=B or BB== + end else if (ReadBuf[3] = PC) then begin + DetectedEnd(CurPos + 2); // only two bytes left to read; BBB= + end; + end; end; - + // Decode the 4 bytes in the buffer to 3 undecoded bytes - Buf[0] := ReadBuf[0] shl 2 or ReadBuf[1] shr 4; + Buf[0] := ReadBuf[0] shl 2 or ReadBuf[1] shr 4; Buf[1] := (ReadBuf[1] and 15) shl 4 or ReadBuf[2] shr 2; - Buf[2] := (ReadBuf[2] and 3) shl 6 or ReadBuf[3]; + Buf[2] := (ReadBuf[2] and 3) shl 6 or ReadBuf[3]; + end; + + if Count <= 0 then begin + Break; end; - p[0] := Chr(Buf[BufPos]); + // write one byte to Count + p^ := Buf[BufPos]; Inc(p); Inc(BufPos); Inc(CurPos); Dec(Count); Inc(Result); end; + + // check for EOF + if (DecodedSize <> -1) and (CurPos >= DecodedSize) then begin + FEOF := true; + end; end; function TBase64DecodingStream.Write(const Buffer; Count: Longint): Longint; @@ -262,35 +419,9 @@ begin end; function TBase64DecodingStream.Seek(Offset: Longint; Origin: Word): Longint; -var - ipos: LongInt; - endbytes: array[0..1] of Char; begin - {This will work only if the input stream supports seeking / Size. If not, the - input stream will raise an exception; we don't handle them here but pass them - to the caller.} - if InputStreamSize = -1 then begin - ipos := InputStream.Position; - InputStreamSize := ((InputStream.Size - ipos + 3) div 4) * 3; - InputStream.Seek(-2, soFromEnd); - InputStream.Read(endbytes, 2); - InputStream.Position := ipos; - if endbytes[1] = '=' then begin - Dec(InputStreamSize); - if endbytes[0] = '=' then - Dec(InputStreamSize); - end; - end; - - // This stream only supports the Seek modes needed for determining its size - if (Origin = soFromCurrent) and (Offset = 0) then - Result := CurPos - else if (Origin = soFromEnd) and (Offset = 0) then - Result := InputStreamSize - else if (Origin = soFromBeginning) and (Offset = CurPos) then - Result := CurPos - else - raise EStreamError.Create('Invalid stream operation'); + // TODO: implement Seeking in TBase64DecodingStream + raise EStreamError.Create('Invalid stream operation'); end;