lazarus-ccr/components/systools/source/general/run/stregex.pas
wp_xxyyzz 543cdf06d9 systools: Rearrange units and packages
git-svn-id: https://svn.code.sf.net/p/lazarus-ccr/svn@6159 8e941d3f-bd1b-0410-a28a-d453659cc2b4
2018-01-30 16:17:37 +00:00

2501 lines
72 KiB
ObjectPascal

// TODO-UNICODE
(* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Original Code is TurboPower SysTools
*
* The Initial Developer of the Original Code is
* TurboPower Software
*
* Portions created by the Initial Developer are Copyright (C) 1996-2002
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
*
* ***** END LICENSE BLOCK ***** *)
{*********************************************************}
{* SysTools: StRegEx.pas 4.04 *}
{*********************************************************}
{* SysTools: SysTools Regular Expression Engine *}
{*********************************************************}
{$IFDEF FPC}
{$mode DELPHI}
{$ENDIF}
{$I StDefine.inc}
unit StRegEx;
interface
uses
{$IFNDEF FPC}
Windows, Messages,
{$ENDIF}
SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
StConst,
StBase,
StStrms;
const
StWordDelimString : string[31] = #9#32'!"&()*+,-./:;<=>?@[\]^`{|}~';
StHexDigitString : string[19] = '0123456789ABCDEF';
type
TMatchPosition = packed record
StartPos : Cardinal;
EndPos : Cardinal;
Length : Cardinal;
LineNum : Cardinal;
end;
TStOutputOption = (ooUnselected, ooModified, ooCountOnly);
TStOutputOptions = set of TStOutputOption;
TStTokens = (tknNil, tknLitChar, tknCharClass, tknNegCharClass,
tknClosure, tknMaybeOne, tknAnyChar, tknBegOfLine,
tknEndOfLine, tknGroup, tknBegTag, tknEndTag, tknDitto);
PStPatRecord = ^TStPatRecord;
TStPatRecord = packed record
StrPtr : ^ShortString;
NestedPattern : PStPatRecord;
NextPattern : PStPatRecord;
Token : TStTokens;
OneChar : AnsiChar;
NextOK : Boolean;
end;
TStTagLevel = -1..9;
TStFlag = array[0..1023] of TStTagLevel;
TStOnRegExProgEvent = procedure(Sender : TObject; Percent : Word) of object;
TStOnMatchEvent = procedure(Sender : TObject;
REPosition : TMatchPosition) of object;
TStNodeHeap = class
private
FFreeList : PStPatRecord;
protected
procedure nhClearHeap;
function nhDeepCloneNode(aNode : PStPatRecord) : PStPatRecord;
public
constructor Create;
destructor Destroy; override;
function AllocNode : PStPatRecord;
procedure FreeNode(aNode : PStPatRecord);
function CloneNode(aNode : PStPatRecord) : PStPatRecord;
end;
TStStreamRegEx = class(TObject)
protected {private}
{ Private declarations }
FAvoid : Boolean;
FIgnoreCase : Boolean;
FInTextStream : TStAnsiTextStream;
FInFileSize : Cardinal;
FInputStream : TStream;
FInLineBuf : PAnsiChar;
FInLineCount : Cardinal;
FInLineNum : Cardinal;
FInLineTermChar : AnsiChar;
FInLineTerminator : TStLineTerminator;
FInLineLength : integer;
FLineNumbers : Boolean;
FLinesPerSec : Cardinal;
FMatchCount : Cardinal;
FMatchPatSL : TStringList;
FMatchPatStr : PAnsiChar;
FMatchPatPtr : PStPatRecord;
FMaxLineLength : Cardinal;
FNodes : TStNodeHeap;
FOnMatch : TStOnMatchEvent;
FOutLineLength : integer;
FOutLineTermChar : AnsiChar;
FOutLineTerminator: TStLineTerminator;
FReplaceCount : Cardinal;
FReplacePatSL : TStringList;
FReplacePatStr : PAnsiChar;
FReplacePatPtr : PStPatRecord;
FOnProgress : TStOnRegExProgEvent;
FOutputStream : TStream;
FOutTextStream : TStAnsiTextStream;
FOutLineBuf : PAnsiChar;
FOutputOptions : TStOutputOptions;
FSelAvoidPatSL : TStringList;
FSelAvoidPatStr : PAnsiChar;
FSelAvoidPatPtr : PStPatRecord;
FSelectCount : Cardinal;
protected
{ Protected declarations }
procedure AddTokenToPattern(var PatRec : PStPatRecord;
LastPatRec : PStPatRecord;
Token : TStTokens;
S : ShortString);
procedure AddTokenToReplace(var PatRec : PStPatRecord;
LastPatRec : PStPatRecord;
Token : TStTokens;
const S : ShortString); {!!.02}
function AppendS(Dest, S1, S2 : PAnsiChar; Count : Cardinal) : PAnsiChar;
function BuildAllPatterns : boolean;
function BuildPatternStr(var PStr : PAnsiChar;
var Len : Integer;
SL : TStringList) : Boolean;
function ConvertMaskToRegEx(const S : AnsiString) : AnsiString;
procedure DisposeItems(var Data : PStPatRecord);
procedure InsertLineNumber(Dest : PAnsiChar;
const S : PAnsiChar; LineNum : Integer);
function GetPattern(var Pattern : PAnsiChar;
var PatList : PStPatRecord) : Boolean;
function GetReplace(Pattern : PAnsiChar;
var PatList : PStPatRecord) : Boolean;
function MakePattern(var Pattern : PAnsiChar;
Start : Integer;
Delim : AnsiChar;
var TagOn : Boolean;
var PatList : PStPatRecord) : Integer;
function MakeReplacePattern(Pattern : PAnsiChar;
Start : Integer;
Delim : AnsiChar;
var PatList : PStPatRecord) : Integer;
function FindMatch(var Buf : PAnsiChar;
PatPtr : PStPatRecord;
var REPosition : TMatchPosition) : Boolean;
function MatchOnePatternElement(var Buf : PAnsiChar;
var I : Integer;
var TagOn : Boolean;
var TagNum : Integer;
PatPtr : PStPatRecord) : Boolean;
function ProcessLine(Buf : PAnsiChar;
Len : integer;
LineNum : integer;
CheckOnly : Boolean;
var REPosition: TMatchPosition) : Boolean;
function SearchMatchPattern(var Buf : PAnsiChar;
OffSet : Integer;
var TagOn : Boolean;
var TagNum : Integer;
PatPtr : PStPatRecord) : Integer;
procedure SetMatchPatSL(Value : TStringList);
procedure SetOptions(Value : TStOutputOptions);
procedure SetReplacePatSL(Value : TStringList);
procedure SetSelAvoidPatSL(Value : TStringList);
procedure SubLine(Buf : PAnsiChar);
function SubLineFindTag(Buf : PAnsiChar;
I : Integer;
IEnd : Integer;
TagNum : Integer;
var Flags : TStFlag;
var IStart : Integer;
var IStop : Integer) : Boolean;
function SubLineMatchOne(Buf : PAnsiChar;
var Flags : TStFlag;
var TagOn : Boolean;
var I : Integer;
var TagNum : Integer;
PatPtr : PStPatRecord) : Boolean;
function SubLineMatchPattern(Buf : PAnsiChar;
var Flags : TStFlag;
var TagOn : Boolean;
var TagNum : Integer;
OffSet : Integer;
PatPtr : PStPatRecord) : Integer;
procedure SubLineWrite(Buf : PAnsiChar;
S : PAnsiChar;
RepRec : PStPatRecord;
I,
IEnd : Integer;
var Flags : TStFlag);
public
{ Public declarations }
property InputStream : TStream
read FInputStream
write FInputStream;
property OutputStream : TStream
read FOutputStream
write FOutputStream;
constructor Create;
destructor Destroy; override;
function CheckString(const S : AnsiString;
var REPosition : TMatchPosition) : Boolean;
function FileMasksToRegEx(Masks : AnsiString) : Boolean;
function Execute : Boolean;
function ReplaceString(var S : AnsiString;
var REPosition : TMatchPosition) : Boolean;
property Avoid : Boolean
read FAvoid
write FAvoid;
property IgnoreCase : Boolean
read FIgnoreCase
write FIgnoreCase;
property InFixedLineLength : integer
read FInLineLength
write FInLineLength;
property InLineTermChar : AnsiChar
read FInLineTermChar
write FInLineTermChar;
property InLineTerminator : TStLineTerminator
read FInLineTerminator
write FInLineTerminator;
property LineCount : Cardinal
read FInLineCount;
property LineNumbers : Boolean
read FLineNumbers
write FLineNumbers;
property LinesMatched : Cardinal
read FMatchCount;
property LinesPerSecond : Cardinal
read FLinesPerSec;
property LinesReplaced : Cardinal
read FReplaceCount;
property LinesSelected : Cardinal
read FSelectCount;
property MatchPattern : TStringList
read FMatchPatSL
write SetMatchPatSL;
property MaxLineLength : Cardinal
read FMaxLineLength
write FMaxLineLength;
property OnMatch : TStOnMatchEvent
read FOnMatch
write FOnMatch;
property OnProgress : TStOnRegExProgEvent
read FOnProgress
write FOnProgress;
property OutFixedLineLength : integer
read FOutLineLength
write FOutLineLength;
property OutLineTermChar : AnsiChar
read FOutLineTermChar
write FOutLineTermChar;
property OutLineTerminator : TStLineTerminator
read FOutLineTerminator
write FOutLineTerminator;
property OutputOptions : TStOutputOptions
read FOutputOptions
write SetOptions;
property ReplacePattern : TStringList
read FReplacePatSL
write SetReplacePatSL;
property SelAvoidPattern : TStringList
read FSelAvoidPatSL
write SetSelAvoidPatSL;
end;
TStRegEx = class(TStComponent)
protected {private}
FAvoid : Boolean;
FIgnoreCase : Boolean;
FInFileSize : Cardinal;
FInFileStream : TFileStream;
FInLineCount : Cardinal;
FInLineTermChar : AnsiChar;
FInLineTerminator : TStLineTerminator;
FInFixedLineLength: integer;
FInputFile : AnsiString;
FLineNumbers : Boolean;
FLinesPerSec : Cardinal;
FMatchCount : Cardinal;
FMatchPatSL : TStringList;
FMatchPatStr : PAnsiChar;
FMatchPatPtr : PStPatRecord;
FMaxLineLength : Cardinal;
FNodes : TStNodeHeap;
FOnProgress : TStOnRegExProgEvent;
FOnMatch : TStOnMatchEvent;
FOutFileStream : TFileStream;
FOutTextStream : TStAnsiTextStream;
FOutLineBuf : PAnsiChar;
FOutFixedLineLength : integer;
FOutLineTermChar : AnsiChar;
FOutLineTerminator: TStLineTerminator;
FOutputFile : AnsiString;
FOutputOptions : TStOutputOptions;
FReplaceCount : Cardinal;
FReplacePatSL : TStringList;
FReplacePatStr : PAnsiChar;
FReplacePatPtr : PStPatRecord;
FSelAvoidPatSL : TStringList;
FSelAvoidPatStr : PAnsiChar;
FSelAvoidPatPtr : PStPatRecord;
FSelectCount : Cardinal;
FStream : TStStreamRegEx;
protected
procedure SetMatchPatSL(Value : TStringList);
procedure SetOptions(Value : TStOutputOptions);
procedure SetReplacePatSL(Value : TStringList);
procedure SetSelAvoidPatSL(Value : TStringList);
procedure SetStreamProperties;
public
constructor Create(AOwner : TComponent); override;
destructor Destroy; override;
function CheckString(const S : AnsiString;
var REPosition : TMatchPosition) : Boolean;
function FileMasksToRegEx(const Masks : AnsiString) : Boolean; {!!.02}
function Execute : Boolean;
function ReplaceString(var S : AnsiString;
var REPosition : TMatchPosition) : Boolean;
property LineCount : Cardinal
read FInLineCount;
property LinesMatched : Cardinal
read FMatchCount;
property LinesPerSecond : Cardinal
read FLinesPerSec;
property LinesReplaced : Cardinal
read FReplaceCount;
property LinesSelected : Cardinal
read FSelectCount;
property MaxLineLength : Cardinal
read FMaxLineLength
write FMaxLineLength;
published
property Avoid : Boolean
read FAvoid
write FAvoid default False;
property IgnoreCase : Boolean
read FIgnoreCase
write FIgnoreCase default False;
property InFixedLineLength : Integer
read FInFixedLineLength
write FInFixedLineLength default 80;
property InLineTermChar : AnsiChar
read FInLineTermChar
write FInLineTermChar default #10;
property InLineTerminator : TStLineTerminator
read FInLineTerminator
write FInLineTerminator default ltCRLF;
property InputFile : AnsiString
read FInputFile
write FInputFile;
property LineNumbers : Boolean
read FLineNumbers
write FLineNumbers default False;
property MatchPattern : TStringList
read FMatchPatSL
write SetMatchPatSL;
property OnMatch : TStOnMatchEvent
read FOnMatch
write FOnMatch;
property OnProgress : TStOnRegExProgEvent
read FOnProgress
write FOnProgress;
property OutFixedLineLength : Integer
read FOutFixedLineLength
write FOutFixedLineLength default 80;
property OutLineTermChar : AnsiChar
read FOutLineTermChar
write FOutLineTermChar default #10;
property OutLineTerminator : TStLineTerminator
read FOutLineTerminator
write FOutLineTerminator default ltCRLF;
property OutputFile : AnsiString
read FOutputFile
write FOutputFile;
property OutputOptions : TStOutputOptions
read FOutputOptions
write SetOptions;
property ReplacePattern : TStringList
read FReplacePatSL
write SetReplacePatSL;
property SelAvoidPattern : TStringList
read FSelAvoidPatSL
write SetSelAvoidPatSL;
end;
implementation
uses
StStrL,
StStrS;
const
Null = #0;
EndStr = #0;
NewLine = #13#10;
Dash = '-';
Esc = '\';
Any = '.'; {was '?'}
Closure = '*';
ClosurePlus = '+';
MaybeOne = '?'; {was '!'}
Bol = '^';
Eol = '$';
Ccl = '[';
Negate = '^';
CclEnd = ']';
BTag = '{';
ETag = '}';
BGroup = '(';
EGroup = ')';
Alter = '|'; {was #}
Ditto = '&';
lSpace = 's';
lNewline = 'n';
lTab = 't';
lBackSpace = 'b';
lReturn = 'r';
lFeed = 'l';
lHex = 'h';
lWordDelim = 'w';
lNil = 'z';
function CleanUpCase(S : String) : String;
{-convert string to uppercase and remove duplicates}
var
I : Integer;
K : Cardinal;
C : Char;
begin
Result := '';
S := AnsiUpperCase(S);
for I := 1 to Length(S) do begin
C := S[I];
if not StrChPosL(Result, C, K) then
Result := Result + C;
end;
end;
procedure AppendChar(C : AnsiChar; var S : ShortString);
{-append a character C onto string S}
begin
S := S + C;
end;
function IsAlphaNum(C : AnsiChar) : Boolean;
begin
{$IFDEF FPC}
Result := C in ['a'..'z', 'A'..'Z', '0'..'9']; // wp: ???
{$ELSE}
Result := IsCharAlphaNumericA(C); //Ansi!
{$ENDIF}
end;
procedure ExpandDash(Delim : AnsiChar;
var Pattern : PAnsiChar ;
var I : Integer;
var S : ShortString);
{-expand the innards of the character class, including dashes}
{stop when endc is found}
{return a string S with the expansion}
var
C,
CLeft,
CNext : AnsiChar;
K : Integer;
begin
while (Pattern[I] <> Delim) and (Pattern[I] <> EndStr) do begin
C := Pattern[I];
if (C = Esc) then begin
if (Pattern[Succ(I)] <> EndStr) then begin
I := Succ(I);
C := Pattern[I];
case C of
lSpace : AppendChar(#32, S);
lTab : AppendChar(#9, S);
lBackSpace : AppendChar(#8, S);
lReturn : AppendChar(#13, S);
lFeed : AppendChar(#10, S);
else
AppendChar(C, S);
end;
end else
{escape must be the character}
AppendChar(Esc, S);
end else if (C <> Dash) then
{literal character}
AppendChar(C, S)
else if ((Length(S) = 0) or (Pattern[Succ(I)] = Delim)) then
{literal dash at begin or end of class}
AppendChar(Dash, S)
else begin
{dash in middle of class}
CLeft := Pattern[Pred(I)];
CNext := Pattern[Succ(I)];
if IsAlphaNum(CLeft) and IsAlphaNum(CNext) and (CLeft <= CNext) then begin
{legal dash to be expanded}
for K := (Ord(CLeft)+1) to Ord(CNext) do
AppendChar(AnsiChar(K), S);
{move over the end of dash character}
I := Succ(I);
end else
{dash must be a literal}
AppendChar(Dash, S);
end;
I := Succ(I);
end;
end;
function GetCharacterClass(var Pattern : PAnsiChar;
var I : Integer;
var S : ShortString;
var AToken : TStTokens) : Boolean;
{-expand a character class starting at position I of Pattern into a string S}
{return a token type (tknCharClass or tknNegCharClass)}
{return I pointing at the end of class character}
{return true if successful}
begin
{skip over start of class character}
I := Succ(I);
if (Pattern[I] = Negate) then begin
AToken := tknNegCharClass;
I := Succ(I);
end else
AToken := tknCharClass;
{expand the character class}
S := '';
ExpandDash(CclEnd, Pattern, I, S);
Result := (Pattern[I] = CclEnd);
end;
{******************************************************************************}
{ TStNodeHeap Implementation }
{******************************************************************************}
constructor TStNodeHeap.Create;
begin
inherited Create;
New(FFreeList);
FillChar(FFreeList^, sizeof(TStPatRecord), 0);
end;
destructor TStNodeHeap.Destroy;
begin
nhClearHeap;
Dispose(FFreeList);
inherited Destroy;
end;
function TStNodeHeap.AllocNode : PStPatRecord;
begin
if (FFreeList^.NextPattern = nil) then
New(Result)
else begin
Result := FFreeList^.NextPattern;
FFreeList^.NextPattern := Result^.NextPattern;
end;
FillChar(Result^, sizeof(TStPatRecord), 0);
end;
function TStNodeHeap.CloneNode(aNode : PStPatRecord) : PStPatRecord;
begin
{allocate a new node}
Result := AllocNode;
{copy fields}
Result^.Token := aNode^.Token;
Result^.OneChar := aNode^.OneChar;
Result^.NextOK := aNode^.NextOK;
if (aNode^.StrPtr <> nil) then begin
New(Result^.StrPtr);
Result^.StrPtr^ := aNode^.StrPtr^;
end else
Result^.StrPtr := nil;
{deep clone the nested node}
if (aNode^.NestedPattern <> nil) then
Result^.NestedPattern := nhDeepCloneNode(aNode^.NestedPattern);
end;
procedure TStNodeHeap.FreeNode(aNode : PStPatRecord);
begin
if (aNode <> nil) then begin
aNode^.NextPattern := FFreeList^.NextPattern;
FFreeList^.NextPattern := aNode;
end;
end;
procedure TStNodeHeap.nhClearHeap;
var
Walker,
Temp : PStPatRecord;
begin
Walker := FFreeList^.NextPattern;
FFreeList^.NextPattern := nil;
while (Walker <> nil) do begin
Temp := Walker;
Walker := Walker^.NextPattern;
Dispose(Temp);
end;
end;
function TStNodeHeap.nhDeepCloneNode(aNode : PStPatRecord) : PStPatRecord;
begin
{allocate a new node}
Result := AllocNode;
{copy fields}
Result^.Token := aNode^.Token;
Result^.OneChar := aNode^.OneChar;
Result^.NextOK := aNode^.NextOK;
if (aNode^.StrPtr <> nil) then begin
New(Result^.StrPtr);
Result^.StrPtr^ := aNode^.StrPtr^;
end else
Result^.StrPtr := nil;
{recursively deepclone the next and nested nodes}
if (aNode^.NextPattern <> nil) then
Result^.NextPattern := nhDeepCloneNode(aNode^.NextPattern);
if (aNode^.NestedPattern <> nil) then
Result^.NestedPattern := nhDeepCloneNode(aNode^.NestedPattern);
end;
{******************************************************************************}
{ TStStreamRegEx Implementation }
{******************************************************************************}
constructor TStStreamRegEx.Create;
begin
inherited Create;
FAvoid := False;
FIgnoreCase := False;
FLineNumbers := False;
FOutputOptions := [];
FInLineTerminator := ltCRLF;
FInLineTermChar := #10;
FInLineLength := 80;
FOutLineTerminator := ltCRLF;
FOutLineTermChar := #10;
FOutLineLength := 80;
FMaxLineLength := 1024;
FMatchPatSL := TStringList.Create;
FMatchPatPtr := nil;
FSelAvoidPatSL := TStringList.Create;
FSelAvoidPatPtr:= nil;
FReplacePatSL := TStringList.Create;
FReplacePatPtr := nil;
FInputStream := nil;
FInTextStream := nil;
FOutputStream := nil;
FOutTextStream := nil;
FNodes := TStNodeHeap.Create;
end;
procedure TStStreamRegEx.DisposeItems(var Data : PStPatRecord);
var
Walker, Temp : PStPatRecord;
begin
if (Data <> nil) then begin
Walker := Data;
while (Walker <> nil) do begin
Temp := Walker;
if (Assigned(Walker^.StrPtr)) then
Dispose(Walker^.StrPtr);
if (Assigned(Walker^.NestedPattern)) then
DisposeItems(Walker^.NestedPattern);
Walker := Walker^.NextPattern;
FNodes.FreeNode(Temp);
end;
Data := nil;
end;
end;
destructor TStStreamRegEx.Destroy;
begin
DisposeItems(FMatchPatPtr);
DisposeItems(FSelAvoidPatPtr);
DisposeItems(FReplacePatPtr);
FNodes.Free;
FNodes := nil;
if (Assigned(FMatchPatStr)) then begin
FreeMem(FMatchPatStr, StrLen(FMatchPatStr) + 1);
FMatchPatStr := nil;
end;
if (Assigned(FReplacePatStr)) then
FreeMem(FReplacePatStr, StrLen(FReplacePatStr) + 1);
FReplacePatStr := nil;
if (Assigned(FSelAvoidPatStr)) then
FreeMem(FSelAvoidPatStr, StrLen(FSelAvoidPatStr) + 1);
FSelAvoidPatStr := nil;
FMatchPatSL.Free;
FMatchPatSL := nil;
FReplacePatSL.Free;
FReplacePatSL := nil;
FSelAvoidPatSL.Free;
FSelAvoidPatSL := nil;
inherited Destroy;
end;
function TStStreamRegEx.AppendS(Dest, S1, S2 : PAnsiChar;
Count : Cardinal) : PAnsiChar;
var
Remaining : Cardinal;
I : Cardinal;
begin
Result := Dest;
I := StrLen(S1);
Remaining := MaxLineLength - I;
if (Remaining < StrLen(S2)) then
Count := Remaining;
Move(S1[0], Dest[0], I);
Move(S2[0], Dest[I], Count);
I := I + Count;
Dest[I] := #0;
end;
function TStStreamRegEx.BuildAllPatterns : Boolean;
var
Len : Integer;
begin
if (FMatchPatSL.Count > 0) then begin
DisposeItems(FMatchPatPtr);
if (BuildPatternStr(FMatchPatStr, Len, FMatchPatSL)) then begin
if (Len > 0) then
GetPattern(FMatchPatStr, FMatchPatPtr)
else
DisposeItems(FMatchPatPtr);
Result := True;
end else begin
DisposeItems(FMatchPatPtr);
Result := False;
end;
end else begin
DisposeItems(FMatchPatPtr);
Result := True;
end;
if Result then begin
if (FSelAvoidPatSL.Count > 0) then begin
DisposeItems(FSelAvoidPatPtr);
if (BuildPatternStr(FSelAvoidPatStr, Len, FSelAvoidPatSL)) then begin
if (Len > 0) then
GetPattern(FSelAvoidPatStr, FSelAvoidPatPtr)
else
DisposeItems(FSelAvoidPatPtr);
Result := True;
end else begin
DisposeItems(FSelAvoidPatPtr);
Result := False;
end;
end else begin
DisposeItems(FSelAvoidPatPtr);
Result := True;
end;
end;
if Result then begin
if (FReplacePatSL.Count > 0) then begin
DisposeItems(FReplacePatPtr);
if (BuildPatternStr(FReplacePatStr, Len, FReplacePatSL)) then begin
if (Len > 0) then
GetReplace(FReplacePatStr, FReplacePatPtr)
else
DisposeItems(FReplacePatPtr);
Result := True;
end else begin
DisposeItems(FReplacePatPtr);
Result := False;
end;
end else begin
DisposeItems(FReplacePatPtr);
Result := True;
end;
end;
end;
function TStStreamRegEx.BuildPatternStr(var PStr : PAnsiChar;
var Len : Integer;
SL : TStringList) : Boolean;
var
I,
J : integer;
CurLen : Integer; {!!.01}
begin
Len := 0;
for I := 0 to pred(SL.Count) do
Len := Len + Length(TrimL(SL[I]));
if (Len = 0) then
Result := True
else begin
if Assigned(PStr) then
FreeMem(PStr, StrLen(PStr)+1);
GetMem(PStr, Len+1);
PStr[Len] := EndStr;
J := 0;
for I := 0 to pred(SL.Count) do begin
CurLen := Length(TrimL(SL[I])); {!!.01}
if CurLen > 0 then begin {!!.01}
Move(SL[I][1], PStr[J], CurLen); {!!.01}
Inc(J, CurLen); {!!.01}
end; {!!.01}
end;
Result := True;
end;
end;
function TStStreamRegEx.CheckString(const S : AnsiString;
var REPosition : TMatchPosition) : Boolean;
var
Tmp : PAnsiChar;
I : integer;
Len : integer;
OK : Boolean;
begin
I := Length(S);
GetMem(Tmp, I+3);
try
if I > 0 then {!!.01}
Move(S[1], Tmp[0], I);
Tmp[I] := #13;
Tmp[I+1] := #10;
Tmp[I+2] := EndStr;
if (FMatchPatSL.Count > 0) then begin
OK := BuildPatternStr(FMatchPatStr, Len, FMatchPatSL);
if (OK) then begin
if (Len > 0) then
GetPattern(FMatchPatStr, FMatchPatPtr)
else
DisposeItems(FMatchPatPtr);
end else
DisposeItems(FMatchPatPtr);
end else
DisposeItems(FMatchPatPtr);
if (FSelAvoidPatSL.Count > 0) then begin
OK := BuildPatternStr(FSelAvoidPatStr, Len, FSelAvoidPatSL);
if (OK) then begin
if (Len > 0) then
GetPattern(FSelAvoidPatStr, FSelAvoidPatPtr)
else
DisposeItems(FSelAvoidPatPtr);
end;
end else
DisposeItems(FSelAvoidPatPtr);
FMatchCount := 0;
FSelectCount := 0;
FReplaceCount := 0;
FInLineCount := 0;
FLinesPerSec := 0;
REPosition.LineNum := 1;
if ((FSelAvoidPatPtr <> nil) or (FMatchPatPtr <> nil)) then
Result := ProcessLine(Tmp, I, 1, True, REPosition)
else begin
Result := False;
RaiseStError(EStRegExError, stscNoPatterns);
end;
finally
FreeMem(Tmp, I+3);
end;
end;
function TStStreamRegEx.ReplaceString(var S : AnsiString;
var REPosition : TMatchPosition) : Boolean;
var
Tmp : PAnsiChar;
I : integer;
Len : integer;
OK : Boolean;
function ProcessString(var S : AnsiString;
Len : integer;
LineNum : integer;
var REPosition : TMatchPosition) : Boolean;
var
TmpBuf : PAnsiChar;
ABuf : PAnsiChar;
L : Integer;
begin
L := Length(S)+1;
GetMem(TmpBuf, MaxLineLength+1);
GetMem(ABuf, L);
try
StrPCopy(ABuf, S);
if (FSelAvoidPatPtr <> nil) then begin
Result := False;
if (not Avoid) then
Result := FindMatch(ABuf, FSelAvoidPatPtr, REPosition)
else
Result := not(FindMatch(ABuf, FSelAvoidPatPtr, REPosition));
end else
Result := True;
if Result then begin
{met select criterion, perhaps by default}
FSelectCount := Succ(FSelectCount);
if (FReplacePatPtr <> nil) then begin
Result := FindMatch(ABuf, FMatchPatPtr, REPosition);
if Result then begin
TmpBuf[0] := #0;
SubLine(ABuf);
S := StrPas(FOutLineBuf);
end;
end;
end;
finally
FreeMem(TmpBuf, MaxLineLength+1);
FreeMem(ABuf, L);
end;
end;
begin
I := Length(S);
GetMem(Tmp, I+3);
try
if I > 0 then {!!.01}
Move(S[1], Tmp[0], I);
Tmp[I] := #13;
Tmp[I+1] := #10;
Tmp[I+2] := EndStr;
if (FMatchPatSL.Count > 0) then begin
OK := BuildPatternStr(FMatchPatStr, Len, FMatchPatSL);
if (OK) then begin
if (Len > 0) then
GetPattern(FMatchPatStr, FMatchPatPtr)
else
DisposeItems(FMatchPatPtr);
end else
DisposeItems(FMatchPatPtr);
end else
DisposeItems(FMatchPatPtr);
if (FSelAvoidPatSL.Count > 0) then begin
OK := BuildPatternStr(FSelAvoidPatStr, Len, FSelAvoidPatSL);
if (OK) then begin
if (Len > 0) then
GetPattern(FSelAvoidPatStr, FSelAvoidPatPtr)
else
DisposeItems(FSelAvoidPatPtr);
end;
end else
DisposeItems(FSelAvoidPatPtr);
if (FReplacePatSL.Count > 0) then begin
OK := BuildPatternStr(FReplacePatStr, Len, FReplacePatSL);
if (OK) then begin
if (Len > 0) then
GetPattern(FReplacePatStr, FReplacePatPtr)
else
DisposeItems(FReplacePatPtr);
end else
DisposeItems(FReplacePatPtr);
end else
DisposeItems(FReplacePatPtr);
FMatchCount := 0;
FSelectCount := 0;
FReplaceCount := 0;
FInLineCount := 0;
FLinesPerSec := 0;
GetMem(FInLineBuf, MaxLineLength+3);
GetMem(FOutLineBuf, MaxLineLength+3);
try
REPosition.LineNum := 1;
if ((FSelAvoidPatPtr <> nil) or (FMatchPatPtr <> nil)) and
(Assigned(FReplacePatPtr))then begin
Result := ProcessString(S, I, 1, REPosition);
end else begin
Result := False;
RaiseStError(EStRegExError, stscNoPatterns);
end;
finally
FreeMem(FInLineBuf, MaxLineLength+3);
FreeMem(FOutLineBuf, MaxLineLength+3);
end;
finally
FreeMem(Tmp, I+3);
end;
end;
function TStStreamRegEx.ConvertMaskToRegEx(const S : AnsiString) : AnsiString;
var
I : integer;
TS : AnsiString;
begin
I := 1;
while (I <= Length(S)) do begin
if (I = 1) then begin
if not (S[1] in ['*', '?']) then begin
TS := '((^[' ;
TS := TS + S[1] + '])';
Inc(I);
end else
TS := '(';
end;
if not (S[I] in ['*', '?', '.', '\']) then
TS := TS + S[I]
else begin
if (S[I] = '*') then
TS := TS + '.*'
else if (S[I] = '?') then begin
if (I = 1) then
TS := TS + '(^.)'
else
TS := TS + '.?';
end else begin
TS := TS + '\' + S[I];
end;
end;
Inc(I);
end;
Result := TS + '\n)';
end;
function TStStreamRegEx.FileMasksToRegEx(Masks : AnsiString) : Boolean;
var
SL : TStringList;
S : AnsiString;
K : Cardinal;
Len: Integer;
begin
SL := TStringList.Create;
try
if StrChPosL(Masks, ';', K) then begin
while (K > 0) do begin
S := Copy(Masks, 1, K-1);
if (Length(S) > 0) then begin
if (SL.Count = 0) then
SL.Add(ConvertMaskToRegEx(S))
else
SL.Add('|' + ConvertMaskToRegEx(S));
end;
Delete(Masks, 1, K);
if not (StrChPosL(Masks, ';', K)) then
break;
end;
if (Length(Masks) > 0) then
SL.Add('|' + ConvertMaskToRegEx(Masks));
end else begin
if (Length(Masks) > 0) then
SL.Add(ConvertMaskToRegEx(Masks));
end;
if (SL.Count > 0) then begin
FMatchPatSL.Clear;
FMatchPatSL.Assign(SL);
DisposeItems(FMatchPatPtr);
FMatchPatPtr := nil;
if (BuildPatternStr(FMatchPatStr, Len, FMatchPatSL)) then begin
if (Len > 0) then
GetPattern(FMatchPatStr, FMatchPatPtr)
else begin
DisposeItems(FMatchPatPtr);
FMatchPatPtr := nil;
end;
Result := True;
end else begin
DisposeItems(FMatchPatPtr);
FMatchPatPtr := nil;
Result := False;
end;
Result := True;
end else
Result := False;
finally
SL.Free;
end;
end;
function TStStreamRegEx.Execute : Boolean;
var
Len : TStMemSize;
LineNum : Integer;
ATime : TDateTime;
PC : Cardinal;
LPC : Cardinal;
BytesRead : Cardinal;
REPosition: TMatchPosition;
Found : Boolean;
Src : PAnsiChar; {!!!}
FFoundText : AnsiString; {!!!}
begin
if (FMatchPatSL.Count = 0) and
(FReplacePatSL.Count = 0) and (FSelAvoidPatSL.Count = 0) then
RaiseStError(EStRegExError, stscNoPatterns);
if (not (BuildAllPatterns)) then
RaiseStError(EStRegExError, stscPatternError);
if (FMatchPatPtr = nil) and (FSelAvoidPatPtr = nil) and (FReplacePatPtr = nil) then
RaiseStError(EStRegExError, stscNoPatterns);
if (not (Assigned(FInputStream))) or
((not (Assigned(FOutputStream)) and (not (ooCountOnly in OutputOptions)))) then
RaiseStError(EStRegExError, stscStreamsNil);
FInTextStream := nil;
FOutTextStream := nil;
try
FInTextStream := TStAnsiTextStream.Create(FInputStream);
FInTextStream.LineTermChar := FInLineTermChar;
FInTextStream.LineTerminator := FInLineTerminator;
FInTextStream.FixedLineLength := FInLineLength;
FInFileSize := FInTextStream.Size;
if not (ooCountOnly in OutputOptions) then begin
FOutTextStream := TStAnsiTextStream.Create(FOutputStream);
FOutTextStream.LineTermChar := FOutLineTermChar;
FOutTextStream.LineTerminator := FOutLineTerminator;
FOutTextStream.FixedLineLength := FInLineLength;
end;
FMatchCount := 0;
FSelectCount := 0;
FReplaceCount := 0;
FInLineCount := 0;
FLinesPerSec := 0;
BytesRead := 0;
LPC := 0;
FInTextStream.Position := 0;
FInLineBuf := nil;
FOutLineBuf := nil;
try
GetMem(FInLineBuf, MaxLineLength+3);
GetMem(FOutLineBuf, MaxLineLength+3);
LineNum := 1;
ATime := Now;
while not FInTextStream.AtEndOfStream do begin
Len := FInTextStream.ReadLineArray(FInLineBuf, MaxLineLength);
Inc(BytesRead, Len);
FInLineBuf[Len] := #13;
FInLineBuf[Len+1] := #10;
FInLineBuf[Len+2] := EndStr;
{!!.02 - added }
REPosition.StartPos := 0;
REPosition.EndPos := 0;
REPosition.Length := 0;
{!!.02 - added end }
REPosition.LineNum := LineNum;
Found := ProcessLine(FInLineBuf, Len, LineNum, False, REPosition);
{!!!}
SetLength(FFoundText, REPosition.Length);
Src := FInLineBuf;
Inc(Src, REPosition.StartPos);
StrMove(PAnsiChar(FFoundText), Src, REPosition.Length);
{!!!}
if (FInFileSize > 0) then begin
PC := Round(BytesRead / FInFileSize * 100);
{avoid calling with every line - when OnProgress is assigned}
{performance is considerably reduced anyway, don't add to it}
if (PC > LPC) then begin
LPC := PC;
if (Assigned(FOnProgress)) then
FOnProgress(Self, PC);
end;
end;
if (Assigned(FOnMatch)) and (Found) then
FOnMatch(Self, REPosition);
Inc(LineNum);
end;
ATime := (Now - ATime) * 86400;
FInLineCount := LineNum-1;
if (ATime > 0) then
FLinesPerSec := Trunc(FInLineCount / ATime)
else
FLinesPerSec := 0;
if (Assigned(FOnProgress)) then
FOnProgress(Self, 100);
Result := (FMatchCount > 0) or (FSelectCount > 0);
finally
FreeMem(FInLineBuf, MaxLineLength+3);
FreeMem(FOutLineBuf, MaxLineLength+3);
end;
finally
FInTextStream.Free;
FInTextStream := nil;
FOutTextStream.Free;
FOutTextStream := nil;
end;
end;
procedure TStStreamRegEx.AddTokenToPattern(var PatRec : PStPatRecord;
LastPatRec : PStPatRecord;
Token : TStTokens;
S : ShortString);
{-add a token record to the pattern list}
{-S contains a literal character or an expanded character class}
begin
PatRec := FNodes.AllocNode;
PatRec^.Token := Token; {save token type}
PatRec^.NextOK := False; {default to non-alternation}
LastPatRec^.NextPattern := PatRec; {hook up the previous token}
case Token of
tknNil, tknAnyChar, tknBegOfLine, tknEndOfLine, tknGroup, tknBegTag, tknEndTag :
begin
PatRec^.OneChar := Null;
PatRec^.StrPtr := nil;
end;
tknLitChar :
begin
if IgnoreCase then
PatRec^.OneChar := AnsiChar(AnsiUpperCase(S[1])[1])
else
PatRec^.OneChar := S[1];
PatRec^.StrPtr := nil;
end;
tknCharClass, tknNegCharClass :
begin
PatRec^.OneChar := Null;
if FIgnoreCase then
S := CleanUpCase(S);
New(PatRec^.StrPtr);
PatRec^.StrPtr^ := S;
end;
else
RaiseStError(EStRegExError, stscUnknownError);
end;
end;
function TStStreamRegEx.MakePattern(var Pattern : PAnsiChar;
Start : Integer;
Delim : AnsiChar;
var TagOn : Boolean;
var PatList : PStPatRecord) : Integer;
var
I : Integer;
NextLastPatRec,
LastPatRec,
TempPatRec,
PatRec : PStPatRecord;
Done : Boolean;
AChar : AnsiChar;
TmpStr : ShortString;
AToken : TStTokens;
GroupStartPos,
GroupEndPos : integer;
begin
PatList := FNodes.AllocNode;
PatList^.Token := tknNil; {put a nil token at the beginning}
PatList^.NextOK := False;
LastPatRec := PatList;
NextLastPatRec := nil;
I := Start; {start point of pattern string}
Done := False;
while not(Done) and (Pattern[I] <> Delim) and (Pattern[I] <> EndStr) do begin
AChar := Pattern[I];
if (AChar = Any) then
AddTokenToPattern(PatRec, LastPatRec, tknAnyChar, AChar)
else if (AChar = Bol) then
AddTokenToPattern(PatRec, LastPatRec, tknBegOfLine, '')
else if (AChar = Eol) then
AddTokenToPattern(PatRec, LastPatRec, tknEndOfLine, '')
else if (AChar = Ccl) then begin
Done := (GetCharacterClass(Pattern, I, TmpStr, AToken) = False);
if Done then
RaiseStError(EStRegExError, stscExpandingClass);
AddTokenToPattern(PatRec, LastPatRec, AToken, TmpStr);
end else if (AChar = Alter) then begin
if (NextLastPatRec = nil) or
((NextLastPatRec^.Token <> tknClosure) and
(NextLastPatRec^.Token <> tknMaybeOne)) then begin
{flag the current token as non-critical, i.e., "next is OK"}
LastPatRec^.NextOK := True;
end else begin
{alternation immediately after a closure is probably not desired}
{e.g., [a-z]*|[0-9] would internally produce ([a-z]|[0-9])*}
Done := True;
RaiseStError(EStRegExError, stscAlternationFollowsClosure);
end;
end else if (AChar = BGroup) then begin
GroupStartPos := I+1;
AddTokenToPattern(PatRec, LastPatRec, tknGroup, '');
{recursive branch off the list}
I := MakePattern(Pattern, Succ(I), EGroup, TagOn, TempPatRec);
if (I > 0) then begin
GroupEndPos := I-1;
if (Pattern[I+1] <> EndStr) then begin
if (Pattern[I+1] in [Closure, ClosurePlus]) then begin
if ((((GroupEndPos - GroupStartPos) = 1) or
(((GroupEndPos - GroupStartPos) = 2) and (Pattern[GroupStartPos] = Esc))) and
(Pattern[GroupEndPos] in [Closure, MaybeOne])) then begin
Done := True;
RaiseStError(EStRegExError, stscClosureMaybeEmpty);
end else
PatRec^.NestedPattern := TempPatRec;
end else
PatRec^.NestedPattern := TempPatRec;
end else
PatRec^.NestedPattern := TempPatRec;
end else begin
{didn't find egroup}
Done := True;
RaiseStError(EStRegExError, stscUnbalancedParens);
end;
end else if ((AChar = BTag) and (not(TagOn))) then begin
AddTokenToPattern(PatRec, LastPatRec, tknBegTag, '');
TagOn := True;
end else if ((AChar = ETag) and (TagOn)) then begin
AddTokenToPattern(PatRec, LastPatRec, tknEndTag, '');
TagOn := False;
end else if (((AChar = Closure) or (AChar = ClosurePlus) or
(AChar = MaybeOne)) and (I > Start)) then begin
if ((LastPatRec^.Token in [tknBegOfLine, tknEndOfLine, tknMaybeOne, tknClosure]) or
(NextLastPatRec^.Token = tknClosure)) then begin
{error, can't have closure after any of these}
Done := True;
RaiseStError(EStRegExError, stscFollowingClosure);
end else begin
if (AChar = ClosurePlus) then begin
{insert an extra copy of the last token before the closure}
TempPatRec := FNodes.CloneNode(LastPatRec);
NextLastPatRec^.NextPattern := TempPatRec;
TempPatRec^.NextPattern := LastPatRec;
NextLastPatRec := TempPatRec;
end;
{insert the closure between next to last and last token}
TempPatRec := FNodes.AllocNode;
NextLastPatRec^.NextPattern := TempPatRec;
if (AChar = MaybeOne) then
TempPatRec^.Token := tknMaybeOne
else
TempPatRec^.Token := tknClosure;
TempPatRec^.OneChar := Null;
TempPatRec^.NextPattern := LastPatRec;
TempPatRec^.NextOK := False;
{set j and lastj back into sequence}
PatRec := LastPatRec;
LastPatRec := TempPatRec;
end;
end else begin
if (AChar = Esc) then begin
{skip over escape character}
I := Succ(I);
AChar := Pattern[I];
case AChar of
lSpace : AddTokenToPattern(PatRec, LastPatRec, tknLitChar, #32);
lNewline :
begin
AddTokenToPattern(PatRec, LastPatRec, tknLitChar, #13);
LastPatRec := PatRec;
AddTokenToPattern(PatRec, LastPatRec, tknLitChar, #10);
end;
lTab : AddTokenToPattern(PatRec, LastPatRec, tknLitChar, #9);
lBackSpace : AddTokenToPattern(PatRec, LastPatRec, tknLitChar, #8);
lReturn : AddTokenToPattern(PatRec, LastPatRec, tknLitChar, #13);
lFeed : AddTokenToPattern(PatRec, LastPatRec, tknLitChar, #10);
lWordDelim : AddTokenToPattern(PatRec, LastPatRec, tknCharClass, StWordDelimString);
lHex : AddTokenToPattern(PatRec, LastPatRec, tknCharClass, StHexDigitString);
else
AddTokenToPattern(PatRec, LastPatRec, tknLitChar,AChar);
end;
end else
AddTokenToPattern(PatRec, LastPatRec, tknLitChar, AChar);
end;
NextLastPatRec := LastPatRec;
LastPatRec := PatRec;
if not(Done) then
I := Succ(I);
end; {of looking through pattern string}
if ((Done) or (Pattern[I] <> Delim)) then begin
Result := 0;
RaiseStError(EStRegExError, stscPatternError);
end else
Result := I;
end;
function TStStreamRegEx.GetPattern(var Pattern : PAnsiChar;
var PatList : PStPatRecord) : Boolean;
{-convert a Pattern PAnsiChar into a pattern list, pointed to by patlist}
{-return true if successful}
var
TagOn : Boolean;
begin
TagOn := False;
Result := (MakePattern(Pattern, 0, EndStr, TagOn, PatList) > 0);
if TagOn then begin
GetPattern := False;
RaiseStError(EStRegExError, stscUnbalancedTag);
end;
end;
procedure TStStreamRegEx.AddTokenToReplace(var PatRec : PStPatRecord;
LastPatRec : PStPatRecord;
Token : TStTokens;
const S : ShortString); {!!.02}
{-add a token record to the pattern list}
{S contains a literal character or an expanded character class}
begin
PatRec := FNodes.AllocNode;
PatRec^.Token := Token; {save token type}
PatRec^.NextOK := False; {default to non-alternation}
LastPatRec^.NextPattern := PatRec; {hook up the previous token}
if (Token = tknLitChar) or (Token = tknDitto) then begin
PatRec^.OneChar := S[1];
PatRec^.StrPtr := nil;
end else
RaiseStError(EStRegExError, stscUnknownError);
end;
function TStStreamRegEx.MakeReplacePattern(Pattern : PAnsiChar;
Start : Integer;
Delim : AnsiChar;
var PatList : PStPatRecord) : Integer;
{-make a pattern list from arg[i], starting at start, ending at delim}
{return 0 is error, last char position in arg if OK}
var
I : Integer;
PatRec,
LastPatRec : PStPatRecord;
Done : Boolean;
AChar : AnsiChar;
begin
PatList := FNodes.AllocNode;
PatList^.Token := tknNil; {put a nil token at the beginning}
PatList^.NextOK := False;
LastPatRec := PatList;
I := Start; {start point of pattern string}
Done := False;
while not(Done) and (Pattern[I] <> Delim) and (Pattern[I] <> EndStr) do begin
AChar := Pattern[I];
if (AChar = Ditto) then
AddTokenToReplace(PatRec, LastPatRec, tknDitto, '0')
else begin
if (AChar = Esc) then begin
{skip over escape character}
I := Succ(I);
AChar := Pattern[I];
if (AChar >= '1') and (AChar <= '9') then
{a tagged ditto}
AddTokenToReplace(PatRec, LastPatRec, tknDitto, AChar)
else case AChar of
lSpace : AddTokenToReplace(PatRec, LastPatRec, tknLitChar, #32);
lNewline :
begin
AddTokenToReplace(PatRec, LastPatRec, tknLitChar, #13);
LastPatRec := PatRec;
AddTokenToReplace(PatRec, LastPatRec, tknLitChar, #10);
end;
lTab : AddTokenToReplace(PatRec, LastPatRec, tknLitChar, #9);
lBackSpace : AddTokenToReplace(PatRec, LastPatRec, tknLitChar, #8);
lReturn : AddTokenToReplace(PatRec, LastPatRec, tknLitChar, #13);
lFeed : AddTokenToReplace(PatRec, LastPatRec, tknLitChar, #10);
lNil : ;
else
AddTokenToReplace(PatRec, LastPatRec, tknLitChar, AChar);
end;
end else
AddTokenToReplace(PatRec, LastPatRec, tknLitChar, AChar);
end;
LastPatRec := PatRec;
if not(Done) then
Inc(I);
end; {of looking through pattern string}
if Done or (Pattern[I] <> Delim) then begin
Result := 0;
RaiseStError(EStRegExError, stscPatternError);
end else
Result := I;
end;
function TStStreamRegEx.GetReplace(Pattern : PAnsiChar;
var PatList : PStPatRecord) : Boolean;
begin
Result := (MakeReplacePattern(Pattern, 0, EndStr, PatList) > 0);
end;
function TStStreamRegEx.MatchOnePatternElement(var Buf : PAnsiChar;
var I : Integer;
var TagOn : Boolean;
var TagNum : Integer;
PatPtr : PStPatRecord) : Boolean;
{-match one pattern element at pattern pointed to by PatPtr, Buf[I]}
var
Advance : -1..255;
AToken : TStTokens;
PatPos : Integer;
K : Cardinal;
C : AnsiChar;
begin
Advance := -1;
AToken := PatPtr^.Token;
if FIgnoreCase then
C := AnsiChar(AnsiUpperCase(Buf[I])[1])
else
C := Buf[I];
if (C <> EndStr) then begin
if (AToken = tknLitChar) then begin
if (C = PatPtr^.OneChar) then
Advance := 1;
end else if (AToken = tknCharClass) then begin
if (StrChPosS(PatPtr^.StrPtr^, C, K)) then
Advance := 1;
end else if (AToken = tknNegCharClass) then begin
if (not (C in [#13, #10])) then begin
if not (StrChPosS(PatPtr^.StrPtr^, C, K)) then
Advance := 1;
end;
end else if (AToken = tknAnyChar) then begin
if not (C in [#13, #10]) then
Advance := 1;
end else if (AToken = tknBegOfLine) then begin
if (I = 0) then
Advance := 0;
end else if (AToken = tknEndOfLine) then begin
if (C = #13) and (Buf[Succ(I)] = #10) then
Advance := 0;
end else if (AToken = tknNil) then begin
Advance := 0;
end else if (AToken = tknBegTag) then begin
Advance := 0;
if not(TagOn) then begin
TagNum := Succ(TagNum);
TagOn := True;
end;
end else if (AToken = tknEndTag) then begin
Advance := 0;
TagOn := False;
end else if (AToken = tknGroup) then begin
{we treat a group as a "character", but allow advance of multiple chars}
{recursive call to SearchMatchPattern}
PatPos := SearchMatchPattern(Buf, I, TagOn, TagNum, PatPtr^.NestedPattern);
if (PatPos >= I) then begin
I := PatPos;
Advance := 0;
end;
end;
end else begin
{at end of line}
{end tag marks match}
if (AToken = tknEndTag) then
Advance := 0;
end;
if (Advance >= 0) then begin
{ignore tag words here, since they are not used}
Result := True;
Inc(I, Advance);
end else
Result := False;
end;
function TStStreamRegEx.SearchMatchPattern(var Buf : PAnsiChar;
OffSet : Integer;
var TagOn : Boolean;
var TagNum : Integer;
PatPtr : PStPatRecord) : Integer;
{-look for match of pattern list starting at PatPtr with Buf[offset...]}
{-return the last position that matched}
var
I : Integer;
K : Integer;
PatRec : PStPatRecord;
Done : Boolean;
AToken : TStTokens;
begin
Done := False;
PatRec := PatPtr;
while not(Done) and (PatRec <> nil) do begin
AToken := PatRec^.Token;
if (AToken = tknClosure) then begin
{a closure}
PatRec := PatRec^.NextPattern; {step past the closure in the pattern list}
I := OffSet; {leave the current line position unchanged}
{match as many as possible}
while not(Done) and (Buf[I] <> EndStr) do begin
if not(MatchOnePatternElement(Buf, I, TagOn, TagNum, PatRec)) then
Done := True;
end;
{I points to the location that caused a non-match}
{match rest of pattern against rest of input}
{shrink closure by one after each failure}
Done := False;
K := -1;
while not(Done) and (I >= OffSet) do begin
K := SearchMatchPattern(Buf, I, TagOn, TagNum, PatRec^.NextPattern);
if (K > -1) then
Done := True
else
Dec(I);
end;
OffSet := K; {if k=-1 then failure else success}
Done := True;
end else if (AToken = tknMaybeOne) then begin
{a 0 or 1 closure}
PatRec := PatRec^.NextPattern; {step past the closure marker}
{match or no match is ok, but advance lin cursor if matched}
MatchOnePatternElement(Buf, OffSet, TagOn, TagNum, PatRec);
{advance to the next pattern token}
PatRec := PatRec^.NextPattern;
end else if not(MatchOnePatternElement(Buf, OffSet,
TagOn, TagNum, PatRec)) then begin
if PatRec^.NextOK then begin
{we get another chance because of alternation}
PatRec := PatRec^.NextPattern;
end else begin
OffSet := -1;
Done := True;
end;
end else begin
{skip over alternates if we matched already}
while (PatRec^.NextOK) and (PatRec^.NextPattern <> nil) do
PatRec := PatRec^.NextPattern;
{move to the next non-alternate}
PatRec := PatRec^.NextPattern;
end;
end;
Result := OffSet;
end;
function TStStreamRegEx.FindMatch(var Buf : PAnsiChar;
PatPtr : PStPatRecord;
var REPosition : TMatchPosition) : Boolean;
var
I,
LPos,
TagNum : Integer;
TagOn : Boolean;
begin
LPos := -1;
I := 0;
TagNum := 0;
TagOn := False;
Result := False;
REPosition.Length := 0;
while (Buf[I] <> EndStr) and (LPos = -1) do begin
LPos := SearchMatchPattern(Buf, I, TagOn, TagNum, PatPtr);
Result := (LPos > -1);
if (Result) then begin
REPosition.StartPos := I+1;
RePosition.EndPos := LPos;
RePosition.Length := REPosition.EndPos - REPosition.StartPos + 1;
end;
Inc(I);
end;
end;
procedure TStStreamRegEx.InsertLineNumber(Dest : PAnsiChar;
const S : PAnsiChar;
LineNum : Integer);
var
Count : Cardinal;
SI : string[8];
begin
Dest[0] := #0;
Count := StrLen(S);
if (Count > MaxLineLength - 8) then
Count := MaxLineLength - 8;
SI := LeftPadS(IntToStr(LineNum), 6) + ' ';
Move(SI[1], Dest[0], 8);
Move(S^, Dest[8], Count);
Dest[Count+8] := #0;
end;
function TStStreamRegEx.ProcessLine( Buf : PAnsiChar;
Len : integer;
LineNum : integer;
CheckOnly : Boolean;
var REPosition: TMatchPosition) : Boolean;
var
Tmp : PAnsiChar;
begin
GetMem(Tmp, MaxLineLength+1);
try
if (FSelAvoidPatPtr <> nil) then begin
if (not Avoid) then
Result := FindMatch(Buf, FSelAvoidPatPtr, REPosition)
else if (Avoid) then
Result := not(FindMatch(Buf, FSelAvoidPatPtr, REPosition))
else
Result := True;
end else
Result := True;
if Result then begin
{met select criterion, perhaps by default}
FSelectCount := Succ(FSelectCount);
if ((FReplacePatPtr <> nil) and (not CheckOnly)) then begin
if (ooModified in FOutputOptions) then begin
{we only want to replace and output lines that have a match}
Result := FindMatch(Buf, FMatchPatPtr, REPosition);
end;
if Result then begin
Tmp[0] := #0;
SubLine(Buf);
if (not (ooCountOnly in FOutputOptions)) then begin
if (LineNumbers) then
InsertLineNumber(Tmp, FOutlineBuf, LineNum)
else
StrCopy(Tmp, FOutlineBuf);
Tmp[StrLen(Tmp)-2] := #0;
FOutTextStream.WriteLineZ(Tmp);
end;
{subline keeps a count of matched lines and replaced patterns}
end;
end else if (FMatchPatPtr <> nil) then begin
Result := FindMatch(Buf, FMatchPatPtr, REPosition);
{met match criterion}
if Result then begin
FMatchCount := Succ(FMatchCount);
if (not CheckOnly) then begin
if (not (ooCountOnly in FOutputOptions)) then begin
Buf[Len] := #0;
if (LineNumbers) then
InsertLineNumber(Tmp, Buf, LineNum)
else
StrCopy(Tmp, Buf);
Tmp[StrLen(Tmp)] := #0;
FOutTextStream.WriteLineZ(Tmp);
end;
end;
end;
end else begin
{we are neither matching nor replacing, just selecting}
{output the selected line}
if (not CheckOnly) then begin
if (not (ooCountOnly in FOutputOptions)) then begin
Buf[Len] := #0;
if (LineNumbers) then
InsertLineNumber(Tmp, Buf, LineNum)
else
StrCopy(Tmp, Buf);
Tmp[StrLen(Tmp)] := #0;
FOutTextStream.WriteLineZ(Tmp);
end;
end;
end;
end else begin
{non-selected line, do we write it?}
if (ooUnselected in FOutputOptions) and
(not (ooCountOnly in FOutputOptions)) then begin
Buf[Len] := #0;
if (LineNumbers) then
InsertLineNumber(Tmp, Buf, LineNum)
else
StrCopy(Tmp, Buf);
Tmp[StrLen(Tmp)] := #0;
FOutTextStream.WriteLineZ(Tmp);
end;
end;
finally
FreeMem(Tmp, MaxLineLength+1);
end;
end;
procedure TStStreamRegEx.SetMatchPatSL(Value : TStringList);
begin
FMatchPatSL.Assign(Value);
DisposeItems(FMatchPatPtr);
end;
procedure TStStreamRegEx.SetOptions(Value : TStOutputOptions);
begin
if (Value <> FOutputOptions) then begin
FOutputOptions := Value;
if (ooCountOnly in FOutputOptions) then
FOutputOptions := [ooCountOnly];
end;
end;
procedure TStStreamRegEx.SetReplacePatSL(Value : TStringList);
begin
FReplacePatSL.Assign(Value);
DisposeItems(FReplacePatPtr);
end;
procedure TStStreamRegEx.SetSelAvoidPatSL(Value : TStringList);
begin
FSelAvoidPatSL.Assign(Value);
DisposeItems(FSelAvoidPatPtr);
end;
function TStStreamRegEx.SubLineMatchOne(Buf : PAnsiChar;
var Flags : TStFlag;
var TagOn : Boolean;
var I : Integer;
var TagNum : Integer;
PatPtr : PStPatRecord) : Boolean;
var
Advance : -1..255;
lToken : TStTokens;
PatPos : Integer;
K : Cardinal;
C : AnsiChar;
begin
Advance := -1;
lToken := PatPtr^.Token;
if FIgnoreCase then
C := AnsiChar(AnsiUpperCase(Buf[I])[1])
else
C := Buf[I];
if (C <> EndStr) then begin
if (lToken = tknLitChar) then begin
if (C = PatPtr^.OneChar) then
Advance := 1;
end else if (lToken = tknCharClass) then begin
if (StrChPosS(PatPtr^.StrPtr^, C, K)) then
Advance := 1;
end else if (lToken = tknNegCharClass) then begin
if (pos(C, NewLine) = 0) then begin
if not (StrChPosS(PatPtr^.StrPtr^, C, K)) then
Advance := 1;
end;
end else if (lToken = tknAnyChar) then begin
if (not (C in [#13, #10])) then
Advance := 1;
end else if (lToken = tknBegOfLine) then begin
if (I = 0) then
Advance := 0;
end else if (lToken = tknEndOfLine) then begin
if (C = #13) and (Buf[Succ(I)] = #10) then begin
Advance := 0;
end;
end else if (lToken = tknNil) then begin
Advance := 0;
end else if (lToken = tknBegTag) then begin
Advance := 0;
if not(TagOn) then begin
Inc(TagNum);
TagOn := True;
end;
end else if (lToken = tknEndTag) then begin
Advance := 0;
TagOn := False;
end else if (lToken = tknGroup) then begin
{we treat a group as a "character", but allow advance of multiple chars}
PatPos := SubLineMatchPattern(Buf, Flags, TagOn, TagNum,
I, PatPtr^.NestedPattern);
if (PatPos >= I) then begin
I := PatPos;
Advance := 0;
end;
end;
end else begin
{at end of line}
{end tag marks match}
if (lToken = tknEndTag) then
Advance := 0;
end;
if (Advance > 0) then begin
{we had a match at this (these) character position(s)}
{set the match flags}
if (TagOn) then
Flags[I] := TagNum
else
Flags[I] := 0;
Inc(I, Advance);
Result := True;
end else if (Advance = 0) then begin
Result := True;
end else begin
{this character didn't match}
Result := False;
Flags[I] := -1;
end;
end;
function TStStreamRegEx.SubLineMatchPattern(Buf : PAnsiChar;
var Flags : TStFlag;
var TagOn : Boolean;
var TagNum : Integer;
OffSet : Integer;
PatPtr : PStPatRecord) : Integer;
{-look for match of pattern list starting at PatPtr with Buf[offset...]}
{return the last position that matched}
var
I,
LocTag : Integer;
PatPos : Integer;
PatRec : PStPatRecord;
Done : Boolean;
AToken : TStTokens;
OldTagOn : boolean;
OldTagNum: integer;
begin
Done := False;
PatRec := PatPtr;
while not(Done) and (PatRec <> nil) do begin
AToken := PatRec^.Token;
if (AToken = tknClosure) then begin
{a closure}
PatRec := PatRec^.NextPattern; {step past the closure in the pattern list}
I := OffSet; {leave the current line position unchanged}
LocTag := TagNum;
{match as many as possible}
while not(Done) and (Buf[I] <> EndStr) do begin
if not(SubLineMatchOne(Buf, Flags, TagOn,
I, LocTag, PatRec)) then
Done := True;
end;
{i points to the location that caused a non-match}
{match rest of pattern against rest of input}
{shrink closure by one after each failure}
Done := False;
PatPos := -1;
while not(Done) and (I >= OffSet) do begin
OldTagOn := TagOn;
OldTagNum := LocTag;
PatPos := SubLineMatchPattern(Buf, Flags, TagOn,
LocTag, I, PatRec^.NextPattern);
if (PatPos > -1) then
Done := True
else begin
I := Pred(I);
TagOn := OldTagOn;
LocTag := OldTagNum;
end;
end;
OffSet := PatPos; {if k=-1 then failure else success}
TagNum := LocTag;
Done := True;
end else if (AToken = tknMaybeOne) then begin
{a 0 or 1 closure}
PatRec := PatRec^.NextPattern; {step past the closure marker}
{match or no match is ok, but advance lin cursor if matched}
SubLineMatchOne(Buf, Flags, TagOn, OffSet, TagNum, PatRec);
{advance to the next pattern token}
PatRec := PatRec^.NextPattern;
end else if not(SubLineMatchOne(Buf, Flags, TagOn,
OffSet, TagNum, PatRec)) then begin
if PatRec^.NextOK then begin
{we get another chance because of alternation}
PatRec := PatRec^.NextPattern;
end else begin
OffSet := -1;
Done := True;
end;
end else begin
{skip over alternates if we matched already}
while PatRec^.NextOK and (PatRec^.NextPattern <> nil) do
PatRec := PatRec^.NextPattern;
{move to the next non-alternate}
PatRec := PatRec^.NextPattern;
end;
end;
Result := OffSet;
end;
function TStStreamRegEx.SubLineFindTag(Buf : PAnsiChar;
I : Integer;
IEnd : Integer;
TagNum : Integer;
var Flags : TStFlag;
var IStart : Integer;
var IStop : Integer) : Boolean;
{-find the tagged match region}
{return true if it is found}
begin
IStart := I;
while (Buf[IStart] <> EndStr) and (Flags[IStart] <> TagNum) do
Inc(IStart);
if (Flags[IStart] = TagNum) then begin
Result := True;
IStop := IStart;
while (Flags[IStop] = TagNum) and (IStop < IEnd) do
Inc(IStop);
end else
Result := False;
end; {findtag}
procedure TStStreamRegEx.SubLineWrite(Buf : PAnsiChar;
S : PAnsiChar;
RepRec : PStPatRecord;
I,
IEnd : Integer;
var Flags : TStFlag);
{-Write the output line with replacements}
var
TagNum,
IStart,
IStop : Integer;
PatRec : PStPatRecord;
Token : TStTokens;
begin {writesub}
{scan the replacement list}
S[0] := #0;
PatRec := RepRec;
while (PatRec <> nil) do begin
Token := PatRec^.Token;
if (Token = tknDitto) then begin
TagNum := Ord(PatRec^.OneChar)-Ord('0');
if (TagNum = 0) then begin
{untagged ditto}
{add the entire matched region}
AppendS(S, S, @Buf[I], IEnd-I);
end else begin
{tagged ditto}
{find the tagged region}
if SubLineFindTag(Buf, I, IEnd, TagNum, Flags, IStart, IStop) then begin
{add the tagged region}
AppendS(S, S, @Buf[IStart], IStop-IStart);
end else begin
{else couldn't find tagged word, don't append anything}
end;
end;
end else if (Token = tknLitChar) then
AppendS(S, S, @PatRec^.OneChar, 1);
PatRec := PatRec^.NextPattern;
end;
end;
procedure TStStreamRegEx.SubLine(Buf : PAnsiChar);
var
I,
M,
NumToAdd,
TagNum,
Lastm : Integer;
Flags : TStFlag;
TagOn,
DidReplace : Boolean;
ALine : PAnsiChar;
begin
DidReplace := False;
LastM := -1;
I := 0;
GetMem(ALine, MaxLineLength+1);
try
FOutLineBuf[0] := #0;
FillChar(ALine^, MaxLineLength+1, #0);
while (Buf[I] <> EndStr) do begin
TagNum := 0;
TagOn := False;
M := SubLineMatchPattern(Buf, Flags, TagOn, TagNum, I, FMatchPatPtr);
if (M > -1) and (M <> I) and (LastM <> M) then begin
{keep track of count}
DidReplace := True;
Inc(FReplaceCount);
{replace matched text}
SubLineWrite(Buf, ALine, FReplacePatPtr, I, M, Flags);
LastM := M;
AppendS(FOutLineBuf, FOutLineBuf, ALine, StrLen(ALine));
end;
if (M = -1) or (M = I) then begin
{no match or null match, append the character}
if (Buf[I] = #13) then
NumToAdd := 2
else
NumToAdd := 1;
AppendS(FOutLineBuf, FOutLineBuf, @Buf[I], NumToAdd);
I := I + NumToAdd;
end else {skip matched text}
I := M;
end;
if DidReplace then
Inc(FMatchCount);
finally
FreeMem(ALine, MaxLineLength+1)
end;
end;
{******************************************************************************}
{ TStRegEx Implementation }
{******************************************************************************}
constructor TStRegEx.Create(AOwner : TComponent);
begin
inherited Create(AOwner);
FAvoid := False;
FIgnoreCase := False;
FLineNumbers := False;
FOutputOptions := [];
FInLineTerminator := ltCRLF;
FInLineTermChar := #10;
FInFixedLineLength:= 80;
FOutLineTerminator := ltCRLF;
FOutLineTermChar := #10;
FOutFixedLineLength := 80; {not used straight away}
FMaxLineLength := 1024;
FMatchPatSL := TStringList.Create;
FMatchPatPtr := nil;
FSelAvoidPatSL := TStringList.Create;
FSelAvoidPatPtr:= nil;
FReplacePatSL := TStringList.Create;
FReplacePatPtr := nil;
FInFileStream := nil;
FOutFileStream := nil;
FStream := TStStreamRegEx.Create;
end;
destructor TStRegEx.Destroy;
begin
FMatchPatSL.Free;
FMatchPatSL := nil;
FReplacePatSL.Free;
FReplacePatSL := nil;
FSelAvoidPatSL.Free;
FSelAvoidPatSL := nil;
FStream.Free;
FStream := nil;
inherited Destroy;
end;
function TStRegEx.CheckString(const S : AnsiString;
var REPosition : TMatchPosition) : Boolean;
begin
if (Assigned(FStream)) then begin
SetStreamProperties;
Result := FStream.CheckString(S, REPosition);
end else
Result := False;
end;
function TStRegEx.ReplaceString(var S : AnsiString;
var REPosition : TMatchPosition) : Boolean;
begin
if (Assigned(FStream)) then begin
SetStreamProperties;
Result := FStream.ReplaceString(S, REPosition);
end else
Result := False;
end;
function TStRegEx.FileMasksToRegEx(const Masks : AnsiString) : Boolean;{!!.02}
begin
if (Assigned(FStream)) then begin
SetStreamProperties;
Result := FStream.FileMasksToRegEx(Masks);
if (Result) then
FMatchPatSL.Assign(FStream.FMatchPatSL);
end else
Result := False;
end;
function TStRegEx.Execute : Boolean;
begin
Result := False;
try
if (not FileExists(FInputFile)) then
RaiseStError(EStRegExError, stscInFileNotFound);
try
FInFileStream := TFileStream.Create(FInputFile,
fmOpenRead or fmShareDenyWrite);
FStream.InputStream := FInFileStream
except
RaiseStError(EStRegExError, stscREInFileError);
Exit;
end;
if not (ooCountOnly in OutputOptions) then begin
if (FileExists(FOutputFile)) then
try
SysUtils.DeleteFile(FOutputFile);
except
RaiseStError(EStRegExError, stscOutFileDelete);
Exit;
end;
FOutFileStream := nil;
FStream.OutputStream := nil;
try
FOutFileStream := TFileStream.Create(FOutputFile, fmCreate);
FStream.OutputStream := FOutFileStream
except
RaiseStError(EStRegExError, stscOutFileCreate);
Exit;
end;
end;
SetStreamProperties;
Result := FStream.Execute;
FMatchCount := FStream.FMatchCount;
FSelectCount := FStream.FSelectCount;
FReplaceCount := FStream.FReplaceCount;
FInLineCount := FStream.FInLineCount;
FLinesPerSec := FStream.FLinesPerSec;
finally
FInFileStream.Free;
FInFileStream := nil;
FOutFileStream.Free;
FOutFileStream := nil;
end;
end;
procedure TStRegEx.SetMatchPatSL(Value : TStringList);
begin
FMatchPatSL.Assign(Value);
end;
procedure TStRegEx.SetOptions(Value : TStOutputOptions);
begin
if (Value <> FOutputOptions) then begin
FOutputOptions := Value;
if (ooCountOnly in FOutputOptions) then
FOutputOptions := [ooCountOnly];
end;
end;
procedure TStRegEx.SetReplacePatSL(Value : TStringList);
begin
FReplacePatSL.Assign(Value);
end;
procedure TStRegEx.SetSelAvoidPatSL(Value : TStringList);
begin
FSelAvoidPatSL.Assign(Value);
end;
procedure TStRegEx.SetStreamProperties;
begin
if (not Assigned(FStream)) then Exit;
FStream.InLineTermChar := FInLineTermChar;
FStream.InLineTerminator := FInLineTerminator;
FStream.InFixedLineLength := FInFixedLineLength;
{!!.02 - Changed }
// FStream.InLineTermChar := FOutLineTermChar;
// FStream.InLineTerminator := FOutLineTerminator;
// FStream.InFixedLineLength := FOutFixedLineLength;
FStream.OutLineTermChar := FOutLineTermChar;
FStream.OutLineTerminator := FOutLineTerminator;
FStream.OutFixedLineLength := FOutFixedLineLength;
{!!.02 - Changed end }
FStream.Avoid := FAvoid;
FStream.IgnoreCase := FIgnoreCase;
FStream.LineNumbers := FLineNumbers;
FStream.MatchPattern := FMatchPatSL;
FStream.OnMatch := FOnMatch;
FStream.OnProgress := FOnProgress;
FStream.OutputOptions := FOutputOptions;
FStream.ReplacePattern := FReplacePatSL;
FStream.SelAvoidPattern:= FSelAvoidPatSL;
FStream.FMatchCount := 0;
FStream.FSelectCount := 0;
FStream.FReplaceCount := 0;
FStream.FInLineCount := 0;
FStream.FLinesPerSec := 0;
end;
end.