Merges the fixes from SynRegExpr, specially fixes 64-bits, while removing the synedit defines

git-svn-id: trunk@18899 -
This commit is contained in:
sekelsenmat 2011-08-29 14:04:14 +00:00
parent ed03858129
commit dbc08b5c1f

View File

@ -45,8 +45,15 @@ unit RegExpr;
The same modified LGPL with static linking exception as the Free Pascal RTL
}
interface
{off $DEFINE DebugSynRegExpr}
{$IFDEF FPC}
{$MODE DELPHI} // Delphi-compatible mode in FreePascal
{$ENDIF}
// ======== Determine compiler
{$IFDEF VER80} Sorry, TRegExpr is for 32-bits Delphi only. Delphi 1 is not supported (and whos really care today?!). {$ENDIF}
{$IFDEF VER90} {$DEFINE D2} {$ENDIF} // D2
@ -62,7 +69,9 @@ interface
{$BOOLEVAL OFF}
{$EXTENDEDSYNTAX ON}
{$LONGSTRINGS ON}
{$IFNDEF FPC}
{$OPTIMIZATION ON}
{$ENDIF}
{$IFDEF D6}
{$WARN SYMBOL_PLATFORM OFF} // Suppress .Net warnings
{$ENDIF}
@ -71,9 +80,6 @@ interface
{$WARN UNSAFE_TYPE OFF} // Suppress .Net warnings
{$WARN UNSAFE_CODE OFF} // Suppress .Net warnings
{$ENDIF}
{$IFDEF FPC}
{$MODE DELPHI} // Delphi-compatible mode in FreePascal
{$ENDIF}
// ======== Define options for TRegExpr engine
{.$DEFINE UniCode} // Unicode support
@ -119,7 +125,7 @@ type
{$ENDIF}
TREOp = REChar; // internal p-code type //###0.933
PREOp = ^TREOp;
TRENextOff = integer; // internal Next "pointer" (offset to current p-code) //###0.933
TRENextOff = PtrInt; // internal Next "pointer" (offset to current p-code) //###0.933
PRENextOff = ^TRENextOff; // used for extracting Next "pointers" from compiled r.e. //###0.933
TREBracesArg = integer; // type of {m,n} arguments
PREBracesArg = ^TREBracesArg;
@ -207,7 +213,7 @@ type
regstart : REChar; // char that must begin a match; '\0' if none obvious
reganch : REChar; // is the match anchored (at beginning-of-line only)?
regmust : PRegExprChar; // string (pointer into program) that match must include, or nil
regmlen : integer; // length of regmust string
regmlen : PtrInt; // length of regmust string
// Regstart and reganch permit very fast decisions on suitable starting points
// for a match, cutting down the work a lot. Regmust permits fast rejection
// of lines that cannot possibly match. The regmust tests are costly enough
@ -227,10 +233,10 @@ type
// work variables for compiler's routines
regparse : PRegExprChar; // Input-scan pointer.
regnpar : integer; // count.
regnpar : PtrInt; // count.
regdummy : char;
regcode : PRegExprChar; // Code-emit pointer; @regdummy = don't.
regsize : integer; // Code size.
regsize : PtrInt; // Code size.
regexpbeg : PRegExprChar; // only for error handling. Contains
// pointer to beginning of r.e. while compiling
@ -330,15 +336,15 @@ type
function ParseAtom (var flagp : integer) : PRegExprChar;
// the lowest level
function GetCompilerErrorPos : integer;
function GetCompilerErrorPos : PtrInt;
// current pos in r.e. - for error hanling
{$IFDEF UseFirstCharSet} //###0.929
procedure FillFirstCharSet (prog : PRegExprChar);
{$ENDIF}
{===================== Mathing section ===================}
function regrepeat (p : PRegExprChar; AMax : integer) : integer;
{===================== Matching section ===================}
function regrepeat (p : PRegExprChar; AMax : PtrInt) : PtrInt;
// repeatedly match something simple, report how many
function regnext (p : PRegExprChar) : PRegExprChar;
@ -347,7 +353,7 @@ type
function MatchPrim (prog : PRegExprChar) : boolean;
// recursively matching routine
function ExecPrim (AOffset: integer) : boolean;
function ExecPrim (AOffset: PtrInt) : boolean;
// Exec for stored InputString
{$IFDEF RegExpPCodeDump}
@ -355,8 +361,8 @@ type
{$ENDIF}
function GetSubExprMatchCount : integer;
function GetMatchPos (Idx : integer) : integer;
function GetMatchLen (Idx : integer) : integer;
function GetMatchPos (Idx : integer) : PtrInt;
function GetMatchLen (Idx : integer) : PtrInt;
function GetMatch (Idx : integer) : RegExprString;
function GetInputString : RegExprString;
@ -400,8 +406,8 @@ type
property ModifierR : boolean index 2 read GetModifier write SetModifier;
// Modifier /r - use r.e.syntax extended for russian,
// (was property ExtSyntaxEnabled in previous versions)
// If true, then à-ÿ additional include russian letter '¸',
// À-ß additional include '¨', and à-ß include all russian symbols.
// If true, then а-я additional include russian letter ',
// А-Я additional include 'Ё', and а-Я include all russian symbols.
// You have to turn it off if it may interfere with you national alphabet.
// , initialized from RegExprModifierR
@ -429,13 +435,13 @@ type
{$IFNDEF FPC} // I do not know why FreePascal cannot overload methods with empty param list
function Exec : boolean; overload; //###0.949
{$ENDIF}
function Exec (AOffset: integer) : boolean; overload; //###0.949
function Exec (AOffset: PtrInt) : boolean; overload; //###0.949
{$ENDIF}
// match a programm against a string AInputString
// !!! Exec store AInputString into InputString property
// For Delphi 5 and higher available overloaded versions - first without
// parameter (uses already assigned to InputString property value)
// and second that has integer parameter and is same as ExecPos
// and second that has PtrInt parameter and is same as ExecPos
function ExecNext : boolean;
// find next match:
@ -448,7 +454,7 @@ type
// Exec* (Exec, ExecPos, ExecNext). So You always must use something like
// if Exec (InputString) then repeat { proceed results} until not ExecNext;
function ExecPos (AOffset: integer {$IFDEF DefParam}= 1{$ENDIF}) : boolean;
function ExecPos (AOffset: PtrInt {$IFDEF DefParam}= 1{$ENDIF}) : boolean;
// find match for InputString starting from AOffset position
// (AOffset=1 - first char of InputString)
@ -511,14 +517,14 @@ type
// Exec ('2'): SubExprMatchCount=0, Match[0]='2'
// Exec ('7') - return False: SubExprMatchCount=-1
property MatchPos [Idx : integer] : integer read GetMatchPos;
property MatchPos [Idx : integer] : PtrInt read GetMatchPos;
// pos of entrance subexpr. #Idx into tested in last Exec*
// string. First subexpr. have Idx=1, last - MatchCount,
// whole r.e. have Idx=0.
// Returns -1 if in r.e. no such subexpr. or this subexpr.
// not found in input string.
property MatchLen [Idx : integer] : integer read GetMatchLen;
property MatchLen [Idx : integer] : PtrInt read GetMatchLen;
// len of entrance subexpr. #Idx r.e. into tested in last Exec*
// string. First subexpr. have Idx=1, last - MatchCount,
// whole r.e. have Idx=0.
@ -539,9 +545,9 @@ type
function ErrorMsg (AErrorID : integer) : RegExprString; virtual;
// Returns Error message for error with ID = AErrorID.
property CompilerErrorPos : integer read GetCompilerErrorPos;
property CompilerErrorPos : PtrInt read GetCompilerErrorPos;
// Returns pos in r.e. there compiler stopped.
// Usefull for error diagnostics
// Useful for error diagnostics
property SpaceChars : RegExprString read fSpaceChars write fSpaceChars; //###0.927
// Contains chars, treated as /s (initially filled with RegExprSpaceChars
@ -567,7 +573,7 @@ type
// Create set it to RegExprInvertCaseFunction (InvertCaseFunction by default)
procedure Compile; //###0.941
// [Re]compile r.e. Usefull for example for GUI r.e. editors (to check
// [Re]compile r.e. Useful for example for GUI r.e. editors (to check
// all properties validity).
{$IFDEF RegExpPCodeDump}
@ -579,7 +585,7 @@ type
ERegExpr = class (Exception)
public
ErrorCode : integer;
CompilerErrorPos : integer;
CompilerErrorPos : PtrInt;
end;
const
@ -609,11 +615,11 @@ function ReplaceRegExpr (const ARegExpr, AInputStr, AReplaceStr : RegExprString;
function QuoteRegExprMetaChars (const AStr : RegExprString) : RegExprString;
// Replace all metachars with its safe representation,
// for example 'abc$cd.(' converts into 'abc\$cd\.\('
// This function usefull for r.e. autogeneration from
// This function useful for r.e. autogeneration from
// user input
function RegExprSubExpressions (const ARegExpr : string;
ASubExprs : TStrings; AExtendedSyntax : boolean{$IFDEF DefParam}= False{$ENDIF}) : integer;
ASubExprs : TStrings; AExtendedSyntax : boolean{$IFDEF DefParam}= False{$ENDIF}) : PtrInt;
// Makes list of subexpressions found in ARegExpr r.e.
// In ASubExps every item represent subexpression,
// from first to last, in format:
@ -624,7 +630,7 @@ function RegExprSubExpressions (const ARegExpr : string;
// if exist!
// AExtendedSyntax - must be True if modifier /m will be On while
// using the r.e.
// Usefull for GUI editors of r.e. etc (You can find example of using
// Useful for GUI editors of r.e. etc (You can find example of using
// in TestRExp.dpr project)
// Returns
// 0 Success. No unbalanced brackets was found;
@ -638,6 +644,16 @@ function RegExprSubExpressions (const ARegExpr : string;
implementation
{$IFDEF FPC}
{$ELSE}
uses
{$IFDEF SYN_WIN32}
Windows; // CharUpper/Lower
{$ELSE}
Libc; //Qt.pas from Borland does not expose char handling functions
{$ENDIF}
{$ENDIF}
const
TRegExprVersionMajor : integer = 0;
TRegExprVersionMinor : integer = 952;
@ -664,7 +680,7 @@ const
function StrPCopy (Dest: PRegExprChar; const Source: RegExprString): PRegExprChar;
var
i, Len : Integer;
i, Len : PtrInt;
begin
Len := length (Source); //###0.932
for i := 1 to Len do
@ -674,8 +690,8 @@ function StrPCopy (Dest: PRegExprChar; const Source: RegExprString): PRegExprCha
end; { of function StrPCopy
--------------------------------------------------------------}
function StrLCopy (Dest, Source: PRegExprChar; MaxLen: Cardinal): PRegExprChar;
var i: Integer;
function StrLCopy (Dest, Source: PRegExprChar; MaxLen: PtrUInt): PRegExprChar;
var i: PtrInt;
begin
for i := 0 to MaxLen - 1 do
Dest [i] := Source [i];
@ -683,7 +699,7 @@ function StrLCopy (Dest, Source: PRegExprChar; MaxLen: Cardinal): PRegExprChar;
end; { of function StrLCopy
--------------------------------------------------------------}
function StrLen (Str: PRegExprChar): Cardinal;
function StrLen (Str: PRegExprChar): PtrUInt;
begin
Result:=0;
while Str [result] <> #0
@ -692,7 +708,7 @@ function StrLen (Str: PRegExprChar): Cardinal;
--------------------------------------------------------------}
function StrPos (Str1, Str2: PRegExprChar): PRegExprChar;
var n: Integer;
var n: PtrInt;
begin
Result := nil;
n := Pos (RegExprString (Str2), RegExprString (Str1));
@ -702,7 +718,7 @@ function StrPos (Str1, Str2: PRegExprChar): PRegExprChar;
end; { of function StrPos
--------------------------------------------------------------}
function StrLComp (Str1, Str2: PRegExprChar; MaxLen: Cardinal): Integer;
function StrLComp (Str1, Str2: PRegExprChar; MaxLen: PtrUInt): PtrInt;
var S1, S2: RegExprString;
begin
S1 := Str1;
@ -776,7 +792,7 @@ function QuoteRegExprMetaChars (const AStr : RegExprString) : RegExprString;
// Very similar to META array, but slighly changed.
// !Any changes in META array must be synchronized with this set.
var
i, i0, Len : integer;
i, i0, Len : PtrInt;
begin
Result := '';
Len := length (AStr);
@ -795,24 +811,24 @@ function QuoteRegExprMetaChars (const AStr : RegExprString) : RegExprString;
--------------------------------------------------------------}
function RegExprSubExpressions (const ARegExpr : string;
ASubExprs : TStrings; AExtendedSyntax : boolean{$IFDEF DefParam}= False{$ENDIF}) : integer;
ASubExprs : TStrings; AExtendedSyntax : boolean{$IFDEF DefParam}= False{$ENDIF}) : PtrInt;
type
TStackItemRec = record //###0.945
SubExprIdx : integer;
StartPos : integer;
StartPos : PtrInt;
end;
TStackArray = packed array [0 .. NSUBEXPMAX - 1] of TStackItemRec;
var
Len, SubExprLen : integer;
i, i0 : integer;
Len, SubExprLen : PtrInt;
i, i0 : PtrInt;
Modif : integer;
Stack : ^TStackArray; //###0.945
StackIdx, StackSz : integer;
StackIdx, StackSz : PtrInt;
begin
Result := 0; // no unbalanced brackets found at this very moment
ASubExprs.Clear; // I don't think that adding to non empty list
// can be usefull, so I simplified algorithm to work only with empty list
// can be useful, so I simplified algorithm to work only with empty list
Len := length (ARegExpr); // some optimization tricks
@ -906,8 +922,8 @@ function RegExprSubExpressions (const ARegExpr : string;
// check if entire r.e. added
if (ASubExprs.Count = 0)
or ((integer (ASubExprs.Objects [0]) and $FFFF) <> 1)
or (((integer (ASubExprs.Objects [0]) ShR 16) and $FFFF) <> Len)
or ((PtrInt (ASubExprs.Objects [0]) and $FFFF) <> 1)
or (((PtrInt (ASubExprs.Objects [0]) ShR 16) and $FFFF) <> Len)
// whole r.e. wasn't added because it isn't bracketed
// well, we add it now:
then ASubExprs.InsertObject (0, ARegExpr, TObject ((Len ShL 16) or 1));
@ -990,7 +1006,7 @@ const
// The Next is a offset from the opcode of the node containing it.
// An operand, if any, simply follows the node. (Note that much of
// the code generation knows about this implicit relationship!)
// Using TRENextOff=integer speed up p-code processing.
// Using TRENextOff=PtrInt speed up p-code processing.
// Opcodes description:
//
@ -1049,7 +1065,7 @@ const
reeMatchPrimCorruptedPointers = 1002;
reeNoExpression = 1003;
reeCorruptedProgram = 1004;
reeNoInpitStringSpecified = 1005;
reeNoInputStringSpecified = 1005;
reeOffsetMustBeGreaterThen0 = 1006;
reeExecNextWithoutExec = 1007;
reeGetInputStringWithoutInputString = 1008;
@ -1093,7 +1109,7 @@ function TRegExpr.ErrorMsg (AErrorID : integer) : RegExprString;
reeMatchPrimCorruptedPointers: Result := 'TRegExpr(exec): MatchPrim Corrupted Pointers';
reeNoExpression: Result := 'TRegExpr(exec): Not Assigned Expression Property';
reeCorruptedProgram: Result := 'TRegExpr(exec): Corrupted Program';
reeNoInpitStringSpecified: Result := 'TRegExpr(exec): No Input String Specified';
reeNoInputStringSpecified: Result := 'TRegExpr(exec): No Input String Specified';
reeOffsetMustBeGreaterThen0: Result := 'TRegExpr(exec): Offset Must Be Greater Then 0';
reeExecNextWithoutExec: Result := 'TRegExpr(exec): ExecNext Without Exec[Pos]';
reeGetInputStringWithoutInputString: Result := 'TRegExpr(exec): GetInputString Without InputString';
@ -1158,12 +1174,21 @@ constructor TRegExpr.Create;
destructor TRegExpr.Destroy;
begin
if programm <> nil
then FreeMem (programm);
if fExpression <> nil
then FreeMem (fExpression);
if fInputString <> nil
then FreeMem (fInputString);
if programm <> nil then
begin
FreeMem (programm);
programm:=nil;
end;
if fExpression <> nil then
begin
FreeMem (fExpression);
fExpression:=nil;
end;
if fInputString <> nil then
begin
FreeMem (fInputString);
fInputString:=nil;
end;
end; { of destructor TRegExpr.Destroy
--------------------------------------------------------------}
@ -1175,9 +1200,9 @@ class function TRegExpr.InvertCaseFunction (const Ch : REChar) : REChar;
else
{$ENDIF}
begin
Result := {$IFDEF FPC}AnsiUpperCase (Ch) [1]{$ELSE} REChar (CharUpper (PChar (Ch))){$ENDIF};
Result := {$IFDEF FPC}AnsiUpperCase (Ch) [1]{$ELSE} {$IFDEF SYN_WIN32}REChar (CharUpper (PChar (Ch))){$ELSE}REChar (toupper (integer (Ch))){$ENDIF} {$ENDIF};
if Result = Ch
then Result := {$IFDEF FPC}AnsiLowerCase (Ch) [1]{$ELSE} REChar (CharLower (PChar (Ch))){$ENDIF};
then Result := {$IFDEF FPC}AnsiLowerCase (Ch) [1]{$ELSE} {$IFDEF SYN_WIN32}REChar (CharLower (PChar (Ch))){$ELSE}REChar(tolower (integer (Ch))){$ENDIF} {$ENDIF};
end;
end; { of function TRegExpr.InvertCaseFunction
--------------------------------------------------------------}
@ -1192,7 +1217,7 @@ function TRegExpr.GetExpression : RegExprString;
procedure TRegExpr.SetExpression (const s : RegExprString);
var
Len : integer; //###0.950
Len : PtrInt; //###0.950
begin
if (s <> fExpression) or not fExprIsCompiled then begin
fExprIsCompiled := false;
@ -1203,12 +1228,7 @@ procedure TRegExpr.SetExpression (const s : RegExprString);
if s <> '' then begin
Len := length (s); //###0.950
GetMem (fExpression, (Len + 1) * SizeOf (REChar));
// StrPCopy (fExpression, s); //###0.950 replaced due to StrPCopy limitation of 255 chars
{$IFDEF UniCode}
StrPCopy (fExpression, Copy (s, 1, Len)); //###0.950
{$ELSE}
StrLCopy (fExpression, PRegExprChar (s), Len); //###0.950
{$ENDIF UniCode}
System.Move(s[1],fExpression^,(Len + 1) * SizeOf (REChar));
InvalidateProgramm; //###0.941
end;
@ -1228,7 +1248,7 @@ function TRegExpr.GetSubExprMatchCount : integer;
end; { of function TRegExpr.GetSubExprMatchCount
--------------------------------------------------------------}
function TRegExpr.GetMatchPos (Idx : integer) : integer;
function TRegExpr.GetMatchPos (Idx : integer) : PtrInt;
begin
if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)
and Assigned (startp [Idx]) and Assigned (endp [Idx]) then begin
@ -1238,7 +1258,7 @@ function TRegExpr.GetMatchPos (Idx : integer) : integer;
end; { of function TRegExpr.GetMatchPos
--------------------------------------------------------------}
function TRegExpr.GetMatchLen (Idx : integer) : integer;
function TRegExpr.GetMatchLen (Idx : integer) : PtrInt;
begin
if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)
and Assigned (startp [Idx]) and Assigned (endp [Idx]) then begin
@ -1252,8 +1272,13 @@ function TRegExpr.GetMatch (Idx : integer) : RegExprString;
begin
if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)
and Assigned (startp [Idx]) and Assigned (endp [Idx])
and (endp [Idx] > startp[Idx])
//then Result := copy (fInputString, MatchPos [Idx], MatchLen [Idx]) //###0.929
then SetString (Result, startp [idx], endp [idx] - startp [idx])
then begin
//SetString (Result, startp [idx], endp [idx] - startp [idx])
SetLength(Result,endp [idx] - startp [idx]);
System.Move(startp [idx]^,Result[1],length(Result));
end
else Result := '';
end; { of function TRegExpr.GetMatch
--------------------------------------------------------------}
@ -1454,7 +1479,7 @@ procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);
// shr after subtraction to calculate widechar distance %-( )
// so, if difference is negative we have .. the "feature" :(
// I could wrap it in $IFDEF UniCode, but I didn't because
// "P Q computes the difference between the address given
// "P Q computes the difference between the address given
// by P (the higher address) and the address given by Q (the
// lower address)" - Delphi help quotation.
else PRENextOff (scan + REOpSz)^ := val - scan; //###0.933
@ -1480,6 +1505,10 @@ function TRegExpr.EmitNode (op : TREOp) : PRegExprChar; //###0.933
inc (regcode, REOpSz);
PRENextOff (regcode)^ := 0; // Next "pointer" := nil
inc (regcode, RENextOffSz);
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
raise Exception.Create('TRegExpr.EmitNode buffer overrun');
{$ENDIF}
end
else inc (regsize, REOpSz + RENextOffSz); // compute code size without code generation
end; { of function TRegExpr.EmitNode
@ -1491,8 +1520,12 @@ procedure TRegExpr.EmitC (b : REChar);
if regcode <> @regdummy then begin
regcode^ := b;
inc (regcode);
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
raise Exception.Create('TRegExpr.EmitC buffer overrun');
{$ENDIF}
end
else inc (regsize); // Type of p-code pointer always is ^REChar
else inc (regsize, REOpSz); // Type of p-code pointer always is ^REChar
end; { of procedure TRegExpr.EmitC
--------------------------------------------------------------}
@ -1507,8 +1540,15 @@ procedure TRegExpr.InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer
inc (regsize, sz);
EXIT;
end;
// move code behind insert position
src := regcode;
inc (regcode, sz);
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
raise Exception.Create('TRegExpr.InsertOperator buffer overrun');
if (opnd<regcode) or (opnd-regcode>regsize) then
raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
{$ENDIF}
dst := regcode;
while src > opnd do begin
dec (dst);
@ -1525,7 +1565,7 @@ procedure TRegExpr.InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer
end; { of procedure TRegExpr.InsertOperator
--------------------------------------------------------------}
function strcspn (s1 : PRegExprChar; s2 : PRegExprChar) : integer;
function strcspn (s1 : PRegExprChar; s2 : PRegExprChar) : PtrInt;
// find length of initial segment of s1 consisting
// entirely of characters not from s2
var scan1, scan2 : PRegExprChar;
@ -1565,17 +1605,17 @@ const
#$418,#$419,#$41A,#$41B,#$41C,#$41D,#$41E,#$41F,
#$420,#$421,#$422,#$423,#$424,#$425,#$426,#$427,
#$428,#$429,#$42A,#$42B,#$42C,#$42D,#$42E,#$42F,#0);
RusRangeLoLow = #$430{'à'};
RusRangeLoHigh = #$44F{'ÿ'};
RusRangeHiLow = #$410{'À'};
RusRangeHiHigh = #$42F{'ß'};
RusRangeLoLow = #$430{'а'};
RusRangeLoHigh = #$44F{'я'};
RusRangeHiLow = #$410{'А'};
RusRangeHiHigh = #$42F{'Я'};
{$ELSE}
RusRangeLo = 'àáâãä叿çèéêëìíîïðñòóôõö÷øùúûüýþÿ';
RusRangeHi = 'ÀÁÂÃÄŨÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß';
RusRangeLoLow = 'à';
RusRangeLoHigh = 'ÿ';
RusRangeHiLow = 'À';
RusRangeHiHigh = 'ß';
RusRangeLo = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя';
RusRangeHi = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ';
RusRangeLoLow = 'а';
RusRangeLoHigh = 'я';
RusRangeHiLow = 'А';
RusRangeHiHigh = 'Я';
{$ENDIF}
function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;
@ -1592,7 +1632,7 @@ function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;
// of the structure of the compiled regexp.
var
scan, longest : PRegExprChar;
len : cardinal;
len : PtrUInt;
flags : integer;
begin
Result := false; // life too dark
@ -1624,13 +1664,6 @@ function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;
if ParseReg (0, flags) = nil
then EXIT;
// Small enough for 2-bytes programm pointers ?
// ###0.933 no real p-code length limits now :)))
// if regsize >= 64 * 1024 then begin
// Error (reeCompRegexpTooBig);
// EXIT;
// end;
// Allocate space.
GetMem (programm, regsize * SizeOf (REChar));
@ -1643,7 +1676,6 @@ function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;
if ParseReg (0, flags) = nil
then EXIT;
// Dig out information for optimizations.
{$IFDEF UseFirstCharSet} //###0.929
FirstCharSet := [];
@ -1674,7 +1706,7 @@ function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;
len := 0;
while scan <> nil do begin
if (PREOp (scan)^ = EXACTLY)
and (strlen (scan + REOpSz + RENextOffSz) >= len) then begin
and (strlen (scan + REOpSz + RENextOffSz) >= PtrInt(len)) then begin
longest := scan + REOpSz + RENextOffSz;
len := strlen (longest);
end;
@ -1850,7 +1882,7 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
ANonGreedyOp : boolean); //###0.940
{$IFDEF ComplexBraces}
var
off : integer;
off : TRENextOff;
{$ENDIF}
begin
{$IFNDEF ComplexBraces}
@ -1870,6 +1902,10 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
inc (regcode, REBracesArgSz);
PRENextOff (regcode)^ := off;
inc (regcode, RENextOffSz);
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
raise Exception.Create('TRegExpr.ParsePiece.EmitComplexBraces buffer overrun');
{$ENDIF}
end
else inc (regsize, REBracesArgSz * 2 + RENextOffSz);
Tail (Result, NextNode); // LOOPENTRY -> LOOP
@ -2018,7 +2054,7 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
else EmitComplexBraces (BracesMin, BracesMax, NonGreedyOp);
if NonGreedyCh //###0.940
then inc (regparse); // Skip extra char '?'
end; { of case '{'}
end; // of case '{'
// else // here we can't be
end; { of case op}
@ -2041,16 +2077,16 @@ function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
flags : integer;
RangeBeg, RangeEnd : REChar;
CanBeRange : boolean;
len : integer;
len : PtrInt;
ender : REChar;
begmodfs : PRegExprChar;
{$IFDEF UseSetOfChar} //###0.930
RangePCodeBeg : PRegExprChar;
RangePCodeIdx : integer;
RangePCodeIdx : PtrInt;
RangeIsCI : boolean;
RangeSet : TSetOfREChar;
RangeLen : integer;
RangeLen : PtrInt;
RangeChMin, RangeChMax : REChar;
{$ENDIF}
@ -2065,13 +2101,13 @@ function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
end;
procedure EmitStr (const s : RegExprString);
var i : integer;
var i : PtrInt;
begin
for i := 1 to length (s)
do EmitC (s [i]);
end;
function HexDig (ch : REChar) : integer;
function HexDig (ch : REChar) : PtrInt;
begin
Result := 0;
if (ch >= 'a') and (ch <= 'f')
@ -2161,6 +2197,10 @@ function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
regcode^ := RangeChMax;
inc (regcode);
end;
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
raise Exception.Create('TRegExpr.ParseAtom.EmitRangeC TinySetLen buffer overrun');
{$ENDIF}
end
else begin
if regcode = @regdummy then begin
@ -2173,6 +2213,10 @@ function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
regcode := RangePCodeBeg;
Move (RangeSet, regcode^, SizeOf (TSetOfREChar));
inc (regcode, SizeOf (TSetOfREChar));
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
raise Exception.Create('TRegExpr.ParseAtom.EmitRangeC non TinySetLen buffer overrun');
{$ENDIF}
end;
end;
{$ELSE}
@ -2188,7 +2232,7 @@ function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
end;
procedure EmitRangeStr (const s : RegExprString);
var i : integer;
var i : PtrInt;
begin
for i := 1 to length (s)
do EmitRangeC (s [i]);
@ -2539,7 +2583,7 @@ function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
end; { of function TRegExpr.ParseAtom
--------------------------------------------------------------}
function TRegExpr.GetCompilerErrorPos : integer;
function TRegExpr.GetCompilerErrorPos : PtrInt;
begin
Result := 0;
if (regexpbeg = nil) or (regparse = nil)
@ -2565,7 +2609,7 @@ function TRegExpr.StrScanCI (s : PRegExprChar; ch : REChar) : PRegExprChar; //##
--------------------------------------------------------------}
{$ENDIF}
function TRegExpr.regrepeat (p : PRegExprChar; AMax : integer) : integer;
function TRegExpr.regrepeat (p : PRegExprChar; AMax : PtrInt) : PtrInt;
// repeatedly match something simple, report how many
var
scan : PRegExprChar;
@ -2775,12 +2819,12 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
var
scan : PRegExprChar; // Current node.
next : PRegExprChar; // Next node.
len : integer;
len : PtrInt;
opnd : PRegExprChar;
no : integer;
no : PtrInt;
save : PRegExprChar;
nextch : REChar;
BracesMin, BracesMax : integer; // we use integer instead of TREBracesArg for better support */+
BracesMin, BracesMax : PtrInt; // we use integer instead of TREBracesArg for better support */+
{$IFDEF ComplexBraces}
SavedLoopStack : array [1 .. LoopStackMax] of integer; // :(( very bad for recursion
SavedLoopStackIdx : integer; //###0.925
@ -3377,20 +3421,20 @@ function TRegExpr.Exec : boolean;
end; { of function TRegExpr.Exec
--------------------------------------------------------------}
{$ENDIF}
function TRegExpr.Exec (AOffset: integer) : boolean;
function TRegExpr.Exec (AOffset: PtrInt) : boolean;
begin
Result := ExecPrim (AOffset);
end; { of function TRegExpr.Exec
--------------------------------------------------------------}
{$ENDIF}
function TRegExpr.ExecPos (AOffset: integer {$IFDEF DefParam}= 1{$ENDIF}) : boolean;
function TRegExpr.ExecPos (AOffset: PtrInt {$IFDEF DefParam}= 1{$ENDIF}) : boolean;
begin
Result := ExecPrim (AOffset);
end; { of function TRegExpr.ExecPos
--------------------------------------------------------------}
function TRegExpr.ExecPrim (AOffset: integer) : boolean;
function TRegExpr.ExecPrim (AOffset: PtrInt) : boolean;
procedure ClearMatchs;
// Clears matchs array
var i : integer;
@ -3416,7 +3460,7 @@ function TRegExpr.ExecPrim (AOffset: integer) : boolean;
var
s : PRegExprChar;
StartPtr: PRegExprChar;
InputLen : integer;
InputLen : PtrInt;
begin
Result := false; // Be paranoid...
@ -3430,7 +3474,7 @@ function TRegExpr.ExecPrim (AOffset: integer) : boolean;
// Check InputString presence
if not Assigned (fInputString) then begin
Error (reeNoInpitStringSpecified);
Error (reeNoInputStringSpecified);
EXIT;
end;
@ -3532,7 +3576,7 @@ function TRegExpr.ExecPrim (AOffset: integer) : boolean;
--------------------------------------------------------------}
function TRegExpr.ExecNext : boolean;
var offset : integer;
var offset : PtrInt;
begin
Result := false;
if not Assigned (startp[0]) or not Assigned (endp[0]) then begin
@ -3560,8 +3604,8 @@ function TRegExpr.GetInputString : RegExprString;
procedure TRegExpr.SetInputString (const AInputString : RegExprString);
var
Len : integer;
i : integer;
Len : PtrInt;
i : PtrInt;
begin
// clear Match* - before next Exec* call it's undefined
for i := 0 to NSUBEXP - 1 do begin
@ -3571,20 +3615,12 @@ procedure TRegExpr.SetInputString (const AInputString : RegExprString);
// need reallocation of input string buffer ?
Len := length (AInputString);
if Assigned (fInputString) and (Length (fInputString) <> Len) then begin
FreeMem (fInputString);
fInputString := nil;
end;
// buffer [re]allocation
if not Assigned (fInputString)
then GetMem (fInputString, (Len + 1) * SizeOf (REChar));
ReAllocMem(fInputString,(Len + 1) * SizeOf (REChar));
// copy input string into buffer
{$IFDEF UniCode}
StrPCopy (fInputString, Copy (AInputString, 1, Len)); //###0.927
{$ELSE}
StrLCopy (fInputString, PRegExprChar (AInputString), Len);
{$ENDIF}
if Len>0 then
System.Move(AInputString[1],fInputString^,(Len+1)* SizeOf (REChar)) // with #0
else
fInputString[0]:=#0;
{
fInputString : string;
@ -3596,7 +3632,7 @@ procedure TRegExpr.SetInputString (const AInputString : RegExprString);
fInputStart := PChar (fInputString);
Len := length (fInputString);
fInputEnd := PRegExprChar (integer (fInputStart) + Len); ??
!! startp/endp âñå ðàâíî áóäåò îïàñíî èñïîëüçîâàòü ?
!! startp/endp все равно будет опасно использовать ?
}
end; { of procedure TRegExpr.SetInputString
--------------------------------------------------------------}
@ -3658,14 +3694,20 @@ function TRegExpr.GetLinePairedSeparator : RegExprString;
function TRegExpr.Substitute (const ATemplate : RegExprString) : RegExprString;
// perform substitutions after a regexp match
// completely rewritten in 0.929
var
TemplateLen : integer;
type
TSubstMode = (smodeNormal, smodeOneUpper, smodeOneLower, smodeAllUpper,
smodeAllLower);
var
TemplateLen : PtrInt;
TemplateBeg, TemplateEnd : PRegExprChar;
p, p0, ResultPtr : PRegExprChar;
ResultLen : integer;
n : integer;
p, p0, p1, ResultPtr : PRegExprChar;
ResultLen : PtrInt;
n : PtrInt;
Ch : REChar;
function ParseVarName (var APtr : PRegExprChar) : integer;
Mode: TSubstMode;
LineEnd: String = LineEnding;
function ParseVarName (var APtr : PRegExprChar) : PtrInt;
// extract name of variable (digits, may be enclosed with
// curly braces) from APtr^, uses TemplateEnd !!!
const
@ -3700,12 +3742,13 @@ function TRegExpr.Substitute (const ATemplate : RegExprString) : RegExprString;
then Result := -1; // no valid digits found or no right curly brace
APtr := p;
end;
begin
begin
// Check programm and input string
if not IsProgrammOk
then EXIT;
if not Assigned (fInputString) then begin
Error (reeNoInpitStringSpecified);
Error (reeNoInputStringSpecified);
EXIT;
end;
// Prepare for working
@ -3726,53 +3769,116 @@ function TRegExpr.Substitute (const ATemplate : RegExprString) : RegExprString;
then n := ParseVarName (p)
else n := -1;
if n >= 0 then begin
if (n < NSUBEXP) and Assigned (startp [n]) and Assigned (endp [n])
if (n < NSUBEXP) and Assigned (startp [n]) and Assigned (endp [n])
then inc (ResultLen, endp [n] - startp [n]);
end
else begin
if (Ch = EscChar) and (p < TemplateEnd) then begin // quoted or special char followed
Ch := p^;
inc (p);
case Ch of
'n' : inc(ResultLen, Length(LineEnding));
'u', 'l', 'U', 'L': {nothing};
else inc(ResultLen);
end;
end
else begin
if (Ch = EscChar) and (p < TemplateEnd)
then inc (p); // quoted or special char followed
inc (ResultLen);
end;
end;
else
inc(ResultLen);
end;
end;
// Get memory. We do it once and it significant speed up work !
if ResultLen = 0 then begin
Result := '';
EXIT;
end;
SetString (Result, nil, ResultLen);
//SetString (Result, nil, ResultLen);
SetLength(Result,ResultLen);
// Fill Result
ResultPtr := pointer (Result);
p := TemplateBeg;
Mode := smodeNormal;
while p < TemplateEnd do begin
Ch := p^;
p0 := p;
inc (p);
p1 := p;
if Ch = '$'
then n := ParseVarName (p)
else n := -1;
if n >= 0 then begin
p0 := startp [n];
if (n < NSUBEXP) and Assigned (p0) and Assigned (endp [n]) then
while p0 < endp [n] do begin
ResultPtr^ := p0^;
inc (ResultPtr);
inc (p0);
end;
end
else begin
if (Ch = EscChar) and (p < TemplateEnd) then begin // quoted or special char followed
Ch := p^;
inc (p);
if (n >= 0) then begin
p0 := startp[n];
p1 := endp[n];
if (n >= NSUBEXP) or not Assigned (p0) or not Assigned (endp [n]) then
p1 := p0; // empty
end
else begin
if (Ch = EscChar) and (p < TemplateEnd) then begin // quoted or special char followed
Ch := p^;
inc (p);
case Ch of
'n' : begin
p0 := @LineEnd[1];
p1 := p0 + Length(LineEnding);
end;
'l' : begin
Mode := smodeOneLower;
p1 := p0;
end;
'L' : begin
Mode := smodeAllLower;
p1 := p0;
end;
'u' : begin
Mode := smodeOneUpper;
p1 := p0;
end;
'U' : begin
Mode := smodeAllUpper;
p1 := p0;
end;
else
begin
inc(p0);
inc(p1);
end;
end;
ResultPtr^ := Ch;
inc (ResultPtr);
end
end;
if p0 < p1 then begin
while p0 < p1 do begin
case Mode of
smodeOneLower, smodeAllLower:
begin
Ch := p0^;
if Ch < #128 then
Ch := AnsiLowerCase(Ch)[1];
ResultPtr^ := Ch;
if Mode = smodeOneLower then
Mode := smodeNormal;
end;
smodeOneUpper, smodeAllUpper:
begin
Ch := p0^;
if Ch < #128 then
Ch := AnsiUpperCase(Ch)[1];
ResultPtr^ := Ch;
if Mode = smodeOneUpper then
Mode := smodeNormal;
end;
else
ResultPtr^ := p0^;
end;
inc (ResultPtr);
inc (p0);
end;
end;
end; { of function TRegExpr.Substitute
Mode := smodeNormal;
end;
end;
end; { of function TRegExpr.Substitute
--------------------------------------------------------------}
procedure TRegExpr.Split (AInputStr : RegExprString; APieces : TStrings);
var PrevPos : integer;
var PrevPos : PtrInt;
begin
PrevPos := 1;
if Exec (AInputStr) then
@ -3787,7 +3893,7 @@ procedure TRegExpr.Split (AInputStr : RegExprString; APieces : TStrings);
function TRegExpr.Replace (AInputStr : RegExprString; const AReplaceStr : RegExprString;
AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) : RegExprString;
var
PrevPos : integer;
PrevPos : PtrInt;
begin
Result := '';
PrevPos := 1;
@ -3808,7 +3914,7 @@ function TRegExpr.ReplaceEx (AInputStr : RegExprString;
AReplaceFunc : TRegExprReplaceFunction)
: RegExprString;
var
PrevPos : integer;
PrevPos : PtrInt;
begin
Result := '';
PrevPos := 1;
@ -3829,7 +3935,7 @@ function TRegExpr.Replace (AInputStr : RegExprString;
AReplaceFunc : TRegExprReplaceFunction)
: RegExprString;
begin
ReplaceEx (AInputStr, AReplaceFunc);
{$IFDEF FPC}Result:={$ENDIF}ReplaceEx (AInputStr, AReplaceFunc);
end; { of function TRegExpr.Replace
--------------------------------------------------------------}
{$ENDIF}
@ -3902,8 +4008,8 @@ function TRegExpr.Dump : RegExprString;
s : PRegExprChar;
op : TREOp; // Arbitrary non-END op.
next : PRegExprChar;
i : integer;
Diff : integer;
i : PtrInt;
Diff : PtrInt;
{$IFDEF UseSetOfChar} //###0.929
Ch : REChar;
{$ENDIF}