Merges the fixes from SynRegExpr, specially fixes 64-bits, while removing the synedit defines

git-svn-id: trunk@18899 -
This commit is contained in:
sekelsenmat 2011-08-29 14:04:14 +00:00
parent ed03858129
commit dbc08b5c1f

View File

@ -45,8 +45,15 @@ unit RegExpr;
The same modified LGPL with static linking exception as the Free Pascal RTL The same modified LGPL with static linking exception as the Free Pascal RTL
} }
interface interface
{off $DEFINE DebugSynRegExpr}
{$IFDEF FPC}
{$MODE DELPHI} // Delphi-compatible mode in FreePascal
{$ENDIF}
// ======== Determine compiler // ======== Determine compiler
{$IFDEF VER80} Sorry, TRegExpr is for 32-bits Delphi only. Delphi 1 is not supported (and whos really care today?!). {$ENDIF} {$IFDEF VER80} Sorry, TRegExpr is for 32-bits Delphi only. Delphi 1 is not supported (and whos really care today?!). {$ENDIF}
{$IFDEF VER90} {$DEFINE D2} {$ENDIF} // D2 {$IFDEF VER90} {$DEFINE D2} {$ENDIF} // D2
@ -62,7 +69,9 @@ interface
{$BOOLEVAL OFF} {$BOOLEVAL OFF}
{$EXTENDEDSYNTAX ON} {$EXTENDEDSYNTAX ON}
{$LONGSTRINGS ON} {$LONGSTRINGS ON}
{$IFNDEF FPC}
{$OPTIMIZATION ON} {$OPTIMIZATION ON}
{$ENDIF}
{$IFDEF D6} {$IFDEF D6}
{$WARN SYMBOL_PLATFORM OFF} // Suppress .Net warnings {$WARN SYMBOL_PLATFORM OFF} // Suppress .Net warnings
{$ENDIF} {$ENDIF}
@ -71,9 +80,6 @@ interface
{$WARN UNSAFE_TYPE OFF} // Suppress .Net warnings {$WARN UNSAFE_TYPE OFF} // Suppress .Net warnings
{$WARN UNSAFE_CODE OFF} // Suppress .Net warnings {$WARN UNSAFE_CODE OFF} // Suppress .Net warnings
{$ENDIF} {$ENDIF}
{$IFDEF FPC}
{$MODE DELPHI} // Delphi-compatible mode in FreePascal
{$ENDIF}
// ======== Define options for TRegExpr engine // ======== Define options for TRegExpr engine
{.$DEFINE UniCode} // Unicode support {.$DEFINE UniCode} // Unicode support
@ -119,7 +125,7 @@ type
{$ENDIF} {$ENDIF}
TREOp = REChar; // internal p-code type //###0.933 TREOp = REChar; // internal p-code type //###0.933
PREOp = ^TREOp; PREOp = ^TREOp;
TRENextOff = integer; // internal Next "pointer" (offset to current p-code) //###0.933 TRENextOff = PtrInt; // internal Next "pointer" (offset to current p-code) //###0.933
PRENextOff = ^TRENextOff; // used for extracting Next "pointers" from compiled r.e. //###0.933 PRENextOff = ^TRENextOff; // used for extracting Next "pointers" from compiled r.e. //###0.933
TREBracesArg = integer; // type of {m,n} arguments TREBracesArg = integer; // type of {m,n} arguments
PREBracesArg = ^TREBracesArg; PREBracesArg = ^TREBracesArg;
@ -207,7 +213,7 @@ type
regstart : REChar; // char that must begin a match; '\0' if none obvious regstart : REChar; // char that must begin a match; '\0' if none obvious
reganch : REChar; // is the match anchored (at beginning-of-line only)? reganch : REChar; // is the match anchored (at beginning-of-line only)?
regmust : PRegExprChar; // string (pointer into program) that match must include, or nil regmust : PRegExprChar; // string (pointer into program) that match must include, or nil
regmlen : integer; // length of regmust string regmlen : PtrInt; // length of regmust string
// Regstart and reganch permit very fast decisions on suitable starting points // Regstart and reganch permit very fast decisions on suitable starting points
// for a match, cutting down the work a lot. Regmust permits fast rejection // for a match, cutting down the work a lot. Regmust permits fast rejection
// of lines that cannot possibly match. The regmust tests are costly enough // of lines that cannot possibly match. The regmust tests are costly enough
@ -227,10 +233,10 @@ type
// work variables for compiler's routines // work variables for compiler's routines
regparse : PRegExprChar; // Input-scan pointer. regparse : PRegExprChar; // Input-scan pointer.
regnpar : integer; // count. regnpar : PtrInt; // count.
regdummy : char; regdummy : char;
regcode : PRegExprChar; // Code-emit pointer; @regdummy = don't. regcode : PRegExprChar; // Code-emit pointer; @regdummy = don't.
regsize : integer; // Code size. regsize : PtrInt; // Code size.
regexpbeg : PRegExprChar; // only for error handling. Contains regexpbeg : PRegExprChar; // only for error handling. Contains
// pointer to beginning of r.e. while compiling // pointer to beginning of r.e. while compiling
@ -330,15 +336,15 @@ type
function ParseAtom (var flagp : integer) : PRegExprChar; function ParseAtom (var flagp : integer) : PRegExprChar;
// the lowest level // the lowest level
function GetCompilerErrorPos : integer; function GetCompilerErrorPos : PtrInt;
// current pos in r.e. - for error hanling // current pos in r.e. - for error hanling
{$IFDEF UseFirstCharSet} //###0.929 {$IFDEF UseFirstCharSet} //###0.929
procedure FillFirstCharSet (prog : PRegExprChar); procedure FillFirstCharSet (prog : PRegExprChar);
{$ENDIF} {$ENDIF}
{===================== Mathing section ===================} {===================== Matching section ===================}
function regrepeat (p : PRegExprChar; AMax : integer) : integer; function regrepeat (p : PRegExprChar; AMax : PtrInt) : PtrInt;
// repeatedly match something simple, report how many // repeatedly match something simple, report how many
function regnext (p : PRegExprChar) : PRegExprChar; function regnext (p : PRegExprChar) : PRegExprChar;
@ -347,7 +353,7 @@ type
function MatchPrim (prog : PRegExprChar) : boolean; function MatchPrim (prog : PRegExprChar) : boolean;
// recursively matching routine // recursively matching routine
function ExecPrim (AOffset: integer) : boolean; function ExecPrim (AOffset: PtrInt) : boolean;
// Exec for stored InputString // Exec for stored InputString
{$IFDEF RegExpPCodeDump} {$IFDEF RegExpPCodeDump}
@ -355,8 +361,8 @@ type
{$ENDIF} {$ENDIF}
function GetSubExprMatchCount : integer; function GetSubExprMatchCount : integer;
function GetMatchPos (Idx : integer) : integer; function GetMatchPos (Idx : integer) : PtrInt;
function GetMatchLen (Idx : integer) : integer; function GetMatchLen (Idx : integer) : PtrInt;
function GetMatch (Idx : integer) : RegExprString; function GetMatch (Idx : integer) : RegExprString;
function GetInputString : RegExprString; function GetInputString : RegExprString;
@ -400,8 +406,8 @@ type
property ModifierR : boolean index 2 read GetModifier write SetModifier; property ModifierR : boolean index 2 read GetModifier write SetModifier;
// Modifier /r - use r.e.syntax extended for russian, // Modifier /r - use r.e.syntax extended for russian,
// (was property ExtSyntaxEnabled in previous versions) // (was property ExtSyntaxEnabled in previous versions)
// If true, then à-ÿ additional include russian letter '¸', // If true, then а-я additional include russian letter ',
// À-ß additional include '¨', and à-ß include all russian symbols. // А-Я additional include 'Ё', and а-Я include all russian symbols.
// You have to turn it off if it may interfere with you national alphabet. // You have to turn it off if it may interfere with you national alphabet.
// , initialized from RegExprModifierR // , initialized from RegExprModifierR
@ -429,13 +435,13 @@ type
{$IFNDEF FPC} // I do not know why FreePascal cannot overload methods with empty param list {$IFNDEF FPC} // I do not know why FreePascal cannot overload methods with empty param list
function Exec : boolean; overload; //###0.949 function Exec : boolean; overload; //###0.949
{$ENDIF} {$ENDIF}
function Exec (AOffset: integer) : boolean; overload; //###0.949 function Exec (AOffset: PtrInt) : boolean; overload; //###0.949
{$ENDIF} {$ENDIF}
// match a programm against a string AInputString // match a programm against a string AInputString
// !!! Exec store AInputString into InputString property // !!! Exec store AInputString into InputString property
// For Delphi 5 and higher available overloaded versions - first without // For Delphi 5 and higher available overloaded versions - first without
// parameter (uses already assigned to InputString property value) // parameter (uses already assigned to InputString property value)
// and second that has integer parameter and is same as ExecPos // and second that has PtrInt parameter and is same as ExecPos
function ExecNext : boolean; function ExecNext : boolean;
// find next match: // find next match:
@ -448,7 +454,7 @@ type
// Exec* (Exec, ExecPos, ExecNext). So You always must use something like // Exec* (Exec, ExecPos, ExecNext). So You always must use something like
// if Exec (InputString) then repeat { proceed results} until not ExecNext; // if Exec (InputString) then repeat { proceed results} until not ExecNext;
function ExecPos (AOffset: integer {$IFDEF DefParam}= 1{$ENDIF}) : boolean; function ExecPos (AOffset: PtrInt {$IFDEF DefParam}= 1{$ENDIF}) : boolean;
// find match for InputString starting from AOffset position // find match for InputString starting from AOffset position
// (AOffset=1 - first char of InputString) // (AOffset=1 - first char of InputString)
@ -511,14 +517,14 @@ type
// Exec ('2'): SubExprMatchCount=0, Match[0]='2' // Exec ('2'): SubExprMatchCount=0, Match[0]='2'
// Exec ('7') - return False: SubExprMatchCount=-1 // Exec ('7') - return False: SubExprMatchCount=-1
property MatchPos [Idx : integer] : integer read GetMatchPos; property MatchPos [Idx : integer] : PtrInt read GetMatchPos;
// pos of entrance subexpr. #Idx into tested in last Exec* // pos of entrance subexpr. #Idx into tested in last Exec*
// string. First subexpr. have Idx=1, last - MatchCount, // string. First subexpr. have Idx=1, last - MatchCount,
// whole r.e. have Idx=0. // whole r.e. have Idx=0.
// Returns -1 if in r.e. no such subexpr. or this subexpr. // Returns -1 if in r.e. no such subexpr. or this subexpr.
// not found in input string. // not found in input string.
property MatchLen [Idx : integer] : integer read GetMatchLen; property MatchLen [Idx : integer] : PtrInt read GetMatchLen;
// len of entrance subexpr. #Idx r.e. into tested in last Exec* // len of entrance subexpr. #Idx r.e. into tested in last Exec*
// string. First subexpr. have Idx=1, last - MatchCount, // string. First subexpr. have Idx=1, last - MatchCount,
// whole r.e. have Idx=0. // whole r.e. have Idx=0.
@ -539,9 +545,9 @@ type
function ErrorMsg (AErrorID : integer) : RegExprString; virtual; function ErrorMsg (AErrorID : integer) : RegExprString; virtual;
// Returns Error message for error with ID = AErrorID. // Returns Error message for error with ID = AErrorID.
property CompilerErrorPos : integer read GetCompilerErrorPos; property CompilerErrorPos : PtrInt read GetCompilerErrorPos;
// Returns pos in r.e. there compiler stopped. // Returns pos in r.e. there compiler stopped.
// Usefull for error diagnostics // Useful for error diagnostics
property SpaceChars : RegExprString read fSpaceChars write fSpaceChars; //###0.927 property SpaceChars : RegExprString read fSpaceChars write fSpaceChars; //###0.927
// Contains chars, treated as /s (initially filled with RegExprSpaceChars // Contains chars, treated as /s (initially filled with RegExprSpaceChars
@ -567,7 +573,7 @@ type
// Create set it to RegExprInvertCaseFunction (InvertCaseFunction by default) // Create set it to RegExprInvertCaseFunction (InvertCaseFunction by default)
procedure Compile; //###0.941 procedure Compile; //###0.941
// [Re]compile r.e. Usefull for example for GUI r.e. editors (to check // [Re]compile r.e. Useful for example for GUI r.e. editors (to check
// all properties validity). // all properties validity).
{$IFDEF RegExpPCodeDump} {$IFDEF RegExpPCodeDump}
@ -579,7 +585,7 @@ type
ERegExpr = class (Exception) ERegExpr = class (Exception)
public public
ErrorCode : integer; ErrorCode : integer;
CompilerErrorPos : integer; CompilerErrorPos : PtrInt;
end; end;
const const
@ -609,11 +615,11 @@ function ReplaceRegExpr (const ARegExpr, AInputStr, AReplaceStr : RegExprString;
function QuoteRegExprMetaChars (const AStr : RegExprString) : RegExprString; function QuoteRegExprMetaChars (const AStr : RegExprString) : RegExprString;
// Replace all metachars with its safe representation, // Replace all metachars with its safe representation,
// for example 'abc$cd.(' converts into 'abc\$cd\.\(' // for example 'abc$cd.(' converts into 'abc\$cd\.\('
// This function usefull for r.e. autogeneration from // This function useful for r.e. autogeneration from
// user input // user input
function RegExprSubExpressions (const ARegExpr : string; function RegExprSubExpressions (const ARegExpr : string;
ASubExprs : TStrings; AExtendedSyntax : boolean{$IFDEF DefParam}= False{$ENDIF}) : integer; ASubExprs : TStrings; AExtendedSyntax : boolean{$IFDEF DefParam}= False{$ENDIF}) : PtrInt;
// Makes list of subexpressions found in ARegExpr r.e. // Makes list of subexpressions found in ARegExpr r.e.
// In ASubExps every item represent subexpression, // In ASubExps every item represent subexpression,
// from first to last, in format: // from first to last, in format:
@ -624,7 +630,7 @@ function RegExprSubExpressions (const ARegExpr : string;
// if exist! // if exist!
// AExtendedSyntax - must be True if modifier /m will be On while // AExtendedSyntax - must be True if modifier /m will be On while
// using the r.e. // using the r.e.
// Usefull for GUI editors of r.e. etc (You can find example of using // Useful for GUI editors of r.e. etc (You can find example of using
// in TestRExp.dpr project) // in TestRExp.dpr project)
// Returns // Returns
// 0 Success. No unbalanced brackets was found; // 0 Success. No unbalanced brackets was found;
@ -638,6 +644,16 @@ function RegExprSubExpressions (const ARegExpr : string;
implementation implementation
{$IFDEF FPC}
{$ELSE}
uses
{$IFDEF SYN_WIN32}
Windows; // CharUpper/Lower
{$ELSE}
Libc; //Qt.pas from Borland does not expose char handling functions
{$ENDIF}
{$ENDIF}
const const
TRegExprVersionMajor : integer = 0; TRegExprVersionMajor : integer = 0;
TRegExprVersionMinor : integer = 952; TRegExprVersionMinor : integer = 952;
@ -664,7 +680,7 @@ const
function StrPCopy (Dest: PRegExprChar; const Source: RegExprString): PRegExprChar; function StrPCopy (Dest: PRegExprChar; const Source: RegExprString): PRegExprChar;
var var
i, Len : Integer; i, Len : PtrInt;
begin begin
Len := length (Source); //###0.932 Len := length (Source); //###0.932
for i := 1 to Len do for i := 1 to Len do
@ -674,8 +690,8 @@ function StrPCopy (Dest: PRegExprChar; const Source: RegExprString): PRegExprCha
end; { of function StrPCopy end; { of function StrPCopy
--------------------------------------------------------------} --------------------------------------------------------------}
function StrLCopy (Dest, Source: PRegExprChar; MaxLen: Cardinal): PRegExprChar; function StrLCopy (Dest, Source: PRegExprChar; MaxLen: PtrUInt): PRegExprChar;
var i: Integer; var i: PtrInt;
begin begin
for i := 0 to MaxLen - 1 do for i := 0 to MaxLen - 1 do
Dest [i] := Source [i]; Dest [i] := Source [i];
@ -683,7 +699,7 @@ function StrLCopy (Dest, Source: PRegExprChar; MaxLen: Cardinal): PRegExprChar;
end; { of function StrLCopy end; { of function StrLCopy
--------------------------------------------------------------} --------------------------------------------------------------}
function StrLen (Str: PRegExprChar): Cardinal; function StrLen (Str: PRegExprChar): PtrUInt;
begin begin
Result:=0; Result:=0;
while Str [result] <> #0 while Str [result] <> #0
@ -692,7 +708,7 @@ function StrLen (Str: PRegExprChar): Cardinal;
--------------------------------------------------------------} --------------------------------------------------------------}
function StrPos (Str1, Str2: PRegExprChar): PRegExprChar; function StrPos (Str1, Str2: PRegExprChar): PRegExprChar;
var n: Integer; var n: PtrInt;
begin begin
Result := nil; Result := nil;
n := Pos (RegExprString (Str2), RegExprString (Str1)); n := Pos (RegExprString (Str2), RegExprString (Str1));
@ -702,7 +718,7 @@ function StrPos (Str1, Str2: PRegExprChar): PRegExprChar;
end; { of function StrPos end; { of function StrPos
--------------------------------------------------------------} --------------------------------------------------------------}
function StrLComp (Str1, Str2: PRegExprChar; MaxLen: Cardinal): Integer; function StrLComp (Str1, Str2: PRegExprChar; MaxLen: PtrUInt): PtrInt;
var S1, S2: RegExprString; var S1, S2: RegExprString;
begin begin
S1 := Str1; S1 := Str1;
@ -776,7 +792,7 @@ function QuoteRegExprMetaChars (const AStr : RegExprString) : RegExprString;
// Very similar to META array, but slighly changed. // Very similar to META array, but slighly changed.
// !Any changes in META array must be synchronized with this set. // !Any changes in META array must be synchronized with this set.
var var
i, i0, Len : integer; i, i0, Len : PtrInt;
begin begin
Result := ''; Result := '';
Len := length (AStr); Len := length (AStr);
@ -795,24 +811,24 @@ function QuoteRegExprMetaChars (const AStr : RegExprString) : RegExprString;
--------------------------------------------------------------} --------------------------------------------------------------}
function RegExprSubExpressions (const ARegExpr : string; function RegExprSubExpressions (const ARegExpr : string;
ASubExprs : TStrings; AExtendedSyntax : boolean{$IFDEF DefParam}= False{$ENDIF}) : integer; ASubExprs : TStrings; AExtendedSyntax : boolean{$IFDEF DefParam}= False{$ENDIF}) : PtrInt;
type type
TStackItemRec = record //###0.945 TStackItemRec = record //###0.945
SubExprIdx : integer; SubExprIdx : integer;
StartPos : integer; StartPos : PtrInt;
end; end;
TStackArray = packed array [0 .. NSUBEXPMAX - 1] of TStackItemRec; TStackArray = packed array [0 .. NSUBEXPMAX - 1] of TStackItemRec;
var var
Len, SubExprLen : integer; Len, SubExprLen : PtrInt;
i, i0 : integer; i, i0 : PtrInt;
Modif : integer; Modif : integer;
Stack : ^TStackArray; //###0.945 Stack : ^TStackArray; //###0.945
StackIdx, StackSz : integer; StackIdx, StackSz : PtrInt;
begin begin
Result := 0; // no unbalanced brackets found at this very moment Result := 0; // no unbalanced brackets found at this very moment
ASubExprs.Clear; // I don't think that adding to non empty list ASubExprs.Clear; // I don't think that adding to non empty list
// can be usefull, so I simplified algorithm to work only with empty list // can be useful, so I simplified algorithm to work only with empty list
Len := length (ARegExpr); // some optimization tricks Len := length (ARegExpr); // some optimization tricks
@ -906,8 +922,8 @@ function RegExprSubExpressions (const ARegExpr : string;
// check if entire r.e. added // check if entire r.e. added
if (ASubExprs.Count = 0) if (ASubExprs.Count = 0)
or ((integer (ASubExprs.Objects [0]) and $FFFF) <> 1) or ((PtrInt (ASubExprs.Objects [0]) and $FFFF) <> 1)
or (((integer (ASubExprs.Objects [0]) ShR 16) and $FFFF) <> Len) or (((PtrInt (ASubExprs.Objects [0]) ShR 16) and $FFFF) <> Len)
// whole r.e. wasn't added because it isn't bracketed // whole r.e. wasn't added because it isn't bracketed
// well, we add it now: // well, we add it now:
then ASubExprs.InsertObject (0, ARegExpr, TObject ((Len ShL 16) or 1)); then ASubExprs.InsertObject (0, ARegExpr, TObject ((Len ShL 16) or 1));
@ -990,7 +1006,7 @@ const
// The Next is a offset from the opcode of the node containing it. // The Next is a offset from the opcode of the node containing it.
// An operand, if any, simply follows the node. (Note that much of // An operand, if any, simply follows the node. (Note that much of
// the code generation knows about this implicit relationship!) // the code generation knows about this implicit relationship!)
// Using TRENextOff=integer speed up p-code processing. // Using TRENextOff=PtrInt speed up p-code processing.
// Opcodes description: // Opcodes description:
// //
@ -1049,7 +1065,7 @@ const
reeMatchPrimCorruptedPointers = 1002; reeMatchPrimCorruptedPointers = 1002;
reeNoExpression = 1003; reeNoExpression = 1003;
reeCorruptedProgram = 1004; reeCorruptedProgram = 1004;
reeNoInpitStringSpecified = 1005; reeNoInputStringSpecified = 1005;
reeOffsetMustBeGreaterThen0 = 1006; reeOffsetMustBeGreaterThen0 = 1006;
reeExecNextWithoutExec = 1007; reeExecNextWithoutExec = 1007;
reeGetInputStringWithoutInputString = 1008; reeGetInputStringWithoutInputString = 1008;
@ -1093,7 +1109,7 @@ function TRegExpr.ErrorMsg (AErrorID : integer) : RegExprString;
reeMatchPrimCorruptedPointers: Result := 'TRegExpr(exec): MatchPrim Corrupted Pointers'; reeMatchPrimCorruptedPointers: Result := 'TRegExpr(exec): MatchPrim Corrupted Pointers';
reeNoExpression: Result := 'TRegExpr(exec): Not Assigned Expression Property'; reeNoExpression: Result := 'TRegExpr(exec): Not Assigned Expression Property';
reeCorruptedProgram: Result := 'TRegExpr(exec): Corrupted Program'; reeCorruptedProgram: Result := 'TRegExpr(exec): Corrupted Program';
reeNoInpitStringSpecified: Result := 'TRegExpr(exec): No Input String Specified'; reeNoInputStringSpecified: Result := 'TRegExpr(exec): No Input String Specified';
reeOffsetMustBeGreaterThen0: Result := 'TRegExpr(exec): Offset Must Be Greater Then 0'; reeOffsetMustBeGreaterThen0: Result := 'TRegExpr(exec): Offset Must Be Greater Then 0';
reeExecNextWithoutExec: Result := 'TRegExpr(exec): ExecNext Without Exec[Pos]'; reeExecNextWithoutExec: Result := 'TRegExpr(exec): ExecNext Without Exec[Pos]';
reeGetInputStringWithoutInputString: Result := 'TRegExpr(exec): GetInputString Without InputString'; reeGetInputStringWithoutInputString: Result := 'TRegExpr(exec): GetInputString Without InputString';
@ -1158,12 +1174,21 @@ constructor TRegExpr.Create;
destructor TRegExpr.Destroy; destructor TRegExpr.Destroy;
begin begin
if programm <> nil if programm <> nil then
then FreeMem (programm); begin
if fExpression <> nil FreeMem (programm);
then FreeMem (fExpression); programm:=nil;
if fInputString <> nil end;
then FreeMem (fInputString); if fExpression <> nil then
begin
FreeMem (fExpression);
fExpression:=nil;
end;
if fInputString <> nil then
begin
FreeMem (fInputString);
fInputString:=nil;
end;
end; { of destructor TRegExpr.Destroy end; { of destructor TRegExpr.Destroy
--------------------------------------------------------------} --------------------------------------------------------------}
@ -1175,9 +1200,9 @@ class function TRegExpr.InvertCaseFunction (const Ch : REChar) : REChar;
else else
{$ENDIF} {$ENDIF}
begin begin
Result := {$IFDEF FPC}AnsiUpperCase (Ch) [1]{$ELSE} REChar (CharUpper (PChar (Ch))){$ENDIF}; Result := {$IFDEF FPC}AnsiUpperCase (Ch) [1]{$ELSE} {$IFDEF SYN_WIN32}REChar (CharUpper (PChar (Ch))){$ELSE}REChar (toupper (integer (Ch))){$ENDIF} {$ENDIF};
if Result = Ch if Result = Ch
then Result := {$IFDEF FPC}AnsiLowerCase (Ch) [1]{$ELSE} REChar (CharLower (PChar (Ch))){$ENDIF}; then Result := {$IFDEF FPC}AnsiLowerCase (Ch) [1]{$ELSE} {$IFDEF SYN_WIN32}REChar (CharLower (PChar (Ch))){$ELSE}REChar(tolower (integer (Ch))){$ENDIF} {$ENDIF};
end; end;
end; { of function TRegExpr.InvertCaseFunction end; { of function TRegExpr.InvertCaseFunction
--------------------------------------------------------------} --------------------------------------------------------------}
@ -1192,7 +1217,7 @@ function TRegExpr.GetExpression : RegExprString;
procedure TRegExpr.SetExpression (const s : RegExprString); procedure TRegExpr.SetExpression (const s : RegExprString);
var var
Len : integer; //###0.950 Len : PtrInt; //###0.950
begin begin
if (s <> fExpression) or not fExprIsCompiled then begin if (s <> fExpression) or not fExprIsCompiled then begin
fExprIsCompiled := false; fExprIsCompiled := false;
@ -1203,12 +1228,7 @@ procedure TRegExpr.SetExpression (const s : RegExprString);
if s <> '' then begin if s <> '' then begin
Len := length (s); //###0.950 Len := length (s); //###0.950
GetMem (fExpression, (Len + 1) * SizeOf (REChar)); GetMem (fExpression, (Len + 1) * SizeOf (REChar));
// StrPCopy (fExpression, s); //###0.950 replaced due to StrPCopy limitation of 255 chars System.Move(s[1],fExpression^,(Len + 1) * SizeOf (REChar));
{$IFDEF UniCode}
StrPCopy (fExpression, Copy (s, 1, Len)); //###0.950
{$ELSE}
StrLCopy (fExpression, PRegExprChar (s), Len); //###0.950
{$ENDIF UniCode}
InvalidateProgramm; //###0.941 InvalidateProgramm; //###0.941
end; end;
@ -1228,7 +1248,7 @@ function TRegExpr.GetSubExprMatchCount : integer;
end; { of function TRegExpr.GetSubExprMatchCount end; { of function TRegExpr.GetSubExprMatchCount
--------------------------------------------------------------} --------------------------------------------------------------}
function TRegExpr.GetMatchPos (Idx : integer) : integer; function TRegExpr.GetMatchPos (Idx : integer) : PtrInt;
begin begin
if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString) if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)
and Assigned (startp [Idx]) and Assigned (endp [Idx]) then begin and Assigned (startp [Idx]) and Assigned (endp [Idx]) then begin
@ -1238,7 +1258,7 @@ function TRegExpr.GetMatchPos (Idx : integer) : integer;
end; { of function TRegExpr.GetMatchPos end; { of function TRegExpr.GetMatchPos
--------------------------------------------------------------} --------------------------------------------------------------}
function TRegExpr.GetMatchLen (Idx : integer) : integer; function TRegExpr.GetMatchLen (Idx : integer) : PtrInt;
begin begin
if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString) if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)
and Assigned (startp [Idx]) and Assigned (endp [Idx]) then begin and Assigned (startp [Idx]) and Assigned (endp [Idx]) then begin
@ -1252,8 +1272,13 @@ function TRegExpr.GetMatch (Idx : integer) : RegExprString;
begin begin
if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString) if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)
and Assigned (startp [Idx]) and Assigned (endp [Idx]) and Assigned (startp [Idx]) and Assigned (endp [Idx])
and (endp [Idx] > startp[Idx])
//then Result := copy (fInputString, MatchPos [Idx], MatchLen [Idx]) //###0.929 //then Result := copy (fInputString, MatchPos [Idx], MatchLen [Idx]) //###0.929
then SetString (Result, startp [idx], endp [idx] - startp [idx]) then begin
//SetString (Result, startp [idx], endp [idx] - startp [idx])
SetLength(Result,endp [idx] - startp [idx]);
System.Move(startp [idx]^,Result[1],length(Result));
end
else Result := ''; else Result := '';
end; { of function TRegExpr.GetMatch end; { of function TRegExpr.GetMatch
--------------------------------------------------------------} --------------------------------------------------------------}
@ -1454,7 +1479,7 @@ procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);
// shr after subtraction to calculate widechar distance %-( ) // shr after subtraction to calculate widechar distance %-( )
// so, if difference is negative we have .. the "feature" :( // so, if difference is negative we have .. the "feature" :(
// I could wrap it in $IFDEF UniCode, but I didn't because // I could wrap it in $IFDEF UniCode, but I didn't because
// "P Q computes the difference between the address given // "P Q computes the difference between the address given
// by P (the higher address) and the address given by Q (the // by P (the higher address) and the address given by Q (the
// lower address)" - Delphi help quotation. // lower address)" - Delphi help quotation.
else PRENextOff (scan + REOpSz)^ := val - scan; //###0.933 else PRENextOff (scan + REOpSz)^ := val - scan; //###0.933
@ -1480,6 +1505,10 @@ function TRegExpr.EmitNode (op : TREOp) : PRegExprChar; //###0.933
inc (regcode, REOpSz); inc (regcode, REOpSz);
PRENextOff (regcode)^ := 0; // Next "pointer" := nil PRENextOff (regcode)^ := 0; // Next "pointer" := nil
inc (regcode, RENextOffSz); inc (regcode, RENextOffSz);
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
raise Exception.Create('TRegExpr.EmitNode buffer overrun');
{$ENDIF}
end end
else inc (regsize, REOpSz + RENextOffSz); // compute code size without code generation else inc (regsize, REOpSz + RENextOffSz); // compute code size without code generation
end; { of function TRegExpr.EmitNode end; { of function TRegExpr.EmitNode
@ -1491,8 +1520,12 @@ procedure TRegExpr.EmitC (b : REChar);
if regcode <> @regdummy then begin if regcode <> @regdummy then begin
regcode^ := b; regcode^ := b;
inc (regcode); inc (regcode);
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
raise Exception.Create('TRegExpr.EmitC buffer overrun');
{$ENDIF}
end end
else inc (regsize); // Type of p-code pointer always is ^REChar else inc (regsize, REOpSz); // Type of p-code pointer always is ^REChar
end; { of procedure TRegExpr.EmitC end; { of procedure TRegExpr.EmitC
--------------------------------------------------------------} --------------------------------------------------------------}
@ -1507,8 +1540,15 @@ procedure TRegExpr.InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer
inc (regsize, sz); inc (regsize, sz);
EXIT; EXIT;
end; end;
// move code behind insert position
src := regcode; src := regcode;
inc (regcode, sz); inc (regcode, sz);
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
raise Exception.Create('TRegExpr.InsertOperator buffer overrun');
if (opnd<regcode) or (opnd-regcode>regsize) then
raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
{$ENDIF}
dst := regcode; dst := regcode;
while src > opnd do begin while src > opnd do begin
dec (dst); dec (dst);
@ -1525,7 +1565,7 @@ procedure TRegExpr.InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer
end; { of procedure TRegExpr.InsertOperator end; { of procedure TRegExpr.InsertOperator
--------------------------------------------------------------} --------------------------------------------------------------}
function strcspn (s1 : PRegExprChar; s2 : PRegExprChar) : integer; function strcspn (s1 : PRegExprChar; s2 : PRegExprChar) : PtrInt;
// find length of initial segment of s1 consisting // find length of initial segment of s1 consisting
// entirely of characters not from s2 // entirely of characters not from s2
var scan1, scan2 : PRegExprChar; var scan1, scan2 : PRegExprChar;
@ -1565,17 +1605,17 @@ const
#$418,#$419,#$41A,#$41B,#$41C,#$41D,#$41E,#$41F, #$418,#$419,#$41A,#$41B,#$41C,#$41D,#$41E,#$41F,
#$420,#$421,#$422,#$423,#$424,#$425,#$426,#$427, #$420,#$421,#$422,#$423,#$424,#$425,#$426,#$427,
#$428,#$429,#$42A,#$42B,#$42C,#$42D,#$42E,#$42F,#0); #$428,#$429,#$42A,#$42B,#$42C,#$42D,#$42E,#$42F,#0);
RusRangeLoLow = #$430{'à'}; RusRangeLoLow = #$430{'а'};
RusRangeLoHigh = #$44F{'ÿ'}; RusRangeLoHigh = #$44F{'я'};
RusRangeHiLow = #$410{'À'}; RusRangeHiLow = #$410{'А'};
RusRangeHiHigh = #$42F{'ß'}; RusRangeHiHigh = #$42F{'Я'};
{$ELSE} {$ELSE}
RusRangeLo = 'àáâãä叿çèéêëìíîïðñòóôõö÷øùúûüýþÿ'; RusRangeLo = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя';
RusRangeHi = 'ÀÁÂÃÄŨÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß'; RusRangeHi = 'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ';
RusRangeLoLow = 'à'; RusRangeLoLow = 'а';
RusRangeLoHigh = 'ÿ'; RusRangeLoHigh = 'я';
RusRangeHiLow = 'À'; RusRangeHiLow = 'А';
RusRangeHiHigh = 'ß'; RusRangeHiHigh = 'Я';
{$ENDIF} {$ENDIF}
function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean; function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;
@ -1592,7 +1632,7 @@ function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;
// of the structure of the compiled regexp. // of the structure of the compiled regexp.
var var
scan, longest : PRegExprChar; scan, longest : PRegExprChar;
len : cardinal; len : PtrUInt;
flags : integer; flags : integer;
begin begin
Result := false; // life too dark Result := false; // life too dark
@ -1624,13 +1664,6 @@ function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;
if ParseReg (0, flags) = nil if ParseReg (0, flags) = nil
then EXIT; then EXIT;
// Small enough for 2-bytes programm pointers ?
// ###0.933 no real p-code length limits now :)))
// if regsize >= 64 * 1024 then begin
// Error (reeCompRegexpTooBig);
// EXIT;
// end;
// Allocate space. // Allocate space.
GetMem (programm, regsize * SizeOf (REChar)); GetMem (programm, regsize * SizeOf (REChar));
@ -1643,7 +1676,6 @@ function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;
if ParseReg (0, flags) = nil if ParseReg (0, flags) = nil
then EXIT; then EXIT;
// Dig out information for optimizations. // Dig out information for optimizations.
{$IFDEF UseFirstCharSet} //###0.929 {$IFDEF UseFirstCharSet} //###0.929
FirstCharSet := []; FirstCharSet := [];
@ -1674,7 +1706,7 @@ function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;
len := 0; len := 0;
while scan <> nil do begin while scan <> nil do begin
if (PREOp (scan)^ = EXACTLY) if (PREOp (scan)^ = EXACTLY)
and (strlen (scan + REOpSz + RENextOffSz) >= len) then begin and (strlen (scan + REOpSz + RENextOffSz) >= PtrInt(len)) then begin
longest := scan + REOpSz + RENextOffSz; longest := scan + REOpSz + RENextOffSz;
len := strlen (longest); len := strlen (longest);
end; end;
@ -1850,7 +1882,7 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
ANonGreedyOp : boolean); //###0.940 ANonGreedyOp : boolean); //###0.940
{$IFDEF ComplexBraces} {$IFDEF ComplexBraces}
var var
off : integer; off : TRENextOff;
{$ENDIF} {$ENDIF}
begin begin
{$IFNDEF ComplexBraces} {$IFNDEF ComplexBraces}
@ -1870,6 +1902,10 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
inc (regcode, REBracesArgSz); inc (regcode, REBracesArgSz);
PRENextOff (regcode)^ := off; PRENextOff (regcode)^ := off;
inc (regcode, RENextOffSz); inc (regcode, RENextOffSz);
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
raise Exception.Create('TRegExpr.ParsePiece.EmitComplexBraces buffer overrun');
{$ENDIF}
end end
else inc (regsize, REBracesArgSz * 2 + RENextOffSz); else inc (regsize, REBracesArgSz * 2 + RENextOffSz);
Tail (Result, NextNode); // LOOPENTRY -> LOOP Tail (Result, NextNode); // LOOPENTRY -> LOOP
@ -2018,7 +2054,7 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
else EmitComplexBraces (BracesMin, BracesMax, NonGreedyOp); else EmitComplexBraces (BracesMin, BracesMax, NonGreedyOp);
if NonGreedyCh //###0.940 if NonGreedyCh //###0.940
then inc (regparse); // Skip extra char '?' then inc (regparse); // Skip extra char '?'
end; { of case '{'} end; // of case '{'
// else // here we can't be // else // here we can't be
end; { of case op} end; { of case op}
@ -2041,16 +2077,16 @@ function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
flags : integer; flags : integer;
RangeBeg, RangeEnd : REChar; RangeBeg, RangeEnd : REChar;
CanBeRange : boolean; CanBeRange : boolean;
len : integer; len : PtrInt;
ender : REChar; ender : REChar;
begmodfs : PRegExprChar; begmodfs : PRegExprChar;
{$IFDEF UseSetOfChar} //###0.930 {$IFDEF UseSetOfChar} //###0.930
RangePCodeBeg : PRegExprChar; RangePCodeBeg : PRegExprChar;
RangePCodeIdx : integer; RangePCodeIdx : PtrInt;
RangeIsCI : boolean; RangeIsCI : boolean;
RangeSet : TSetOfREChar; RangeSet : TSetOfREChar;
RangeLen : integer; RangeLen : PtrInt;
RangeChMin, RangeChMax : REChar; RangeChMin, RangeChMax : REChar;
{$ENDIF} {$ENDIF}
@ -2065,13 +2101,13 @@ function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
end; end;
procedure EmitStr (const s : RegExprString); procedure EmitStr (const s : RegExprString);
var i : integer; var i : PtrInt;
begin begin
for i := 1 to length (s) for i := 1 to length (s)
do EmitC (s [i]); do EmitC (s [i]);
end; end;
function HexDig (ch : REChar) : integer; function HexDig (ch : REChar) : PtrInt;
begin begin
Result := 0; Result := 0;
if (ch >= 'a') and (ch <= 'f') if (ch >= 'a') and (ch <= 'f')
@ -2161,6 +2197,10 @@ function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
regcode^ := RangeChMax; regcode^ := RangeChMax;
inc (regcode); inc (regcode);
end; end;
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
raise Exception.Create('TRegExpr.ParseAtom.EmitRangeC TinySetLen buffer overrun');
{$ENDIF}
end end
else begin else begin
if regcode = @regdummy then begin if regcode = @regdummy then begin
@ -2173,6 +2213,10 @@ function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
regcode := RangePCodeBeg; regcode := RangePCodeBeg;
Move (RangeSet, regcode^, SizeOf (TSetOfREChar)); Move (RangeSet, regcode^, SizeOf (TSetOfREChar));
inc (regcode, SizeOf (TSetOfREChar)); inc (regcode, SizeOf (TSetOfREChar));
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
raise Exception.Create('TRegExpr.ParseAtom.EmitRangeC non TinySetLen buffer overrun');
{$ENDIF}
end; end;
end; end;
{$ELSE} {$ELSE}
@ -2188,7 +2232,7 @@ function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
end; end;
procedure EmitRangeStr (const s : RegExprString); procedure EmitRangeStr (const s : RegExprString);
var i : integer; var i : PtrInt;
begin begin
for i := 1 to length (s) for i := 1 to length (s)
do EmitRangeC (s [i]); do EmitRangeC (s [i]);
@ -2539,7 +2583,7 @@ function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
end; { of function TRegExpr.ParseAtom end; { of function TRegExpr.ParseAtom
--------------------------------------------------------------} --------------------------------------------------------------}
function TRegExpr.GetCompilerErrorPos : integer; function TRegExpr.GetCompilerErrorPos : PtrInt;
begin begin
Result := 0; Result := 0;
if (regexpbeg = nil) or (regparse = nil) if (regexpbeg = nil) or (regparse = nil)
@ -2565,7 +2609,7 @@ function TRegExpr.StrScanCI (s : PRegExprChar; ch : REChar) : PRegExprChar; //##
--------------------------------------------------------------} --------------------------------------------------------------}
{$ENDIF} {$ENDIF}
function TRegExpr.regrepeat (p : PRegExprChar; AMax : integer) : integer; function TRegExpr.regrepeat (p : PRegExprChar; AMax : PtrInt) : PtrInt;
// repeatedly match something simple, report how many // repeatedly match something simple, report how many
var var
scan : PRegExprChar; scan : PRegExprChar;
@ -2775,12 +2819,12 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
var var
scan : PRegExprChar; // Current node. scan : PRegExprChar; // Current node.
next : PRegExprChar; // Next node. next : PRegExprChar; // Next node.
len : integer; len : PtrInt;
opnd : PRegExprChar; opnd : PRegExprChar;
no : integer; no : PtrInt;
save : PRegExprChar; save : PRegExprChar;
nextch : REChar; nextch : REChar;
BracesMin, BracesMax : integer; // we use integer instead of TREBracesArg for better support */+ BracesMin, BracesMax : PtrInt; // we use integer instead of TREBracesArg for better support */+
{$IFDEF ComplexBraces} {$IFDEF ComplexBraces}
SavedLoopStack : array [1 .. LoopStackMax] of integer; // :(( very bad for recursion SavedLoopStack : array [1 .. LoopStackMax] of integer; // :(( very bad for recursion
SavedLoopStackIdx : integer; //###0.925 SavedLoopStackIdx : integer; //###0.925
@ -3377,20 +3421,20 @@ function TRegExpr.Exec : boolean;
end; { of function TRegExpr.Exec end; { of function TRegExpr.Exec
--------------------------------------------------------------} --------------------------------------------------------------}
{$ENDIF} {$ENDIF}
function TRegExpr.Exec (AOffset: integer) : boolean; function TRegExpr.Exec (AOffset: PtrInt) : boolean;
begin begin
Result := ExecPrim (AOffset); Result := ExecPrim (AOffset);
end; { of function TRegExpr.Exec end; { of function TRegExpr.Exec
--------------------------------------------------------------} --------------------------------------------------------------}
{$ENDIF} {$ENDIF}
function TRegExpr.ExecPos (AOffset: integer {$IFDEF DefParam}= 1{$ENDIF}) : boolean; function TRegExpr.ExecPos (AOffset: PtrInt {$IFDEF DefParam}= 1{$ENDIF}) : boolean;
begin begin
Result := ExecPrim (AOffset); Result := ExecPrim (AOffset);
end; { of function TRegExpr.ExecPos end; { of function TRegExpr.ExecPos
--------------------------------------------------------------} --------------------------------------------------------------}
function TRegExpr.ExecPrim (AOffset: integer) : boolean; function TRegExpr.ExecPrim (AOffset: PtrInt) : boolean;
procedure ClearMatchs; procedure ClearMatchs;
// Clears matchs array // Clears matchs array
var i : integer; var i : integer;
@ -3416,7 +3460,7 @@ function TRegExpr.ExecPrim (AOffset: integer) : boolean;
var var
s : PRegExprChar; s : PRegExprChar;
StartPtr: PRegExprChar; StartPtr: PRegExprChar;
InputLen : integer; InputLen : PtrInt;
begin begin
Result := false; // Be paranoid... Result := false; // Be paranoid...
@ -3430,7 +3474,7 @@ function TRegExpr.ExecPrim (AOffset: integer) : boolean;
// Check InputString presence // Check InputString presence
if not Assigned (fInputString) then begin if not Assigned (fInputString) then begin
Error (reeNoInpitStringSpecified); Error (reeNoInputStringSpecified);
EXIT; EXIT;
end; end;
@ -3532,7 +3576,7 @@ function TRegExpr.ExecPrim (AOffset: integer) : boolean;
--------------------------------------------------------------} --------------------------------------------------------------}
function TRegExpr.ExecNext : boolean; function TRegExpr.ExecNext : boolean;
var offset : integer; var offset : PtrInt;
begin begin
Result := false; Result := false;
if not Assigned (startp[0]) or not Assigned (endp[0]) then begin if not Assigned (startp[0]) or not Assigned (endp[0]) then begin
@ -3560,8 +3604,8 @@ function TRegExpr.GetInputString : RegExprString;
procedure TRegExpr.SetInputString (const AInputString : RegExprString); procedure TRegExpr.SetInputString (const AInputString : RegExprString);
var var
Len : integer; Len : PtrInt;
i : integer; i : PtrInt;
begin begin
// clear Match* - before next Exec* call it's undefined // clear Match* - before next Exec* call it's undefined
for i := 0 to NSUBEXP - 1 do begin for i := 0 to NSUBEXP - 1 do begin
@ -3571,20 +3615,12 @@ procedure TRegExpr.SetInputString (const AInputString : RegExprString);
// need reallocation of input string buffer ? // need reallocation of input string buffer ?
Len := length (AInputString); Len := length (AInputString);
if Assigned (fInputString) and (Length (fInputString) <> Len) then begin ReAllocMem(fInputString,(Len + 1) * SizeOf (REChar));
FreeMem (fInputString);
fInputString := nil;
end;
// buffer [re]allocation
if not Assigned (fInputString)
then GetMem (fInputString, (Len + 1) * SizeOf (REChar));
// copy input string into buffer // copy input string into buffer
{$IFDEF UniCode} if Len>0 then
StrPCopy (fInputString, Copy (AInputString, 1, Len)); //###0.927 System.Move(AInputString[1],fInputString^,(Len+1)* SizeOf (REChar)) // with #0
{$ELSE} else
StrLCopy (fInputString, PRegExprChar (AInputString), Len); fInputString[0]:=#0;
{$ENDIF}
{ {
fInputString : string; fInputString : string;
@ -3596,7 +3632,7 @@ procedure TRegExpr.SetInputString (const AInputString : RegExprString);
fInputStart := PChar (fInputString); fInputStart := PChar (fInputString);
Len := length (fInputString); Len := length (fInputString);
fInputEnd := PRegExprChar (integer (fInputStart) + Len); ?? fInputEnd := PRegExprChar (integer (fInputStart) + Len); ??
!! startp/endp âñå ðàâíî áóäåò îïàñíî èñïîëüçîâàòü ? !! startp/endp все равно будет опасно использовать ?
} }
end; { of procedure TRegExpr.SetInputString end; { of procedure TRegExpr.SetInputString
--------------------------------------------------------------} --------------------------------------------------------------}
@ -3658,14 +3694,20 @@ function TRegExpr.GetLinePairedSeparator : RegExprString;
function TRegExpr.Substitute (const ATemplate : RegExprString) : RegExprString; function TRegExpr.Substitute (const ATemplate : RegExprString) : RegExprString;
// perform substitutions after a regexp match // perform substitutions after a regexp match
// completely rewritten in 0.929 // completely rewritten in 0.929
var type
TemplateLen : integer; TSubstMode = (smodeNormal, smodeOneUpper, smodeOneLower, smodeAllUpper,
smodeAllLower);
var
TemplateLen : PtrInt;
TemplateBeg, TemplateEnd : PRegExprChar; TemplateBeg, TemplateEnd : PRegExprChar;
p, p0, ResultPtr : PRegExprChar; p, p0, p1, ResultPtr : PRegExprChar;
ResultLen : integer; ResultLen : PtrInt;
n : integer; n : PtrInt;
Ch : REChar; Ch : REChar;
function ParseVarName (var APtr : PRegExprChar) : integer; Mode: TSubstMode;
LineEnd: String = LineEnding;
function ParseVarName (var APtr : PRegExprChar) : PtrInt;
// extract name of variable (digits, may be enclosed with // extract name of variable (digits, may be enclosed with
// curly braces) from APtr^, uses TemplateEnd !!! // curly braces) from APtr^, uses TemplateEnd !!!
const const
@ -3700,12 +3742,13 @@ function TRegExpr.Substitute (const ATemplate : RegExprString) : RegExprString;
then Result := -1; // no valid digits found or no right curly brace then Result := -1; // no valid digits found or no right curly brace
APtr := p; APtr := p;
end; end;
begin
begin
// Check programm and input string // Check programm and input string
if not IsProgrammOk if not IsProgrammOk
then EXIT; then EXIT;
if not Assigned (fInputString) then begin if not Assigned (fInputString) then begin
Error (reeNoInpitStringSpecified); Error (reeNoInputStringSpecified);
EXIT; EXIT;
end; end;
// Prepare for working // Prepare for working
@ -3726,53 +3769,116 @@ function TRegExpr.Substitute (const ATemplate : RegExprString) : RegExprString;
then n := ParseVarName (p) then n := ParseVarName (p)
else n := -1; else n := -1;
if n >= 0 then begin if n >= 0 then begin
if (n < NSUBEXP) and Assigned (startp [n]) and Assigned (endp [n]) if (n < NSUBEXP) and Assigned (startp [n]) and Assigned (endp [n])
then inc (ResultLen, endp [n] - startp [n]); then inc (ResultLen, endp [n] - startp [n]);
end
else begin
if (Ch = EscChar) and (p < TemplateEnd) then begin // quoted or special char followed
Ch := p^;
inc (p);
case Ch of
'n' : inc(ResultLen, Length(LineEnding));
'u', 'l', 'U', 'L': {nothing};
else inc(ResultLen);
end;
end end
else begin else
if (Ch = EscChar) and (p < TemplateEnd) inc(ResultLen);
then inc (p); // quoted or special char followed end;
inc (ResultLen); end;
end;
end;
// Get memory. We do it once and it significant speed up work ! // Get memory. We do it once and it significant speed up work !
if ResultLen = 0 then begin if ResultLen = 0 then begin
Result := ''; Result := '';
EXIT; EXIT;
end; end;
SetString (Result, nil, ResultLen); //SetString (Result, nil, ResultLen);
SetLength(Result,ResultLen);
// Fill Result // Fill Result
ResultPtr := pointer (Result); ResultPtr := pointer (Result);
p := TemplateBeg; p := TemplateBeg;
Mode := smodeNormal;
while p < TemplateEnd do begin while p < TemplateEnd do begin
Ch := p^; Ch := p^;
p0 := p;
inc (p); inc (p);
p1 := p;
if Ch = '$' if Ch = '$'
then n := ParseVarName (p) then n := ParseVarName (p)
else n := -1; else n := -1;
if n >= 0 then begin if (n >= 0) then begin
p0 := startp [n]; p0 := startp[n];
if (n < NSUBEXP) and Assigned (p0) and Assigned (endp [n]) then p1 := endp[n];
while p0 < endp [n] do begin if (n >= NSUBEXP) or not Assigned (p0) or not Assigned (endp [n]) then
ResultPtr^ := p0^; p1 := p0; // empty
inc (ResultPtr); end
inc (p0); else begin
end; if (Ch = EscChar) and (p < TemplateEnd) then begin // quoted or special char followed
end Ch := p^;
else begin inc (p);
if (Ch = EscChar) and (p < TemplateEnd) then begin // quoted or special char followed case Ch of
Ch := p^; 'n' : begin
inc (p); p0 := @LineEnd[1];
p1 := p0 + Length(LineEnding);
end;
'l' : begin
Mode := smodeOneLower;
p1 := p0;
end;
'L' : begin
Mode := smodeAllLower;
p1 := p0;
end;
'u' : begin
Mode := smodeOneUpper;
p1 := p0;
end;
'U' : begin
Mode := smodeAllUpper;
p1 := p0;
end;
else
begin
inc(p0);
inc(p1);
end;
end; end;
ResultPtr^ := Ch; end
inc (ResultPtr); end;
if p0 < p1 then begin
while p0 < p1 do begin
case Mode of
smodeOneLower, smodeAllLower:
begin
Ch := p0^;
if Ch < #128 then
Ch := AnsiLowerCase(Ch)[1];
ResultPtr^ := Ch;
if Mode = smodeOneLower then
Mode := smodeNormal;
end;
smodeOneUpper, smodeAllUpper:
begin
Ch := p0^;
if Ch < #128 then
Ch := AnsiUpperCase(Ch)[1];
ResultPtr^ := Ch;
if Mode = smodeOneUpper then
Mode := smodeNormal;
end;
else
ResultPtr^ := p0^;
end;
inc (ResultPtr);
inc (p0);
end; end;
end; Mode := smodeNormal;
end; { of function TRegExpr.Substitute end;
end;
end; { of function TRegExpr.Substitute
--------------------------------------------------------------} --------------------------------------------------------------}
procedure TRegExpr.Split (AInputStr : RegExprString; APieces : TStrings); procedure TRegExpr.Split (AInputStr : RegExprString; APieces : TStrings);
var PrevPos : integer; var PrevPos : PtrInt;
begin begin
PrevPos := 1; PrevPos := 1;
if Exec (AInputStr) then if Exec (AInputStr) then
@ -3787,7 +3893,7 @@ procedure TRegExpr.Split (AInputStr : RegExprString; APieces : TStrings);
function TRegExpr.Replace (AInputStr : RegExprString; const AReplaceStr : RegExprString; function TRegExpr.Replace (AInputStr : RegExprString; const AReplaceStr : RegExprString;
AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) : RegExprString; AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) : RegExprString;
var var
PrevPos : integer; PrevPos : PtrInt;
begin begin
Result := ''; Result := '';
PrevPos := 1; PrevPos := 1;
@ -3808,7 +3914,7 @@ function TRegExpr.ReplaceEx (AInputStr : RegExprString;
AReplaceFunc : TRegExprReplaceFunction) AReplaceFunc : TRegExprReplaceFunction)
: RegExprString; : RegExprString;
var var
PrevPos : integer; PrevPos : PtrInt;
begin begin
Result := ''; Result := '';
PrevPos := 1; PrevPos := 1;
@ -3829,7 +3935,7 @@ function TRegExpr.Replace (AInputStr : RegExprString;
AReplaceFunc : TRegExprReplaceFunction) AReplaceFunc : TRegExprReplaceFunction)
: RegExprString; : RegExprString;
begin begin
ReplaceEx (AInputStr, AReplaceFunc); {$IFDEF FPC}Result:={$ENDIF}ReplaceEx (AInputStr, AReplaceFunc);
end; { of function TRegExpr.Replace end; { of function TRegExpr.Replace
--------------------------------------------------------------} --------------------------------------------------------------}
{$ENDIF} {$ENDIF}
@ -3902,8 +4008,8 @@ function TRegExpr.Dump : RegExprString;
s : PRegExprChar; s : PRegExprChar;
op : TREOp; // Arbitrary non-END op. op : TREOp; // Arbitrary non-END op.
next : PRegExprChar; next : PRegExprChar;
i : integer; i : PtrInt;
Diff : integer; Diff : PtrInt;
{$IFDEF UseSetOfChar} //###0.929 {$IFDEF UseSetOfChar} //###0.929
Ch : REChar; Ch : REChar;
{$ENDIF} {$ENDIF}