* fixes from Ido Kanner

+ RegExprEscapeStr from Ido Kanner
* fixed memory leak

git-svn-id: trunk@953 -
This commit is contained in:
florian 2005-08-28 08:27:18 +00:00
parent 57e373a4d3
commit 51d0c0ef2a
2 changed files with 100 additions and 12 deletions

View File

@ -13,7 +13,7 @@
**********************************************************************}
{ $define DEBUG}
{
(*
TODO:
- correct backtracking, for example in (...)*
- | support
@ -22,7 +22,12 @@
- newline handling in DOS?
- locals dependend upper/lowercase routines
- extend the interface
}
- support for number of matches:
{n} Match exactly n times
{n,} Match at least n times
{n,m} Match at least n but not more than m times
*)
{$mode objfpc}
@ -78,6 +83,9 @@ unit regexpr;
function RegExprPos(regexprengine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
{ This function Escape known regex chars and place the result on Return. If something went wrong the function will return false. }
function RegExprEscapeStr (const S : AnsiString) : AnsiString;
implementation
{$ifdef DEBUG}
@ -102,9 +110,8 @@ unit regexpr;
procedure doregister(p : pregexprentry);
begin
p^.nextdestroy:=first;
if not(assigned(first)) then
first:=p;
p^.nextdestroy:=first;
first:=p;
end;
var
@ -180,6 +187,16 @@ unit regexpr;
inc(currentpos);
readchars:=cs_nonwordchars;
end;
'f' :
begin
inc(currentpos);
readchars:= [#12];
end;
'a' :
begin
inc(currentpos);
readchars:= [#7];
end;
else
begin //Some basic escaping...
readchars := [currentpos^];
@ -300,12 +317,15 @@ unit regexpr;
end;
inc(currentpos);
end;
{
'|':
(* '|':
begin
{$ifdef DEBUG}
writeln('Creating backtrace entry');
{$endif DEBUG}
if (not assigned (hp2)) then
new (hp2);
while currentpos^='|' do
begin
inc(currentpos);
@ -323,7 +343,7 @@ unit regexpr;
new(hp);
doregister(hp);
hp^.typ:=ret_backtrace;
hp^.elsepath:=parseregexpr();
hp^.elsepath:= parseregexpr (next, elsepath);
hp^.next:=next;
if assigned(chaining) then
chaining^:=hp
@ -333,7 +353,7 @@ unit regexpr;
end;
exit;
end;
}
*)
')':
exit;
'^':
@ -603,6 +623,32 @@ unit regexpr;
index:=-1;
end;
function RegExprEscapeStr (const S : AnsiString) : AnsiString;
var
i, len : SizeUInt;
begin
Result := '';
if (S = '') then
exit;
SetLength(Result,Length(S)*2);
len := Length (S);
for i := 1 to len do
begin
if (S [i] in ['(','|', '.', '*', '?', '^', '$', '-', '[', '{', '}', ']', ')', '\']) then
begin
Result := Result + '\';
end;
Result := Result + S[i];
end;
SetLength(Result,Length(Result));
end;
begin
cs_nonwordchars:=cs_allchars-cs_wordchars;
cs_nondigits:=cs_allchars-cs_digits;

View File

@ -1,10 +1,12 @@
{$MODE OBJFPC}
program testreg1;
uses
regexpr;
var
r : tregexprengine;
r : tregexprengine;
index,len : longint;
S : String;
procedure do_error(i : longint);
@ -111,11 +113,51 @@ begin
{ test real backtracking }
r:=GenerateRegExprEngine('nofoo|foo',[]);
(* r:=GenerateRegExprEngine('nofoo|foo',[]);
if not(RegExprPos(r,'1234 foo1234XXXX',index,len)) or
(index<>7) or (len<>3) then
do_error(1300);
DestroyregExprEngine(r);
DestroyregExprEngine(r);*)
r := GenerateRegExprEngine('abc\(123\)$',[]);
if not (RegExprPos(r,'1234 abc(123)', index, len)) or
(index <> 5) or (len <> 8) then
do_error (1400);
DestroyregExprEngine(r);
r := GenerateRegExprEngine('^\t$',[ref_singleline]);
if not (RegExprPos(r,#9, index, len)) or
(index <> 0) or (len <> 1) then
do_error (1401);
DestroyregExprEngine(r);
r := GenerateRegExprEngine('^\n$',[ref_singleline]);
if not (RegExprPos(r,#10, index, len)) or
(index <> 0) or (len <> 1) then
do_error (1402);
DestroyregExprEngine(r);
r := GenerateRegExprEngine('^\f$',[ref_singleline]);
if not (RegExprPos(r,#12, index, len)) or
(index <> 0) or (len <> 1) then
do_error (1403);
DestroyregExprEngine(r);
r := GenerateRegExprEngine('^\r$',[ref_singleline]);
if not (RegExprPos(r,#13, index, len)) or
(index <> 0) or (len <> 1) then
do_error (1404);
DestroyregExprEngine(r);
r := GenerateRegExprEngine('^\a$',[ref_singleline]);
if not (RegExprPos(r,#7, index, len)) or
(index <> 0) or (len <> 1) then
do_error (1405);
DestroyregExprEngine(r);
s := '^Hello World \. [a-z] \D { } |() ?a*.*\\ 1 $';
writeln ('Before Escaping: ', s);
writeln ('Afther Escaping: ', RegExprEscapeStr(s));
{
r:=GenerateRegExprEngine('(nofoo|foo)1234',[]);