* Added Regex unit with an NFA regexpression parser, based on code from Julian Bucknall which is used with his permission

* Added RegExprCompat unit which contains an interface to the Regex unit which is compatible with the existing RegExpr unit
 * Added some more tests and added a define UseRegexCompat to make it possible to choose what regex-implementation to test. Test which fail on the existing regex-parser are placed between ifdefs

git-svn-id: trunk@10611 -
This commit is contained in:
joost 2008-04-06 21:23:13 +00:00
parent 85250cca06
commit 44fe103c46
6 changed files with 1439 additions and 65 deletions

2
.gitattributes vendored
View File

@ -3980,7 +3980,9 @@ packages/pxlib/src/pxlib.pp svneol=native#text/plain
packages/regexpr/Makefile svneol=native#text/plain
packages/regexpr/Makefile.fpc svneol=native#text/plain
packages/regexpr/fpmake.pp svneol=native#text/plain
packages/regexpr/src/regex.pp svneol=native#text/plain
packages/regexpr/src/regexpr.pp svneol=native#text/plain
packages/regexpr/src/regexprcompat.pp svneol=native#text/plain
packages/regexpr/tests/testreg1.pp svneol=native#text/plain
packages/rexx/Makefile svneol=native#text/plain
packages/rexx/Makefile.fpc svneol=native#text/plain

View File

@ -1,5 +1,5 @@
#
# Don't edit, this file is generated by FPCMake Version 2.0.0 [2008/03/24]
# Don't edit, this file is generated by FPCMake Version 2.0.0 [2008/03/27]
#
default: all
MAKEFILETARGETS=i386-linux i386-go32v2 i386-win32 i386-os2 i386-freebsd i386-beos i386-netbsd i386-solaris i386-qnx i386-netware i386-openbsd i386-wdosx i386-darwin i386-emx i386-watcom i386-netwlibc i386-wince i386-embedded i386-symbian m68k-linux m68k-freebsd m68k-netbsd m68k-amiga m68k-atari m68k-openbsd m68k-palmos m68k-embedded powerpc-linux powerpc-netbsd powerpc-amiga powerpc-macos powerpc-darwin powerpc-morphos powerpc-embedded sparc-linux sparc-netbsd sparc-solaris sparc-embedded x86_64-linux x86_64-freebsd x86_64-darwin x86_64-win64 x86_64-embedded arm-linux arm-palmos arm-wince arm-gba arm-nds arm-embedded arm-symbian powerpc64-linux powerpc64-darwin powerpc64-embedded avr-embedded armeb-linux armeb-embedded
@ -254,172 +254,172 @@ PACKAGESDIR:=$(wildcard $(FPCDIR) $(FPCDIR)/packages $(FPCDIR)/packages/base $(F
override PACKAGE_NAME=regexpr
override PACKAGE_VERSION=2.0.0
ifeq ($(FULL_TARGET),i386-linux)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-go32v2)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-win32)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-os2)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-freebsd)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-beos)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-netbsd)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-solaris)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-qnx)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-netware)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-openbsd)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-wdosx)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-darwin)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-emx)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-watcom)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-netwlibc)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-wince)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-embedded)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-symbian)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),m68k-linux)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),m68k-freebsd)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),m68k-netbsd)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),m68k-amiga)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),m68k-atari)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),m68k-openbsd)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),m68k-palmos)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),m68k-embedded)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),powerpc-linux)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),powerpc-netbsd)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),powerpc-amiga)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),powerpc-macos)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),powerpc-darwin)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),powerpc-morphos)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),powerpc-embedded)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),sparc-linux)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),sparc-netbsd)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),sparc-solaris)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),sparc-embedded)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),x86_64-linux)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),x86_64-freebsd)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),x86_64-darwin)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),x86_64-win64)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),x86_64-embedded)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),arm-linux)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),arm-palmos)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),arm-wince)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),arm-gba)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),arm-nds)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),arm-embedded)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),arm-symbian)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),powerpc64-linux)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),powerpc64-darwin)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),powerpc64-embedded)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),avr-embedded)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),armeb-linux)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),armeb-embedded)
override TARGET_UNITS+=regexpr
override TARGET_UNITS+=regexpr regex regexprcompat
endif
ifeq ($(FULL_TARGET),i386-linux)
override TARGET_EXAMPLES+=testreg1

View File

@ -7,7 +7,7 @@ name=regexpr
version=2.0.0
[target]
units=regexpr
units=regexpr regex regexprcompat
examples=testreg1
[compiler]

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,117 @@
{
This file is part of the Free Pascal packages library.
Copyright (c) 2008 by Joost van der Sluis, member of the
Free Pascal development team
Compatibility unit for the old regexpr unit.
See the file COPYING.FPC, included in this distribution,
for details about the copyright.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
**********************************************************************}
unit RegExprCompat;
{$mode objfpc}{$H+}
interface
uses
Regex;
type
tregexprflag = (
ref_singleline,
{** This indicates that a start of line is either the
start of the pattern or a linebreak. }
ref_multiline,
{** The match will be done in a case-insensitive way
according to US-ASCII character set. }
ref_caseinsensitive);
tregexprflags = set of tregexprflag;
TRegExprEngine = TRegexEngine;
function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags;var RegExprEngine: TRegExprEngine): boolean;
function GenerateRegExprEngine(regexpr : pchar;flags : tregexprflags): TREGExprEngine;
procedure DestroyRegExprEngine(var regexpr : TRegExprEngine);
function RegExprPos(RegExprEngine : TRegExprEngine;p : pchar;var index,len : longint) : boolean;
function RegExprReplaceAll(RegExprEngine : TRegExprEngine;const src,newstr : ansistring;var dest : ansistring) : sizeint;
function RegExprEscapeStr (const S : string) : string;
implementation
function GenerateRegExprEngine(regexpr: pchar; flags: tregexprflags;
var RegExprEngine: TRegExprEngine): boolean;
var ErrorPos : Integer;
ErrorCode : TRegexError;
begin
RegExprEngine := TRegExprEngine.Create(regexpr);
if ref_multiline in flags then RegExprEngine.MultiLine:=True;
if ref_caseinsensitive in flags then RegExprEngine.IgnoreCase:=True;
Result := RegExprEngine.Parse(ErrorPos,ErrorCode);
end;
function GenerateRegExprEngine(regexpr: pchar; flags: tregexprflags
): TREGExprEngine;
var r: TRegExprEngine;
begin
GenerateRegExprEngine(regexpr,flags,r);
GenerateRegExprEngine:=r;
end;
procedure DestroyRegExprEngine(var regexpr: TRegExprEngine);
begin
regexpr.Free;
end;
function RegExprPos(RegExprEngine: TRegExprEngine; p: pchar; var index,
len: longint): boolean;
begin
Len := 1;
Result := RegExprEngine.MatchString(p,index,len);
Len := Len - index;
Dec(Index);
end;
function RegExprReplaceAll(RegExprEngine: TRegExprEngine; const src,
newstr: ansistring; var dest: ansistring): sizeint;
begin
result := RegExprEngine.ReplaceAllString(src,newstr,Dest);
end;
function RegExprEscapeStr(const S: string): string;
var
i, len : integer;
s1: string;
begin
RegExprEscapeStr:= '';
s1:='';
if (S = '') then
exit;
len := Length (S);
for i := 1 to len do
begin
if (S [i] in ['(','|', '.', '*', '?', '^', '$', '-', '[', '{', '}', ']', ')', '\']) then
begin
s1 := s1 + '\';
end;
s1 := s1 + S[i];
end;
RegExprEscapeStr:=s1;
end;
end.

View File

@ -1,9 +1,16 @@
{$IFDEF FPC}
{$MODE OBJFPC}
{$ENDIF}
{$DEFINE UseRegexCompat}
program testreg1;
uses
{$IFDEF UseRegexCompat}
regexprcompat;
{$ELSE}
regexpr;
{$ENDIF}
var
r : tregexprengine;
@ -318,6 +325,16 @@ begin
do_error(705);
DestroyregExprEngine(r);
{$IFDEF UseRegexCompat}
initok:=GenerateRegExprEngine('Cat(AZ){2,}Q',[],r);
if not initok then
do_error(705);
if not(RegExprPos(r,'BCatAZAZAZAZQDABCD',index,len)) or
(index<>1) or (len<>12) then
do_error(705);
DestroyregExprEngine(r);
{$ENDIF}
initok:=GenerateRegExprEngine('CatAZ{0,}',[],r);
if not initok then
do_error(706);
@ -364,6 +381,14 @@ begin
do_error(725);
DestroyregExprEngine(r);
initok:=GenerateRegExprEngine('Cat(AZ){1,3}',[],r);
if not initok then
do_error(725);
if not(RegExprPos(r,'BCatAZAZDABCD',index,len)) or
(index<>1) or (len<>7) then
do_error(725);
DestroyregExprEngine(r);
initok:=GenerateRegExprEngine('CatAz{1,5}',[],r);
if not initok then
do_error(726);
@ -650,11 +675,13 @@ begin
{ test real backtracking }
(* r:=GenerateRegExprEngine('nofoo|foo',[]);
{$IFDEF UseRegexCompat}
r:=GenerateRegExprEngine('nofoo|foo',[]);
if not(RegExprPos(r,'1234 foo1234XXXX',index,len)) or
(index<>7) or (len<>3) then
do_error(1300);
DestroyregExprEngine(r);*)
DestroyregExprEngine(r);
{$ENDIF}
GenerateRegExprEngine('abc\(123\)$',[],r);
if not (RegExprPos(r,'1234 abc(123)', index, len)) or
@ -762,7 +789,6 @@ begin
do_error(1505);
DestroyregExprEngine(r);
{
initok:=GenerateRegExprEngine('\.localhost$',[],r);
if not initok then
do_error(1506);
@ -778,6 +804,7 @@ begin
do_error(1507);
DestroyregExprEngine(r);
{$IFDEF UseRegexCompat}
initok:=GenerateRegExprEngine('.*[^e]\.localhost$',[],r);
if not initok then
do_error(1508);
@ -796,22 +823,22 @@ begin
if not initok then
do_error(1500);
if not(RegExprPos(r,'1234 nofoo1234XXXX',index,len)) or
(index<>8) or (len<>9) then
(index<>7) or (len<>9) then
do_error(1500);
DestroyregExprEngine(r);
r:=GenerateRegExprEngine('(nofoo|foo|anotherfoo)1234',[]);
if not(RegExprPos(r,'1234 nofoo1234XXXX',index,len)) or
(index<>8) or (len<>9) then
(index<>7) or (len<>9) then
do_error(1009);
DestroyregExprEngine(r);
r:=GenerateRegExprEngine('nofoo1234|foo1234',[]);
if (r.data=nil) or not(RegExprPos(r,'1234 foo1234XXXX',index,len)) or
if {(r.data=nil) or} not(RegExprPos(r,'1234 foo1234XXXX',index,len)) or
(index<>7) or (len<>7) then
do_error(1010);
DestroyregExprEngine(r);
}
{$ENDIF}
{ *************************************************************************
replacement tests