* Add Delphi-compatible regular expressions based on libpcre

This commit is contained in:
Michaël Van Canneyt 2023-12-02 13:32:17 +01:00
parent 33917ee19b
commit 8d2e2c6c21
6 changed files with 1729 additions and 1 deletions

View File

@ -36,6 +36,7 @@ begin
P.Dependencies.Add('fcl-json');
P.Dependencies.Add('fcl-hash');
P.Dependencies.Add('hash');
P.Dependencies.Add('libpcre');
P.SourcePath.Add('src');
P.IncludePath.Add('src');
@ -61,6 +62,10 @@ begin
T.Dependencies.AddUnit('system.json');
T:=P.Targets.AddUnit('system.hash.pp');
T.ResourceStrings := True;
T:=P.Targets.AddUnit('system.regularexpressionsconsts.pp');
T.ResourceStrings := True;
T:=P.Targets.AddUnit('system.regularexpressionscore.pp');
T.Dependencies.AddUnit('system.regularexpressionsconsts');
{$ifndef ALLPACKAGES}

View File

@ -0,0 +1,22 @@
unit System.RegularExpressionsConsts;
interface
resourcestring
SRegExExpressionError = 'Error in regular expression at offset %d: %s';
SRegExIndexOutOfBounds = 'Index out of bounds (%d)';
SRegExInvalidGroupName = 'Invalid group name (%s)';
SRegExInvalidIndexType = 'Invalid index type';
SRegExMatchRequired = 'Successful match required';
SRegExMatchError = 'Error matching the regex: %s';
SRegExMissingExpression = 'A regular expression specified in RegEx is required';
SRegExStringsRequired = 'Strings parameter cannot be nil';
SRegExStudyError = 'Error studying the regex: %s';
SErrRegexOvectorTooSmall = 'output vector was not big enough for all the captured substrings';
SRegExMatcStartAfterEnd = '\K was used in an assertion to set the match start after its end.'+sLineBreak+
'From end to start the match was: %s';
implementation
end.

File diff suppressed because it is too large Load Diff

View File

@ -68,6 +68,10 @@
<Filename Value="utchash.pp"/>
<IsPartOfProject Value="True"/>
</Unit>
<Unit>
<Filename Value="utregex.pas"/>
<IsPartOfProject Value="True"/>
</Unit>
</Units>
</ProjectOptions>
<CompilerOptions>

View File

@ -6,7 +6,7 @@ uses
{$IFDEF UNIX}cwstring,{$ENDIF}
Classes, consoletestrunner, tcnetencoding, tciotuils,
utmessagemanager, utcdevices, utcanalytics, utcimagelist,
utcnotifications, utcjson, utcpush, utchash;
utcnotifications, utcjson, utcpush, utchash, utcregex;
type

View File

@ -0,0 +1,315 @@
unit utcregex;
{$mode objfpc}{$H+}
{ $DEFINE USEWIDESTRING}
interface
uses
Classes, SysUtils, fpcunit, testutils, testregistry, system.regularexpressionscore;
type
{ TTestRegExpCore }
TTestRegExpCore = class(TTestCase)
private
FRegex: TPerlRegEx;
FMatchEventCount : Integer;
FSplitSubject: TStrings;
procedure AssertMatch(const Msg, aMatch: TREString; aPos, aLength: Integer; Groups: array of TREString);
procedure DoMatch(Sender: TObject);
protected
procedure SetUp; override;
procedure TearDown; override;
Property RegEx : TPerlRegEx Read FRegex;
Property SplitSubject : TStrings Read FSplitSubject;
published
Procedure TestHookup;
procedure TestMatch;
procedure TestNamedGroups;
procedure TestReplace;
procedure TestReplaceAll;
procedure TestSplitAll;
procedure TestSplitLimit;
procedure TestInitialCaps;
procedure TestReplaceGroupBackslash;
procedure TestReplaceGroupDollar;
procedure TestReplaceGroupQuoted;
procedure TestReplaceGroupNamed;
procedure TestReplaceGroupNamed2;
procedure TestReplaceGroupNamedInvalidName;
procedure TestReplaceWholeSubject;
procedure TestReplaceLeftOfMatch;
procedure TestReplaceRightOfMatch;
procedure TestReplaceWholeMatch;
procedure TestReplaceLastMatch;
end;
implementation
procedure TTestRegExpCore.AssertMatch(Const Msg,aMatch : TREString; aPos,aLength : Integer; Groups : Array of TREString);
var
I : Integer;
begin
AssertEquals(Msg+': matched text',aMatch,Regex.MatchedText);
AssertEquals(Msg+': offset',aPos,Regex.MatchedOffset);
AssertEquals(Msg+': length',aLength,Regex.MatchedLength);
AssertEquals(Msg+': group count',Length(Groups),Regex.GroupCount);
For I:=1 to Regex.GroupCount do
AssertEquals(Msg+' group['+IntToStr(I)+']',Groups[I-1],Regex.Groups[I]);
end;
procedure TTestRegExpCore.DoMatch(Sender: TObject);
begin
Inc(FMatchEventCount);
end;
procedure TTestRegExpCore.TestMatch;
begin
Regex.subject:='xyz abba abbba abbbba zyx';
Regex.RegEx:='a(b*)a';
AssertTrue('First match found',Regex.Match);
AssertEquals('Match event called',1,FMatchEventCount);
AssertMatch('Match 1','abba',5,4,['bb']);
AssertEquals('Left of match','xyz ',Regex.SubjectLeft);
AssertEquals('Right of match',' abbba abbbba zyx',Regex.SubjectRight);
AssertTrue('Second match found',Regex.MatchAgain);
AssertMatch('Match 2','abbba',10,5,['bbb']);
AssertTrue('Third match found',Regex.MatchAgain);
AssertMatch('Match 3','abbbba',16,6,['bbbb']);
AssertFalse('No more matches',Regex.MatchAgain);
AssertEquals('Match event called',3,FMatchEventCount);
end;
procedure TTestRegExpCore.TestNamedGroups;
Const
Rec1 = 'Name:"John" Surname:"Doe" Email:"john@example.com"';
Rec2 = 'Name:"Jane" Surname:"Dolina" Email:"jane@doe.com"';
begin
Regex.Subject:=Rec1+#10+Rec2;
Regex.RegEx:='Name:"(?<Name>[\w]+?)".*?Surname:"(?<Surname>[\w]+?)".*?Email:"(?<Email>\b[\w.%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b)"';
AssertTrue('First match found',Regex.Match);
AssertMatch('Match 1',Rec1,1,Length(Rec1),['John','Doe','john@example.com']);
AssertEquals('Nonexisting group','',Regex.NamedGroups['nonexisting']);
AssertEquals('Name group','John',Regex.NamedGroups['Name']);
AssertEquals('Surname group','Doe',Regex.NamedGroups['Surname']);
AssertEquals('Email group','john@example.com',Regex.NamedGroups['Email']);
AssertTrue('Second match found',Regex.MatchAgain);
AssertMatch('Match 2',Rec2,Length(Rec1)+2,Length(Rec2),['Jane','Dolina','jane@doe.com']);
AssertFalse('No more matches',Regex.MatchAgain);
end;
procedure TTestRegExpCore.TestReplace;
begin
Regex.subject:='xyz abba abbba abbbba zyx';
Regex.RegEx:='a(b*)a';
Regex.Replacement:='c';
AssertTrue('First match found',Regex.Match);
AssertEquals('Replace','c',Regex.Replace);
AssertEquals('Replace result','xyz c abbba abbbba zyx',Regex.Subject);
AssertTrue('Second match found',Regex.MatchAgain);
AssertEquals('Replace 2','c',Regex.Replace);
AssertEquals('Replace 2 result','xyz c c abbbba zyx',Regex.Subject);
AssertTrue('Third match found',Regex.MatchAgain);
AssertEquals('Replace 3','c',Regex.Replace);
AssertEquals('Replace 3 result','xyz c c c zyx',Regex.Subject);
AssertFalse('No more matches',Regex.MatchAgain);
end;
procedure TTestRegExpCore.TestReplaceAll;
begin
Regex.subject:='xyz abba abbba abbbba zyx';
Regex.RegEx:='a(b*)a';
Regex.Replacement:='c';
AssertTrue('Replacements done',Regex.ReplaceAll);
AssertEquals('ReplaceAll result','xyz c c c zyx',Regex.Subject);
end;
procedure TTestRegExpCore.TestReplaceGroupBackslash;
// \n
begin
Regex.subject:='*abba*';
Regex.RegEx:='a(b*)a';
Regex.Replacement:='\1';
AssertTrue('Match',Regex.Match);
AssertEquals('ReplaceText','bb',Regex.Replace);
AssertEquals('Result','*bb*',Regex.Subject);
end;
procedure TTestRegExpCore.TestReplaceGroupDollar;
// $N
begin
Regex.subject:='*abba*';
Regex.RegEx:='a(b*)a';
Regex.Replacement:='$1';
AssertTrue('Match',Regex.Match);
AssertEquals('ReplaceText','bb',Regex.Replace);
AssertEquals('Result','*bb*',Regex.Subject);
end;
procedure TTestRegExpCore.TestReplaceGroupQuoted;
// \{N}
begin
Regex.subject:='*abba*';
Regex.RegEx:='a(b*)a';
Regex.Replacement:='\{1}';
AssertTrue('Match',Regex.Match);
AssertEquals('ReplaceText','bb',Regex.Replace);
AssertEquals('Result','*bb*',Regex.Subject);
end;
procedure TTestRegExpCore.TestReplaceGroupNamed;
// \{name}
begin
Regex.subject:='*abba*';
Regex.RegEx:='a(?<Name>b*?)a';
Regex.Replacement:='\{Name}';
AssertTrue('Match',Regex.Match);
AssertEquals('ReplaceText','bb',Regex.Replace);
AssertEquals('Result','*bb*',Regex.Subject);
end;
procedure TTestRegExpCore.TestReplaceGroupNamed2;
// \{name}
begin
Regex.subject:='*abba*';
Regex.RegEx:='a(?<Name>b*?)a';
Regex.Replacement:='<\{Name}>';
AssertTrue('Match',Regex.Match);
AssertEquals('ReplaceText','<bb>',Regex.Replace);
AssertEquals('Result','*<bb>*',Regex.Subject);
end;
procedure TTestRegExpCore.TestReplaceGroupNamedInvalidName;
// \{name} with invalid name
begin
Regex.subject:='*abba*';
Regex.RegEx:='a(?<Name>b*?)a';
Regex.Replacement:='<\{NameX}>';
AssertTrue('Match',Regex.Match);
AssertEquals('ReplaceText','<>',Regex.Replace);
AssertEquals('Result','*<>*',Regex.Subject);
end;
procedure TTestRegExpCore.TestReplaceWholeSubject;
begin
Regex.subject:='*abba*';
Regex.RegEx:='a(b*)a';
Regex.Replacement:='<\_>';
AssertTrue('Match',Regex.Match);
AssertEquals('ReplaceText','<*abba*>',Regex.Replace);
AssertEquals('Result','*<*abba*>*',Regex.Subject);
end;
procedure TTestRegExpCore.TestReplaceLeftOfMatch;
// \`
begin
Regex.subject:='x*abba*';
Regex.RegEx:='a(b*)a';
Regex.Replacement:='<\`>';
AssertTrue('Match',Regex.Match);
AssertEquals('ReplaceText','<x*>',Regex.Replace);
AssertEquals('Result','x*<x*>*',Regex.Subject);
end;
procedure TTestRegExpCore.TestReplaceRightOfMatch;
// \'
begin
Regex.subject:='*abba*x';
Regex.RegEx:='a(b*)a';
Regex.Replacement:='<\''>';
AssertTrue('Match',Regex.Match);
AssertEquals('ReplaceText','<*x>',Regex.Replace);
AssertEquals('Result','*<*x>*x',Regex.Subject);
end;
procedure TTestRegExpCore.TestReplaceWholeMatch;
// \&
begin
Regex.subject:='*abba*';
Regex.RegEx:='a(b*)a';
Regex.Replacement:='<\&>';
AssertTrue('Match',Regex.Match);
AssertEquals('ReplaceText','<abba>',Regex.Replace);
AssertEquals('Result','*<abba>*',Regex.Subject);
end;
procedure TTestRegExpCore.TestReplaceLastMatch;
// \&
begin
Regex.subject:='*abbcca*';
Regex.RegEx:='a(b*)(c*)a';
Regex.Replacement:='<\+>';
AssertTrue('Match',Regex.Match);
AssertEquals('ReplaceText','<cc>',Regex.Replace);
AssertEquals('Result','*<cc>*',Regex.Subject);
end;
procedure TTestRegExpCore.TestSplitAll;
begin
Regex.subject:='xyz abba abbba abbbba zyx';
Regex.RegEx:='\s';
Regex.Split(SplitSubject,0);
AssertEquals('Count',5,SplitSubject.Count);
AssertEquals('Item 0','xyz',SplitSubject[0]);
AssertEquals('Item 1','abba',SplitSubject[1]);
AssertEquals('Item 2','abbba',SplitSubject[2]);
AssertEquals('Item 3','abbbba',SplitSubject[3]);
AssertEquals('Item 4','zyx',SplitSubject[4]);
end;
procedure TTestRegExpCore.TestSplitLimit;
begin
Regex.subject:='xyz abba abbba abbbba zyx';
Regex.RegEx:='\s';
Regex.Split(SplitSubject,2);
AssertEquals('Count',2,SplitSubject.Count);
AssertEquals('Item 0','xyz',SplitSubject[0]);
AssertEquals('Item 1','abba abbba abbbba zyx',SplitSubject[1]);
end;
procedure TTestRegExpCore.TestInitialCaps;
begin
AssertEquals('Initialcaps 1','Abc',InitialCaps('aBc'));
AssertEquals('Initialcaps 2',' Abc',InitialCaps(' aBc'));
AssertEquals('Initialcaps 3','Dad Abc',InitialCaps('dAd aBc'));
AssertEquals('Initialcaps 4','Dad Abc ',InitialCaps('dAd aBc '));
end;
procedure TTestRegExpCore.SetUp;
begin
FRegex:=TPerlRegEx.Create;
FRegEx.OnMatch:=@DoMatch;
FMatchEventCount:=0;
FSplitSubject:=TStringList.Create;
end;
procedure TTestRegExpCore.TearDown;
begin
FreeAndNil(FSplitSubject);
FreeAndNil(FRegex);
end;
procedure TTestRegExpCore.TestHookup;
begin
AssertNotNull('Regex',Regex);
AssertTrue('Assigned OnMatch event',Assigned(Regex.OnMatch));
AssertEquals('Match event count',0,FMatchEventCount);
end;
initialization
RegisterTest(TTestRegExpCore);
end.