{ %norun } unit tests; {$IFDEF FPC} {$mode objfpc}{$H+} {$ENDIF} { $DEFINE DUMPTESTS} //define this to dump results to console {$IFDEF VER130} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D5 {$IFDEF VER140} {$DEFINE D6} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D6 {$IFDEF VER150} {$DEFINE D7} {$DEFINE D6} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D7 {$IFDEF D5} {$DEFINE OverMeth} {$ENDIF} {$IFDEF FPC} {$DEFINE OverMeth} {$ENDIF} {$DEFINE Unicode} interface uses {$IFDEF FPC} fpcunit, testregistry, {$IFDEF VER3} fpwidestring, //required in FPC to use WideChar uppercase/lowercase {$ENDIF} {$ELSE} TestFramework, {$ENDIF} {$IFDEF UNICODE} uregexpr, {$ELSE} regexpr, {$ENDIF} Classes, SysUtils; type { TTestRegexpr } TTestRegexpr= class(TTestCase) private RE: TRegExpr; protected procedure RunRETest(aIndex: Integer); procedure CompileRE(const AExpression: RegExprString); procedure IsNotNull(AErrorMessage: AnsiString; AObjectToCheck: TObject); procedure IsTrue(AErrorMessage: AnsiString; AConditionToCheck: boolean); procedure IsFalse(AErrorMessage: AnsiString; AConditionToCheck: boolean); procedure AreEqual(AErrorMessage: AnsiString; s1, s2: AnsiString); overload; procedure AreEqual(AErrorMessage: AnsiString; i1, i2: integer); overload; procedure TestBadRegex(const AErrorMessage: AnsiString; const AExpression: RegExprString); published procedure TestEmpty; procedure TestNotFound; procedure TestBads; {$IFDEF OverMeth} procedure TestReplaceOverload; {$ENDIF} procedure RunTest1; procedure RunTest2; procedure RunTest3; procedure RunTest4; procedure RunTest5; procedure RunTest6; procedure RunTest7; procedure RunTest8; procedure RunTest9; procedure RunTest10; procedure RunTest11; procedure RunTest12; procedure RunTest13; procedure RunTest14; procedure RunTest15; procedure RunTest16; procedure RunTest17; procedure RunTest18; procedure RunTest19; procedure RunTest20; procedure RunTest21; procedure RunTest22; procedure RunTest23; procedure RunTest24; procedure RunTest25; procedure RunTest26; procedure RunTest27; procedure RunTest28; procedure RunTest29; procedure RunTest30; procedure RunTest31; procedure RunTest32; procedure RunTest33; procedure RunTest34; procedure RunTest35; procedure RunTest36; procedure RunTest37; procedure RunTest38; procedure RunTest39; procedure RunTest40; procedure RunTest41; procedure RunTest42; procedure RunTest43; procedure RunTest44; procedure RunTest45; procedure RunTest46; procedure RunTest47; procedure RunTest48; procedure RunTest49; procedure RunTest50; procedure TestGroups; {$IFDEF Unicode} {$IFDEF FastUniCodeData} procedure RunTest51unicode; procedure RunTest52unicode; {$ENDIF} procedure RunTest70russian; {$ENDIF} procedure RunTest53; procedure RunTest54; procedure RunTest55; procedure RunTest56; procedure RunTest57; procedure RunTest58; procedure RunTest59; procedure RunTest60; procedure RunTest61; procedure RunTest62; procedure RunTest63; procedure RunTest64; procedure RunTest65; procedure RunTest66; procedure RunTest67; procedure RunTest68; procedure RunTest69; end; implementation Type TRegExTest = record Expression: RegExprString; InputText: RegExprString; SubstitutionText: RegExprString; ExpectedResult: RegExprString; MatchStart: integer; end; function PrintableString(const S: RegExprString): AnsiString; var buf: AnsiString; ch: AnsiChar; i: integer; begin Result := ''; buf := UTF8Encode(S); for i := 1 to Length(buf) do begin ch := buf[i]; if Ord(ch) < 31 then Result := Result + '#' + IntToStr(Ord(ch)) else Result := Result + ch; end; end; const testCases: array [1 .. 69] of TRegExTest = ( // 1 ( expression: '\nd'; inputText: 'abc'#13#10'def'; substitutionText: '\n\x{10}\r\\'; expectedResult: 'abc'#13#10#16#13'\ef'; MatchStart: 0 ), // 2 ( expression: '(\w*)'; inputText: 'name.ext'; substitutionText: '$1.new'; expectedResult: 'name.new.new.ext.new.new'; MatchStart: 0 ), // 3 ( expression: #$d'('#$a')'; inputText: 'word'#$d#$a; substitutionText: '${1}'; expectedResult: 'word'#$a; MatchStart: 0 ), // 4 ( expression: '(word)'; inputText: 'word'; substitutionText: '\U$1\\r'; expectedResult: 'WORD\r'; MatchStart: 0 ), // 5 ( expression: '(word)'; inputText: 'word'; substitutionText: '$1\n'; expectedResult: 'word'#$a; MatchStart: 0 ), // 6 ( expression: '[A-Z]'; inputText: '234578923457823659GHJK38'; substitutionText: ''; expectedResult: 'G'; matchStart: 19; ), // 7 ( expression: '[A-Z]*?'; inputText: '234578923457823659ARTZU38'; substitutionText: ''; expectedResult: ''; matchStart: 1 ), // 8 ( expression: '[A-Z]+'; inputText: '234578923457823659ARTZU38'; substitutionText: ''; expectedResult: 'ARTZU'; matchStart: 19 ), // 9 ( expression: '[A-Z][A-Z]*'; inputText: '234578923457823659ARTZU38'; substitutionText: ''; expectedResult: 'ARTZU'; matchStart: 19 ), // 10 ( expression: '[A-Z][A-Z]?'; inputText: '234578923457823659ARTZU38'; substitutionText: ''; expectedResult: 'AR'; matchStart: 19 ), // 11 ( expression: '[^\d]+'; inputText: '234578923457823659ARTZU38'; substitutionText: ''; expectedResult: 'ARTZU'; matchStart: 19 ), // 12 ( expression: '[A-Z][A-Z]?[A-Z]'; inputText: '234578923457823659ARTZU38'; substitutionText: ''; expectedResult: 'ART'; matchStart: 19 ), // 13 ( expression: '[A-Z][A-Z]*[0-9]'; inputText: '234578923457823659ARTZU38'; substitutionText: ''; expectedResult: 'ARTZU3'; matchStart: 19 ), // 14 ( expression: '[A-Z]+[0-9]'; inputText: '234578923457823659ARTZU38'; substitutionText: ''; expectedResult: 'ARTZU3'; matchStart: 19 ), // 15 ( expression: '(?i)[A-Z]'; inputText: '234578923457823659a38'; substitutionText: ''; expectedResult: 'a'; matchStart: 19 ), // 16 ( expression: '(?i)[a-z]'; inputText: '234578923457823659A38'; substitutionText: ''; expectedResult: 'A'; matchStart: 19 ), // 17 ( expression: '(foo)1234'; inputText: '1234 foo1234XXXX'; substitutionText: ''; expectedResult: 'foo1234'; matchStart: 8 ), // 18 ( expression: '(((foo)))1234'; inputText: '1234 foo1234XXXX'; substitutionText: ''; expectedResult: 'foo1234'; matchStart: 8 ), // 19 ( expression: '(foo)(1234)'; inputText: '1234 foo1234XXXX'; substitutionText: ''; expectedResult: 'foo1234'; matchStart: 8 ), // 20 ( expression: 'nofoo|foo'; inputText: '1234 foo1234XXXX'; substitutionText: ''; expectedResult: 'foo'; matchStart: 8 ), // 21 ( expression: '(nofoo|foo)1234'; inputText: '1234 nofoo1234XXXX'; substitutionText: ''; expectedResult: 'nofoo1234'; matchStart: 8 ), // 22 ( expression: '(nofoo|foo|anotherfoo)1234'; inputText: '1234 nofoo1234XXXX'; substitutionText: ''; expectedResult: 'nofoo1234'; matchStart: 8 ), // 23 ( expression: 'nofoo1234|foo1234'; inputText: '1234 foo1234XXXX'; substitutionText: ''; expectedResult: 'foo1234'; matchStart: 8 ), // 24 ( expression: '(\w*)'; inputText: 'name.ext'; substitutionText: ''; expectedResult: 'name'; matchStart: 1 ), // 25 ( expression: '\r(\n)'; inputText: #$d#$a; substitutionText: ''; expectedResult: #$d#$a; matchStart: 1 ), // 26 ( expression: '\r(\n)'; inputText: #$d#$a; substitutionText: '\n'; expectedResult: #$a; matchStart: 1 ), // 27 ( expression: '(?m)Test:\s*(.*?)\s;'; inputText: 'Test: hel'#$d#$a'lo ;'; substitutionText: ''; expectedResult: 'Test: hel'#$d#$a'lo ;'; matchStart: 1 ), // 28 ( expression: '(?:\w+)=\w+;(\w+)=\w+;(?:\w+)=\w+;(\w+)=\w+;'; inputText: 'skip1=11;needed1=22;skip2=33;needed2=44;'; substitutionText: '$1 $2'; expectedResult: 'needed1 needed2'; matchStart: 0 ), // 29 ( expression: '.*?\b(https?|ftp)\b://(?:\w+)\.(?:\w+)\.(\w\B\w\B\w)'; inputText: '>>ftp://www.name.com'; substitutionText: '$1 $2'; expectedResult: 'ftp com'; matchStart: 0 ), // 30 ( expression: '\v'; inputText: 'aaa'#10'bbb'#13'ccc'#$c'ddd'#$b'eee'; substitutionText: '-'; expectedResult: 'aaa-bbb-ccc-ddd-eee'; matchStart: 0 ), // 31 ( expression: '\h+'; inputText: #9'aaa bbb '#9' ccc '#$A0#9; substitutionText: '-'; expectedResult: '-aaa-bbb-ccc-'; matchStart: 0 ), // 32 ( expression: '\w+'; inputText: 'abc XY 12.,'; substitutionText: '\L$0'; expectedResult: 'abc xy 12.,'; matchStart: 0 ), // 33 ( expression: '\w+'; inputText: 'abc XY 12.,'; substitutionText: '\U$0'; expectedResult: 'ABC XY 12.,'; matchStart: 0 ), // 34 ( // NULL chars in InputString expression: #0+'?[2-5]+(\s+)([xyz\$\#]{3,})\1'+#0+'+.+'; inputText: '.:'+#0+'ab'+#0+'_34 z$x '+#0+'end'; substitutionText: ''; expectedResult: '34 z$x '+#0+'end'; matchStart: 8 ), // 35 ( expression: '\w\cA\cz\cb\w'; inputText: '..abc'#1#26#2'test'; substitutionText: ''; expectedResult: 'c'#1#26#2't'; matchStart: 5 ), // 36 ( expression: '\V+'; inputText: '.,,'#10'aB2'#13'cc()'#$c'$%'#$b'[]'; substitutionText: '-'; expectedResult: '-'#10'-'#13'-'#$c'-'#$b'-'; matchStart: 0 ), // 37 ( expression: '\H+'; inputText: #9'.,; aB2 '#9' ^&() '#$A0#9; substitutionText: '-'; expectedResult: #9'- - '#9' - '#$A0#9; matchStart: 0 ), // 38 ( // brackets just after [ expression: '[[\w]+ []\w]+'; inputText: ' ww[ww w]www'; substitutionText: ''; expectedResult: 'ww[ww w]www'; matchStart: 3 ), // 39 ( // NULL in expression, negative \W \S \D in [] expression: '([\x00\d]+ )+ [\W]+ [\S\x00-\x10]+ [\D]+'; inputText: ' 22'#0'33 '#0'33 .& w#'#5#0' w#'; substitutionText: ''; expectedResult: '22'#0'33 '#0'33 .& w#'#5#0' w#'; matchStart: 3 ), // 40 ( // find 1+ simple chars expression: 'd+'; inputText: ' ddddee '; substitutionText: ''; expectedResult: 'dddd'; matchStart: 3 ), // 41 ( // find {N,M} spaces expression: ' {4,}'; inputText: 'dd dd'; substitutionText: ''; expectedResult: ' '; matchStart: 3 ), // 42 ( // valid regex set [.-] expression: '\w+([.-])\d+([.-])\w+([.-])\w+'; inputText: 'Pictures-2018-Spain.Madrid'; substitutionText: '$1 $2 $3'; expectedResult: '- - .'; matchStart: 0 ), // 43 ( // valid regex set combinaton if escaping expression: '\w+([.\-])\d+([\.-])\w+([\.\-])\w+'; inputText: 'Pictures-2018.Spain-Madrid'; substitutionText: '$1 $2 $3'; expectedResult: '- . -'; matchStart: 0 ), // 44 ( // valid regex set expression: '.*?([.-]Test[.-])'; inputText: 'This.Is.A_Test_1234.Test.abc'; substitutionText: '$1'; expectedResult: '.Test.abc'; matchStart: 0 ), // 45 ( // comments and modifier-strings expression: '(?#zzz)(?i)aA(?#zz).*(?-i)aA(?#zzz)'; inputText: '_a_aaaAAAaaaAAAaaa__'; substitutionText: ''; expectedResult: 'aaaAAAaaaA'; matchStart: 4 ), // 46 ( // named groups expression: '(?P[''"])\w+(?P=quote).*(?:\w+).*(?P")\w+(?P=q)'; inputText: 'aa "bb? "ok" a ''b "ok" eeee'; substitutionText: ''; expectedResult: '"ok" a ''b "ok"'; matchStart: 9 ), // 47 ( // lookbehind. it also has group refs \1 \2. expression: '(?<=foo)(=)(\w)\w+\2\1'; inputText: '..=tat=..=tat=..foo=tabt=..'; substitutionText: ''; expectedResult: '=tabt='; matchStart: 20 ), // 48 ( // lookahead expression: '(=)\w+\1(?=bar)'; inputText: '..=taat=..=tddt=bar..'; substitutionText: ''; expectedResult: '=tddt='; matchStart: 11 ), // 49 ( // lookahead+lookbehind expression: '(?<=[a-z]+)(\d+)[a-z]+\1(?=[a-z]+)'; inputText: '..2tt2..foo23test23bar..'; substitutionText: ''; expectedResult: '23test23'; matchStart: 12 ), // 50 ( // replace with named groups expression: '\s+(?P[f-h]+)\s+(?P[o-r]+)\s+'; inputText: '< fg oppo >'; substitutionText: '{${bb},${aa}}'; expectedResult: '<{oppo,fg}>'; matchStart: 1 ), // 51, unicode! ( expression: '\pL \p{Lu}{3,} \PL+ \P{Lu}+'; inputText: ',,wew ABDEF 345 weUPend'; substitutionText: ''; expectedResult: 'w ABDEF 345 we'; matchStart: 5 ), // 52, unicode! ( expression: '[\p{Ll}\p{N}%]{5,} [\P{L}]+'; inputText: ',,NOPE%400 @_ ok%200 @_end'; substitutionText: ''; expectedResult: 'ok%200 @_'; matchStart: 15 ), // 53, lookahead aa(?!bb) ( expression: 'a+(?!\w)'; inputText: 'aabaaddaaazaaa=aau'; substitutionText: ''; expectedResult: 'aaa'; matchStart: 12 ), // 54, lookahead aa(?!bb) ( expression: '(?:\s+)\w{2,}\.(?!com|org|net)'; inputText: ' www.com www.org www.ok www.net'; substitutionText: ''; expectedResult: ' www.'; matchStart: 19 ), // 55, atomic groups ( expression: 'a(?>bc|b)c'; inputText: ' abc abcc abc abcc '; substitutionText: '_'; expectedResult: ' abc _ abc _ '; matchStart: 1 ), // 56, a++ ( expression: '\d++e\d++'; inputText: ' 20ed2 100e20 2e34 '; substitutionText: '_'; expectedResult: ' 20ed2 _ _ '; matchStart: 1 ), // 57, a*+, must fail ( expression: '".*+"'; inputText: 'dd "abc" ee'; substitutionText: ''; expectedResult: ''; matchStart: -1 ), // 58, recursion ( expression: 'a(?R)?b'; inputText: '__aaaabbbbbbbb__'; substitutionText: ''; expectedResult: 'aaaabbbb'; matchStart: 3 ), // 59, recursion, generic regex 1 - https://regular-expressions.mobi/recurse.html?wlr=1 ( expression: 'b(?:m|(?R))*e'; inputText: '_bbfee_bbbmeee__'; substitutionText: ''; expectedResult: 'bbbmeee'; matchStart: 8 ), // 60, recursion, generic regex 2 - https://regular-expressions.mobi/recurse.html?wlr=1 ( expression: 'b(?R)*e|m'; inputText: '__bbbmeee__bme__m__'; substitutionText: '@'; expectedResult: '__@__@__@__'; matchStart: 1 ), // 61, recursion, balanced set of parentheses - https://regular-expressions.mobi/recurse.html?wlr=1 ( expression: '\((?>[^()]|(?0))*\)'; inputText: '__(((dd)dd))__(dd)__(((dd)f)f)__'; substitutionText: '@'; expectedResult: '__@__@__@__'; matchStart: 1 ), // 62, subroutine call (?3) + non-capturing groups + atomic group ( expression: '(rr)(qq)(?:t)(?:t)(\[(?>m|(?3))*\])'; inputText: '__rrqqtt[[[mmm]mm]m]m]m]m]m]__'; substitutionText: ''; expectedResult: 'rrqqtt[[[mmm]mm]m]'; matchStart: 3 ), // 63, subroutine call (?P>name) ( expression: '(?P[abc])(?1)(?P>name)'; inputText: '__bcabcadef__'; substitutionText: ''; expectedResult: 'bca'; matchStart: 3 ), // 64 ( // named groups with Perl syntax expression: '(?''quote''[''"])\w+(?"e).*(?:\w+).*(?''q''")\w+(?&q)'; inputText: 'aa "bb? "ok" a ''b "ok" eeee'; substitutionText: ''; expectedResult: '"ok" a ''b "ok"'; matchStart: 9 ), // 65 ( // \A and \z expression: '(?s)\A.+\z'; inputText: 'some'#10'text'#10; substitutionText: '-'; expectedResult: '-'; matchStart: 1 ), // 66 ( // \A and \Z expression: '(?s)\A.+\w\Z'; inputText: 'some'#13#10'text'#13#10; substitutionText: '-'; expectedResult: '-'#13#10; matchStart: 1 ), // 67 ( // (? '') then Writeln(' Substitution text: "', PrintableString(T.substitutionText), '"'); {$ENDIF} end; procedure TTestRegexpr.RunRETest(aIndex: Integer); var T: TRegExTest; S: RegExprString; begin T:= testCases[aIndex]; {$IFDEF DUMPTESTS} Writeln('Test: ',TestName); {$ENDIF} CompileRE(T.Expression); if T.SubstitutionText<>'' then begin S:= RE.Replace(T.InputText, T.SubstitutionText, True); AreEqual('Replace failed', PrintableString(T.ExpectedResult), PrintableString(S)) end else begin RE.Exec(T.inputText); AreEqual('Search position', T.MatchStart, RE.MatchPos[0]); AreEqual('Matched text', PrintableString(T.ExpectedResult), PrintableString(RE.Match[0])); end; end; initialization {$IFDEF FPC} RegisterTest(TTestRegexpr); {$ENDIF} end.