diff --git a/packages/vcl-compat/fpmake.pp b/packages/vcl-compat/fpmake.pp index 006de59198..bc1d5d0ed7 100644 --- a/packages/vcl-compat/fpmake.pp +++ b/packages/vcl-compat/fpmake.pp @@ -36,6 +36,7 @@ begin P.Dependencies.Add('fcl-json'); P.Dependencies.Add('fcl-hash'); P.Dependencies.Add('hash'); + P.Dependencies.Add('libpcre'); P.SourcePath.Add('src'); P.IncludePath.Add('src'); @@ -61,6 +62,10 @@ begin T.Dependencies.AddUnit('system.json'); T:=P.Targets.AddUnit('system.hash.pp'); T.ResourceStrings := True; + T:=P.Targets.AddUnit('system.regularexpressionsconsts.pp'); + T.ResourceStrings := True; + T:=P.Targets.AddUnit('system.regularexpressionscore.pp'); + T.Dependencies.AddUnit('system.regularexpressionsconsts'); {$ifndef ALLPACKAGES} diff --git a/packages/vcl-compat/src/system.regularexpressionsconsts.pp b/packages/vcl-compat/src/system.regularexpressionsconsts.pp new file mode 100755 index 0000000000..4a1a29083d --- /dev/null +++ b/packages/vcl-compat/src/system.regularexpressionsconsts.pp @@ -0,0 +1,22 @@ +unit System.RegularExpressionsConsts; + +interface + +resourcestring + SRegExExpressionError = 'Error in regular expression at offset %d: %s'; + SRegExIndexOutOfBounds = 'Index out of bounds (%d)'; + SRegExInvalidGroupName = 'Invalid group name (%s)'; + SRegExInvalidIndexType = 'Invalid index type'; + SRegExMatchRequired = 'Successful match required'; + SRegExMatchError = 'Error matching the regex: %s'; + SRegExMissingExpression = 'A regular expression specified in RegEx is required'; + SRegExStringsRequired = 'Strings parameter cannot be nil'; + SRegExStudyError = 'Error studying the regex: %s'; + SErrRegexOvectorTooSmall = 'output vector was not big enough for all the captured substrings'; + SRegExMatcStartAfterEnd = '\K was used in an assertion to set the match start after its end.'+sLineBreak+ + 'From end to start the match was: %s'; + +implementation + +end. + diff --git a/packages/vcl-compat/src/system.regularexpressionscore.pp b/packages/vcl-compat/src/system.regularexpressionscore.pp new file mode 100755 index 0000000000..c1fe038fa1 --- /dev/null +++ b/packages/vcl-compat/src/system.regularexpressionscore.pp @@ -0,0 +1,1382 @@ +{ + This file is part of the Free Component Library (FCL) + Copyright (c) 2023 The free pascal team + + Delphi-compatible Regular expressions unit. + + See the file COPYING.FPC, included in this distribution, + for details about the copyright. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + + **********************************************************************} +{ + Note that the original Delphi unit (by Jan Goyvaerts) uses PCRE1, + but this unit uses PCRE2. The string type depends on how the packages were + compiled. +} + +unit System.RegularExpressionsCore; + +{$MODE OBJFPC} +{$H+} + +interface + +{.$DEFINE USEWIDESTRING} // uncomment if you want to force widestring... + +// We cannot detect the char size before the uses clause is parsed, it will return 1, the compiler default. +// So we need a define here, maybe a compiler switch is needed to set the default size (-Sw ?) which would allow to set the default type. +// The detection here is based on the assumption that the dotted units use widestring... +{$IFDEF FPC_DOTTEDUNITS} +{$DEFINE USEWIDESTRING} +{$ENDIF} + + +uses +{$IFDEF FPC_DOTTEDUNITS} + System.SysUtils, System.Classes, System.Contnrs, {$IFNDEF USEWIDESTRING} Api.PCRE2_8 {$ELSE} Api.PCRE2_16 {$ENDIF}, System.CTypes, System.RegularExpressionsConsts; +{$ELSE} + SysUtils, Classes, Contnrs, {$IFNDEF USEWIDESTRING} libpcre2_8 {$ELSE} libpcre2_16 {$ENDIF}, ctypes, System.RegularExpressionsConsts; +{$ENDIF} + +const + MAX_SUBEXPRESSIONS = 99; + +type + {$IFDEF USEWIDESTRING} + TREString = UnicodeString; + {$ElSE} + TREString = AnsiString; + {$ENDIF} + + TPerlRegExOption = (preCaseLess,preMultiLine,preSingleLine,preExtended,preAnchored,preUnGreedy,preNoAutoCapture, + preAllowEmptyClass, preAltBSUX, preAltCircumFlex, preAltVerbNames, + preDollarEndOnly, preDupNames, preEndAnchored, preFirstLine, preLiteral, preMatchInvalidUTF, + preMatchUnsetBackRef, preNeverBackslashC, preNoAutoPossess, preNoDotStarAnchor, preNoStartOptimize, + preNoUTFCheck, preUseOffsetLimit); + + TPerlRegExOptions = set of TPerlRegExOption; + + TPerlRegExStateItem = (preNotBOL,preNotEOL,preNotEmpty); + TPerlRegExState = set of TPerlRegExStateItem; + + TPerlRegExReplaceEvent = procedure(Sender: TObject; var ReplaceWith: TREString) of object; + + { TPerlRegEx } + + TPerlRegEx = class + Private + Type + TTransformation = (tNone,tLowerCase,tUpperCase,tFirstCap,tInitialCap); + + class function TransForm(aTransform: TTransformation; S: TREString): TREString; + private + {$IFDEF USEWIDESTRING} + FCode : Ppcre2_code_16; + {$ELSE} + FCode : Ppcre2_code_8; + {$ENDIF} + FOnMatch: TNotifyEvent; + FOnReplace: TPerlRegExReplaceEvent; + FOptions: TPerlRegExOptions; + FRegEx: TREString; + FState: TPerlRegExState; + FStart, + FStop: Integer; + FStudied: Boolean; + FResultVector : Psize_t; + FResultCount : Cardinal; + FMatchData : ppcre2_match_data; + FModifiedSubject, + FSubject: TREString; + FSubjectLength : cuint32; + FNameCount : cuint32; + FNameTable : PCRE2_SPTR; + FNameEntrySize : cuint32; + FLastModifiedEnd: SizeInt; + FReplacement : TREString; + FStoredGroups: array of TREString; + FCrLFIsNewLine, + FIsUtf : Boolean; + Procedure CheckMatch; inline; + function DoMatch(Opts: CUInt32): Boolean; + function GetBackRefIndex(const Ref: TREString; var I: Integer): Integer; + function GetCompiled: Boolean; + function GetFoundMatch: Boolean; inline; + function GetGroupCount: Integer; + function GetGroupLengths(aIndex: Integer): Integer; + function GetGroupOffsets(aIndex: Integer): Integer; + function GetGroups(aIndex: Integer): TREString; + function GetMatchedLength: Integer; + function GetMatchedOffset: Integer; + function GetMatchedText: TREString; + function GetModifiedSubject: TREString; + function GetNamedGroup(aName : TREString): TREString; + procedure GetNamedGroupInfo; + function GetPCREErrorMsg(ErrorNr: Integer): TREString; + function GetResultString(aIndex: Integer): TREString; + function GetStart: Integer; + function GetStop: Integer; + function GetSubject: TREString; + function GetSubjectLeft: TREString; + function GetSubjectRight: TREString; + function MakeOptions(aOptions: TPerlRegExOptions): Integer; + procedure SetOptions(aValue: TPerlRegExOptions); + procedure SetRegEx(aValue: TREString); + procedure SetReplacement(aValue: TREString); + procedure SetStart(aValue: Integer); + procedure SetStop(aValue: Integer); + procedure SetSubject(aValue: TREString); + protected + procedure FreeCodeData; + procedure FreeMatchData; + procedure CleanUp; virtual; + procedure ClearStoredGroups; + function FirstOffset : Cardinal; + function FirstLength : Cardinal; + public + constructor Create; + destructor Destroy; override; + // Use this to escape special characters. + class function EscapeRegExChars(const aString: TREString): TREString; + // Compile the regex. + procedure Compile; + // Study regex (may result in faster execution); + procedure Study; + // Try to match, starting at beginning. Returns true if a match was found. + function Match: Boolean; + // Try to match again, starting previous match end. Returns true if a new match was found. + function MatchAgain: Boolean; + // Replace current match in Subject with ComputeReplacement. Returns computed replacement + function Replace: TREString; + // Replace all matches in Subject with their ComputeReplacement. Returns true if a match was found. + function ReplaceAll: Boolean; + // Compute replacement text. + function ComputeReplacement: TREString; + // Store groups for faster access. + procedure StoreGroups; + // Index in groups of name. + function NamedGroup(const aName: TREString): Integer; + // Split subject TREString based on regex. aStrings will contain everything outside the matches. + procedure Split(const aStrings: TStrings; aLimit: Integer); + // Split subject TREString based on regex, but include matches in result. + procedure SplitCapture(const aStrings: TStrings; aLimit: Integer); overload; + // Split subject TREString based on regex, but include matches in result. + // if aoffset is > 1 then everything till offset is put in the first TREString. + procedure SplitCapture(const aStrings: TStrings; aLimit: Integer; aOffset : Integer); overload; + // Was the regex compiled ? + property Compiled: Boolean read GetCompiled; + // Match found ? + property FoundMatch: Boolean read GetFoundMatch; + // Did study ? + property Studied: Boolean read FStudied; + // Fast access, group 0. + property MatchedText: TREString read GetMatchedText; + property MatchedLength: Integer read GetMatchedLength; + property MatchedOffset: Integer read GetMatchedOffset; + // Minimum search position, 1-based. + property Start: Integer read GetStart write SetStart; + // Maximum search position, 1-based. + property Stop: Integer read GetStop write SetStop; + property State: TPerlRegExState read FState write FState; + // Group count. + property GroupCount: Integer read GetGroupCount; + // Group Texts. Index 0 - GroupCount. 0 is whole matched text. on original search text. + property Groups[aIndex: Integer]: TREString read GetGroups; + // Group lengths & Offsets. Index 0 - GroupCount. 0 is whole matched text, on original search text. + property GroupLengths[aIndex: Integer]: Integer read GetGroupLengths; + property GroupOffsets[aIndex: Integer]: Integer read GetGroupOffsets; + // Named access to groups. + property NamedGroups[aName : TREString] : TREString Read GetNamedGroup; + // Subject TREString. Will be modified by replace ! + property Subject: TREString read GetModifiedSubject write SetSubject; + // Original subject TREString. Not modified by replace ! + property OriginalSubject: TREString read FSubject write SetSubject; + // Left of original subject. + property SubjectLeft: TREString read GetSubjectLeft; + // Right of original subject. + property SubjectRight: TREString read GetSubjectRight; + public + // Set options. + property Options: TPerlRegExOptions read FOptions write SetOptions; + // The regular expression + property RegEx: TREString read FRegEx write SetRegEx; + // The replacement expression. + property Replacement: TREString read FReplacement write SetReplacement; + // Called on every match. + property OnMatch: TNotifyEvent read FOnMatch write FOnMatch; + // Set this to modify the computed replacement text. + property OnReplace: TPerlRegExReplaceEvent read FOnReplace write FOnReplace; + end; + + + TRegExStudyOption = (preJIT, preJITPartialHard, preJITPartialSoft); + TRegExStudyOptions = set of TRegExStudyOption; + + { TPerlRegExList } + + TPerlRegExList = class + private + FMatch: TPerlRegEx; + FList : TFPObjectList; + FStart : Integer; + FStop : Integer; + FSubject : TREString; + function GetCount: Integer; + function GetOwnsRegex: Boolean; + function GetRegEx(aIndex: Integer): TPerlRegEx; + function GetStart: Integer; + function GetStop: Integer; + function GetSubject: TREString; + procedure SetRegEx(aIndex: Integer; aValue: TPerlRegEx); + procedure SetStart(aValue: Integer); + procedure SetStop(aValue: Integer); + procedure SetSubject(aValue: TREString); + protected + procedure UpdateRegEx(const aRegEx: TPerlRegEx); + public + constructor Create(OwnsRegex : Boolean); + destructor Destroy; override; + public + function Add(const aRegEx: TPerlRegEx): Integer; + procedure Clear; + procedure Delete(aIndex: Integer); + function IndexOf(const aRegEx: TPerlRegEx): Integer; + procedure Insert(aIndex: Integer; const aRegEx: TPerlRegEx); + public + function Match: Boolean; + function MatchAgain: Boolean; + property RegEx[aIndex: Integer]: TPerlRegEx read GetRegEx write SetRegEx; + property Count: Integer read GetCount; + property Subject: TREString read GetSubject write SetSubject; + property Start: Integer read GetStart write SetStart; + property Stop: Integer read GetStop write SetStop; + property MatchedRegEx: TPerlRegEx read FMatch; + Property OwnsRegex : Boolean Read GetOwnsRegex; + end; + + ERegularExpressionError = class(Exception); + +// Todo: move to strutils ? +Function InitialCaps(const S : TREString) : TREString; + +implementation + +{$IFNDEF USEWIDESTRING} +function GetStrLen(p : PAnsiChar; len : Integer) : AnsiString; + +var + L : Integer; + +begin + Result:=''; + L:=StrLen(P); + if L>Len then + L:=Len; + SetLength(Result,L); + if L>0 then + Move(P^,Result[1],L); +end; +{$ELSE} +function GetStrLen(p : PWideChar; len : Integer) : UnicodeString; + +var + L : Integer; + +begin + Result:=''; + L:=StrLen(P); + if L>Len then + L:=Len; + SetLength(Result,L); + if Len>0 then + Move(P^,Result[1],L*2); +end; +{$ENDIF} + +Function InitialCaps(const S : TREString) : TREString; + +const + NonWord = [#0..'&', '(', '*', '+', ',', '-', '.', '?', '<', '[', '{', #$B7]; + +var + L : TREString; + Len,Last,I : Integer; + Upper : Boolean; + +begin + L:=LowerCase(S); + Len:=Length(L); + Last:=1; + I:=1; + Upper:=True; + Result:=''; + While I<=Len do + begin + if L[i] in NonWord then + Upper:=True + else if Upper then + begin + if I>Last then + Result:=Result+Copy(L,Last,I-Last); + Result:=Result+UpperCase(L[i]); + inc(I); + Last:=I; + Upper:=False; + end; + Inc(i); + end; + Result:=Result+Copy(L,Last,I-Last); +end; + + +{ TPerlRegEx } + + +function TPerlRegEx.GetFoundMatch: Boolean; +begin + Result:=FResultCount>0; +end; + +function TPerlRegEx.GetCompiled: Boolean; +begin + Result:=Assigned(FCode); +end; + +procedure TPerlRegEx.CheckMatch; +begin + if not FoundMatch then + raise ERegularExpressionError.Create(SRegExMatchRequired); +end; + +function TPerlRegEx.GetGroupCount: Integer; +begin + CheckMatch; + Result:=FResultCount-1; +end; + +function TPerlRegEx.GetGroupLengths(aIndex: Integer): Integer; + +begin + CheckMatch; + Result:=FResultVector[2*aIndex+1]-FResultVector[2*aIndex]; +end; + +function TPerlRegEx.GetGroupOffsets(aIndex: Integer): Integer; +begin + CheckMatch; + Result:=FResultVector[2*aIndex]+1; +end; + +function TPerlRegEx.GetResultString(aIndex: Integer): TREString; + +var + astart,aLength : Ptrint; + +begin + // Writeln('AIndex ',aIndex,' ',FResultCount); + aStart:=FResultVector[2*aIndex]; + aLength:=FResultVector[2*aIndex+1]-aStart; + inc(aStart); // 1-based + Result:=Copy(FSubject,AStart,aLength); +end; + +function TPerlRegEx.GetGroups(aIndex: Integer): TREString; + +begin + CheckMatch; + if Length(FStoredGroups)>0 then + Result:=FStoredGroups[aIndex] + else + Result:=GetResultString(aIndex); +end; + +function TPerlRegEx.GetMatchedLength: Integer; + +begin + Result:=GetGroupLengths(0) +end; + +function TPerlRegEx.GetMatchedOffset: Integer; +begin + Result:=GetGroupOffsets(0); +end; + +function TPerlRegEx.GetMatchedText: TREString; +begin + Result:=GetResultString(0) +end; + +function TPerlRegEx.GetModifiedSubject: TREString; +begin + Result:=FModifiedSubject; +end; + +function TPerlRegEx.GetNamedGroup(aName : TREString): TREString; + +var + Idx : integer; + +begin + Result:=''; + Idx:=NamedGroup(aName); + if Idx<>-1 then + Result:=Groups[Idx]; +end; + +function TPerlRegEx.GetStart: Integer; +begin + Result:=FStart+1; +end; + +function TPerlRegEx.GetStop: Integer; +begin + Result:=FStop+1; +end; + +function TPerlRegEx.GetSubject: TREString; +begin + Result:=FSubject; +end; + +function TPerlRegEx.GetSubjectLeft: TREString; +begin + // Resultvector is 0 based + Result:=Copy(FSubject,1,FResultVector[0]); +end; + +function TPerlRegEx.GetSubjectRight: TREString; +var + SPos : Integer; +begin + SPos:=FResultVector[1]; // 0-based + Result:=Copy(FSubject,SPos+1,FSubjectLength-SPos); +end; + +procedure TPerlRegEx.SetOptions(aValue: TPerlRegExOptions); +begin + if FOptions=AValue then Exit; + FOptions:=AValue; + CleanUp; // Need to reset... +end; + +procedure TPerlRegEx.SetRegEx(aValue: TREString); +begin + if FRegEx=AValue then Exit; + FRegEx:=aValue; +end; + +procedure TPerlRegEx.SetReplacement(aValue: TREString); +begin + FReplacement:=AValue; +end; + +procedure TPerlRegEx.SetStart(aValue: Integer); +begin + FStart:=aValue-1; +end; + +procedure TPerlRegEx.SetStop(aValue: Integer); +begin + if FStop=aValue-1 then Exit; + FStop:=aValue-1; +end; + +procedure TPerlRegEx.SetSubject(aValue: TREString); +begin + FSubject:=aValue; + FSubjectLength:=Length(FSubject); + FModifiedSubject:=aValue; + CleanUp; +end; + +procedure TPerlRegEx.CleanUp; +begin + FreeMatchData; + FreeCodeData; + ClearStoredGroups; + FResultCount:=0; + FResultVector:=Nil; + FStart:=0; + FStop:=Length(FSubject); + FLastModifiedEnd:=0; +end; + +procedure TPerlRegEx.ClearStoredGroups; +begin + SetLength(FStoredGroups,0); +end; + +function TPerlRegEx.FirstOffset: Cardinal; +begin + Result:=FResultVector[0]; +end; + +function TPerlRegEx.FirstLength: Cardinal; +begin + Result:=FResultVector[1]-FResultVector[0]; +end; + +constructor TPerlRegEx.Create; +begin + if not libpcre28loaded then + Loadlibpcre28; +end; + +destructor TPerlRegEx.Destroy; +begin + inherited Destroy; +end; + +class function TPerlRegEx.EscapeRegExChars(const aString: TREString): TREString; + +Const + NeedEscape = ['\','[',']','^','$','.','|','?','*','+','-','(',')','{','}','&','<','>']; + +var + I : Integer; + PSrc,PDest,PStart : PChar; + +begin + Result:=''; + SetLength(Result,2*Length(aString)); + PSrc:=PChar(aString); + PDest:=PChar(Result); + PStart:=PDest; + for I:=1 to Length(aString) do + begin + if PSrc^=#0 then + begin + PDest^:='\'; + Inc(PDest); + PDest^:='0'; + end + else if CharInSet(PSrc^,NeedEscape) then + begin + PDest^:='\'; + Inc(PDest); + PDest^:=PSrc^; + end + else + PDest^:=PSrc^; + Inc(PSrc); + Inc(PDest); + end; + SetLength(Result,(PDest-PStart)); +end; + +function TPerlRegEx.MakeOptions(aOptions: TPerlRegExOptions): Integer; + + Procedure AddOption(aOpt : TPerlRegExOption; aValue : cuint32); + + begin + if aOpt in AOptions then + Result:=Result or aValue; + end; + +begin + Result:=PCRE2_NEWLINE_ANY or PCRE2_UTF; + AddOption(preCaseLess,PCRE2_CASELESS); + AddOption(preMultiLine,PCRE2_MULTILINE); + AddOption(preSingleLine,PCRE2_DOTALL); + AddOption(preExtended,PCRE2_EXTENDED); + AddOption(preAnchored,PCRE2_ANCHORED); + AddOption(preUnGreedy,PCRE2_UNGREEDY); + AddOption(preNoAutoCapture,PCRE2_NO_AUTO_CAPTURE); + AddOption(preAllowEmptyClass,PCRE2_ALLOW_EMPTY_CLASS); + AddOption(preAltBSUX,PCRE2_ALT_BSUX); + AddOption(preAltCircumFlex,PCRE2_ALT_CIRCUMFLEX); + AddOption(preAltVerbNames,PCRE2_ALT_VERBNAMES); + AddOption(preDollarEndOnly,PCRE2_DOLLAR_ENDONLY); + AddOption(preDupNames,PCRE2_DUPNAMES); + AddOption(preEndAnchored,PCRE2_ENDANCHORED); + AddOption(preFirstLine,PCRE2_FIRSTLINE); + AddOption(preLiteral,PCRE2_LITERAL); + AddOption(preMatchInvalidUTF,PCRE2_MATCH_INVALID_UTF); + AddOption(preMatchUnsetBackRef,PCRE2_MATCH_UNSET_BACKREF); + AddOption(preNeverBackslashC,PCRE2_NEVER_BACKSLASH_C); + AddOption(preNoAutoPossess,PCRE2_NO_AUTO_POSSESS); + AddOption(preNoDotStarAnchor,PCRE2_NO_DOTSTAR_ANCHOR); + AddOption(preNoStartOptimize,PCRE2_NO_START_OPTIMIZE); + // maybe we should enable by default ? + AddOption(preNoUTFCheck,PCRE2_NO_UTF_CHECK); + AddOption(preUseOffsetLimit,PCRE2_USE_OFFSET_LIMIT); +// AddOption(preUTF,PCRE2_UTF); + +end; + +function TPerlRegEx.GetPCREErrorMsg(ErrorNr: Integer): TREString; + +var + Buffer : Array[0..255] of ansichar; + +begin + pcre2_get_error_message(ErrorNr,@Buffer,SizeOf(Buffer)); + Result:=strpas(@Buffer); +end; + +procedure TPerlRegEx.Compile; + +var + ErrorNr: Integer; + ErrorPos: Integer; + +begin + if (FRegEx='') then + raise ERegularExpressionError.CreateRes(@SRegExMissingExpression); + CleanUp; + FCode:=pcre2_compile(TPCRE2_SPTR8(FRegEx),Length(FRegEx),MakeOptions(FOptions),@ErrorNr,@ErrorPos,Nil); + if (FCode=nil) then + raise ERegularExpressionError.CreateFmt(SRegExExpressionError,[ErrorPos+1,GetPCREErrorMsg(ErrorNr)]); + FMatchData:=pcre2_match_data_create_from_pattern(FCode,Nil); + +end; + +procedure TPerlRegEx.Study; +begin + +end; + +procedure TPerlRegEx.FreeMatchData; + +var + Data : ppcre2_match_data; + +begin + if FMatchData=Nil then exit; + Data:=FMatchData; + FMatchData:=Nil; + pcre2_match_data_free(Data); + FResultVector:=Nil; +end; + +procedure TPerlRegEx.FreeCodeData; + +var + {$IFDEF USEWIDESTRING} + Data : Ppcre2_code_16; + {$ELSE} + Data : Ppcre2_code_8; + {$ENDIF} + +begin + if (FCode=Nil) then + exit; + Data:=FCode; + FCode:=Nil; + pcre2_code_free(Data); +end; + +procedure TPerlRegEx.GetNamedGroupInfo; + + +begin + FNameEntrySize:=0; + FNameTable:=Nil; + pcre2_pattern_info( + FCode, (* the compiled pattern *) + PCRE2_INFO_NAMECOUNT, (* get the number of named substrings *) + @FNameCount); (* where to put the answer *) + if (FNameCount = 0) then + Exit; + pcre2_pattern_info( + FCode, (* the compiled pattern *) + PCRE2_INFO_NAMETABLE, (* address of the table *) + @FNameTable); (* where to put the answer *) + pcre2_pattern_info( + FCODE, (* the compiled pattern *) + PCRE2_INFO_NAMEENTRYSIZE, (* size of each entry in the table *) + @FNameEntrySize); +end; + +function TPerlRegEx.Match: Boolean; + +var + newline,option_bits : cuint32; + +begin + Result:=False; + ClearStoredGroups; + if not Compiled then + Compile; + FMatchData:=pcre2_match_data_create_from_pattern(FCode,Nil); + Result:=DoMatch(0); + if Result then + begin + pcre2_pattern_info(FCode,PCRE2_INFO_ALLOPTIONS, @option_bits); + FIsUtf:=((option_bits and PCRE2_UTF) <> 0); + pcre2_pattern_info(FCode,PCRE2_INFO_NEWLINE,@newline); + FCrLFIsNewLine:= (newline=PCRE2_NEWLINE_ANY) or + (newline=PCRE2_NEWLINE_CRLF) or + (newline=PCRE2_NEWLINE_ANYCRLF); + end; +end; + + +function TPerlRegEx.DoMatch(Opts : CUInt32): Boolean; + +var + len,rc : cInt; + S : TREString; + +begin + Result:=False; +{$IF SIZEOF(CHAR)=2} + rc:=pcre2_match_w( +{$ELSE} + rc:=pcre2_match( +{$ENDIF} + FCode, (* the compiled pattern *) + PChar(FSubject), (* the subject TREString *) + FSubjectLength, (* the length of the subject *) + FStart, (* start at offset 0 in the subject *) + Opts, (* default options *) + FMatchData, (* block for storing the result *) + Nil); + if (rc <= 0) then + begin + FreeMatchData; + FreeCodeData; + if (rc=PCRE2_ERROR_NOMATCH) then + Exit(False) + else if (rc = 0) then + raise ERegularExpressionError.CreateFmt(SRegExMatchError,[SErrRegexOvectorTooSmall]) + else + raise ERegularExpressionError.CreateFmt(SRegExMatchError,[GetPCREErrorMsg(rc)]); + end; + Result:=True; + FResultCount:=rc; + FResultVector:=pcre2_get_ovector_pointer(FMatchData); + {For i:=0 to FResultCount-1 do + Writeln(I,': ',FResultVector[2*I],' - ',FResultVector[2*I+1]);} + if (FResultVector[0]>FResultVector[1]) then + begin + Len:=integer(FResultVector[0]-FResultVector[1]); + S:=Copy(FSubject,FResultVector[1],Len); + FreeMatchData; + FreeCodeData; + raise ERegularExpressionError.CreateFmt(SRegExMatcStartAfterEnd,[S]); + end; + // Next should start after current + FStart:=FResultVector[1]; + GetNamedGroupInfo; + if Assigned(OnMatch) then + OnMatch(Self); +end; + +function TPerlRegEx.MatchAgain: Boolean; + +var + StartChar,Opts : cuint32; +begin + Result:=False; + Opts:=0; + // Special case, empty TREString. + if (FResultVector[0]=FResultVector[1]) then + begin + if (FResultVector[0]>=FSubjectLength) then + Exit; + Opts:=PCRE2_NOTEMPTY_ATSTART or PCRE2_ANCHORED; + end + else + begin + // Check whether start empty + Startchar:=pcre2_get_startchar(FMatchData); + if (FStart<=Startchar) then + begin + (* Reached end of subject. *) + if (startchar>=FSubjectLength) then + Exit; + (* Advance by one character. *) + FStart:=StartChar+1; + (* If UTF-8, it may be more than one code unit. *) + if FIsUtf then + begin + While (FStart$80) then + Exit; + Inc(FStart); + end; + end; + end; + end; + Result:=DoMatch(Opts); + (* + This time, a result of NOMATCH isn't an error. If the value in 'options' + is zero, it just means we have found all possible matches, so the loop ends. + Otherwise, it means we have failed to find a non-empty-TREString match at a + point where there was a previous empty-TREString match. In this case, we do what + Perl does: advance the matching position by one character, and continue. We + do this by setting the 'end of previous match' offset, because that is picked + up at the top of the loop as the point at which to start again. + + There are two complications: (a) When CRLF is a valid newline sequence, and + the current position is just before it, advance by an extra byte. (b) + Otherwise we must ensure that we skip an entire UTF character if we are in + UTF mode. + *) + While not Result do + begin + if Opts=0 then + Break; + FResultVector[1]:=FStart+1; (* Advance one code unit *) + if FCrLFIsNewLine and (* If CRLF is a newline & *) + (FStart $80) then + break; + inc(FResultVector[1]); + end; + end; + Result:=DoMatch(Opts); + end; +end; + + +function TPerlRegEx.Replace: TREString; + +var + NewSubject,Tmp : TREString; + +begin + CheckMatch; + Result:=ComputeReplacement; + if Assigned(OnReplace) then + OnReplace(Self, Result); + Tmp:=Result; + if FLastModifiedEnd=0 then + FLastModifiedEnd:=GetMatchedOffset-1; + NewSubject:=Copy(FModifiedSubject,1,FLastModifiedEnd)+Tmp; + FLastModifiedEnd:=Length(NewSubject)+1; + tmp:=GetSubjectRight; + FModifiedSubject:=NewSubject+tmp; + ClearStoredGroups; +end; + +function TPerlRegEx.ReplaceAll: Boolean; +begin + Result:=Match; + if Not Result then + exit; + repeat + Replace; + until not MatchAgain; +end; + +function IsAlphaAndUnderline(const C: Char): Boolean; + +Const + allowed = ['A'..'Z', 'a'..'z', '_']; + +begin + Result:=CharInSet(C,Allowed); +end; + +function IsNumeric(const C: Char): Boolean; + +Const + allowed = ['0'..'9']; + +begin + Result:=CharInSet(C,Allowed); +end; + + +{ Return values: + >=0 : group number. + -1 : whole subject. + -2 : Left of match. + -3 : Right of match. + -99 : invalid. + On return, I is the index of the next character to process. +} + +Function TPerlRegEx.GetBackRefIndex(const Ref : TREString; var I : Integer) : Integer; + +var + Len,P,N,Group : Integer; + +begin + Len:=Length(Ref); + Group:=-99; + Case Ref[I] of + '0'..'9': + begin + Group:=Ord(Ref[i])-Ord('0'); + Inc(I); + // Only consume as much integers as there are groups. + // So if there are 15 groups then $16 -> $1 + literal 6. + While (I<=Len) and (Ref[i] in ['0'..'9']) do + begin + N:=(Group*10)+Ord(Ref[i])-Ord('0'); + if N>GroupCount then + Break; + Group:=N; + Inc(I); + end; + end; + '{': + begin + Inc(I); + if (Ref[I] in ['0'..'9']) then + // \{123} + begin + Group:=0; + while (ILen) or (Ref[I]<>'}') then + Group:=-99 + else + Inc(I); + end + else + // \{named} + begin + P:=I; + while (ILen) or (Ref[I]<>'}') then + Group:=-99 + else + begin + Group:=NamedGroup(Copy(Ref,P,I-P)); + if Group=-1 then + group:=-99; + Inc(I); + end + end; + end; + '_': // Whole subject + begin + Group:=-1; + Inc(I); + end; + '&': // \& or $& (whole regex match) + begin + Group:=0; + Inc(I); + end; + '+': // Last group + begin + Group:=GroupCount; + Inc(I); + end; + '`': // Subject to left of match. + begin + Group:=-2; + inc(I); + end; + #39: // Subject to right of match. + begin + Group:=-3; + inc(I); + end + end; + Result:=Group; +end; + +Class function TPerlRegEx.TransForm(aTransform : TTransformation; S : TREString): TREString; + +begin + Case aTransform of + tFirstCap : Result:=UpperCase(Copy(S,1,1))+LowerCase(Copy(S,2,Length(S)-1)); + tInitialCap : Result:=InitialCaps(S); + tUpperCase : Result:=UpperCase(S); + tLowerCase : Result:=LowerCase(S); + else + Result:=S; + end; +end; + +function TPerlRegEx.ComputeReplacement: TREString; + +var + Res : TREString; + Len : Integer; + + Procedure AddToResult(aStart,aNext : Integer); inline; + + begin + Res:=Res+Copy(FReplacement,aStart,aNext-aStart); + end; + + Procedure AddNamedGroup(const aName : TREString); inline; + + begin + Res:=Res+NamedGroups[aName]; + end; + + Function AddBackRef(aTransform : TTransformation; I : Integer) : Integer; + + var + P,N,Group : Integer; + + begin + Group:=GetBackRefIndex(FReplacement,I); + Case Group of + -99 : ; // invalid + -1 : Res:=Res+TransForm(aTransform,FSubject); + -2 : Res:=Res+TransForm(aTransform,SubjectLeft); + -3 : Res:=Res+TransForm(aTransform,SubjectRight); + else + if Group<=GroupCount then + Res:=Res+TransForm(aTransform,Groups[Group]); + end; + Result:=I; + end; + +var + I, P, Last : Integer; + updatelast : boolean; + +begin + Len:=Length(FReplacement); + if Len=0 then + Exit(''); + I:=1; + Last:=1; + while I<=Len do + begin + case FReplacement[I] of + '\': + begin + if (I=Len) then + raise ERegularExpressionError.CreateFmt(SRegExIndexOutOfBounds,[I]); + AddToResult(Last,I); + Inc(I); + UpdateLast:=True; + case FReplacement[I] of + '$', '\': + begin + Inc(I); + AddToResult(I-1,I); + end; + 'g': + begin + if (I+2 + if (I<=Len) and (FReplacement[I]='>') then + begin + AddNamedGroup(Copy(FReplaceMent,P,I-P)); + Inc(I); + Last:=I; + end + else + begin + I:=I+2; // Skip everything. + UpdateLast:=False + end; + end + else + UpdateLast:=False; + end; + 'l','L' : I:=AddBackRef(tLowerCase,I); + 'u','U' : I:=AddBackRef(tLowerCase,I); + 'f','F' : I:=AddBackRef(tFirstCap,I); + 'i','I' : I:=AddBackRef(tInitialCap,I); + else + I:=AddBackRef(tNone,I); + end; + if UpdateLast then + Last:=I; + end; + '$': + begin + if I=Len then + raise ERegularExpressionError.CreateFmt(SRegExIndexOutOfBounds,[I]); + AddToResult(Last,I); + Inc(I); + if FReplacement[I]='$' then + begin + AddToResult(Last,I); + Inc(I); + end + else + I:=AddBackRef(tNone,I); + Last:=I; + end; + else // Case + Inc(I); + end; + end; + if I>Last then + AddToResult(Last,I); + Result:=Res; +end; + +procedure TPerlRegEx.StoreGroups; + +var + I : Integer; + +begin + CheckMatch; + SetLength(FStoredGroups,GroupCount+1); + For I:=0 to GroupCount do + FStoredGroups[i]:=GetResultString(I); +end; + +function TPerlRegEx.NamedGroup(const aName: TREString): Integer; + +var + Ptr : PCRE2_SPTR; + N,I : Integer; + tblName : TREString; + +begin + Ptr:=FNameTable; + for i:=0 to FNameCount-1 do + begin +{$IFDEF USEWIDESTRING} + n:=ord(ptr[0]); + tblName:=GetStrLen((Ptr+1),FNameEntrySize-2); +{$ELSE} + n:=(ord(ptr[0]) shl 8) or ord(ptr[1]); + tblName:=GetStrLen((Ptr+2),FNameEntrySize-3); +{$ENDIF} + if SameText(TblName,aName) then + Exit(n); + Inc(Ptr,FNameEntrySize); + end ; + Result:=-1; +end; + +procedure TPerlRegEx.Split(const aStrings: TStrings; aLimit: Integer); + +var + NewStart,LastEnd,Matches: Integer; + +begin + if Not Assigned(aStrings) then + raise ERegularExpressionError.Create(SRegExStringsRequired); + if (aLimit=1) or not Match then + begin + aStrings.Add(Subject); + Exit; + end; + LastEnd:=0; // Last match pos + Matches:=1; + repeat + NewStart:=FirstOffset; // Start of current match + aStrings.Add(Copy(Subject,LastEnd+1,NewStart-LastEnd)); // Copy everything since last match. + Inc(Matches); + LastEnd:=NewStart+MatchedLength; // update last match pos. + until ((aLimit>1) and (Matches>=aLimit)) or not MatchAgain; + aStrings.Add(TREString(Copy(FSubject,LastEnd+1,FSubjectLength -LastEnd))); +end; + +procedure TPerlRegEx.SplitCapture(const aStrings: TStrings; aLimit: Integer); + +begin + SplitCapture(aStrings,aLimit,1); +end; + +procedure TPerlRegEx.SplitCapture(const aStrings: TStrings; aLimit: Integer; aOffset: Integer); + +var + NewStart,LastEnd,Matches: Integer; + DoCopy : Boolean; + +begin + if Not Assigned(aStrings) then + raise ERegularExpressionError.Create(SRegExStringsRequired); + if (aLimit=1) or not Match then + begin + aStrings.Add(Subject); + Exit; + end; + Dec(aOffset); + if (aOffset>0) then + Dec(aLimit); + LastEnd:=0; // Last match pos + Matches:=1; + repeat + NewStart:=FirstOffset; // Start of current match + DoCopy:=(NewStart>aOffset); + if DoCopy then + begin + aStrings.Add(Copy(Subject,LastEnd+1,NewStart-LastEnd)); // Copy everything since last match. + if GroupCount > 0 then + aStrings.Add(Groups[GroupCount]); + Inc(Matches); + LastEnd:=NewStart+MatchedLength; // update last match pos. + end; + until ((aLimit>1) and (Matches>=aLimit)) or not MatchAgain; + aStrings.Add(TREString(Copy(FSubject,LastEnd+1,FSubjectLength-LastEnd))); +end; + +{ TPerlRegExList } + +function TPerlRegExList.GetCount: Integer; +begin + Result:=FList.Count; +end; + +function TPerlRegExList.GetOwnsRegex: Boolean; +begin + Result:=FList.OwnsObjects; +end; + +function TPerlRegExList.GetRegEx(aIndex: Integer): TPerlRegEx; +begin + Result:=TPerlRegEx(Flist[aIndex]) +end; + +function TPerlRegExList.GetStart: Integer; +begin + Result:=FStart; +end; + +function TPerlRegExList.GetStop: Integer; +begin + Result:=FStop; +end; + +function TPerlRegExList.GetSubject: TREString; +begin + Result:=FSubject; +end; + +procedure TPerlRegExList.SetRegEx(aIndex: Integer; aValue: TPerlRegEx); +begin + FList[aIndex]:=aValue; +end; + +procedure TPerlRegExList.SetStart(AValue: Integer); +var + I : Integer; +begin + if AValue=FStart then exit; + FStart:=aValue; + For I:=0 to Count-1 do + RegEx[I].Start:=aValue; +end; + +procedure TPerlRegExList.SetStop(AValue: Integer); +var + I : Integer; +begin + if AValue=FStart then exit; + FStop:=aValue; + For I:=0 to Count-1 do + RegEx[I].Stop:=aValue; +end; + +procedure TPerlRegExList.SetSubject(aValue: TREString); +var + I: Integer; +begin + if aValue=FSUbject then exit; + FSubject:=aValue; + for I:=Count-1 downto 0 do + RegEx[I].Subject:=Subject; + FMatch:=nil; +end; + +procedure TPerlRegExList.UpdateRegEx(const aRegEx: TPerlRegEx); +begin + aRegEx.Subject:=FSubject; + ARegEx.Start:=FStart; + ARegEx.Stop:=FStop; +end; + +constructor TPerlRegExList.Create(OwnsRegex: Boolean); +begin + FList:=TFPObjectList.Create(OwnsRegex); +end; + +destructor TPerlRegExList.Destroy; +begin + FreeAndNil(FList); + inherited Destroy; +end; + +function TPerlRegExList.Add(const aRegEx: TPerlRegEx): Integer; +begin + Result:=FList.Add(aRegEx); + UpdateRegEx(aRegEx); +end; + +procedure TPerlRegExList.Clear; +begin + FList.Clear; +end; + +procedure TPerlRegExList.Delete(aIndex: Integer); +begin + FList.Delete(aIndex); +end; + +function TPerlRegExList.IndexOf(const aRegEx: TPerlRegEx): Integer; +begin + Result:=FList.IndexOf(aRegex); +end; + +procedure TPerlRegExList.Insert(aIndex: Integer; const aRegEx: TPerlRegEx); +begin + FList.Insert(aIndex,aRegex); +end; + +function TPerlRegExList.Match: Boolean; +begin + SetStart(1); + FMatch:=nil; + Result:=MatchAgain; +end; + +function TPerlRegExList.MatchAgain: Boolean; + +var + PRE : TPerlRegEx; + I,StartAt,Current: Integer; + +begin + // Determine start position + if not Assigned(FMatch) then + StartAt:=Start + else + With FMatch do + StartAt:=0; // MVC todo {InternalGetMatchedOffset+InternalGetMatchedLength}; + FMatch:=nil; + Current:=-1; + // Check all regexes for new closest match. + I:=0; + While (IStartAt) do + begin + PRE:=RegEx[I]; + // Should we search this regex again ? + if (not PRE.FoundMatch) or (PRE.FirstOffset-1; +end; + +end. diff --git a/packages/vcl-compat/tests/testcompat.lpi b/packages/vcl-compat/tests/testcompat.lpi index 8b191dc145..2702838a09 100644 --- a/packages/vcl-compat/tests/testcompat.lpi +++ b/packages/vcl-compat/tests/testcompat.lpi @@ -68,6 +68,10 @@ + + + + diff --git a/packages/vcl-compat/tests/testcompat.lpr b/packages/vcl-compat/tests/testcompat.lpr index 0f3d393726..7e075a281c 100644 --- a/packages/vcl-compat/tests/testcompat.lpr +++ b/packages/vcl-compat/tests/testcompat.lpr @@ -6,7 +6,7 @@ uses {$IFDEF UNIX}cwstring,{$ENDIF} Classes, consoletestrunner, tcnetencoding, tciotuils, utmessagemanager, utcdevices, utcanalytics, utcimagelist, - utcnotifications, utcjson, utcpush, utchash; + utcnotifications, utcjson, utcpush, utchash, utcregex; type diff --git a/packages/vcl-compat/tests/utcregex.pas b/packages/vcl-compat/tests/utcregex.pas new file mode 100644 index 0000000000..c36028f818 --- /dev/null +++ b/packages/vcl-compat/tests/utcregex.pas @@ -0,0 +1,315 @@ +unit utcregex; + +{$mode objfpc}{$H+} +{ $DEFINE USEWIDESTRING} + +interface + +uses + Classes, SysUtils, fpcunit, testutils, testregistry, system.regularexpressionscore; + +type + + { TTestRegExpCore } + + TTestRegExpCore = class(TTestCase) + private + FRegex: TPerlRegEx; + FMatchEventCount : Integer; + FSplitSubject: TStrings; + procedure AssertMatch(const Msg, aMatch: TREString; aPos, aLength: Integer; Groups: array of TREString); + procedure DoMatch(Sender: TObject); + protected + procedure SetUp; override; + procedure TearDown; override; + Property RegEx : TPerlRegEx Read FRegex; + Property SplitSubject : TStrings Read FSplitSubject; + published + Procedure TestHookup; + procedure TestMatch; + procedure TestNamedGroups; + procedure TestReplace; + procedure TestReplaceAll; + procedure TestSplitAll; + procedure TestSplitLimit; + procedure TestInitialCaps; + procedure TestReplaceGroupBackslash; + procedure TestReplaceGroupDollar; + procedure TestReplaceGroupQuoted; + procedure TestReplaceGroupNamed; + procedure TestReplaceGroupNamed2; + procedure TestReplaceGroupNamedInvalidName; + procedure TestReplaceWholeSubject; + procedure TestReplaceLeftOfMatch; + procedure TestReplaceRightOfMatch; + procedure TestReplaceWholeMatch; + procedure TestReplaceLastMatch; + end; + +implementation + +procedure TTestRegExpCore.AssertMatch(Const Msg,aMatch : TREString; aPos,aLength : Integer; Groups : Array of TREString); + +var + I : Integer; +begin + AssertEquals(Msg+': matched text',aMatch,Regex.MatchedText); + AssertEquals(Msg+': offset',aPos,Regex.MatchedOffset); + AssertEquals(Msg+': length',aLength,Regex.MatchedLength); + AssertEquals(Msg+': group count',Length(Groups),Regex.GroupCount); + For I:=1 to Regex.GroupCount do + AssertEquals(Msg+' group['+IntToStr(I)+']',Groups[I-1],Regex.Groups[I]); +end; + +procedure TTestRegExpCore.DoMatch(Sender: TObject); +begin + Inc(FMatchEventCount); +end; + +procedure TTestRegExpCore.TestMatch; + +begin + Regex.subject:='xyz abba abbba abbbba zyx'; + Regex.RegEx:='a(b*)a'; + AssertTrue('First match found',Regex.Match); + AssertEquals('Match event called',1,FMatchEventCount); + AssertMatch('Match 1','abba',5,4,['bb']); + AssertEquals('Left of match','xyz ',Regex.SubjectLeft); + AssertEquals('Right of match',' abbba abbbba zyx',Regex.SubjectRight); + AssertTrue('Second match found',Regex.MatchAgain); + AssertMatch('Match 2','abbba',10,5,['bbb']); + AssertTrue('Third match found',Regex.MatchAgain); + AssertMatch('Match 3','abbbba',16,6,['bbbb']); + AssertFalse('No more matches',Regex.MatchAgain); + AssertEquals('Match event called',3,FMatchEventCount); +end; + +procedure TTestRegExpCore.TestNamedGroups; + +Const + Rec1 = 'Name:"John" Surname:"Doe" Email:"john@example.com"'; + Rec2 = 'Name:"Jane" Surname:"Dolina" Email:"jane@doe.com"'; + +begin + Regex.Subject:=Rec1+#10+Rec2; + Regex.RegEx:='Name:"(?[\w]+?)".*?Surname:"(?[\w]+?)".*?Email:"(?\b[\w.%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,6}\b)"'; + AssertTrue('First match found',Regex.Match); + AssertMatch('Match 1',Rec1,1,Length(Rec1),['John','Doe','john@example.com']); + AssertEquals('Nonexisting group','',Regex.NamedGroups['nonexisting']); + AssertEquals('Name group','John',Regex.NamedGroups['Name']); + AssertEquals('Surname group','Doe',Regex.NamedGroups['Surname']); + AssertEquals('Email group','john@example.com',Regex.NamedGroups['Email']); + AssertTrue('Second match found',Regex.MatchAgain); + AssertMatch('Match 2',Rec2,Length(Rec1)+2,Length(Rec2),['Jane','Dolina','jane@doe.com']); + AssertFalse('No more matches',Regex.MatchAgain); +end; + +procedure TTestRegExpCore.TestReplace; +begin + Regex.subject:='xyz abba abbba abbbba zyx'; + Regex.RegEx:='a(b*)a'; + Regex.Replacement:='c'; + AssertTrue('First match found',Regex.Match); + AssertEquals('Replace','c',Regex.Replace); + AssertEquals('Replace result','xyz c abbba abbbba zyx',Regex.Subject); + AssertTrue('Second match found',Regex.MatchAgain); + AssertEquals('Replace 2','c',Regex.Replace); + AssertEquals('Replace 2 result','xyz c c abbbba zyx',Regex.Subject); + AssertTrue('Third match found',Regex.MatchAgain); + AssertEquals('Replace 3','c',Regex.Replace); + AssertEquals('Replace 3 result','xyz c c c zyx',Regex.Subject); + AssertFalse('No more matches',Regex.MatchAgain); +end; + +procedure TTestRegExpCore.TestReplaceAll; +begin + Regex.subject:='xyz abba abbba abbbba zyx'; + Regex.RegEx:='a(b*)a'; + Regex.Replacement:='c'; + AssertTrue('Replacements done',Regex.ReplaceAll); + AssertEquals('ReplaceAll result','xyz c c c zyx',Regex.Subject); +end; + + +procedure TTestRegExpCore.TestReplaceGroupBackslash; +// \n +begin + Regex.subject:='*abba*'; + Regex.RegEx:='a(b*)a'; + Regex.Replacement:='\1'; + AssertTrue('Match',Regex.Match); + AssertEquals('ReplaceText','bb',Regex.Replace); + AssertEquals('Result','*bb*',Regex.Subject); +end; + +procedure TTestRegExpCore.TestReplaceGroupDollar; +// $N +begin + Regex.subject:='*abba*'; + Regex.RegEx:='a(b*)a'; + Regex.Replacement:='$1'; + AssertTrue('Match',Regex.Match); + AssertEquals('ReplaceText','bb',Regex.Replace); + AssertEquals('Result','*bb*',Regex.Subject); +end; + +procedure TTestRegExpCore.TestReplaceGroupQuoted; +// \{N} +begin + Regex.subject:='*abba*'; + Regex.RegEx:='a(b*)a'; + Regex.Replacement:='\{1}'; + AssertTrue('Match',Regex.Match); + AssertEquals('ReplaceText','bb',Regex.Replace); + AssertEquals('Result','*bb*',Regex.Subject); +end; + +procedure TTestRegExpCore.TestReplaceGroupNamed; +// \{name} + +begin + Regex.subject:='*abba*'; + Regex.RegEx:='a(?b*?)a'; + Regex.Replacement:='\{Name}'; + AssertTrue('Match',Regex.Match); + AssertEquals('ReplaceText','bb',Regex.Replace); + AssertEquals('Result','*bb*',Regex.Subject); +end; + +procedure TTestRegExpCore.TestReplaceGroupNamed2; +// \{name} +begin + Regex.subject:='*abba*'; + Regex.RegEx:='a(?b*?)a'; + Regex.Replacement:='<\{Name}>'; + AssertTrue('Match',Regex.Match); + AssertEquals('ReplaceText','',Regex.Replace); + AssertEquals('Result','**',Regex.Subject); +end; + +procedure TTestRegExpCore.TestReplaceGroupNamedInvalidName; +// \{name} with invalid name +begin + Regex.subject:='*abba*'; + Regex.RegEx:='a(?b*?)a'; + Regex.Replacement:='<\{NameX}>'; + AssertTrue('Match',Regex.Match); + AssertEquals('ReplaceText','<>',Regex.Replace); + AssertEquals('Result','*<>*',Regex.Subject); +end; + +procedure TTestRegExpCore.TestReplaceWholeSubject; +begin + Regex.subject:='*abba*'; + Regex.RegEx:='a(b*)a'; + Regex.Replacement:='<\_>'; + AssertTrue('Match',Regex.Match); + AssertEquals('ReplaceText','<*abba*>',Regex.Replace); + AssertEquals('Result','*<*abba*>*',Regex.Subject); +end; + +procedure TTestRegExpCore.TestReplaceLeftOfMatch; +// \` +begin + Regex.subject:='x*abba*'; + Regex.RegEx:='a(b*)a'; + Regex.Replacement:='<\`>'; + AssertTrue('Match',Regex.Match); + AssertEquals('ReplaceText','',Regex.Replace); + AssertEquals('Result','x**',Regex.Subject); +end; + +procedure TTestRegExpCore.TestReplaceRightOfMatch; +// \' +begin + Regex.subject:='*abba*x'; + Regex.RegEx:='a(b*)a'; + Regex.Replacement:='<\''>'; + AssertTrue('Match',Regex.Match); + AssertEquals('ReplaceText','<*x>',Regex.Replace); + AssertEquals('Result','*<*x>*x',Regex.Subject); +end; + +procedure TTestRegExpCore.TestReplaceWholeMatch; +// \& +begin + Regex.subject:='*abba*'; + Regex.RegEx:='a(b*)a'; + Regex.Replacement:='<\&>'; + AssertTrue('Match',Regex.Match); + AssertEquals('ReplaceText','',Regex.Replace); + AssertEquals('Result','**',Regex.Subject); +end; + +procedure TTestRegExpCore.TestReplaceLastMatch; +// \& +begin + Regex.subject:='*abbcca*'; + Regex.RegEx:='a(b*)(c*)a'; + Regex.Replacement:='<\+>'; + AssertTrue('Match',Regex.Match); + AssertEquals('ReplaceText','',Regex.Replace); + AssertEquals('Result','**',Regex.Subject); +end; + + +procedure TTestRegExpCore.TestSplitAll; +begin + Regex.subject:='xyz abba abbba abbbba zyx'; + Regex.RegEx:='\s'; + Regex.Split(SplitSubject,0); + AssertEquals('Count',5,SplitSubject.Count); + AssertEquals('Item 0','xyz',SplitSubject[0]); + AssertEquals('Item 1','abba',SplitSubject[1]); + AssertEquals('Item 2','abbba',SplitSubject[2]); + AssertEquals('Item 3','abbbba',SplitSubject[3]); + AssertEquals('Item 4','zyx',SplitSubject[4]); +end; + +procedure TTestRegExpCore.TestSplitLimit; + +begin + Regex.subject:='xyz abba abbba abbbba zyx'; + Regex.RegEx:='\s'; + Regex.Split(SplitSubject,2); + AssertEquals('Count',2,SplitSubject.Count); + AssertEquals('Item 0','xyz',SplitSubject[0]); + AssertEquals('Item 1','abba abbba abbbba zyx',SplitSubject[1]); +end; + +procedure TTestRegExpCore.TestInitialCaps; +begin + AssertEquals('Initialcaps 1','Abc',InitialCaps('aBc')); + AssertEquals('Initialcaps 2',' Abc',InitialCaps(' aBc')); + AssertEquals('Initialcaps 3','Dad Abc',InitialCaps('dAd aBc')); + AssertEquals('Initialcaps 4','Dad Abc ',InitialCaps('dAd aBc ')); +end; + +procedure TTestRegExpCore.SetUp; +begin + FRegex:=TPerlRegEx.Create; + FRegEx.OnMatch:=@DoMatch; + FMatchEventCount:=0; + FSplitSubject:=TStringList.Create; +end; + +procedure TTestRegExpCore.TearDown; + +begin + FreeAndNil(FSplitSubject); + FreeAndNil(FRegex); +end; + +procedure TTestRegExpCore.TestHookup; +begin + AssertNotNull('Regex',Regex); + AssertTrue('Assigned OnMatch event',Assigned(Regex.OnMatch)); + AssertEquals('Match event count',0,FMatchEventCount); + +end; + +initialization + + RegisterTest(TTestRegExpCore); +end. +