diff --git a/components/lazedit/xregexpr.pas b/components/lazedit/xregexpr.pas index e72caf747a..4e0d220e8a 100644 --- a/components/lazedit/xregexpr.pas +++ b/components/lazedit/xregexpr.pas @@ -367,9 +367,10 @@ type regMustLen: integer; // length of regMust string regMustString: RegExprString; // string which must occur in match (got from regMust/regMustLen) LookAroundInfoList: PRegExprLookAroundInfo; - regNestedCalls: integer; // some attempt to prevent 'catastrophic backtracking' but not used + //regNestedCalls: integer; // some attempt to prevent 'catastrophic backtracking' but not used CurrentSubCalled: integer; + FMinMatchLen: integer; {$IFDEF UseFirstCharSet} FirstCharSet: TRegExprCharset; FirstCharArray: array[byte] of boolean; @@ -1722,6 +1723,8 @@ const OP_GBRANCH_EX = TREOp(68); OP_GBRANCH_EX_CI = TREOp(69); + OP_RESET_MATCHPOS = TReOp(70); + OP_NONE = high(TREOp); // We work with p-code through pointers, compatible with PRegExprChar. @@ -3156,7 +3159,8 @@ function TRegExpr.CompileRegExpr(ARegExp: PRegExprChar): boolean; var scan, scanTemp, longest, longestTemp: PRegExprChar; Len, LenTemp: integer; - FlagTemp: integer; + FlagTemp, MaxMatchLen: integer; + op: TREOp; begin Result := False; FlagTemp := 0; @@ -3219,6 +3223,7 @@ begin Exit; // Dig out information for optimizations. + IsFixedLengthEx(op, FMinMatchLen, MaxMatchLen); {$IFDEF UseFirstCharSet} FirstCharSet := []; FillFirstCharSet(regCodeWork); @@ -4866,6 +4871,11 @@ begin ret := EmitGroupRef(GrpIndex, fCompModifiers.I); FlagParse := FlagParse or FLAG_HASWIDTH or FLAG_SIMPLE; end; + 'K': + begin + ret := EmitNode(OP_RESET_MATCHPOS); + FlagParse := FlagParse or FLAG_NOT_QUANTIFIABLE; + end; {$IFDEF FastUnicodeData} 'p': begin @@ -5428,9 +5438,6 @@ end; type TRegExprMatchPrimLocals = record case TREOp of - OP_CLOSE_ATOMIC: ( - IsAtomic: Boolean; - ); {$IFDEF ComplexBraces} OP_LOOPENTRY: ( LoopInfo: TOpLoopInfo; @@ -5526,6 +5533,15 @@ begin Exit; end; + OP_RESET_MATCHPOS: + begin + save := GrpBounds[0].GrpStart[0]; + GrpBounds[0].GrpStart[0] := regInput; + Result := MatchPrim(next); + if not Result then + GrpBounds[0].GrpStart[0] := save; + exit; + end; OP_EOL: begin // \z matches at the very end @@ -5842,23 +5858,24 @@ begin begin no := PReGroupIndex((scan + REOpSz + RENextOffSz))^; save := GrpBounds[regRecursion].GrpStart[no]; + opnd := GrpBounds[regRecursion].GrpEnd[no]; // save2 GrpBounds[regRecursion].GrpStart[no] := regInput; Result := MatchPrim(next); if GrpBacktrackingAsAtom[no] then IsBacktrackingGroupAsAtom := False; GrpBacktrackingAsAtom[no] := False; - if not Result then + if not Result then begin GrpBounds[regRecursion].GrpStart[no] := save; + GrpBounds[regRecursion].GrpEnd[no] := opnd; + end; Exit; end; - OP_CLOSE, OP_CLOSE_ATOMIC: + OP_CLOSE: begin - Local.IsAtomic := scan^ = OP_CLOSE_ATOMIC; no := PReGroupIndex((scan + REOpSz + RENextOffSz))^; // handle atomic group, mark it as "done" // (we are here because some OP_BRANCH is matched) - save := GrpBounds[regRecursion].GrpEnd[no]; GrpBounds[regRecursion].GrpEnd[no] := regInput; // if we are in OP_SUBCALL* call, it called OP_OPEN*, so we must return @@ -5868,11 +5885,18 @@ begin Result := True; Exit; end; + end; + + OP_CLOSE_ATOMIC: + begin + no := PReGroupIndex((scan + REOpSz + RENextOffSz))^; + // handle atomic group, mark it as "done" + // (we are here because some OP_BRANCH is matched) + GrpBounds[regRecursion].GrpEnd[no] := regInput; Result := MatchPrim(next); if not Result then begin - GrpBounds[regRecursion].GrpEnd[no] := save; - if Local.IsAtomic and not IsBacktrackingGroupAsAtom then begin + if not IsBacktrackingGroupAsAtom then begin GrpBacktrackingAsAtom[no] := True; IsBacktrackingGroupAsAtom := True; end; @@ -5911,8 +5935,9 @@ begin if (next^ = OP_LOOKAROUND_OPTIONAL) then next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz; regInput := Local.LookAroundInfo.InputPos; - Result := MatchPrim(next); - Exit; + Result := False; + scan := next; + continue; end; end else @@ -5922,8 +5947,9 @@ begin if (next^ = OP_LOOKAROUND_OPTIONAL) then next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz; regInput := Local.LookAroundInfo.InputPos; - Result := MatchPrim(next); - Exit; + Result := False; + scan := next; + continue; end; end; @@ -6000,8 +6026,9 @@ begin if (next^ = OP_LOOKAROUND_OPTIONAL) then next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz; regInput := Local.LookAroundInfo.InputPos; - Result := MatchPrim(next); - Exit; + Result := False; + scan := next; + continue; end; end else @@ -6011,8 +6038,9 @@ begin if (next^ = OP_LOOKAROUND_OPTIONAL) then next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz; regInput := Local.LookAroundInfo.InputPos; - Result := MatchPrim(next); - Exit; + Result := False; + scan := next; + continue; end; end; @@ -6351,8 +6379,10 @@ begin end; no := FindRepeated(opnd, BracesMax); if no >= BracesMin then - if (nextch = #0) or (regInput^ = nextch) then - Result := MatchPrim(next); + if (nextch = #0) or (regInput^ = nextch) then begin + scan := next; + continue; + end; Exit; end; @@ -6493,21 +6523,16 @@ end; function TRegExpr.MatchAtOnePos(APos: PRegExprChar): boolean; begin regInput := APos; - regNestedCalls := 0; - regRecursion := 0; + //regNestedCalls := 0; fInputCurrentEnd := fInputEnd; - Result := False; - {$IFDEF RegExpWithStackOverflowCheck_DecStack_Frame} - StackLimit := StackBottom; - if StackLimit <> nil then - StackLimit := StackLimit + 36000; // Add for any calls within the current MatchPrim // FPC has "STACK_MARGIN = 16384;", but we need to call Error, ..., raise - {$ENDIF} + GrpBounds[0].GrpStart[0] := APos; Result := MatchPrim(regCodeWork); if Result then - begin - GrpBounds[0].GrpStart[0] := APos; - GrpBounds[0].GrpEnd[0] := regInput; - end; + Result := regInput >= GrpBounds[0].GrpStart[0]; + if Result then + GrpBounds[0].GrpEnd[0] := regInput + else + GrpBounds[0].GrpStart[0] := nil; end; procedure TRegExpr.ClearMatches; @@ -6530,6 +6555,7 @@ begin {$ENDIF} LookAroundInfoList := nil; CurrentSubCalled := -1; + regRecursion := 0; end; procedure TRegExpr.InitInternalGroupData; @@ -6583,7 +6609,7 @@ end; function TRegExpr.ExecPrimProtected(AOffset: Integer; ASlowChecks, ABackward: Boolean; ATryMatchOnlyStartingBefore: Integer): Boolean; var - Ptr: PRegExprChar; + Ptr, SearchEnd: PRegExprChar; begin Result := False; @@ -6631,6 +6657,12 @@ begin if StrLPos(fInputStart, PRegExprChar(regMustString), fInputEnd - fInputStart, length(regMustString)) = nil then exit; + {$IFDEF RegExpWithStackOverflowCheck_DecStack_Frame} + StackLimit := StackBottom; + if StackLimit <> nil then + StackLimit := StackLimit + 36000; // Add for any calls within the current MatchPrim // FPC has "STACK_MARGIN = 16384;", but we need to call Error, ..., raise + {$ENDIF} + FMatchesCleared := False; // ATryOnce or anchored match (it needs to be tried only once). if (ATryMatchOnlyStartingBefore = AOffset + 1) or (regAnchored in [raBOL, raOnlyOnce, raContinue]) then @@ -6651,40 +6683,53 @@ begin Exit; end; + // Messy cases: unanchored match. - if ABackward then - Inc(Ptr, 2) - else - Dec(Ptr); - repeat - if ABackward then - begin + if ABackward then begin + Inc(Ptr, 2); + repeat Dec(Ptr); if Ptr < fInputStart then Exit; - end - else - begin + + {$IFDEF UseFirstCharSet} + {$IFDEF UnicodeRE} + if Ord(Ptr^) <= $FF then + {$ENDIF} + if not FirstCharArray[byte(Ptr^)] then + Continue; + {$ENDIF} + + Result := MatchAtOnePos(Ptr); + // Exit on a match or after testing the end-of-string + if Result then + Exit; + until False; + end + else begin + Dec(Ptr); + SearchEnd := fInputEnd - FMinMatchLen; + if (ATryMatchOnlyStartingBefore > 0) and (fInputStart + ATryMatchOnlyStartingBefore < SearchEnd) then + SearchEnd := fInputStart + ATryMatchOnlyStartingBefore - 2; + repeat Inc(Ptr); - if Ptr > fInputEnd then + if Ptr > SearchEnd then Exit; - if (ATryMatchOnlyStartingBefore > 0) and (Ptr - fInputStart >= ATryMatchOnlyStartingBefore - 1) then + + {$IFDEF UseFirstCharSet} + {$IFDEF UnicodeRE} + if Ord(Ptr^) <= $FF then + {$ENDIF} + if not FirstCharArray[byte(Ptr^)] then + Continue; + {$ENDIF} + + Result := MatchAtOnePos(Ptr); + // Exit on a match or after testing the end-of-string + if Result then Exit; - end; - - {$IFDEF UseFirstCharSet} - {$IFDEF UnicodeRE} - if Ord(Ptr^) <= $FF then - {$ENDIF} - if not FirstCharArray[byte(Ptr^)] then - Continue; - {$ENDIF} - - Result := MatchAtOnePos(Ptr); - // Exit on a match or after testing the end-of-string - if Result then - Exit; - until False; + until False; + end; end; { of function TRegExpr.ExecPrim -------------------------------------------------------------- } @@ -7094,7 +7139,8 @@ begin OP_BOL, OP_BOL_ML, - OP_CONTINUE_POS: + OP_CONTINUE_POS, + OP_RESET_MATCHPOS: ; // Exit; OP_EOL, @@ -7669,6 +7715,8 @@ begin Result := 'SUBCALL'; OP_ANYLINEBREAK: Result := 'ANYLINEBREAK'; + OP_RESET_MATCHPOS: + Result := 'RESET_MATCHPOS'; else Error(reeDumpCorruptedOpcode); end; @@ -8082,7 +8130,7 @@ begin if (ABranchMaxLen = high(ABranchMaxLen)) and not(flfForceToStopAt in Flags) then Exit; end; - assert(s^=OP_CLOSE); + assert(s^=OP_CLOSE_ATOMIC); AMinLen := AMinLen + ASubLen; IncMaxLen(FndMaxLen, ASubMaxLen); Inc(s, REOpSz + RENextOffSz + ReGroupIndexSz); // consume the OP_CLOSE_ATOMIC;