mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-08-11 19:16:16 +02:00
LazEdit: update TRegExpr
This commit is contained in:
parent
7fd8824fbc
commit
327c5770ab
@ -367,9 +367,10 @@ type
|
||||
regMustLen: integer; // length of regMust string
|
||||
regMustString: RegExprString; // string which must occur in match (got from regMust/regMustLen)
|
||||
LookAroundInfoList: PRegExprLookAroundInfo;
|
||||
regNestedCalls: integer; // some attempt to prevent 'catastrophic backtracking' but not used
|
||||
//regNestedCalls: integer; // some attempt to prevent 'catastrophic backtracking' but not used
|
||||
CurrentSubCalled: integer;
|
||||
|
||||
FMinMatchLen: integer;
|
||||
{$IFDEF UseFirstCharSet}
|
||||
FirstCharSet: TRegExprCharset;
|
||||
FirstCharArray: array[byte] of boolean;
|
||||
@ -1722,6 +1723,8 @@ const
|
||||
OP_GBRANCH_EX = TREOp(68);
|
||||
OP_GBRANCH_EX_CI = TREOp(69);
|
||||
|
||||
OP_RESET_MATCHPOS = TReOp(70);
|
||||
|
||||
OP_NONE = high(TREOp);
|
||||
|
||||
// We work with p-code through pointers, compatible with PRegExprChar.
|
||||
@ -3156,7 +3159,8 @@ function TRegExpr.CompileRegExpr(ARegExp: PRegExprChar): boolean;
|
||||
var
|
||||
scan, scanTemp, longest, longestTemp: PRegExprChar;
|
||||
Len, LenTemp: integer;
|
||||
FlagTemp: integer;
|
||||
FlagTemp, MaxMatchLen: integer;
|
||||
op: TREOp;
|
||||
begin
|
||||
Result := False;
|
||||
FlagTemp := 0;
|
||||
@ -3219,6 +3223,7 @@ begin
|
||||
Exit;
|
||||
|
||||
// Dig out information for optimizations.
|
||||
IsFixedLengthEx(op, FMinMatchLen, MaxMatchLen);
|
||||
{$IFDEF UseFirstCharSet}
|
||||
FirstCharSet := [];
|
||||
FillFirstCharSet(regCodeWork);
|
||||
@ -4866,6 +4871,11 @@ begin
|
||||
ret := EmitGroupRef(GrpIndex, fCompModifiers.I);
|
||||
FlagParse := FlagParse or FLAG_HASWIDTH or FLAG_SIMPLE;
|
||||
end;
|
||||
'K':
|
||||
begin
|
||||
ret := EmitNode(OP_RESET_MATCHPOS);
|
||||
FlagParse := FlagParse or FLAG_NOT_QUANTIFIABLE;
|
||||
end;
|
||||
{$IFDEF FastUnicodeData}
|
||||
'p':
|
||||
begin
|
||||
@ -5428,9 +5438,6 @@ end;
|
||||
type
|
||||
TRegExprMatchPrimLocals = record
|
||||
case TREOp of
|
||||
OP_CLOSE_ATOMIC: (
|
||||
IsAtomic: Boolean;
|
||||
);
|
||||
{$IFDEF ComplexBraces}
|
||||
OP_LOOPENTRY: (
|
||||
LoopInfo: TOpLoopInfo;
|
||||
@ -5526,6 +5533,15 @@ begin
|
||||
Exit;
|
||||
end;
|
||||
|
||||
OP_RESET_MATCHPOS:
|
||||
begin
|
||||
save := GrpBounds[0].GrpStart[0];
|
||||
GrpBounds[0].GrpStart[0] := regInput;
|
||||
Result := MatchPrim(next);
|
||||
if not Result then
|
||||
GrpBounds[0].GrpStart[0] := save;
|
||||
exit;
|
||||
end;
|
||||
OP_EOL:
|
||||
begin
|
||||
// \z matches at the very end
|
||||
@ -5842,23 +5858,24 @@ begin
|
||||
begin
|
||||
no := PReGroupIndex((scan + REOpSz + RENextOffSz))^;
|
||||
save := GrpBounds[regRecursion].GrpStart[no];
|
||||
opnd := GrpBounds[regRecursion].GrpEnd[no]; // save2
|
||||
GrpBounds[regRecursion].GrpStart[no] := regInput;
|
||||
Result := MatchPrim(next);
|
||||
if GrpBacktrackingAsAtom[no] then
|
||||
IsBacktrackingGroupAsAtom := False;
|
||||
GrpBacktrackingAsAtom[no] := False;
|
||||
if not Result then
|
||||
if not Result then begin
|
||||
GrpBounds[regRecursion].GrpStart[no] := save;
|
||||
GrpBounds[regRecursion].GrpEnd[no] := opnd;
|
||||
end;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
OP_CLOSE, OP_CLOSE_ATOMIC:
|
||||
OP_CLOSE:
|
||||
begin
|
||||
Local.IsAtomic := scan^ = OP_CLOSE_ATOMIC;
|
||||
no := PReGroupIndex((scan + REOpSz + RENextOffSz))^;
|
||||
// handle atomic group, mark it as "done"
|
||||
// (we are here because some OP_BRANCH is matched)
|
||||
save := GrpBounds[regRecursion].GrpEnd[no];
|
||||
GrpBounds[regRecursion].GrpEnd[no] := regInput;
|
||||
|
||||
// if we are in OP_SUBCALL* call, it called OP_OPEN*, so we must return
|
||||
@ -5868,11 +5885,18 @@ begin
|
||||
Result := True;
|
||||
Exit;
|
||||
end;
|
||||
end;
|
||||
|
||||
OP_CLOSE_ATOMIC:
|
||||
begin
|
||||
no := PReGroupIndex((scan + REOpSz + RENextOffSz))^;
|
||||
// handle atomic group, mark it as "done"
|
||||
// (we are here because some OP_BRANCH is matched)
|
||||
GrpBounds[regRecursion].GrpEnd[no] := regInput;
|
||||
|
||||
Result := MatchPrim(next);
|
||||
if not Result then begin
|
||||
GrpBounds[regRecursion].GrpEnd[no] := save;
|
||||
if Local.IsAtomic and not IsBacktrackingGroupAsAtom then begin
|
||||
if not IsBacktrackingGroupAsAtom then begin
|
||||
GrpBacktrackingAsAtom[no] := True;
|
||||
IsBacktrackingGroupAsAtom := True;
|
||||
end;
|
||||
@ -5911,8 +5935,9 @@ begin
|
||||
if (next^ = OP_LOOKAROUND_OPTIONAL) then
|
||||
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
|
||||
regInput := Local.LookAroundInfo.InputPos;
|
||||
Result := MatchPrim(next);
|
||||
Exit;
|
||||
Result := False;
|
||||
scan := next;
|
||||
continue;
|
||||
end;
|
||||
end
|
||||
else
|
||||
@ -5922,8 +5947,9 @@ begin
|
||||
if (next^ = OP_LOOKAROUND_OPTIONAL) then
|
||||
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
|
||||
regInput := Local.LookAroundInfo.InputPos;
|
||||
Result := MatchPrim(next);
|
||||
Exit;
|
||||
Result := False;
|
||||
scan := next;
|
||||
continue;
|
||||
end;
|
||||
end;
|
||||
|
||||
@ -6000,8 +6026,9 @@ begin
|
||||
if (next^ = OP_LOOKAROUND_OPTIONAL) then
|
||||
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
|
||||
regInput := Local.LookAroundInfo.InputPos;
|
||||
Result := MatchPrim(next);
|
||||
Exit;
|
||||
Result := False;
|
||||
scan := next;
|
||||
continue;
|
||||
end;
|
||||
end
|
||||
else
|
||||
@ -6011,8 +6038,9 @@ begin
|
||||
if (next^ = OP_LOOKAROUND_OPTIONAL) then
|
||||
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
|
||||
regInput := Local.LookAroundInfo.InputPos;
|
||||
Result := MatchPrim(next);
|
||||
Exit;
|
||||
Result := False;
|
||||
scan := next;
|
||||
continue;
|
||||
end;
|
||||
end;
|
||||
|
||||
@ -6351,8 +6379,10 @@ begin
|
||||
end;
|
||||
no := FindRepeated(opnd, BracesMax);
|
||||
if no >= BracesMin then
|
||||
if (nextch = #0) or (regInput^ = nextch) then
|
||||
Result := MatchPrim(next);
|
||||
if (nextch = #0) or (regInput^ = nextch) then begin
|
||||
scan := next;
|
||||
continue;
|
||||
end;
|
||||
Exit;
|
||||
end;
|
||||
|
||||
@ -6493,21 +6523,16 @@ end;
|
||||
function TRegExpr.MatchAtOnePos(APos: PRegExprChar): boolean;
|
||||
begin
|
||||
regInput := APos;
|
||||
regNestedCalls := 0;
|
||||
regRecursion := 0;
|
||||
//regNestedCalls := 0;
|
||||
fInputCurrentEnd := fInputEnd;
|
||||
Result := False;
|
||||
{$IFDEF RegExpWithStackOverflowCheck_DecStack_Frame}
|
||||
StackLimit := StackBottom;
|
||||
if StackLimit <> nil then
|
||||
StackLimit := StackLimit + 36000; // Add for any calls within the current MatchPrim // FPC has "STACK_MARGIN = 16384;", but we need to call Error, ..., raise
|
||||
{$ENDIF}
|
||||
GrpBounds[0].GrpStart[0] := APos;
|
||||
Result := MatchPrim(regCodeWork);
|
||||
if Result then
|
||||
begin
|
||||
GrpBounds[0].GrpStart[0] := APos;
|
||||
GrpBounds[0].GrpEnd[0] := regInput;
|
||||
end;
|
||||
Result := regInput >= GrpBounds[0].GrpStart[0];
|
||||
if Result then
|
||||
GrpBounds[0].GrpEnd[0] := regInput
|
||||
else
|
||||
GrpBounds[0].GrpStart[0] := nil;
|
||||
end;
|
||||
|
||||
procedure TRegExpr.ClearMatches;
|
||||
@ -6530,6 +6555,7 @@ begin
|
||||
{$ENDIF}
|
||||
LookAroundInfoList := nil;
|
||||
CurrentSubCalled := -1;
|
||||
regRecursion := 0;
|
||||
end;
|
||||
|
||||
procedure TRegExpr.InitInternalGroupData;
|
||||
@ -6583,7 +6609,7 @@ end;
|
||||
function TRegExpr.ExecPrimProtected(AOffset: Integer; ASlowChecks,
|
||||
ABackward: Boolean; ATryMatchOnlyStartingBefore: Integer): Boolean;
|
||||
var
|
||||
Ptr: PRegExprChar;
|
||||
Ptr, SearchEnd: PRegExprChar;
|
||||
begin
|
||||
Result := False;
|
||||
|
||||
@ -6631,6 +6657,12 @@ begin
|
||||
if StrLPos(fInputStart, PRegExprChar(regMustString), fInputEnd - fInputStart, length(regMustString)) = nil then
|
||||
exit;
|
||||
|
||||
{$IFDEF RegExpWithStackOverflowCheck_DecStack_Frame}
|
||||
StackLimit := StackBottom;
|
||||
if StackLimit <> nil then
|
||||
StackLimit := StackLimit + 36000; // Add for any calls within the current MatchPrim // FPC has "STACK_MARGIN = 16384;", but we need to call Error, ..., raise
|
||||
{$ENDIF}
|
||||
|
||||
FMatchesCleared := False;
|
||||
// ATryOnce or anchored match (it needs to be tried only once).
|
||||
if (ATryMatchOnlyStartingBefore = AOffset + 1) or (regAnchored in [raBOL, raOnlyOnce, raContinue]) then
|
||||
@ -6651,40 +6683,53 @@ begin
|
||||
Exit;
|
||||
end;
|
||||
|
||||
|
||||
// Messy cases: unanchored match.
|
||||
if ABackward then
|
||||
Inc(Ptr, 2)
|
||||
else
|
||||
Dec(Ptr);
|
||||
repeat
|
||||
if ABackward then
|
||||
begin
|
||||
if ABackward then begin
|
||||
Inc(Ptr, 2);
|
||||
repeat
|
||||
Dec(Ptr);
|
||||
if Ptr < fInputStart then
|
||||
Exit;
|
||||
end
|
||||
else
|
||||
begin
|
||||
|
||||
{$IFDEF UseFirstCharSet}
|
||||
{$IFDEF UnicodeRE}
|
||||
if Ord(Ptr^) <= $FF then
|
||||
{$ENDIF}
|
||||
if not FirstCharArray[byte(Ptr^)] then
|
||||
Continue;
|
||||
{$ENDIF}
|
||||
|
||||
Result := MatchAtOnePos(Ptr);
|
||||
// Exit on a match or after testing the end-of-string
|
||||
if Result then
|
||||
Exit;
|
||||
until False;
|
||||
end
|
||||
else begin
|
||||
Dec(Ptr);
|
||||
SearchEnd := fInputEnd - FMinMatchLen;
|
||||
if (ATryMatchOnlyStartingBefore > 0) and (fInputStart + ATryMatchOnlyStartingBefore < SearchEnd) then
|
||||
SearchEnd := fInputStart + ATryMatchOnlyStartingBefore - 2;
|
||||
repeat
|
||||
Inc(Ptr);
|
||||
if Ptr > fInputEnd then
|
||||
if Ptr > SearchEnd then
|
||||
Exit;
|
||||
if (ATryMatchOnlyStartingBefore > 0) and (Ptr - fInputStart >= ATryMatchOnlyStartingBefore - 1) then
|
||||
|
||||
{$IFDEF UseFirstCharSet}
|
||||
{$IFDEF UnicodeRE}
|
||||
if Ord(Ptr^) <= $FF then
|
||||
{$ENDIF}
|
||||
if not FirstCharArray[byte(Ptr^)] then
|
||||
Continue;
|
||||
{$ENDIF}
|
||||
|
||||
Result := MatchAtOnePos(Ptr);
|
||||
// Exit on a match or after testing the end-of-string
|
||||
if Result then
|
||||
Exit;
|
||||
end;
|
||||
|
||||
{$IFDEF UseFirstCharSet}
|
||||
{$IFDEF UnicodeRE}
|
||||
if Ord(Ptr^) <= $FF then
|
||||
{$ENDIF}
|
||||
if not FirstCharArray[byte(Ptr^)] then
|
||||
Continue;
|
||||
{$ENDIF}
|
||||
|
||||
Result := MatchAtOnePos(Ptr);
|
||||
// Exit on a match or after testing the end-of-string
|
||||
if Result then
|
||||
Exit;
|
||||
until False;
|
||||
until False;
|
||||
end;
|
||||
end; { of function TRegExpr.ExecPrim
|
||||
-------------------------------------------------------------- }
|
||||
|
||||
@ -7094,7 +7139,8 @@ begin
|
||||
|
||||
OP_BOL,
|
||||
OP_BOL_ML,
|
||||
OP_CONTINUE_POS:
|
||||
OP_CONTINUE_POS,
|
||||
OP_RESET_MATCHPOS:
|
||||
; // Exit;
|
||||
|
||||
OP_EOL,
|
||||
@ -7669,6 +7715,8 @@ begin
|
||||
Result := 'SUBCALL';
|
||||
OP_ANYLINEBREAK:
|
||||
Result := 'ANYLINEBREAK';
|
||||
OP_RESET_MATCHPOS:
|
||||
Result := 'RESET_MATCHPOS';
|
||||
else
|
||||
Error(reeDumpCorruptedOpcode);
|
||||
end;
|
||||
@ -8082,7 +8130,7 @@ begin
|
||||
if (ABranchMaxLen = high(ABranchMaxLen)) and not(flfForceToStopAt in Flags) then
|
||||
Exit;
|
||||
end;
|
||||
assert(s^=OP_CLOSE);
|
||||
assert(s^=OP_CLOSE_ATOMIC);
|
||||
AMinLen := AMinLen + ASubLen;
|
||||
IncMaxLen(FndMaxLen, ASubMaxLen);
|
||||
Inc(s, REOpSz + RENextOffSz + ReGroupIndexSz); // consume the OP_CLOSE_ATOMIC;
|
||||
|
Loading…
Reference in New Issue
Block a user