LazEdit: update TRegExpr

This commit is contained in:
Martin 2023-11-20 23:54:24 +01:00
parent 7fd8824fbc
commit 327c5770ab

View File

@ -367,9 +367,10 @@ type
regMustLen: integer; // length of regMust string
regMustString: RegExprString; // string which must occur in match (got from regMust/regMustLen)
LookAroundInfoList: PRegExprLookAroundInfo;
regNestedCalls: integer; // some attempt to prevent 'catastrophic backtracking' but not used
//regNestedCalls: integer; // some attempt to prevent 'catastrophic backtracking' but not used
CurrentSubCalled: integer;
FMinMatchLen: integer;
{$IFDEF UseFirstCharSet}
FirstCharSet: TRegExprCharset;
FirstCharArray: array[byte] of boolean;
@ -1722,6 +1723,8 @@ const
OP_GBRANCH_EX = TREOp(68);
OP_GBRANCH_EX_CI = TREOp(69);
OP_RESET_MATCHPOS = TReOp(70);
OP_NONE = high(TREOp);
// We work with p-code through pointers, compatible with PRegExprChar.
@ -3156,7 +3159,8 @@ function TRegExpr.CompileRegExpr(ARegExp: PRegExprChar): boolean;
var
scan, scanTemp, longest, longestTemp: PRegExprChar;
Len, LenTemp: integer;
FlagTemp: integer;
FlagTemp, MaxMatchLen: integer;
op: TREOp;
begin
Result := False;
FlagTemp := 0;
@ -3219,6 +3223,7 @@ begin
Exit;
// Dig out information for optimizations.
IsFixedLengthEx(op, FMinMatchLen, MaxMatchLen);
{$IFDEF UseFirstCharSet}
FirstCharSet := [];
FillFirstCharSet(regCodeWork);
@ -4866,6 +4871,11 @@ begin
ret := EmitGroupRef(GrpIndex, fCompModifiers.I);
FlagParse := FlagParse or FLAG_HASWIDTH or FLAG_SIMPLE;
end;
'K':
begin
ret := EmitNode(OP_RESET_MATCHPOS);
FlagParse := FlagParse or FLAG_NOT_QUANTIFIABLE;
end;
{$IFDEF FastUnicodeData}
'p':
begin
@ -5428,9 +5438,6 @@ end;
type
TRegExprMatchPrimLocals = record
case TREOp of
OP_CLOSE_ATOMIC: (
IsAtomic: Boolean;
);
{$IFDEF ComplexBraces}
OP_LOOPENTRY: (
LoopInfo: TOpLoopInfo;
@ -5526,6 +5533,15 @@ begin
Exit;
end;
OP_RESET_MATCHPOS:
begin
save := GrpBounds[0].GrpStart[0];
GrpBounds[0].GrpStart[0] := regInput;
Result := MatchPrim(next);
if not Result then
GrpBounds[0].GrpStart[0] := save;
exit;
end;
OP_EOL:
begin
// \z matches at the very end
@ -5842,23 +5858,24 @@ begin
begin
no := PReGroupIndex((scan + REOpSz + RENextOffSz))^;
save := GrpBounds[regRecursion].GrpStart[no];
opnd := GrpBounds[regRecursion].GrpEnd[no]; // save2
GrpBounds[regRecursion].GrpStart[no] := regInput;
Result := MatchPrim(next);
if GrpBacktrackingAsAtom[no] then
IsBacktrackingGroupAsAtom := False;
GrpBacktrackingAsAtom[no] := False;
if not Result then
if not Result then begin
GrpBounds[regRecursion].GrpStart[no] := save;
GrpBounds[regRecursion].GrpEnd[no] := opnd;
end;
Exit;
end;
OP_CLOSE, OP_CLOSE_ATOMIC:
OP_CLOSE:
begin
Local.IsAtomic := scan^ = OP_CLOSE_ATOMIC;
no := PReGroupIndex((scan + REOpSz + RENextOffSz))^;
// handle atomic group, mark it as "done"
// (we are here because some OP_BRANCH is matched)
save := GrpBounds[regRecursion].GrpEnd[no];
GrpBounds[regRecursion].GrpEnd[no] := regInput;
// if we are in OP_SUBCALL* call, it called OP_OPEN*, so we must return
@ -5868,11 +5885,18 @@ begin
Result := True;
Exit;
end;
end;
OP_CLOSE_ATOMIC:
begin
no := PReGroupIndex((scan + REOpSz + RENextOffSz))^;
// handle atomic group, mark it as "done"
// (we are here because some OP_BRANCH is matched)
GrpBounds[regRecursion].GrpEnd[no] := regInput;
Result := MatchPrim(next);
if not Result then begin
GrpBounds[regRecursion].GrpEnd[no] := save;
if Local.IsAtomic and not IsBacktrackingGroupAsAtom then begin
if not IsBacktrackingGroupAsAtom then begin
GrpBacktrackingAsAtom[no] := True;
IsBacktrackingGroupAsAtom := True;
end;
@ -5911,8 +5935,9 @@ begin
if (next^ = OP_LOOKAROUND_OPTIONAL) then
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
regInput := Local.LookAroundInfo.InputPos;
Result := MatchPrim(next);
Exit;
Result := False;
scan := next;
continue;
end;
end
else
@ -5922,8 +5947,9 @@ begin
if (next^ = OP_LOOKAROUND_OPTIONAL) then
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
regInput := Local.LookAroundInfo.InputPos;
Result := MatchPrim(next);
Exit;
Result := False;
scan := next;
continue;
end;
end;
@ -6000,8 +6026,9 @@ begin
if (next^ = OP_LOOKAROUND_OPTIONAL) then
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
regInput := Local.LookAroundInfo.InputPos;
Result := MatchPrim(next);
Exit;
Result := False;
scan := next;
continue;
end;
end
else
@ -6011,8 +6038,9 @@ begin
if (next^ = OP_LOOKAROUND_OPTIONAL) then
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
regInput := Local.LookAroundInfo.InputPos;
Result := MatchPrim(next);
Exit;
Result := False;
scan := next;
continue;
end;
end;
@ -6351,8 +6379,10 @@ begin
end;
no := FindRepeated(opnd, BracesMax);
if no >= BracesMin then
if (nextch = #0) or (regInput^ = nextch) then
Result := MatchPrim(next);
if (nextch = #0) or (regInput^ = nextch) then begin
scan := next;
continue;
end;
Exit;
end;
@ -6493,21 +6523,16 @@ end;
function TRegExpr.MatchAtOnePos(APos: PRegExprChar): boolean;
begin
regInput := APos;
regNestedCalls := 0;
regRecursion := 0;
//regNestedCalls := 0;
fInputCurrentEnd := fInputEnd;
Result := False;
{$IFDEF RegExpWithStackOverflowCheck_DecStack_Frame}
StackLimit := StackBottom;
if StackLimit <> nil then
StackLimit := StackLimit + 36000; // Add for any calls within the current MatchPrim // FPC has "STACK_MARGIN = 16384;", but we need to call Error, ..., raise
{$ENDIF}
GrpBounds[0].GrpStart[0] := APos;
Result := MatchPrim(regCodeWork);
if Result then
begin
GrpBounds[0].GrpStart[0] := APos;
GrpBounds[0].GrpEnd[0] := regInput;
end;
Result := regInput >= GrpBounds[0].GrpStart[0];
if Result then
GrpBounds[0].GrpEnd[0] := regInput
else
GrpBounds[0].GrpStart[0] := nil;
end;
procedure TRegExpr.ClearMatches;
@ -6530,6 +6555,7 @@ begin
{$ENDIF}
LookAroundInfoList := nil;
CurrentSubCalled := -1;
regRecursion := 0;
end;
procedure TRegExpr.InitInternalGroupData;
@ -6583,7 +6609,7 @@ end;
function TRegExpr.ExecPrimProtected(AOffset: Integer; ASlowChecks,
ABackward: Boolean; ATryMatchOnlyStartingBefore: Integer): Boolean;
var
Ptr: PRegExprChar;
Ptr, SearchEnd: PRegExprChar;
begin
Result := False;
@ -6631,6 +6657,12 @@ begin
if StrLPos(fInputStart, PRegExprChar(regMustString), fInputEnd - fInputStart, length(regMustString)) = nil then
exit;
{$IFDEF RegExpWithStackOverflowCheck_DecStack_Frame}
StackLimit := StackBottom;
if StackLimit <> nil then
StackLimit := StackLimit + 36000; // Add for any calls within the current MatchPrim // FPC has "STACK_MARGIN = 16384;", but we need to call Error, ..., raise
{$ENDIF}
FMatchesCleared := False;
// ATryOnce or anchored match (it needs to be tried only once).
if (ATryMatchOnlyStartingBefore = AOffset + 1) or (regAnchored in [raBOL, raOnlyOnce, raContinue]) then
@ -6651,40 +6683,53 @@ begin
Exit;
end;
// Messy cases: unanchored match.
if ABackward then
Inc(Ptr, 2)
else
Dec(Ptr);
repeat
if ABackward then
begin
if ABackward then begin
Inc(Ptr, 2);
repeat
Dec(Ptr);
if Ptr < fInputStart then
Exit;
end
else
begin
{$IFDEF UseFirstCharSet}
{$IFDEF UnicodeRE}
if Ord(Ptr^) <= $FF then
{$ENDIF}
if not FirstCharArray[byte(Ptr^)] then
Continue;
{$ENDIF}
Result := MatchAtOnePos(Ptr);
// Exit on a match or after testing the end-of-string
if Result then
Exit;
until False;
end
else begin
Dec(Ptr);
SearchEnd := fInputEnd - FMinMatchLen;
if (ATryMatchOnlyStartingBefore > 0) and (fInputStart + ATryMatchOnlyStartingBefore < SearchEnd) then
SearchEnd := fInputStart + ATryMatchOnlyStartingBefore - 2;
repeat
Inc(Ptr);
if Ptr > fInputEnd then
if Ptr > SearchEnd then
Exit;
if (ATryMatchOnlyStartingBefore > 0) and (Ptr - fInputStart >= ATryMatchOnlyStartingBefore - 1) then
{$IFDEF UseFirstCharSet}
{$IFDEF UnicodeRE}
if Ord(Ptr^) <= $FF then
{$ENDIF}
if not FirstCharArray[byte(Ptr^)] then
Continue;
{$ENDIF}
Result := MatchAtOnePos(Ptr);
// Exit on a match or after testing the end-of-string
if Result then
Exit;
end;
{$IFDEF UseFirstCharSet}
{$IFDEF UnicodeRE}
if Ord(Ptr^) <= $FF then
{$ENDIF}
if not FirstCharArray[byte(Ptr^)] then
Continue;
{$ENDIF}
Result := MatchAtOnePos(Ptr);
// Exit on a match or after testing the end-of-string
if Result then
Exit;
until False;
until False;
end;
end; { of function TRegExpr.ExecPrim
-------------------------------------------------------------- }
@ -7094,7 +7139,8 @@ begin
OP_BOL,
OP_BOL_ML,
OP_CONTINUE_POS:
OP_CONTINUE_POS,
OP_RESET_MATCHPOS:
; // Exit;
OP_EOL,
@ -7669,6 +7715,8 @@ begin
Result := 'SUBCALL';
OP_ANYLINEBREAK:
Result := 'ANYLINEBREAK';
OP_RESET_MATCHPOS:
Result := 'RESET_MATCHPOS';
else
Error(reeDumpCorruptedOpcode);
end;
@ -8082,7 +8130,7 @@ begin
if (ABranchMaxLen = high(ABranchMaxLen)) and not(flfForceToStopAt in Flags) then
Exit;
end;
assert(s^=OP_CLOSE);
assert(s^=OP_CLOSE_ATOMIC);
AMinLen := AMinLen + ASubLen;
IncMaxLen(FndMaxLen, ASubMaxLen);
Inc(s, REOpSz + RENextOffSz + ReGroupIndexSz); // consume the OP_CLOSE_ATOMIC;