mirror of
https://gitlab.com/freepascal.org/lazarus/lazarus.git
synced 2025-08-12 00:55:59 +02:00
LazEdit: update TRegExpr
This commit is contained in:
parent
7fd8824fbc
commit
327c5770ab
@ -367,9 +367,10 @@ type
|
|||||||
regMustLen: integer; // length of regMust string
|
regMustLen: integer; // length of regMust string
|
||||||
regMustString: RegExprString; // string which must occur in match (got from regMust/regMustLen)
|
regMustString: RegExprString; // string which must occur in match (got from regMust/regMustLen)
|
||||||
LookAroundInfoList: PRegExprLookAroundInfo;
|
LookAroundInfoList: PRegExprLookAroundInfo;
|
||||||
regNestedCalls: integer; // some attempt to prevent 'catastrophic backtracking' but not used
|
//regNestedCalls: integer; // some attempt to prevent 'catastrophic backtracking' but not used
|
||||||
CurrentSubCalled: integer;
|
CurrentSubCalled: integer;
|
||||||
|
|
||||||
|
FMinMatchLen: integer;
|
||||||
{$IFDEF UseFirstCharSet}
|
{$IFDEF UseFirstCharSet}
|
||||||
FirstCharSet: TRegExprCharset;
|
FirstCharSet: TRegExprCharset;
|
||||||
FirstCharArray: array[byte] of boolean;
|
FirstCharArray: array[byte] of boolean;
|
||||||
@ -1722,6 +1723,8 @@ const
|
|||||||
OP_GBRANCH_EX = TREOp(68);
|
OP_GBRANCH_EX = TREOp(68);
|
||||||
OP_GBRANCH_EX_CI = TREOp(69);
|
OP_GBRANCH_EX_CI = TREOp(69);
|
||||||
|
|
||||||
|
OP_RESET_MATCHPOS = TReOp(70);
|
||||||
|
|
||||||
OP_NONE = high(TREOp);
|
OP_NONE = high(TREOp);
|
||||||
|
|
||||||
// We work with p-code through pointers, compatible with PRegExprChar.
|
// We work with p-code through pointers, compatible with PRegExprChar.
|
||||||
@ -3156,7 +3159,8 @@ function TRegExpr.CompileRegExpr(ARegExp: PRegExprChar): boolean;
|
|||||||
var
|
var
|
||||||
scan, scanTemp, longest, longestTemp: PRegExprChar;
|
scan, scanTemp, longest, longestTemp: PRegExprChar;
|
||||||
Len, LenTemp: integer;
|
Len, LenTemp: integer;
|
||||||
FlagTemp: integer;
|
FlagTemp, MaxMatchLen: integer;
|
||||||
|
op: TREOp;
|
||||||
begin
|
begin
|
||||||
Result := False;
|
Result := False;
|
||||||
FlagTemp := 0;
|
FlagTemp := 0;
|
||||||
@ -3219,6 +3223,7 @@ begin
|
|||||||
Exit;
|
Exit;
|
||||||
|
|
||||||
// Dig out information for optimizations.
|
// Dig out information for optimizations.
|
||||||
|
IsFixedLengthEx(op, FMinMatchLen, MaxMatchLen);
|
||||||
{$IFDEF UseFirstCharSet}
|
{$IFDEF UseFirstCharSet}
|
||||||
FirstCharSet := [];
|
FirstCharSet := [];
|
||||||
FillFirstCharSet(regCodeWork);
|
FillFirstCharSet(regCodeWork);
|
||||||
@ -4866,6 +4871,11 @@ begin
|
|||||||
ret := EmitGroupRef(GrpIndex, fCompModifiers.I);
|
ret := EmitGroupRef(GrpIndex, fCompModifiers.I);
|
||||||
FlagParse := FlagParse or FLAG_HASWIDTH or FLAG_SIMPLE;
|
FlagParse := FlagParse or FLAG_HASWIDTH or FLAG_SIMPLE;
|
||||||
end;
|
end;
|
||||||
|
'K':
|
||||||
|
begin
|
||||||
|
ret := EmitNode(OP_RESET_MATCHPOS);
|
||||||
|
FlagParse := FlagParse or FLAG_NOT_QUANTIFIABLE;
|
||||||
|
end;
|
||||||
{$IFDEF FastUnicodeData}
|
{$IFDEF FastUnicodeData}
|
||||||
'p':
|
'p':
|
||||||
begin
|
begin
|
||||||
@ -5428,9 +5438,6 @@ end;
|
|||||||
type
|
type
|
||||||
TRegExprMatchPrimLocals = record
|
TRegExprMatchPrimLocals = record
|
||||||
case TREOp of
|
case TREOp of
|
||||||
OP_CLOSE_ATOMIC: (
|
|
||||||
IsAtomic: Boolean;
|
|
||||||
);
|
|
||||||
{$IFDEF ComplexBraces}
|
{$IFDEF ComplexBraces}
|
||||||
OP_LOOPENTRY: (
|
OP_LOOPENTRY: (
|
||||||
LoopInfo: TOpLoopInfo;
|
LoopInfo: TOpLoopInfo;
|
||||||
@ -5526,6 +5533,15 @@ begin
|
|||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
OP_RESET_MATCHPOS:
|
||||||
|
begin
|
||||||
|
save := GrpBounds[0].GrpStart[0];
|
||||||
|
GrpBounds[0].GrpStart[0] := regInput;
|
||||||
|
Result := MatchPrim(next);
|
||||||
|
if not Result then
|
||||||
|
GrpBounds[0].GrpStart[0] := save;
|
||||||
|
exit;
|
||||||
|
end;
|
||||||
OP_EOL:
|
OP_EOL:
|
||||||
begin
|
begin
|
||||||
// \z matches at the very end
|
// \z matches at the very end
|
||||||
@ -5842,23 +5858,24 @@ begin
|
|||||||
begin
|
begin
|
||||||
no := PReGroupIndex((scan + REOpSz + RENextOffSz))^;
|
no := PReGroupIndex((scan + REOpSz + RENextOffSz))^;
|
||||||
save := GrpBounds[regRecursion].GrpStart[no];
|
save := GrpBounds[regRecursion].GrpStart[no];
|
||||||
|
opnd := GrpBounds[regRecursion].GrpEnd[no]; // save2
|
||||||
GrpBounds[regRecursion].GrpStart[no] := regInput;
|
GrpBounds[regRecursion].GrpStart[no] := regInput;
|
||||||
Result := MatchPrim(next);
|
Result := MatchPrim(next);
|
||||||
if GrpBacktrackingAsAtom[no] then
|
if GrpBacktrackingAsAtom[no] then
|
||||||
IsBacktrackingGroupAsAtom := False;
|
IsBacktrackingGroupAsAtom := False;
|
||||||
GrpBacktrackingAsAtom[no] := False;
|
GrpBacktrackingAsAtom[no] := False;
|
||||||
if not Result then
|
if not Result then begin
|
||||||
GrpBounds[regRecursion].GrpStart[no] := save;
|
GrpBounds[regRecursion].GrpStart[no] := save;
|
||||||
|
GrpBounds[regRecursion].GrpEnd[no] := opnd;
|
||||||
|
end;
|
||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
OP_CLOSE, OP_CLOSE_ATOMIC:
|
OP_CLOSE:
|
||||||
begin
|
begin
|
||||||
Local.IsAtomic := scan^ = OP_CLOSE_ATOMIC;
|
|
||||||
no := PReGroupIndex((scan + REOpSz + RENextOffSz))^;
|
no := PReGroupIndex((scan + REOpSz + RENextOffSz))^;
|
||||||
// handle atomic group, mark it as "done"
|
// handle atomic group, mark it as "done"
|
||||||
// (we are here because some OP_BRANCH is matched)
|
// (we are here because some OP_BRANCH is matched)
|
||||||
save := GrpBounds[regRecursion].GrpEnd[no];
|
|
||||||
GrpBounds[regRecursion].GrpEnd[no] := regInput;
|
GrpBounds[regRecursion].GrpEnd[no] := regInput;
|
||||||
|
|
||||||
// if we are in OP_SUBCALL* call, it called OP_OPEN*, so we must return
|
// if we are in OP_SUBCALL* call, it called OP_OPEN*, so we must return
|
||||||
@ -5868,11 +5885,18 @@ begin
|
|||||||
Result := True;
|
Result := True;
|
||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
|
end;
|
||||||
|
|
||||||
|
OP_CLOSE_ATOMIC:
|
||||||
|
begin
|
||||||
|
no := PReGroupIndex((scan + REOpSz + RENextOffSz))^;
|
||||||
|
// handle atomic group, mark it as "done"
|
||||||
|
// (we are here because some OP_BRANCH is matched)
|
||||||
|
GrpBounds[regRecursion].GrpEnd[no] := regInput;
|
||||||
|
|
||||||
Result := MatchPrim(next);
|
Result := MatchPrim(next);
|
||||||
if not Result then begin
|
if not Result then begin
|
||||||
GrpBounds[regRecursion].GrpEnd[no] := save;
|
if not IsBacktrackingGroupAsAtom then begin
|
||||||
if Local.IsAtomic and not IsBacktrackingGroupAsAtom then begin
|
|
||||||
GrpBacktrackingAsAtom[no] := True;
|
GrpBacktrackingAsAtom[no] := True;
|
||||||
IsBacktrackingGroupAsAtom := True;
|
IsBacktrackingGroupAsAtom := True;
|
||||||
end;
|
end;
|
||||||
@ -5911,8 +5935,9 @@ begin
|
|||||||
if (next^ = OP_LOOKAROUND_OPTIONAL) then
|
if (next^ = OP_LOOKAROUND_OPTIONAL) then
|
||||||
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
|
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
|
||||||
regInput := Local.LookAroundInfo.InputPos;
|
regInput := Local.LookAroundInfo.InputPos;
|
||||||
Result := MatchPrim(next);
|
Result := False;
|
||||||
Exit;
|
scan := next;
|
||||||
|
continue;
|
||||||
end;
|
end;
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
@ -5922,8 +5947,9 @@ begin
|
|||||||
if (next^ = OP_LOOKAROUND_OPTIONAL) then
|
if (next^ = OP_LOOKAROUND_OPTIONAL) then
|
||||||
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
|
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
|
||||||
regInput := Local.LookAroundInfo.InputPos;
|
regInput := Local.LookAroundInfo.InputPos;
|
||||||
Result := MatchPrim(next);
|
Result := False;
|
||||||
Exit;
|
scan := next;
|
||||||
|
continue;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -6000,8 +6026,9 @@ begin
|
|||||||
if (next^ = OP_LOOKAROUND_OPTIONAL) then
|
if (next^ = OP_LOOKAROUND_OPTIONAL) then
|
||||||
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
|
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
|
||||||
regInput := Local.LookAroundInfo.InputPos;
|
regInput := Local.LookAroundInfo.InputPos;
|
||||||
Result := MatchPrim(next);
|
Result := False;
|
||||||
Exit;
|
scan := next;
|
||||||
|
continue;
|
||||||
end;
|
end;
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
@ -6011,8 +6038,9 @@ begin
|
|||||||
if (next^ = OP_LOOKAROUND_OPTIONAL) then
|
if (next^ = OP_LOOKAROUND_OPTIONAL) then
|
||||||
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
|
next := PRegExprChar(AlignToPtr(next + 1)) + RENextOffSz;
|
||||||
regInput := Local.LookAroundInfo.InputPos;
|
regInput := Local.LookAroundInfo.InputPos;
|
||||||
Result := MatchPrim(next);
|
Result := False;
|
||||||
Exit;
|
scan := next;
|
||||||
|
continue;
|
||||||
end;
|
end;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -6351,8 +6379,10 @@ begin
|
|||||||
end;
|
end;
|
||||||
no := FindRepeated(opnd, BracesMax);
|
no := FindRepeated(opnd, BracesMax);
|
||||||
if no >= BracesMin then
|
if no >= BracesMin then
|
||||||
if (nextch = #0) or (regInput^ = nextch) then
|
if (nextch = #0) or (regInput^ = nextch) then begin
|
||||||
Result := MatchPrim(next);
|
scan := next;
|
||||||
|
continue;
|
||||||
|
end;
|
||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
@ -6493,21 +6523,16 @@ end;
|
|||||||
function TRegExpr.MatchAtOnePos(APos: PRegExprChar): boolean;
|
function TRegExpr.MatchAtOnePos(APos: PRegExprChar): boolean;
|
||||||
begin
|
begin
|
||||||
regInput := APos;
|
regInput := APos;
|
||||||
regNestedCalls := 0;
|
//regNestedCalls := 0;
|
||||||
regRecursion := 0;
|
|
||||||
fInputCurrentEnd := fInputEnd;
|
fInputCurrentEnd := fInputEnd;
|
||||||
Result := False;
|
GrpBounds[0].GrpStart[0] := APos;
|
||||||
{$IFDEF RegExpWithStackOverflowCheck_DecStack_Frame}
|
|
||||||
StackLimit := StackBottom;
|
|
||||||
if StackLimit <> nil then
|
|
||||||
StackLimit := StackLimit + 36000; // Add for any calls within the current MatchPrim // FPC has "STACK_MARGIN = 16384;", but we need to call Error, ..., raise
|
|
||||||
{$ENDIF}
|
|
||||||
Result := MatchPrim(regCodeWork);
|
Result := MatchPrim(regCodeWork);
|
||||||
if Result then
|
if Result then
|
||||||
begin
|
Result := regInput >= GrpBounds[0].GrpStart[0];
|
||||||
GrpBounds[0].GrpStart[0] := APos;
|
if Result then
|
||||||
GrpBounds[0].GrpEnd[0] := regInput;
|
GrpBounds[0].GrpEnd[0] := regInput
|
||||||
end;
|
else
|
||||||
|
GrpBounds[0].GrpStart[0] := nil;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TRegExpr.ClearMatches;
|
procedure TRegExpr.ClearMatches;
|
||||||
@ -6530,6 +6555,7 @@ begin
|
|||||||
{$ENDIF}
|
{$ENDIF}
|
||||||
LookAroundInfoList := nil;
|
LookAroundInfoList := nil;
|
||||||
CurrentSubCalled := -1;
|
CurrentSubCalled := -1;
|
||||||
|
regRecursion := 0;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure TRegExpr.InitInternalGroupData;
|
procedure TRegExpr.InitInternalGroupData;
|
||||||
@ -6583,7 +6609,7 @@ end;
|
|||||||
function TRegExpr.ExecPrimProtected(AOffset: Integer; ASlowChecks,
|
function TRegExpr.ExecPrimProtected(AOffset: Integer; ASlowChecks,
|
||||||
ABackward: Boolean; ATryMatchOnlyStartingBefore: Integer): Boolean;
|
ABackward: Boolean; ATryMatchOnlyStartingBefore: Integer): Boolean;
|
||||||
var
|
var
|
||||||
Ptr: PRegExprChar;
|
Ptr, SearchEnd: PRegExprChar;
|
||||||
begin
|
begin
|
||||||
Result := False;
|
Result := False;
|
||||||
|
|
||||||
@ -6631,6 +6657,12 @@ begin
|
|||||||
if StrLPos(fInputStart, PRegExprChar(regMustString), fInputEnd - fInputStart, length(regMustString)) = nil then
|
if StrLPos(fInputStart, PRegExprChar(regMustString), fInputEnd - fInputStart, length(regMustString)) = nil then
|
||||||
exit;
|
exit;
|
||||||
|
|
||||||
|
{$IFDEF RegExpWithStackOverflowCheck_DecStack_Frame}
|
||||||
|
StackLimit := StackBottom;
|
||||||
|
if StackLimit <> nil then
|
||||||
|
StackLimit := StackLimit + 36000; // Add for any calls within the current MatchPrim // FPC has "STACK_MARGIN = 16384;", but we need to call Error, ..., raise
|
||||||
|
{$ENDIF}
|
||||||
|
|
||||||
FMatchesCleared := False;
|
FMatchesCleared := False;
|
||||||
// ATryOnce or anchored match (it needs to be tried only once).
|
// ATryOnce or anchored match (it needs to be tried only once).
|
||||||
if (ATryMatchOnlyStartingBefore = AOffset + 1) or (regAnchored in [raBOL, raOnlyOnce, raContinue]) then
|
if (ATryMatchOnlyStartingBefore = AOffset + 1) or (regAnchored in [raBOL, raOnlyOnce, raContinue]) then
|
||||||
@ -6651,40 +6683,53 @@ begin
|
|||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
|
|
||||||
|
|
||||||
// Messy cases: unanchored match.
|
// Messy cases: unanchored match.
|
||||||
if ABackward then
|
if ABackward then begin
|
||||||
Inc(Ptr, 2)
|
Inc(Ptr, 2);
|
||||||
else
|
repeat
|
||||||
Dec(Ptr);
|
|
||||||
repeat
|
|
||||||
if ABackward then
|
|
||||||
begin
|
|
||||||
Dec(Ptr);
|
Dec(Ptr);
|
||||||
if Ptr < fInputStart then
|
if Ptr < fInputStart then
|
||||||
Exit;
|
Exit;
|
||||||
end
|
|
||||||
else
|
{$IFDEF UseFirstCharSet}
|
||||||
begin
|
{$IFDEF UnicodeRE}
|
||||||
|
if Ord(Ptr^) <= $FF then
|
||||||
|
{$ENDIF}
|
||||||
|
if not FirstCharArray[byte(Ptr^)] then
|
||||||
|
Continue;
|
||||||
|
{$ENDIF}
|
||||||
|
|
||||||
|
Result := MatchAtOnePos(Ptr);
|
||||||
|
// Exit on a match or after testing the end-of-string
|
||||||
|
if Result then
|
||||||
|
Exit;
|
||||||
|
until False;
|
||||||
|
end
|
||||||
|
else begin
|
||||||
|
Dec(Ptr);
|
||||||
|
SearchEnd := fInputEnd - FMinMatchLen;
|
||||||
|
if (ATryMatchOnlyStartingBefore > 0) and (fInputStart + ATryMatchOnlyStartingBefore < SearchEnd) then
|
||||||
|
SearchEnd := fInputStart + ATryMatchOnlyStartingBefore - 2;
|
||||||
|
repeat
|
||||||
Inc(Ptr);
|
Inc(Ptr);
|
||||||
if Ptr > fInputEnd then
|
if Ptr > SearchEnd then
|
||||||
Exit;
|
Exit;
|
||||||
if (ATryMatchOnlyStartingBefore > 0) and (Ptr - fInputStart >= ATryMatchOnlyStartingBefore - 1) then
|
|
||||||
|
{$IFDEF UseFirstCharSet}
|
||||||
|
{$IFDEF UnicodeRE}
|
||||||
|
if Ord(Ptr^) <= $FF then
|
||||||
|
{$ENDIF}
|
||||||
|
if not FirstCharArray[byte(Ptr^)] then
|
||||||
|
Continue;
|
||||||
|
{$ENDIF}
|
||||||
|
|
||||||
|
Result := MatchAtOnePos(Ptr);
|
||||||
|
// Exit on a match or after testing the end-of-string
|
||||||
|
if Result then
|
||||||
Exit;
|
Exit;
|
||||||
end;
|
until False;
|
||||||
|
end;
|
||||||
{$IFDEF UseFirstCharSet}
|
|
||||||
{$IFDEF UnicodeRE}
|
|
||||||
if Ord(Ptr^) <= $FF then
|
|
||||||
{$ENDIF}
|
|
||||||
if not FirstCharArray[byte(Ptr^)] then
|
|
||||||
Continue;
|
|
||||||
{$ENDIF}
|
|
||||||
|
|
||||||
Result := MatchAtOnePos(Ptr);
|
|
||||||
// Exit on a match or after testing the end-of-string
|
|
||||||
if Result then
|
|
||||||
Exit;
|
|
||||||
until False;
|
|
||||||
end; { of function TRegExpr.ExecPrim
|
end; { of function TRegExpr.ExecPrim
|
||||||
-------------------------------------------------------------- }
|
-------------------------------------------------------------- }
|
||||||
|
|
||||||
@ -7094,7 +7139,8 @@ begin
|
|||||||
|
|
||||||
OP_BOL,
|
OP_BOL,
|
||||||
OP_BOL_ML,
|
OP_BOL_ML,
|
||||||
OP_CONTINUE_POS:
|
OP_CONTINUE_POS,
|
||||||
|
OP_RESET_MATCHPOS:
|
||||||
; // Exit;
|
; // Exit;
|
||||||
|
|
||||||
OP_EOL,
|
OP_EOL,
|
||||||
@ -7669,6 +7715,8 @@ begin
|
|||||||
Result := 'SUBCALL';
|
Result := 'SUBCALL';
|
||||||
OP_ANYLINEBREAK:
|
OP_ANYLINEBREAK:
|
||||||
Result := 'ANYLINEBREAK';
|
Result := 'ANYLINEBREAK';
|
||||||
|
OP_RESET_MATCHPOS:
|
||||||
|
Result := 'RESET_MATCHPOS';
|
||||||
else
|
else
|
||||||
Error(reeDumpCorruptedOpcode);
|
Error(reeDumpCorruptedOpcode);
|
||||||
end;
|
end;
|
||||||
@ -8082,7 +8130,7 @@ begin
|
|||||||
if (ABranchMaxLen = high(ABranchMaxLen)) and not(flfForceToStopAt in Flags) then
|
if (ABranchMaxLen = high(ABranchMaxLen)) and not(flfForceToStopAt in Flags) then
|
||||||
Exit;
|
Exit;
|
||||||
end;
|
end;
|
||||||
assert(s^=OP_CLOSE);
|
assert(s^=OP_CLOSE_ATOMIC);
|
||||||
AMinLen := AMinLen + ASubLen;
|
AMinLen := AMinLen + ASubLen;
|
||||||
IncMaxLen(FndMaxLen, ASubMaxLen);
|
IncMaxLen(FndMaxLen, ASubMaxLen);
|
||||||
Inc(s, REOpSz + RENextOffSz + ReGroupIndexSz); // consume the OP_CLOSE_ATOMIC;
|
Inc(s, REOpSz + RENextOffSz + ReGroupIndexSz); // consume the OP_CLOSE_ATOMIC;
|
||||||
|
Loading…
Reference in New Issue
Block a user