LazEdit: update TRegExpr

This commit is contained in:
Martin 2023-11-20 16:00:11 +01:00
parent e764c0f684
commit 769cab6f8b

View File

@ -530,7 +530,7 @@ type
function EmitNode(op: TREOp): PRegExprChar;
// emit OP_BRANCH (and fillchars)
function EmitBranch: PRegExprChar;
function EmitBranch: PRegExprChar; {$IFDEF InlineFuncs}inline;{$ENDIF}
// emit (if appropriate) a byte of code
procedure EmitC(ch: REChar); {$IFDEF InlineFuncs}inline;{$ENDIF}
@ -552,6 +552,7 @@ type
// insert an operator in front of already-emitted operand
// Means relocating the operand.
procedure InsertOperator(op: TREOp; opnd: PRegExprChar; sz: integer);
procedure RemoveOperator(opnd: PRegExprChar; sz: Integer);
// regular expression, i.e. main body or parenthesized thing
function ParseReg(InBrackets: boolean; var FlagParse: integer): PRegExprChar;
@ -633,10 +634,10 @@ type
// For Delphi 5 and higher available overloaded versions - first without
// parameter (uses already assigned to InputString property value)
// and second that has int parameter and is same as ExecPos
function Exec(const AInputString: RegExprString): boolean;
function Exec(const AInputString: RegExprString): Boolean; {$IFDEF InlineFuncs}inline;{$ENDIF}
{$IFDEF OverMeth} overload;
function Exec: boolean; overload;
function Exec(AOffset: integer): boolean; overload;
function Exec: Boolean; overload; {$IFDEF InlineFuncs}inline;{$ENDIF}
function Exec(AOffset: Integer): Boolean; overload; {$IFDEF InlineFuncs}inline;{$ENDIF}
{$ENDIF}
// find next match:
@ -652,14 +653,14 @@ type
// find match for InputString starting from AOffset position
// (AOffset=1 - first char of InputString)
function ExecPos(AOffset: integer {$IFDEF DefParam} = 1{$ENDIF}): boolean;
function ExecPos(AOffset: Integer {$IFDEF DefParam} = 1{$ENDIF}): Boolean; {$IFDEF InlineFuncs}inline;{$ENDIF}
{$IFDEF OverMeth} overload;
// find match for InputString at AOffset.
// if ATryOnce=True then only match exactly at AOffset (like anchor \G)
// if ATryMatchOnlyStartingBefore then only when the match can start before
// that position: Result := MatchPos[0] < ATryMatchOnlyStartingBefore;
function ExecPos(AOffset: integer; ATryOnce, ABackward: boolean): boolean; overload;
function ExecPos(AOffset, ATryMatchOnlyStartingBefore: integer): boolean; overload;
function ExecPos(AOffset: Integer; ATryOnce, ABackward: Boolean): Boolean; overload; {$IFDEF InlineFuncs}inline;{$ENDIF}
function ExecPos(AOffset, ATryMatchOnlyStartingBefore: Integer): Boolean; overload; {$IFDEF InlineFuncs}inline;{$ENDIF}
{$ENDIF}
// Returns ATemplate with '$&' or '$0' replaced by whole r.e.
@ -2567,7 +2568,7 @@ begin
regExactlyLen := nil;
{$IFDEF DebugSynRegExpr}
if regcode - programm > regsize then
if regcode - programm > regCodeSize then
raise Exception.Create('TRegExpr.EmitNode buffer overrun');
{$ENDIF}
end
@ -2591,7 +2592,7 @@ begin
regCode^ := ch;
Inc(regCode);
{$IFDEF DebugSynRegExpr}
if regcode - programm > regsize then
if regcode - programm > regCodeSize then
raise Exception.Create('TRegExpr.EmitC buffer overrun');
{$ENDIF}
end
@ -2607,7 +2608,7 @@ begin
PLongInt(regCode)^ := AValue;
Inc(regCode, RENumberSz);
{$IFDEF DebugSynRegExpr}
if regcode - programm > regsize then
if regcode - programm > regCodeSize then
raise Exception.Create('TRegExpr.EmitInt buffer overrun');
{$ENDIF}
end
@ -2722,8 +2723,8 @@ begin
{$IFDEF DebugSynRegExpr}
if regCode - programm > regCodeSize then
raise Exception.Create('TRegExpr.InsertOperator buffer overrun');
// if (opnd<regCode) or (opnd-regCode>regCodeSize) then
// raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
if fSecondPass and ( (opnd<regCodeWork) or (opnd-regCodeWork>regCodeSize) ) then
raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
{$ENDIF}
dst := regCode;
while src > opnd do
@ -2746,6 +2747,39 @@ begin
end; { of procedure TRegExpr.InsertOperator
-------------------------------------------------------------- }
procedure TRegExpr.RemoveOperator(opnd: PRegExprChar; sz: Integer);
// remove an operator in front of already-emitted operand
// Means relocating the operand.
var
src, dst: PRegExprChar;
i: Integer;
begin
if regCode = @regDummy then
begin
// Do not decrement regCodeSize => the fSecondPass may temporary fill the extra memory;
Exit;
end;
// move code behind insert position
{$IFDEF DebugSynRegExpr}
if fSecondPass and ( (opnd<regCodeWork) or (opnd>=regCodeWork+regCodeSize) ) then
raise Exception.Create('TRegExpr.RemoveOperator() invalid opnd');
if (sz > regCodeSize-(opnd-regCodeWork)) then
raise Exception.Create('TRegExpr.RemoveOperator buffer underrun');
{$ENDIF}
src := opnd + sz;
dst := opnd;
while src < regCode do
begin
dst^ := src^;
Inc(dst);
Inc(src);
end;
Dec(regCode, sz);
for i := 0 to regNumBrackets - 1 do
if (GrpOpCodes[i] <> nil) and (GrpOpCodes[i] > opnd) then
GrpOpCodes[i] := GrpOpCodes[i] - sz;
end;
function FindSkippedMetaLen(PStart, PEnd: PRegExprChar): integer; {$IFDEF InlineFuncs}inline;{$ENDIF}
// find length of initial segment of PStart string consisting
// entirely of characters not from IsMetaSymbol1.
@ -3198,10 +3232,6 @@ begin
regMustString := '';
scan := regCodeWork; // First OP_BRANCH.
if PREOp(regNext(scan))^ = OP_EEND then
begin // Only one top-level choice.
scan := scan + REOpSz + RENextOffSz + REBranchArgSz;
// Starting-point info.
if PREOp(scan)^ = OP_BOL then
regAnchored := raBOL
@ -3264,7 +3294,6 @@ begin
if regMustLen > 1 then // don't use regMust if too short
SetString(regMustString, regMust, regMustLen);
end;
end;
Result := True;
@ -3365,7 +3394,7 @@ begin
end
else
if not HasChoice then
brStart^ := OP_BRANCH;
RemoveOperator(brStart, REOpSz + RENextOffSz + REBranchArgSz);
end;
// Make a closing node, and hook it on the end.
@ -3375,16 +3404,26 @@ begin
else
ender := EmitNode(EndGroupOP);
end
else
if (EndGroupOP = OP_NONE) then begin
if HasChoice then
ender := EmitNode(OP_COMMENT) // need something to hook the branches' tails too
else
ender := nil;
end
else
ender := EmitNode(OP_EEND);
Tail(ret, ender);
// Hook the tails of the branches to the closing node.
br := ret;
while br <> nil do
begin
OpTail(br, ender);
br := regNext(br);
if ender <> nil then begin
Tail(ret, ender);
// Hook the tails of the branches to the closing node.
br := ret;
while br <> nil do
begin
OpTail(br, ender);
br := regNext(br);
end;
end;
// Check for proper termination.
@ -3430,6 +3469,15 @@ begin
Result := nil;
Exit;
end;
if fSecondPass and
(latest <> nil) and (latest^ = OP_COMMENT) and
( ((regParse < fRegexEnd) and (regParse^ <> '|') and (regParse^ <> ')')) or
(chain <> nil)
)
then begin
regCode := latest;
continue;
end;
FlagParse := FlagParse or FlagTemp and (FLAG_HASWIDTH or FLAG_LOOP or FLAG_GREEDY);
if chain = nil // First piece.
then begin
@ -3524,7 +3572,7 @@ var
PRENextOff(AlignToPtr(regCode))^ := off;
Inc(regCode, RENextOffSz);
{$IFDEF DebugSynRegExpr}
if regcode - programm > regsize then
if regcode - programm > regCodeSize then
raise Exception.Create
('TRegExpr.ParsePiece.EmitComplexBraces buffer overrun');
{$ENDIF}
@ -4476,8 +4524,18 @@ begin
// B: process found kind of brackets
case GrpKind of
gkNonCapturingGroup:
begin
ret := DoParseReg(True, False, FlagTemp, OP_NONE, OP_NONE);
if ret = nil then
begin
Result := nil;
Exit;
end;
FlagParse := FlagParse or FlagTemp and (FLAG_HASWIDTH or FLAG_SPECSTART or FLAG_LOOP or FLAG_GREEDY);
end;
gkNormalGroup,
gkNonCapturingGroup,
gkAtomicGroup:
begin
// skip this block for one of passes, to not double groups count;