SynEdit: synregrexpr: safer align, bug #19109

git-svn-id: trunk@32569 -
This commit is contained in:
mattias 2011-09-30 14:27:16 +00:00
parent dbb71f8379
commit 883880f37a

View File

@ -1,7 +1,6 @@
{$IFNDEF QSYNREGEXPR}
unit SynRegExpr;
{$ENDIF}
{
TRegExpr class library
Delphi Regular Expressions
@ -96,8 +95,10 @@ interface
{$ENDIF}
{$DEFINE ComplexBraces} // support braces in complex cases
{$IFNDEF UniCode} // the option applicable only for non-UniCode mode
{$IFNDEF FPC_REQUIRES_PROPER_ALIGNMENT} //sets have to be aligned
{$DEFINE UseSetOfChar} // Significant optimization by using set of char
{$ENDIF}
{$ENDIF}
{$IFDEF UseSetOfChar}
{$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars
{$ENDIF}
@ -139,8 +140,15 @@ type
const
REOpSz = SizeOf (TREOp) div SizeOf (REChar); // size of p-code in RegExprString units
RENextOffSz = SizeOf (TRENextOff) div SizeOf (REChar); // size of Next 'pointer' -"-
{$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
// add space for aligning pointer
// -1 is the correct max size but also needed for InsertOperator that needs a multiple of pointer size
RENextOffSz = (2 * SizeOf (TRENextOff) div SizeOf (REChar))-1;
REBracesArgSz = (2 * SizeOf (TREBracesArg) div SizeOf (REChar)); // add space for aligning pointer
{$ELSE}
RENextOffSz = (SizeOf (TRENextOff) div SizeOf (REChar)); // size of Next 'pointer' -"-
REBracesArgSz = SizeOf (TREBracesArg) div SizeOf (REChar); // size of BRACES arguments -"-
{$ENDIF}
type
TRegExprInvertCaseFunction = function (const Ch : REChar) : REChar
@ -679,6 +687,24 @@ const
XIgnoredChars = [' ', #9, #$d, #$a];
{$ENDIF}
function AlignToPtr(const p: Pointer): Pointer;
begin
{$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
Result := Align(p, SizeOf(Pointer));
{$ELSE}
Result := p;
{$ENDIF}
end;
function AlignToInt(const p: Pointer): Pointer;
begin
{$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
Result := Align(p, SizeOf(integer));
{$ELSE}
Result := p;
{$ENDIF}
end;
{=============================================================}
{=================== WideString functions ====================}
{=============================================================}
@ -1481,7 +1507,7 @@ procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);
UNTIL false;
// Set Next 'pointer'
if val < scan
then PRENextOff (scan + REOpSz)^ := - (scan - val) //###0.948
then PRENextOff (AlignToPtr(scan + REOpSz))^ := - (scan - val) //###0.948
// work around PWideChar subtraction bug (Delphi uses
// shr after subtraction to calculate widechar distance %-( )
// so, if difference is negative we have .. the "feature" :(
@ -1489,7 +1515,7 @@ procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);
// "P Q computes the difference between the address given
// by P (the higher address) and the address given by Q (the
// lower address)" - Delphi help quotation.
else PRENextOff (scan + REOpSz)^ := val - scan; //###0.933
else PRENextOff (AlignToPtr(scan + REOpSz))^ := val - scan; //###0.933
end; { of procedure TRegExpr.Tail
--------------------------------------------------------------}
@ -1510,7 +1536,7 @@ function TRegExpr.EmitNode (op : TREOp) : PRegExprChar; //###0.933
if Result <> @regdummy then begin
PREOp (regcode)^ := op;
inc (regcode, REOpSz);
PRENextOff (regcode)^ := 0; // Next "pointer" := nil
PRENextOff (AlignToPtr(regcode))^ := 0; // Next "pointer" := nil
inc (regcode, RENextOffSz);
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
@ -1553,8 +1579,8 @@ procedure TRegExpr.InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
raise Exception.Create('TRegExpr.InsertOperator buffer overrun');
if (opnd<regcode) or (opnd-regcode>regsize) then
raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
// if (opnd<regcode) or (opnd-regcode>regsize) then
// raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
{$ENDIF}
dst := regcode;
while src > opnd do begin
@ -1903,11 +1929,11 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
if regcode <> @regdummy then begin
off := (Result + REOpSz + RENextOffSz)
- (regcode - REOpSz - RENextOffSz); // back to Atom after LOOPENTRY
PREBracesArg (regcode)^ := ABracesMin;
PREBracesArg (AlignToInt(regcode))^ := ABracesMin;
inc (regcode, REBracesArgSz);
PREBracesArg (regcode)^ := ABracesMax;
PREBracesArg (AlignToInt(regcode))^ := ABracesMax;
inc (regcode, REBracesArgSz);
PRENextOff (regcode)^ := off;
PRENextOff (AlignToPtr(regcode))^ := off;
inc (regcode, RENextOffSz);
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
@ -1929,8 +1955,8 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
else TheOp := BRACES;
InsertOperator (TheOp, Result, REOpSz + RENextOffSz + REBracesArgSz * 2);
if regcode <> @regdummy then begin
PREBracesArg (Result + REOpSz + RENextOffSz)^ := ABracesMin;
PREBracesArg (Result + REOpSz + RENextOffSz + REBracesArgSz)^ := ABracesMax;
PREBracesArg (AlignToInt(Result + REOpSz + RENextOffSz))^ := ABracesMin;
PREBracesArg (AlignToInt(Result + REOpSz + RENextOffSz + REBracesArgSz))^ := ABracesMax;
end;
end;
@ -2808,7 +2834,7 @@ function TRegExpr.regnext (p : PRegExprChar) : PRegExprChar;
Result := nil;
EXIT;
end;
offset := PRENextOff (p + REOpSz)^; //###0.933 inlined NEXT
offset := PRENextOff (AlignToPtr(p + REOpSz))^; //###0.933 inlined NEXT
if offset = 0
then Result := nil
else Result := p + offset;
@ -2841,7 +2867,7 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
scan := prog;
while scan <> nil do begin
len := PRENextOff (scan + 1)^; //###0.932 inlined regnext
len := PRENextOff (AlignToPtr(scan + 1))^; //###0.932 inlined regnext
if len = 0
then next := nil
else next := scan + len;
@ -3130,9 +3156,9 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
Error (reeLoopWithoutEntry);
EXIT;
end;
opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + 2 * REBracesArgSz)^;
BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^;
BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^;
opnd := scan + PRENextOff (AlignToPtr(scan + REOpSz + RENextOffSz + 2 * REBracesArgSz))^;
BracesMin := PREBracesArg (AlignToInt(scan + REOpSz + RENextOffSz))^;
BracesMax := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^;
save := reginput;
if LoopStack [LoopStackIdx] >= BracesMin then begin // Min alredy matched - we can work
if scan^ = LOOP then begin
@ -3196,8 +3222,8 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
else if (scan^ = PLUS) or (scan^ = PLUSNG)
then BracesMin := 1 // PLUS
else begin // BRACES
BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^;
BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^;
BracesMin := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^;
BracesMax := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^;
end;
save := reginput;
opnd := scan + REOpSz + RENextOffSz;
@ -3377,8 +3403,8 @@ procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar);
EXIT;
end;
LOOP, LOOPNG: begin //###0.940
opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + REBracesArgSz * 2)^;
min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^;
opnd := scan + PRENextOff (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz * 2))^;
min_cnt := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^;
FillFirstCharSet (opnd);
if min_cnt = 0
then FillFirstCharSet (next);
@ -3393,7 +3419,7 @@ procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar);
end;
BRACES, BRACESNG: begin //###0.940
opnd := scan + REOpSz + RENextOffSz + REBracesArgSz * 2;
min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^; // BRACES
min_cnt := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^; // BRACES
FillFirstCharSet (opnd);
if min_cnt > 0
then EXIT;
@ -4071,14 +4097,14 @@ function TRegExpr.Dump : RegExprString;
{$ENDIF}
if (op = BRACES) or (op = BRACESNG) then begin //###0.941
// show min/max argument of BRACES operator
Result := Result + Format ('{%d,%d}', [PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]);
Result := Result + Format ('{%d,%d}', [PREBracesArg (AlignToInt(s))^, PREBracesArg (AlignToInt(s + REBracesArgSz))^]);
inc (s, REBracesArgSz * 2);
end;
{$IFDEF ComplexBraces}
if (op = LOOP) or (op = LOOPNG) then begin //###0.940
Result := Result + Format (' -> (%d) {%d,%d}', [
(s - programm - (REOpSz + RENextOffSz)) + PRENextOff (s + 2 * REBracesArgSz)^,
PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]);
(s - programm - (REOpSz + RENextOffSz)) + PRENextOff (AlignToPtr(s + 2 * REBracesArgSz))^,
PREBracesArg (AlignToInt(s))^, PREBracesArg (AlignToInt(s + REBracesArgSz))^]);
inc (s, 2 * REBracesArgSz + RENextOffSz);
end;
{$ENDIF}