SynEdit: synregrexpr: safer align, bug #19109

git-svn-id: trunk@32569 -
This commit is contained in:
mattias 2011-09-30 14:27:16 +00:00
parent dbb71f8379
commit 883880f37a

View File

@ -1,7 +1,6 @@
{$IFNDEF QSYNREGEXPR}
unit SynRegExpr;
{$ENDIF}
{
TRegExpr class library
Delphi Regular Expressions
@ -30,8 +29,8 @@ unit SynRegExpr;
not be charged seperatly.
4. Altered versions must be plainly marked as such, and must
not be misrepresented as being the original software.
5. RegExp Studio application and all the visual components as
well as documentation is not part of the TRegExpr library
5. RegExp Studio application and all the visual components as
well as documentation is not part of the TRegExpr library
and is not free for usage.
mailto:anso@mail.ru
@ -96,7 +95,9 @@ interface
{$ENDIF}
{$DEFINE ComplexBraces} // support braces in complex cases
{$IFNDEF UniCode} // the option applicable only for non-UniCode mode
{$IFNDEF FPC_REQUIRES_PROPER_ALIGNMENT} //sets have to be aligned
{$DEFINE UseSetOfChar} // Significant optimization by using set of char
{$ENDIF}
{$ENDIF}
{$IFDEF UseSetOfChar}
{$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars
@ -139,8 +140,15 @@ type
const
REOpSz = SizeOf (TREOp) div SizeOf (REChar); // size of p-code in RegExprString units
RENextOffSz = SizeOf (TRENextOff) div SizeOf (REChar); // size of Next 'pointer' -"-
{$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
// add space for aligning pointer
// -1 is the correct max size but also needed for InsertOperator that needs a multiple of pointer size
RENextOffSz = (2 * SizeOf (TRENextOff) div SizeOf (REChar))-1;
REBracesArgSz = (2 * SizeOf (TREBracesArg) div SizeOf (REChar)); // add space for aligning pointer
{$ELSE}
RENextOffSz = (SizeOf (TRENextOff) div SizeOf (REChar)); // size of Next 'pointer' -"-
REBracesArgSz = SizeOf (TREBracesArg) div SizeOf (REChar); // size of BRACES arguments -"-
{$ENDIF}
type
TRegExprInvertCaseFunction = function (const Ch : REChar) : REChar
@ -679,6 +687,24 @@ const
XIgnoredChars = [' ', #9, #$d, #$a];
{$ENDIF}
function AlignToPtr(const p: Pointer): Pointer;
begin
{$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
Result := Align(p, SizeOf(Pointer));
{$ELSE}
Result := p;
{$ENDIF}
end;
function AlignToInt(const p: Pointer): Pointer;
begin
{$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
Result := Align(p, SizeOf(integer));
{$ELSE}
Result := p;
{$ENDIF}
end;
{=============================================================}
{=================== WideString functions ====================}
{=============================================================}
@ -1481,7 +1507,7 @@ procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);
UNTIL false;
// Set Next 'pointer'
if val < scan
then PRENextOff (scan + REOpSz)^ := - (scan - val) //###0.948
then PRENextOff (AlignToPtr(scan + REOpSz))^ := - (scan - val) //###0.948
// work around PWideChar subtraction bug (Delphi uses
// shr after subtraction to calculate widechar distance %-( )
// so, if difference is negative we have .. the "feature" :(
@ -1489,7 +1515,7 @@ procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);
// "P Q computes the difference between the address given
// by P (the higher address) and the address given by Q (the
// lower address)" - Delphi help quotation.
else PRENextOff (scan + REOpSz)^ := val - scan; //###0.933
else PRENextOff (AlignToPtr(scan + REOpSz))^ := val - scan; //###0.933
end; { of procedure TRegExpr.Tail
--------------------------------------------------------------}
@ -1510,7 +1536,7 @@ function TRegExpr.EmitNode (op : TREOp) : PRegExprChar; //###0.933
if Result <> @regdummy then begin
PREOp (regcode)^ := op;
inc (regcode, REOpSz);
PRENextOff (regcode)^ := 0; // Next "pointer" := nil
PRENextOff (AlignToPtr(regcode))^ := 0; // Next "pointer" := nil
inc (regcode, RENextOffSz);
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
@ -1553,8 +1579,8 @@ procedure TRegExpr.InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
raise Exception.Create('TRegExpr.InsertOperator buffer overrun');
if (opnd<regcode) or (opnd-regcode>regsize) then
raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
// if (opnd<regcode) or (opnd-regcode>regsize) then
// raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
{$ENDIF}
dst := regcode;
while src > opnd do begin
@ -1903,11 +1929,11 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
if regcode <> @regdummy then begin
off := (Result + REOpSz + RENextOffSz)
- (regcode - REOpSz - RENextOffSz); // back to Atom after LOOPENTRY
PREBracesArg (regcode)^ := ABracesMin;
PREBracesArg (AlignToInt(regcode))^ := ABracesMin;
inc (regcode, REBracesArgSz);
PREBracesArg (regcode)^ := ABracesMax;
PREBracesArg (AlignToInt(regcode))^ := ABracesMax;
inc (regcode, REBracesArgSz);
PRENextOff (regcode)^ := off;
PRENextOff (AlignToPtr(regcode))^ := off;
inc (regcode, RENextOffSz);
{$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then
@ -1929,8 +1955,8 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
else TheOp := BRACES;
InsertOperator (TheOp, Result, REOpSz + RENextOffSz + REBracesArgSz * 2);
if regcode <> @regdummy then begin
PREBracesArg (Result + REOpSz + RENextOffSz)^ := ABracesMin;
PREBracesArg (Result + REOpSz + RENextOffSz + REBracesArgSz)^ := ABracesMax;
PREBracesArg (AlignToInt(Result + REOpSz + RENextOffSz))^ := ABracesMin;
PREBracesArg (AlignToInt(Result + REOpSz + RENextOffSz + REBracesArgSz))^ := ABracesMax;
end;
end;
@ -2808,7 +2834,7 @@ function TRegExpr.regnext (p : PRegExprChar) : PRegExprChar;
Result := nil;
EXIT;
end;
offset := PRENextOff (p + REOpSz)^; //###0.933 inlined NEXT
offset := PRENextOff (AlignToPtr(p + REOpSz))^; //###0.933 inlined NEXT
if offset = 0
then Result := nil
else Result := p + offset;
@ -2841,7 +2867,7 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
scan := prog;
while scan <> nil do begin
len := PRENextOff (scan + 1)^; //###0.932 inlined regnext
len := PRENextOff (AlignToPtr(scan + 1))^; //###0.932 inlined regnext
if len = 0
then next := nil
else next := scan + len;
@ -3130,9 +3156,9 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
Error (reeLoopWithoutEntry);
EXIT;
end;
opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + 2 * REBracesArgSz)^;
BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^;
BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^;
opnd := scan + PRENextOff (AlignToPtr(scan + REOpSz + RENextOffSz + 2 * REBracesArgSz))^;
BracesMin := PREBracesArg (AlignToInt(scan + REOpSz + RENextOffSz))^;
BracesMax := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^;
save := reginput;
if LoopStack [LoopStackIdx] >= BracesMin then begin // Min alredy matched - we can work
if scan^ = LOOP then begin
@ -3196,8 +3222,8 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
else if (scan^ = PLUS) or (scan^ = PLUSNG)
then BracesMin := 1 // PLUS
else begin // BRACES
BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^;
BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^;
BracesMin := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^;
BracesMax := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^;
end;
save := reginput;
opnd := scan + REOpSz + RENextOffSz;
@ -3377,8 +3403,8 @@ procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar);
EXIT;
end;
LOOP, LOOPNG: begin //###0.940
opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + REBracesArgSz * 2)^;
min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^;
opnd := scan + PRENextOff (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz * 2))^;
min_cnt := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^;
FillFirstCharSet (opnd);
if min_cnt = 0
then FillFirstCharSet (next);
@ -3393,7 +3419,7 @@ procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar);
end;
BRACES, BRACESNG: begin //###0.940
opnd := scan + REOpSz + RENextOffSz + REBracesArgSz * 2;
min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^; // BRACES
min_cnt := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^; // BRACES
FillFirstCharSet (opnd);
if min_cnt > 0
then EXIT;
@ -4071,14 +4097,14 @@ function TRegExpr.Dump : RegExprString;
{$ENDIF}
if (op = BRACES) or (op = BRACESNG) then begin //###0.941
// show min/max argument of BRACES operator
Result := Result + Format ('{%d,%d}', [PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]);
Result := Result + Format ('{%d,%d}', [PREBracesArg (AlignToInt(s))^, PREBracesArg (AlignToInt(s + REBracesArgSz))^]);
inc (s, REBracesArgSz * 2);
end;
{$IFDEF ComplexBraces}
if (op = LOOP) or (op = LOOPNG) then begin //###0.940
Result := Result + Format (' -> (%d) {%d,%d}', [
(s - programm - (REOpSz + RENextOffSz)) + PRENextOff (s + 2 * REBracesArgSz)^,
PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]);
(s - programm - (REOpSz + RENextOffSz)) + PRENextOff (AlignToPtr(s + 2 * REBracesArgSz))^,
PREBracesArg (AlignToInt(s))^, PREBracesArg (AlignToInt(s + REBracesArgSz))^]);
inc (s, 2 * REBracesArgSz + RENextOffSz);
end;
{$ENDIF}