SynEdit: synregrexpr: safer align, bug #19109

git-svn-id: trunk@32569 -
This commit is contained in:
mattias 2011-09-30 14:27:16 +00:00
parent dbb71f8379
commit 883880f37a

View File

@ -1,7 +1,6 @@
{$IFNDEF QSYNREGEXPR} {$IFNDEF QSYNREGEXPR}
unit SynRegExpr; unit SynRegExpr;
{$ENDIF} {$ENDIF}
{ {
TRegExpr class library TRegExpr class library
Delphi Regular Expressions Delphi Regular Expressions
@ -30,8 +29,8 @@ unit SynRegExpr;
not be charged seperatly. not be charged seperatly.
4. Altered versions must be plainly marked as such, and must 4. Altered versions must be plainly marked as such, and must
not be misrepresented as being the original software. not be misrepresented as being the original software.
5. RegExp Studio application and all the visual components as 5. RegExp Studio application and all the visual components as
well as documentation is not part of the TRegExpr library well as documentation is not part of the TRegExpr library
and is not free for usage. and is not free for usage.
mailto:anso@mail.ru mailto:anso@mail.ru
@ -96,7 +95,9 @@ interface
{$ENDIF} {$ENDIF}
{$DEFINE ComplexBraces} // support braces in complex cases {$DEFINE ComplexBraces} // support braces in complex cases
{$IFNDEF UniCode} // the option applicable only for non-UniCode mode {$IFNDEF UniCode} // the option applicable only for non-UniCode mode
{$IFNDEF FPC_REQUIRES_PROPER_ALIGNMENT} //sets have to be aligned
{$DEFINE UseSetOfChar} // Significant optimization by using set of char {$DEFINE UseSetOfChar} // Significant optimization by using set of char
{$ENDIF}
{$ENDIF} {$ENDIF}
{$IFDEF UseSetOfChar} {$IFDEF UseSetOfChar}
{$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars {$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars
@ -139,8 +140,15 @@ type
const const
REOpSz = SizeOf (TREOp) div SizeOf (REChar); // size of p-code in RegExprString units REOpSz = SizeOf (TREOp) div SizeOf (REChar); // size of p-code in RegExprString units
RENextOffSz = SizeOf (TRENextOff) div SizeOf (REChar); // size of Next 'pointer' -"- {$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
// add space for aligning pointer
// -1 is the correct max size but also needed for InsertOperator that needs a multiple of pointer size
RENextOffSz = (2 * SizeOf (TRENextOff) div SizeOf (REChar))-1;
REBracesArgSz = (2 * SizeOf (TREBracesArg) div SizeOf (REChar)); // add space for aligning pointer
{$ELSE}
RENextOffSz = (SizeOf (TRENextOff) div SizeOf (REChar)); // size of Next 'pointer' -"-
REBracesArgSz = SizeOf (TREBracesArg) div SizeOf (REChar); // size of BRACES arguments -"- REBracesArgSz = SizeOf (TREBracesArg) div SizeOf (REChar); // size of BRACES arguments -"-
{$ENDIF}
type type
TRegExprInvertCaseFunction = function (const Ch : REChar) : REChar TRegExprInvertCaseFunction = function (const Ch : REChar) : REChar
@ -679,6 +687,24 @@ const
XIgnoredChars = [' ', #9, #$d, #$a]; XIgnoredChars = [' ', #9, #$d, #$a];
{$ENDIF} {$ENDIF}
function AlignToPtr(const p: Pointer): Pointer;
begin
{$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
Result := Align(p, SizeOf(Pointer));
{$ELSE}
Result := p;
{$ENDIF}
end;
function AlignToInt(const p: Pointer): Pointer;
begin
{$IFDEF FPC_REQUIRES_PROPER_ALIGNMENT}
Result := Align(p, SizeOf(integer));
{$ELSE}
Result := p;
{$ENDIF}
end;
{=============================================================} {=============================================================}
{=================== WideString functions ====================} {=================== WideString functions ====================}
{=============================================================} {=============================================================}
@ -1481,7 +1507,7 @@ procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);
UNTIL false; UNTIL false;
// Set Next 'pointer' // Set Next 'pointer'
if val < scan if val < scan
then PRENextOff (scan + REOpSz)^ := - (scan - val) //###0.948 then PRENextOff (AlignToPtr(scan + REOpSz))^ := - (scan - val) //###0.948
// work around PWideChar subtraction bug (Delphi uses // work around PWideChar subtraction bug (Delphi uses
// shr after subtraction to calculate widechar distance %-( ) // shr after subtraction to calculate widechar distance %-( )
// so, if difference is negative we have .. the "feature" :( // so, if difference is negative we have .. the "feature" :(
@ -1489,7 +1515,7 @@ procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);
// "P Q computes the difference between the address given // "P Q computes the difference between the address given
// by P (the higher address) and the address given by Q (the // by P (the higher address) and the address given by Q (the
// lower address)" - Delphi help quotation. // lower address)" - Delphi help quotation.
else PRENextOff (scan + REOpSz)^ := val - scan; //###0.933 else PRENextOff (AlignToPtr(scan + REOpSz))^ := val - scan; //###0.933
end; { of procedure TRegExpr.Tail end; { of procedure TRegExpr.Tail
--------------------------------------------------------------} --------------------------------------------------------------}
@ -1510,7 +1536,7 @@ function TRegExpr.EmitNode (op : TREOp) : PRegExprChar; //###0.933
if Result <> @regdummy then begin if Result <> @regdummy then begin
PREOp (regcode)^ := op; PREOp (regcode)^ := op;
inc (regcode, REOpSz); inc (regcode, REOpSz);
PRENextOff (regcode)^ := 0; // Next "pointer" := nil PRENextOff (AlignToPtr(regcode))^ := 0; // Next "pointer" := nil
inc (regcode, RENextOffSz); inc (regcode, RENextOffSz);
{$IFDEF DebugSynRegExpr} {$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then if regcode-programm>regsize then
@ -1553,8 +1579,8 @@ procedure TRegExpr.InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer
{$IFDEF DebugSynRegExpr} {$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then if regcode-programm>regsize then
raise Exception.Create('TRegExpr.InsertOperator buffer overrun'); raise Exception.Create('TRegExpr.InsertOperator buffer overrun');
if (opnd<regcode) or (opnd-regcode>regsize) then // if (opnd<regcode) or (opnd-regcode>regsize) then
raise Exception.Create('TRegExpr.InsertOperator invalid opnd'); // raise Exception.Create('TRegExpr.InsertOperator invalid opnd');
{$ENDIF} {$ENDIF}
dst := regcode; dst := regcode;
while src > opnd do begin while src > opnd do begin
@ -1903,11 +1929,11 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
if regcode <> @regdummy then begin if regcode <> @regdummy then begin
off := (Result + REOpSz + RENextOffSz) off := (Result + REOpSz + RENextOffSz)
- (regcode - REOpSz - RENextOffSz); // back to Atom after LOOPENTRY - (regcode - REOpSz - RENextOffSz); // back to Atom after LOOPENTRY
PREBracesArg (regcode)^ := ABracesMin; PREBracesArg (AlignToInt(regcode))^ := ABracesMin;
inc (regcode, REBracesArgSz); inc (regcode, REBracesArgSz);
PREBracesArg (regcode)^ := ABracesMax; PREBracesArg (AlignToInt(regcode))^ := ABracesMax;
inc (regcode, REBracesArgSz); inc (regcode, REBracesArgSz);
PRENextOff (regcode)^ := off; PRENextOff (AlignToPtr(regcode))^ := off;
inc (regcode, RENextOffSz); inc (regcode, RENextOffSz);
{$IFDEF DebugSynRegExpr} {$IFDEF DebugSynRegExpr}
if regcode-programm>regsize then if regcode-programm>regsize then
@ -1929,8 +1955,8 @@ function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
else TheOp := BRACES; else TheOp := BRACES;
InsertOperator (TheOp, Result, REOpSz + RENextOffSz + REBracesArgSz * 2); InsertOperator (TheOp, Result, REOpSz + RENextOffSz + REBracesArgSz * 2);
if regcode <> @regdummy then begin if regcode <> @regdummy then begin
PREBracesArg (Result + REOpSz + RENextOffSz)^ := ABracesMin; PREBracesArg (AlignToInt(Result + REOpSz + RENextOffSz))^ := ABracesMin;
PREBracesArg (Result + REOpSz + RENextOffSz + REBracesArgSz)^ := ABracesMax; PREBracesArg (AlignToInt(Result + REOpSz + RENextOffSz + REBracesArgSz))^ := ABracesMax;
end; end;
end; end;
@ -2808,7 +2834,7 @@ function TRegExpr.regnext (p : PRegExprChar) : PRegExprChar;
Result := nil; Result := nil;
EXIT; EXIT;
end; end;
offset := PRENextOff (p + REOpSz)^; //###0.933 inlined NEXT offset := PRENextOff (AlignToPtr(p + REOpSz))^; //###0.933 inlined NEXT
if offset = 0 if offset = 0
then Result := nil then Result := nil
else Result := p + offset; else Result := p + offset;
@ -2841,7 +2867,7 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
scan := prog; scan := prog;
while scan <> nil do begin while scan <> nil do begin
len := PRENextOff (scan + 1)^; //###0.932 inlined regnext len := PRENextOff (AlignToPtr(scan + 1))^; //###0.932 inlined regnext
if len = 0 if len = 0
then next := nil then next := nil
else next := scan + len; else next := scan + len;
@ -3130,9 +3156,9 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
Error (reeLoopWithoutEntry); Error (reeLoopWithoutEntry);
EXIT; EXIT;
end; end;
opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + 2 * REBracesArgSz)^; opnd := scan + PRENextOff (AlignToPtr(scan + REOpSz + RENextOffSz + 2 * REBracesArgSz))^;
BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^; BracesMin := PREBracesArg (AlignToInt(scan + REOpSz + RENextOffSz))^;
BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^; BracesMax := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^;
save := reginput; save := reginput;
if LoopStack [LoopStackIdx] >= BracesMin then begin // Min alredy matched - we can work if LoopStack [LoopStackIdx] >= BracesMin then begin // Min alredy matched - we can work
if scan^ = LOOP then begin if scan^ = LOOP then begin
@ -3196,8 +3222,8 @@ function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
else if (scan^ = PLUS) or (scan^ = PLUSNG) else if (scan^ = PLUS) or (scan^ = PLUSNG)
then BracesMin := 1 // PLUS then BracesMin := 1 // PLUS
else begin // BRACES else begin // BRACES
BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^; BracesMin := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^;
BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^; BracesMax := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz))^;
end; end;
save := reginput; save := reginput;
opnd := scan + REOpSz + RENextOffSz; opnd := scan + REOpSz + RENextOffSz;
@ -3377,8 +3403,8 @@ procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar);
EXIT; EXIT;
end; end;
LOOP, LOOPNG: begin //###0.940 LOOP, LOOPNG: begin //###0.940
opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + REBracesArgSz * 2)^; opnd := scan + PRENextOff (AlignToPtr(scan + REOpSz + RENextOffSz + REBracesArgSz * 2))^;
min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^; min_cnt := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^;
FillFirstCharSet (opnd); FillFirstCharSet (opnd);
if min_cnt = 0 if min_cnt = 0
then FillFirstCharSet (next); then FillFirstCharSet (next);
@ -3393,7 +3419,7 @@ procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar);
end; end;
BRACES, BRACESNG: begin //###0.940 BRACES, BRACESNG: begin //###0.940
opnd := scan + REOpSz + RENextOffSz + REBracesArgSz * 2; opnd := scan + REOpSz + RENextOffSz + REBracesArgSz * 2;
min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^; // BRACES min_cnt := PREBracesArg (AlignToPtr(scan + REOpSz + RENextOffSz))^; // BRACES
FillFirstCharSet (opnd); FillFirstCharSet (opnd);
if min_cnt > 0 if min_cnt > 0
then EXIT; then EXIT;
@ -4071,14 +4097,14 @@ function TRegExpr.Dump : RegExprString;
{$ENDIF} {$ENDIF}
if (op = BRACES) or (op = BRACESNG) then begin //###0.941 if (op = BRACES) or (op = BRACESNG) then begin //###0.941
// show min/max argument of BRACES operator // show min/max argument of BRACES operator
Result := Result + Format ('{%d,%d}', [PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]); Result := Result + Format ('{%d,%d}', [PREBracesArg (AlignToInt(s))^, PREBracesArg (AlignToInt(s + REBracesArgSz))^]);
inc (s, REBracesArgSz * 2); inc (s, REBracesArgSz * 2);
end; end;
{$IFDEF ComplexBraces} {$IFDEF ComplexBraces}
if (op = LOOP) or (op = LOOPNG) then begin //###0.940 if (op = LOOP) or (op = LOOPNG) then begin //###0.940
Result := Result + Format (' -> (%d) {%d,%d}', [ Result := Result + Format (' -> (%d) {%d,%d}', [
(s - programm - (REOpSz + RENextOffSz)) + PRENextOff (s + 2 * REBracesArgSz)^, (s - programm - (REOpSz + RENextOffSz)) + PRENextOff (AlignToPtr(s + 2 * REBracesArgSz))^,
PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]); PREBracesArg (AlignToInt(s))^, PREBracesArg (AlignToInt(s + REBracesArgSz))^]);
inc (s, 2 * REBracesArgSz + RENextOffSz); inc (s, 2 * REBracesArgSz + RENextOffSz);
end; end;
{$ENDIF} {$ENDIF}