mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-07 20:08:12 +02:00

r40619 | pierre | 2018-12-23 16:17:49 +0000 (Sun, 23 Dec 2018) | 8 lines * in lexbase and yaccbase units, use DirectorySeparator instead of '\' in functions path, root and addext. * in plex and pyacc programs, for unix, try to find installed yyparse.cod using path of paramstr(0). ------------------------------------------------------------------------ --- Merging r40619 into '.': U utils/tply/pyacc.pas U utils/tply/plex.pas U utils/tply/yaccbase.pas U utils/tply/lexbase.pas U utils/tply/pyacc.y --- Recording mergeinfo for merge of r40619 into '.': U . git-svn-id: branches/fixes_3_2@40627 -
1169 lines
32 KiB
ObjectPascal
1169 lines
32 KiB
ObjectPascal
{
|
|
This module collects the basic data types and operations used in the TP
|
|
Lex program, and other basic stuff that does not belong anywhere else:
|
|
- Lex input and output files and corresponding bookkeeping information
|
|
used by the parser
|
|
- symbolic character constants
|
|
- dynamically allocated strings and character classes
|
|
- integer sets
|
|
- generic quicksort and hash table routines
|
|
- utilities for list-generating
|
|
- other tiny utilities
|
|
|
|
|
|
Copyright (c) 1990-92 Albert Graef <ag@muwiinfa.geschichte.uni-mainz.de>
|
|
Copyright (C) 1996 Berend de Boer <berend@pobox.com>
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
|
|
$Revision: 1.2 $
|
|
$Modtime: 96-08-01 10:21 $
|
|
|
|
$History: LEXBASE.PAS $
|
|
*
|
|
* ***************** Version 2 *****************
|
|
* User: Berend Date: 96-10-10 Time: 21:16
|
|
* Updated in $/Lex and Yacc/tply
|
|
* Updated for protected mode, windows and Delphi 1.X and 2.X.
|
|
|
|
}
|
|
|
|
|
|
unit LexBase;
|
|
|
|
interface
|
|
|
|
|
|
const
|
|
|
|
(* symbolic character constants: *)
|
|
|
|
bs = #8; (* backspace character *)
|
|
tab = #9; (* tab character *)
|
|
nl = #10; (* newline character *)
|
|
cr = #13; (* carriage return *)
|
|
ff = #12; (* form feed character *)
|
|
|
|
var
|
|
|
|
(* Filenames: *)
|
|
|
|
lfilename : String;
|
|
pasfilename : String;
|
|
lstfilename : String;
|
|
codfilename : String;
|
|
codfilepath1,
|
|
codfilepath2 : String; { Under linux, binary and conf file
|
|
are not in the same path}
|
|
|
|
(* Lex input, output, list and code template file: *)
|
|
|
|
yyin, yylst, yyout, yycod : Text;
|
|
|
|
(* the following values are initialized and updated by the parser: *)
|
|
|
|
line : String; (* current input line *)
|
|
lno : Integer; (* current line number *)
|
|
|
|
const
|
|
|
|
max_elems = 1000; (* maximum size of integer sets *)
|
|
|
|
type
|
|
|
|
(* String and character class pointers: *)
|
|
|
|
StrPtr = ^String;
|
|
CClass = set of Char;
|
|
CClassPtr = ^CClass;
|
|
|
|
(* Sorted integer sets: *)
|
|
|
|
IntSet = array [0..max_elems] of Integer;
|
|
(* word 0 is size *)
|
|
IntSetPtr = ^IntSet;
|
|
|
|
(* Regular expressions: *)
|
|
|
|
RegExpr = ^Node;
|
|
|
|
NodeType = (mark_node, (* marker node *)
|
|
char_node, (* character node *)
|
|
str_node, (* string node *)
|
|
cclass_node, (* character class node *)
|
|
star_node, (* star node *)
|
|
plus_node, (* plus node *)
|
|
opt_node, (* option node *)
|
|
cat_node, (* concatenation node *)
|
|
alt_node); (* alternatives node (|) *)
|
|
|
|
Node = record case node_type : NodeType of
|
|
mark_node : (rule, pos : Integer);
|
|
char_node : (c : Char);
|
|
str_node : (str : StrPtr);
|
|
cclass_node : (cc : CClassPtr);
|
|
star_node, plus_node, opt_node : (r : RegExpr);
|
|
cat_node, alt_node : (r1, r2 : RegExpr);
|
|
end;
|
|
|
|
(* Some standard character classes: *)
|
|
|
|
const
|
|
|
|
letters : CClass = ['A'..'Z','a'..'z','_'];
|
|
digits : CClass = ['0'..'9'];
|
|
alphanums : CClass = ['A'..'Z','a'..'z','_','0'..'9'];
|
|
|
|
(* Operations: *)
|
|
|
|
(* Strings and character classes: *)
|
|
|
|
function newStr(str : String) : StrPtr;
|
|
(* creates a string pointer (only the space actually needed for the given
|
|
string is allocated) *)
|
|
function newCClass(cc : CClass) : CClassPtr;
|
|
(* creates a CClass pointer *)
|
|
|
|
(* Integer sets (set arguments are passed by reference even if they are not
|
|
modified, for greater efficiency): *)
|
|
|
|
procedure empty(var M : IntSet);
|
|
(* initializes M as empty *)
|
|
procedure singleton(var M : IntSet; i : Integer);
|
|
(* initializes M as a singleton set containing the element i *)
|
|
procedure include(var M : IntSet; i : Integer);
|
|
(* include i in M *)
|
|
procedure exclude(var M : IntSet; i : Integer);
|
|
(* exclude i from M *)
|
|
procedure setunion(var M, N : IntSet);
|
|
(* adds N to M *)
|
|
procedure setminus(var M, N : IntSet);
|
|
(* removes N from M *)
|
|
procedure intersect(var M, N : IntSet);
|
|
(* removes from M all elements NOT in N *)
|
|
function size(var M : IntSet) : Integer;
|
|
(* cardinality of set M *)
|
|
function member(i : Integer; var M : IntSet) : Boolean;
|
|
(* tests for membership of i in M *)
|
|
function isempty(var M : IntSet) : Boolean;
|
|
(* checks whether M is an empty set *)
|
|
function equal(var M, N : IntSet) : Boolean;
|
|
(* checks whether M and N are equal *)
|
|
function subseteq(var M, N : IntSet) : Boolean;
|
|
(* checks whether M is a subset of N *)
|
|
function newIntSet : IntSetPtr;
|
|
(* creates a pointer to an empty integer set *)
|
|
|
|
(* Constructors for regular expressions: *)
|
|
|
|
const epsExpr : RegExpr = nil;
|
|
(* empty regular expression *)
|
|
function markExpr(rule, pos : Integer) : RegExpr;
|
|
(* markers are used to denote endmarkers of rules, as well as other
|
|
special positions in rules, e.g. the position of the lookahead
|
|
operator; they are considered nullable; by convention, we use
|
|
the following pos numbers:
|
|
- 0: endmarker position
|
|
- 1: lookahead operator position *)
|
|
function charExpr(c : Char) : RegExpr;
|
|
(* character c *)
|
|
function strExpr(str : StrPtr) : RegExpr;
|
|
(* "str" *)
|
|
function cclassExpr(cc : CClassPtr) : RegExpr;
|
|
(* [str] where str are the literals in cc *)
|
|
function starExpr(r : RegExpr) : RegExpr;
|
|
(* r* *)
|
|
function plusExpr(r : RegExpr) : RegExpr;
|
|
(* r+ *)
|
|
function optExpr(r : RegExpr) : RegExpr;
|
|
(* r? *)
|
|
function mnExpr(r : RegExpr; m, n : Integer) : RegExpr;
|
|
(* constructor expanding expression r{m,n} to the corresponding
|
|
alt expression r^m|...|r^n *)
|
|
function catExpr(r1, r2 : RegExpr) : RegExpr;
|
|
(* r1r2 *)
|
|
function altExpr(r1, r2 : RegExpr) : RegExpr;
|
|
(* r1|r2 *)
|
|
|
|
(* Unifiers for regular expressions:
|
|
The following predicates check whether the specified regular
|
|
expression r is of the denoted type; if the predicate succeeds,
|
|
the other arguments of the predicate are instantiated to the
|
|
corresponding values. *)
|
|
|
|
function is_epsExpr(r : RegExpr) : Boolean;
|
|
(* empty regular expression *)
|
|
function is_markExpr(r : RegExpr; var rule, pos : Integer) : Boolean;
|
|
(* marker expression *)
|
|
function is_charExpr(r : RegExpr; var c : Char) : Boolean;
|
|
(* character c *)
|
|
function is_strExpr(r : RegExpr; var str : StrPtr) : Boolean;
|
|
(* "str" *)
|
|
function is_cclassExpr(r : RegExpr; var cc : CClassPtr) : Boolean;
|
|
(* [str] where str are the literals in cc *)
|
|
function is_starExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
|
|
(* r1* *)
|
|
function is_plusExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
|
|
(* r1+ *)
|
|
function is_optExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
|
|
(* r1? *)
|
|
function is_catExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
|
|
(* r1r2 *)
|
|
function is_altExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
|
|
(* r1|r2 *)
|
|
|
|
(* Quicksort: *)
|
|
|
|
type
|
|
|
|
OrderPredicate = function (i, j : Integer) : Boolean;
|
|
SwapProc = procedure (i, j : Integer);
|
|
|
|
procedure quicksort(lo, hi: Integer;
|
|
less : OrderPredicate;
|
|
swap : SwapProc);
|
|
(* General inplace sorting procedure based on the quicksort algorithm.
|
|
This procedure can be applied to any sequential data structure;
|
|
only the corresponding routines less which compares, and swap which
|
|
swaps two elements i,j of the target data structure, must be
|
|
supplied as appropriate for the target data structure.
|
|
- lo, hi: the lower and higher indices, indicating the elements to
|
|
be sorted
|
|
- less(i, j): should return true if element no. i `is less than'
|
|
element no. j, and false otherwise; any total quasi-ordering may
|
|
be supplied here (if neither less(i, j) nor less(j, i) then elements
|
|
i and j are assumed to be `equal').
|
|
- swap(i, j): should swap the elements with index i and j *)
|
|
|
|
(* Generic hash table routines (based on quadratic rehashing; hence the
|
|
table size must be a prime number): *)
|
|
|
|
type
|
|
|
|
TableLookupProc = function(k : Integer) : String;
|
|
TableEntryProc = procedure(k : Integer; symbol : String);
|
|
|
|
function key(symbol : String;
|
|
table_size : Integer;
|
|
lookup : TableLookupProc;
|
|
entry : TableEntryProc) : Integer;
|
|
(* returns a hash table key for symbol; inserts the symbol into the
|
|
table if necessary
|
|
- table_size is the symbol table size and must be a fixed prime number
|
|
- lookup is the table lookup procedure which should return the string
|
|
at key k in the table ('' if entry is empty)
|
|
- entry is the table entry procedure which is assumed to store the
|
|
given symbol at the given location *)
|
|
|
|
function definedKey(symbol : String;
|
|
table_size : Integer;
|
|
lookup : TableLookupProc) : Boolean;
|
|
(* checks the table to see if symbol is in the table *)
|
|
|
|
(* Utility routines: *)
|
|
|
|
function min(i, j : Integer) : Integer;
|
|
function max(i, j : Integer) : Integer;
|
|
(* minimum and maximum of two integers *)
|
|
function nchars(cc : CClass) : Integer;
|
|
(* returns the cardinality (number of characters) of a character class *)
|
|
function upper(str : String) : String;
|
|
(* returns str converted to uppercase *)
|
|
function strip(str : String) : String;
|
|
(* returns str with leading and trailing blanks stripped off *)
|
|
function blankStr(str : String) : String;
|
|
(* returns string of same length as str, with all non-whitespace characters
|
|
replaced by blanks *)
|
|
function intStr(i : Integer) : String;
|
|
(* returns the string representation of i *)
|
|
function isInt(str : String; var i : Integer) : Boolean;
|
|
(* checks whether str represents an integer; if so, returns the
|
|
value of it in i *)
|
|
function path(filename : String) : String;
|
|
(* returns the path in filename *)
|
|
function root(filename : String) : String;
|
|
(* returns root (i.e. extension stripped from filename) of
|
|
filename *)
|
|
function addExt(filename, ext : String) : String;
|
|
(* if filename has no extension and last filename character is not '.',
|
|
add extension ext to filename *)
|
|
function file_size(filename : String) : LongInt;
|
|
(* determines file size in bytes *)
|
|
|
|
(* Utility functions for list generating routines: *)
|
|
|
|
function charStr(c : char; reserved : CClass) : String;
|
|
(* returns a print name for character c, using the standard escape
|
|
conventions; reserved is the class of `reserved' special characters
|
|
which should be quoted with \ (\ itself is always quoted) *)
|
|
function singleQuoteStr(str : String) : String;
|
|
(* returns print name of str enclosed in single quotes, using the
|
|
standard escape conventions *)
|
|
function doubleQuoteStr(str : String) : String;
|
|
(* returns print name of str enclosed in double quotes, using the
|
|
standard escape conventions *)
|
|
function cclassStr(cc : CClass) : String;
|
|
(* returns print name of character class cc, using the standard escape
|
|
conventions; if cc contains more than 128 elements, the complement
|
|
notation (^) is used; if cc is the class of all (non-null) characters
|
|
except newline, the period notation is used *)
|
|
function cclassOrCharStr(cc : CClass) : String;
|
|
(* returns a print name for character class cc (either cclassStr, or,
|
|
if cc contains only one element, character in single quotes) *)
|
|
function regExprStr(r : RegExpr) : String;
|
|
(* unparses a regular expression *)
|
|
|
|
implementation
|
|
|
|
uses LexMsgs;
|
|
|
|
(* String and character class pointers: *)
|
|
|
|
function newStr(str : String) : StrPtr;
|
|
var strp : StrPtr;
|
|
begin
|
|
getmem(strp, succ(length(str)));
|
|
move(str, strp^, succ(length(str)));
|
|
newStr := strp;
|
|
end(*newStr*);
|
|
|
|
function newCClass(cc : CClass) : CClassPtr;
|
|
var ccp : CClassPtr;
|
|
begin
|
|
new(ccp);
|
|
ccp^ := cc;
|
|
newCClass := ccp;
|
|
end(*newCClass*);
|
|
|
|
(* Integer sets: *)
|
|
|
|
procedure empty(var M : IntSet);
|
|
begin
|
|
M[0] := 0;
|
|
end(*empty*);
|
|
|
|
procedure singleton(var M : IntSet; i : Integer);
|
|
begin
|
|
M[0] := 1; M[1] := i;
|
|
end(*singleton*);
|
|
|
|
procedure include(var M : IntSet; i : Integer);
|
|
var l, r, k : Integer;
|
|
begin
|
|
(* binary search: *)
|
|
l := 1; r := M[0];
|
|
k := l + (r-l) div 2;
|
|
while (l<r) and (M[k]<>i) do
|
|
begin
|
|
if M[k]<i then
|
|
l := succ(k)
|
|
else
|
|
r := pred(k);
|
|
k := l + (r-l) div 2;
|
|
end;
|
|
if (k>M[0]) or (M[k]<>i) then
|
|
begin
|
|
if M[0]>=max_elems then fatal(intset_overflow);
|
|
if (k<=M[0]) and (M[k]<i) then
|
|
begin
|
|
move(M[k+1], M[k+2], (M[0]-k)*sizeOf(Integer));
|
|
M[k+1] := i;
|
|
end
|
|
else
|
|
begin
|
|
move(M[k], M[k+1], (M[0]-k+1)*sizeOf(Integer));
|
|
M[k] := i;
|
|
end;
|
|
inc(M[0]);
|
|
end;
|
|
end(*include*);
|
|
|
|
procedure exclude(var M : IntSet; i : Integer);
|
|
var l, r, k : Integer;
|
|
begin
|
|
(* binary search: *)
|
|
l := 1; r := M[0];
|
|
k := l + (r-l) div 2;
|
|
while (l<r) and (M[k]<>i) do
|
|
begin
|
|
if M[k]<i then
|
|
l := succ(k)
|
|
else
|
|
r := pred(k);
|
|
k := l + (r-l) div 2;
|
|
end;
|
|
if (k<=M[0]) and (M[k]=i) then
|
|
begin
|
|
move(M[k+1], M[k], (M[0]-k)*sizeOf(Integer));
|
|
dec(M[0]);
|
|
end;
|
|
end(*exclude*);
|
|
|
|
procedure setunion(var M, N : IntSet);
|
|
var
|
|
K : IntSet;
|
|
i, j, i_M, i_N : Integer;
|
|
begin
|
|
(* merge sort: *)
|
|
i := 0; i_M := 1; i_N := 1;
|
|
while (i_M<=M[0]) and (i_N<=N[0]) do
|
|
begin
|
|
inc(i);
|
|
if i>max_elems then fatal(intset_overflow);
|
|
if M[i_M]<N[i_N] then
|
|
begin
|
|
K[i] := M[i_M]; inc(i_M);
|
|
end
|
|
else if N[i_N]<M[i_M] then
|
|
begin
|
|
K[i] := N[i_N]; inc(i_N);
|
|
end
|
|
else
|
|
begin
|
|
K[i] := M[i_M]; inc(i_M); inc(i_N);
|
|
end
|
|
end;
|
|
for j := i_M to M[0] do
|
|
begin
|
|
inc(i);
|
|
if i>max_elems then fatal(intset_overflow);
|
|
K[i] := M[j];
|
|
end;
|
|
for j := i_N to N[0] do
|
|
begin
|
|
inc(i);
|
|
if i>max_elems then fatal(intset_overflow);
|
|
K[i] := N[j];
|
|
end;
|
|
K[0] := i;
|
|
move(K, M, succ(i)*sizeOf(Integer));
|
|
end(*setunion*);
|
|
|
|
procedure setminus(var M, N : IntSet);
|
|
var
|
|
K : IntSet;
|
|
i, i_M, i_N : Integer;
|
|
begin
|
|
i := 0; i_N := 1;
|
|
for i_M := 1 to M[0] do
|
|
begin
|
|
while (i_N<=N[0]) and (N[i_N]<M[i_M]) do inc(i_N);
|
|
if (i_N>N[0]) or (N[i_N]>M[i_M]) then
|
|
begin
|
|
inc(i);
|
|
K[i] := M[i_M];
|
|
end
|
|
else
|
|
inc(i_N);
|
|
end;
|
|
K[0] := i;
|
|
move(K, M, succ(i)*sizeOf(Integer));
|
|
end(*setminus*);
|
|
|
|
procedure intersect(var M, N : IntSet);
|
|
var
|
|
K : IntSet;
|
|
i, i_M, i_N : Integer;
|
|
begin
|
|
i := 0; i_N := 1;
|
|
for i_M := 1 to M[0] do
|
|
begin
|
|
while (i_N<=N[0]) and (N[i_N]<M[i_M]) do inc(i_N);
|
|
if (i_N<=N[0]) and (N[i_N]=M[i_M]) then
|
|
begin
|
|
inc(i);
|
|
K[i] := M[i_M];
|
|
inc(i_N);
|
|
end
|
|
end;
|
|
K[0] := i;
|
|
move(K, M, succ(i)*sizeOf(Integer));
|
|
end(*intersect*);
|
|
|
|
function size(var M : IntSet) : Integer;
|
|
begin
|
|
size := M[0]
|
|
end(*size*);
|
|
|
|
function member(i : Integer; var M : IntSet) : Boolean;
|
|
var l, r, k : Integer;
|
|
begin
|
|
(* binary search: *)
|
|
l := 1; r := M[0];
|
|
k := l + (r-l) div 2;
|
|
while (l<r) and (M[k]<>i) do
|
|
begin
|
|
if M[k]<i then
|
|
l := succ(k)
|
|
else
|
|
r := pred(k);
|
|
k := l + (r-l) div 2;
|
|
end;
|
|
member := (k<=M[0]) and (M[k]=i);
|
|
end(*member*);
|
|
|
|
function isempty(var M : IntSet) : Boolean;
|
|
begin
|
|
isempty := M[0]=0
|
|
end(*isempty*);
|
|
|
|
function equal(var M, N : IntSet) : Boolean;
|
|
var i : Integer;
|
|
begin
|
|
if M[0]<>N[0] then
|
|
equal := false
|
|
else
|
|
begin
|
|
for i := 1 to M[0] do
|
|
if M[i]<>N[i] then
|
|
begin
|
|
equal := false;
|
|
exit
|
|
end;
|
|
equal := true
|
|
end
|
|
end(*equal*);
|
|
|
|
function subseteq(var M, N : IntSet) : Boolean;
|
|
var
|
|
i_M, i_N : Integer;
|
|
begin
|
|
if M[0]>N[0] then
|
|
subseteq := false
|
|
else
|
|
begin
|
|
i_N := 1;
|
|
for i_M := 1 to M[0] do
|
|
begin
|
|
while (i_N<=N[0]) and (N[i_N]<M[i_M]) do inc(i_N);
|
|
if (i_N>N[0]) or (N[i_N]>M[i_M]) then
|
|
begin
|
|
subseteq := false;
|
|
exit
|
|
end
|
|
else
|
|
inc(i_N);
|
|
end;
|
|
subseteq := true
|
|
end;
|
|
end(*subseteq*);
|
|
|
|
function newIntSet : IntSetPtr;
|
|
var
|
|
MP : IntSetPtr;
|
|
begin
|
|
getmem(MP, (max_elems+1)*sizeOf(Integer));
|
|
MP^[0] := 0;
|
|
newIntSet := MP
|
|
end(*newIntSet*);
|
|
|
|
(* Constructors for regular expressions: *)
|
|
|
|
function newExpr(node_type : NodeType; n : Integer) : RegExpr;
|
|
(* returns new RegExpr node (n: number of bytes to allocate) *)
|
|
var x : RegExpr;
|
|
begin
|
|
getmem(x, sizeOf(NodeType)+n);
|
|
x^.node_type := node_type;
|
|
newExpr := x
|
|
end(*newExpr*);
|
|
function markExpr(rule, pos : Integer) : RegExpr;
|
|
var x : RegExpr;
|
|
begin
|
|
x := newExpr(mark_node, 2*sizeOf(Integer));
|
|
x^.rule := rule;
|
|
x^.pos := pos;
|
|
markExpr := x
|
|
end(*markExpr*);
|
|
function charExpr(c : Char) : RegExpr;
|
|
var x : RegExpr;
|
|
begin
|
|
x := newExpr(char_node, sizeOf(Char));
|
|
x^.c := c;
|
|
charExpr := x
|
|
end(*charExpr*);
|
|
function strExpr(str : StrPtr) : RegExpr;
|
|
var x : RegExpr;
|
|
begin
|
|
x := newExpr(str_node, sizeOf(StrPtr));
|
|
x^.str := str;
|
|
strExpr := x
|
|
end(*strExpr*);
|
|
function cclassExpr(cc : CClassPtr) : RegExpr;
|
|
var x : RegExpr;
|
|
begin
|
|
x := newExpr(cclass_node, sizeOf(CClassPtr));
|
|
x^.cc := cc;
|
|
cclassExpr := x
|
|
end(*cclassExpr*);
|
|
function starExpr(r : RegExpr) : RegExpr;
|
|
var x : RegExpr;
|
|
begin
|
|
x := newExpr(star_node, sizeOf(RegExpr));
|
|
x^.r := r;
|
|
starExpr := x
|
|
end(*starExpr*);
|
|
function plusExpr(r : RegExpr) : RegExpr;
|
|
var x : RegExpr;
|
|
begin
|
|
x := newExpr(plus_node, sizeOf(RegExpr));
|
|
x^.r := r;
|
|
plusExpr := x
|
|
end(*plusExpr*);
|
|
function optExpr(r : RegExpr) : RegExpr;
|
|
var x : RegExpr;
|
|
begin
|
|
x := newExpr(opt_node, sizeOf(RegExpr));
|
|
x^.r := r;
|
|
optExpr := x
|
|
end(*optExpr*);
|
|
function mnExpr(r : RegExpr; m, n : Integer) : RegExpr;
|
|
var
|
|
ri, rmn : RegExpr;
|
|
i : Integer;
|
|
begin
|
|
if (m>n) or (n=0) then
|
|
mnExpr := epsExpr
|
|
else
|
|
begin
|
|
(* construct r^m: *)
|
|
if m=0 then
|
|
ri := epsExpr
|
|
else
|
|
begin
|
|
ri := r;
|
|
for i := 2 to m do
|
|
ri := catExpr(ri, r);
|
|
end;
|
|
(* construct r{m,n}: *)
|
|
rmn := ri; (* r{m,n} := r^m *)
|
|
for i := m+1 to n do
|
|
begin
|
|
if is_epsExpr(ri) then
|
|
ri := r
|
|
else
|
|
ri := catExpr(ri, r);
|
|
rmn := altExpr(rmn, ri) (* r{m,n} := r{m,n} | r^i,
|
|
i=m+1,...,n *)
|
|
end;
|
|
mnExpr := rmn
|
|
end
|
|
end(*mnExpr*);
|
|
function catExpr(r1, r2 : RegExpr) : RegExpr;
|
|
var x : RegExpr;
|
|
begin
|
|
x := newExpr(cat_node, 2*sizeOf(RegExpr));
|
|
x^.r1 := r1;
|
|
x^.r2 := r2;
|
|
catExpr := x
|
|
end(*catExpr*);
|
|
function altExpr(r1, r2 : RegExpr) : RegExpr;
|
|
var x : RegExpr;
|
|
begin
|
|
x := newExpr(alt_node, 2*sizeOf(RegExpr));
|
|
x^.r1 := r1;
|
|
x^.r2 := r2;
|
|
altExpr := x
|
|
end(*altExpr*);
|
|
|
|
(* Unifiers for regular expressions: *)
|
|
|
|
function is_epsExpr(r : RegExpr) : Boolean;
|
|
begin
|
|
is_epsExpr := r=epsExpr
|
|
end(*is_epsExpr*);
|
|
function is_markExpr(r : RegExpr; var rule, pos : Integer) : Boolean;
|
|
begin
|
|
if r=epsExpr then
|
|
is_markExpr := false
|
|
else if r^.node_type=mark_node then
|
|
begin
|
|
is_markExpr := true;
|
|
rule := r^.rule;
|
|
pos := r^.pos;
|
|
end
|
|
else
|
|
is_markExpr := false
|
|
end(*is_markExpr*);
|
|
function is_charExpr(r : RegExpr; var c : Char) : Boolean;
|
|
begin
|
|
if r=epsExpr then
|
|
is_charExpr := false
|
|
else if r^.node_type=char_node then
|
|
begin
|
|
is_charExpr := true;
|
|
c := r^.c
|
|
end
|
|
else
|
|
is_charExpr := false
|
|
end(*is_charExpr*);
|
|
function is_strExpr(r : RegExpr; var str : StrPtr) : Boolean;
|
|
begin
|
|
if r=epsExpr then
|
|
is_strExpr := false
|
|
else if r^.node_type=str_node then
|
|
begin
|
|
is_strExpr := true;
|
|
str := r^.str;
|
|
end
|
|
else
|
|
is_strExpr := false
|
|
end(*is_strExpr*);
|
|
function is_cclassExpr(r : RegExpr; var cc : CClassPtr) : Boolean;
|
|
begin
|
|
if r=epsExpr then
|
|
is_cclassExpr := false
|
|
else if r^.node_type=cclass_node then
|
|
begin
|
|
is_cclassExpr := true;
|
|
cc := r^.cc
|
|
end
|
|
else
|
|
is_cclassExpr := false
|
|
end(*is_cclassExpr*);
|
|
function is_starExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
|
|
begin
|
|
if r=epsExpr then
|
|
is_starExpr := false
|
|
else if r^.node_type=star_node then
|
|
begin
|
|
is_starExpr := true;
|
|
r1 := r^.r
|
|
end
|
|
else
|
|
is_starExpr := false
|
|
end(*is_starExpr*);
|
|
function is_plusExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
|
|
begin
|
|
if r=epsExpr then
|
|
is_plusExpr := false
|
|
else if r^.node_type=plus_node then
|
|
begin
|
|
is_plusExpr := true;
|
|
r1 := r^.r
|
|
end
|
|
else
|
|
is_plusExpr := false
|
|
end(*is_plusExpr*);
|
|
function is_optExpr(r : RegExpr; var r1 : RegExpr) : Boolean;
|
|
begin
|
|
if r=epsExpr then
|
|
is_optExpr := false
|
|
else if r^.node_type=opt_node then
|
|
begin
|
|
is_optExpr := true;
|
|
r1 := r^.r
|
|
end
|
|
else
|
|
is_optExpr := false
|
|
end(*is_optExpr*);
|
|
function is_catExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
|
|
begin
|
|
if r=epsExpr then
|
|
is_catExpr := false
|
|
else if r^.node_type=cat_node then
|
|
begin
|
|
is_catExpr := true;
|
|
r1 := r^.r1;
|
|
r2 := r^.r2
|
|
end
|
|
else
|
|
is_catExpr := false
|
|
end(*is_catExpr*);
|
|
function is_altExpr(r : RegExpr; var r1, r2 : RegExpr) : Boolean;
|
|
begin
|
|
if r=epsExpr then
|
|
is_altExpr := false
|
|
else if r^.node_type=alt_node then
|
|
begin
|
|
is_altExpr := true;
|
|
r1 := r^.r1;
|
|
r2 := r^.r2
|
|
end
|
|
else
|
|
is_altExpr := false
|
|
end(*is_altExpr*);
|
|
|
|
(* Quicksort: *)
|
|
|
|
procedure quicksort(lo, hi: Integer;
|
|
less : OrderPredicate;
|
|
swap : SwapProc);
|
|
(* derived from the quicksort routine in QSORT.PAS in the Turbo Pascal
|
|
distribution *)
|
|
procedure sort(l, r: Integer);
|
|
var i, j, k : Integer;
|
|
begin
|
|
i := l; j := r; k := (l+r) DIV 2;
|
|
repeat
|
|
while less(i, k) do inc(i);
|
|
while less(k, j) do dec(j);
|
|
if i<=j then
|
|
begin
|
|
swap(i, j);
|
|
if k=i then k := j (* pivot element swapped! *)
|
|
else if k=j then k := i;
|
|
inc(i); dec(j);
|
|
end;
|
|
until i>j;
|
|
if l<j then sort(l,j);
|
|
if i<r then sort(i,r);
|
|
end(*sort*);
|
|
begin
|
|
if lo<hi then sort(lo,hi);
|
|
end(*quicksort*);
|
|
|
|
(* Generic hash table routines: *)
|
|
|
|
function hash(str : String; table_size : Integer) : Integer;
|
|
(* computes a hash key for str *)
|
|
var i, key : Integer;
|
|
begin
|
|
key := 0;
|
|
for i := 1 to length(str) do
|
|
inc(key, ord(str[i]));
|
|
hash := key mod table_size + 1;
|
|
end(*hash*);
|
|
|
|
procedure newPos(var pos, incr, count : Integer; table_size : Integer);
|
|
(* computes a new position in the table (quadratic collision strategy)
|
|
- pos: current position (+inc)
|
|
- incr: current increment (+2)
|
|
- count: current number of collisions (+1)
|
|
quadratic collision formula for position of str after n collisions:
|
|
pos(str, n) = (hash(str)+n^2) mod table_size +1
|
|
note that n^2-(n-1)^2 = 2n-1 <=> n^2 = (n-1)^2 + (2n-1) for n>0,
|
|
i.e. the increment inc=2n-1 increments by two in each collision *)
|
|
begin
|
|
inc(count);
|
|
inc(pos, incr);
|
|
if pos>table_size then pos := pos mod table_size + 1;
|
|
inc(incr, 2)
|
|
end(*newPos*);
|
|
|
|
function key(symbol : String;
|
|
table_size : Integer;
|
|
lookup : TableLookupProc;
|
|
entry : TableEntryProc) : Integer;
|
|
var pos, incr, count : Integer;
|
|
begin
|
|
pos := hash(symbol, table_size);
|
|
incr := 1;
|
|
count := 0;
|
|
while count<=table_size do
|
|
if lookup(pos)='' then
|
|
begin
|
|
entry(pos, symbol);
|
|
key := pos;
|
|
exit
|
|
end
|
|
else if lookup(pos)=symbol then
|
|
begin
|
|
key := pos;
|
|
exit
|
|
end
|
|
else
|
|
newPos(pos, incr, count, table_size);
|
|
fatal(sym_table_overflow)
|
|
end(*key*);
|
|
|
|
function definedKey(symbol : String;
|
|
table_size : Integer;
|
|
lookup : TableLookupProc) : Boolean;
|
|
var pos, incr, count : Integer;
|
|
begin
|
|
pos := hash(symbol, table_size);
|
|
incr := 1;
|
|
count := 0;
|
|
while count<=table_size do
|
|
if lookup(pos)='' then
|
|
begin
|
|
definedKey := false;
|
|
exit
|
|
end
|
|
else if lookup(pos)=symbol then
|
|
begin
|
|
definedKey := true;
|
|
exit
|
|
end
|
|
else
|
|
newPos(pos, incr, count, table_size);
|
|
definedKey := false
|
|
end(*definedKey*);
|
|
|
|
(* Utility routines: *)
|
|
|
|
function min(i, j : Integer) : Integer;
|
|
begin
|
|
if i<j then
|
|
min := i
|
|
else
|
|
min := j
|
|
end(*min*);
|
|
function max(i, j : Integer) : Integer;
|
|
begin
|
|
if i>j then
|
|
max := i
|
|
else
|
|
max := j
|
|
end(*max*);
|
|
function nchars(cc : CClass) : Integer;
|
|
var
|
|
c : Char;
|
|
count : Integer;
|
|
begin
|
|
count := 0;
|
|
for c := #0 to #255 do if c in cc then inc(count);
|
|
nchars := count;
|
|
end(*nchars*);
|
|
function upper(str : String) : String;
|
|
var i : Integer;
|
|
begin
|
|
for i := 1 to length(str) do
|
|
str[i] := upCase(str[i]);
|
|
upper := str
|
|
end(*upper*);
|
|
function strip(str : String) : String;
|
|
begin
|
|
while (length(str)>0) and ((str[1]=' ') or (str[1]=tab)) do
|
|
delete(str, 1, 1);
|
|
while (length(str)>0) and
|
|
((str[length(str)]= ' ') or
|
|
(str[length(str)]=tab)) do
|
|
delete(str, length(str), 1);
|
|
strip := str;
|
|
end(*strip*);
|
|
function blankStr(str : String) : String;
|
|
var i : Integer;
|
|
begin
|
|
for i := 1 to length(str) do
|
|
if str[i]<>tab then str[i] := ' ';
|
|
blankStr := str;
|
|
end(*blankStr*);
|
|
function intStr(i : Integer) : String;
|
|
var s : String;
|
|
begin
|
|
str(i, s);
|
|
intStr := s
|
|
end(*intStr*);
|
|
function isInt(str : String; var i : Integer) : Boolean;
|
|
var res : Integer;
|
|
begin
|
|
val(str, i, res);
|
|
isInt := res = 0;
|
|
end(*isInt*);
|
|
function path(filename : String) : String;
|
|
var i : Integer;
|
|
begin
|
|
i := length(filename);
|
|
while (i>0) and (filename[i]<>DirectorySeparator) and (filename[i]<>':') do
|
|
dec(i);
|
|
path := copy(filename, 1, i);
|
|
end(*path*);
|
|
function root(filename : String) : String;
|
|
var
|
|
i : Integer;
|
|
begin
|
|
root := filename;
|
|
for i := length(filename) downto 1 do
|
|
case filename[i] of
|
|
'.' :
|
|
begin
|
|
root := copy(filename, 1, i-1);
|
|
exit
|
|
end;
|
|
DirectorySeparator : exit;
|
|
else
|
|
end;
|
|
end(*root*);
|
|
function addExt(filename, ext : String) : String;
|
|
(* implemented with goto for maximum efficiency *)
|
|
label x;
|
|
var
|
|
i : Integer;
|
|
begin
|
|
addExt := filename;
|
|
for i := length(filename) downto 1 do
|
|
case filename[i] of
|
|
'.' : exit;
|
|
DirectorySeparator: goto x;
|
|
else
|
|
end;
|
|
x : addExt := filename+'.'+ext
|
|
end(*addExt*);
|
|
function file_size(filename : String) : LongInt;
|
|
var f : File;
|
|
begin
|
|
assign(f, filename);
|
|
reset(f, 1);
|
|
if ioresult=0 then
|
|
file_size := fileSize(f)
|
|
else
|
|
file_size := 0;
|
|
close(f);
|
|
end(*file_size*);
|
|
|
|
(* Utility functions for list generating routines: *)
|
|
|
|
function charStr(c : char; reserved : CClass) : String;
|
|
function octStr(c : char) : String;
|
|
(* return octal string representation of character c *)
|
|
begin
|
|
octStr := intStr(ord(c) div 64)+intStr((ord(c) mod 64) div 8)+
|
|
intStr(ord(c) mod 8);
|
|
end(*octStr*);
|
|
begin
|
|
case c of
|
|
#0..#7, (* nonprintable characters *)
|
|
#11,#14..#31,
|
|
#127..#255 : charStr := '\'+octStr(c);
|
|
bs : charStr := '\b';
|
|
tab : charStr := '\t';
|
|
nl : charStr := '\n';
|
|
cr : charStr := '\c';
|
|
ff : charStr := '\f';
|
|
'\' : charStr := '\\';
|
|
else if c in reserved then
|
|
charStr := '\'+c
|
|
else
|
|
charStr := c
|
|
end
|
|
end(*charStr*);
|
|
|
|
function singleQuoteStr(str : String) : String;
|
|
var
|
|
i : Integer;
|
|
str1 : String;
|
|
begin
|
|
str1 := '';
|
|
for i := 1 to length(str) do
|
|
str1 := str1+charStr(str[i], ['''']);
|
|
singleQuoteStr := ''''+str1+''''
|
|
end(*singleQuoteStr*);
|
|
|
|
function doubleQuoteStr(str : String) : String;
|
|
var
|
|
i : Integer;
|
|
str1 : String;
|
|
begin
|
|
str1 := '';
|
|
for i := 1 to length(str) do
|
|
str1 := str1+charStr(str[i], ['"']);
|
|
doubleQuoteStr := '"'+str1+'"'
|
|
end(*doubleQuoteStr*);
|
|
|
|
function cclassStr(cc : CClass) : String;
|
|
const
|
|
reserved : CClass = ['^','-',']'];
|
|
MaxChar = #255;
|
|
var
|
|
c1, c2 : Char;
|
|
str : String;
|
|
Quit: Boolean;
|
|
begin
|
|
if cc=[#1..#255]-[nl] then
|
|
cclassStr := '.'
|
|
else
|
|
begin
|
|
str := '';
|
|
if nchars(cc)>128 then
|
|
begin
|
|
str := '^';
|
|
cc := [#0..#255]-cc;
|
|
end;
|
|
c1 := chr(0);
|
|
Quit := False;
|
|
while not Quit do begin
|
|
if c1 in cc then begin
|
|
c2 := c1;
|
|
while (c2<MaxChar) and (succ(c2) in cc) do
|
|
c2 := succ(c2);
|
|
if c1=c2
|
|
then str := str+charStr(c1, reserved)
|
|
else
|
|
if c2=succ(c1)
|
|
then str := str+charStr(c1, reserved)+charStr(c2, reserved)
|
|
else str := str+charStr(c1, reserved)+'-'+charStr(c2, reserved);
|
|
c1 := c2;
|
|
end;
|
|
Quit := c1 = MaxChar;
|
|
if not Quit then
|
|
c1 := Succ(c1);
|
|
end; { of while }
|
|
cclassStr := '['+str+']'
|
|
end
|
|
end(*cclassStr*);
|
|
|
|
function cclassOrCharStr(cc : CClass) : String;
|
|
var count : Integer;
|
|
c, c1 : Char;
|
|
begin
|
|
count := 0;
|
|
for c := #0 to #255 do
|
|
if c in cc then
|
|
begin
|
|
c1 := c;
|
|
inc(count);
|
|
if count>1 then
|
|
begin
|
|
cclassOrCharStr := cclassStr(cc);
|
|
exit;
|
|
end;
|
|
end;
|
|
if count=1 then
|
|
cclassOrCharStr := singleQuoteStr(c1)
|
|
else
|
|
cclassOrCharStr := '[]';
|
|
end(*cclassOrCharStr*);
|
|
|
|
function regExprStr(r : RegExpr) : String;
|
|
function unparseExpr(r : RegExpr) : String;
|
|
var rule_no, pos : Integer;
|
|
c : Char;
|
|
str : StrPtr;
|
|
cc : CClassPtr;
|
|
r1, r2 : RegExpr;
|
|
begin
|
|
if is_epsExpr(r) then
|
|
unparseExpr := ''
|
|
else if is_markExpr(r, rule_no, pos) then
|
|
unparseExpr := '#('+intStr(rule_no)+','+intStr(pos)+')'
|
|
else if is_charExpr(r, c) then
|
|
unparseExpr := charStr(c, [ '"','.','^','$','[',']','*','+','?',
|
|
'{','}','|','(',')','/','<','>'])
|
|
else if is_strExpr(r, str) then
|
|
unparseExpr := doubleQuoteStr(str^)
|
|
else if is_cclassExpr(r, cc) then
|
|
unparseExpr := cclassStr(cc^)
|
|
else if is_starExpr(r, r1) then
|
|
unparseExpr := unparseExpr(r1)+'*'
|
|
else if is_plusExpr(r, r1) then
|
|
unparseExpr := unparseExpr(r1)+'+'
|
|
else if is_optExpr(r, r1) then
|
|
unparseExpr := unparseExpr(r1)+'?'
|
|
else if is_catExpr(r, r1, r2) then
|
|
unparseExpr := '('+unparseExpr(r1)+unparseExpr(r2)+')'
|
|
else if is_altExpr(r, r1, r2) then
|
|
unparseExpr := '('+unparseExpr(r1)+'|'+unparseExpr(r2)+')'
|
|
else
|
|
fatal('invalid expression');
|
|
end(*unparseExpr*);
|
|
begin
|
|
regExprStr := unparseExpr(r);
|
|
end(*regExprStr*);
|
|
|
|
end(*LexBase*).
|