mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-04-05 04:58:00 +02:00
136 lines
3.4 KiB
C
136 lines
3.4 KiB
C
/* This file is part of the software similarity tester SIM.
|
|
Written by Dick Grune, Vrije Universiteit, Amsterdam.
|
|
$Id: algollike.c,v 2.4 2005/02/20 17:02:59 dick Exp $
|
|
*/
|
|
|
|
/* This module implements the routines InitLanguage, MayBeStartOfRun
|
|
and CheckRun for ALGOL-like languages, in which it is meaningful
|
|
and useful to isolate function bodies.
|
|
|
|
It requires the user to define, preferably in Xlang.l, four token
|
|
sets, represented as TOKEN[] and terminated by NOTOKEN:
|
|
|
|
TOKEN NonFinals[] tokens that may not end a chunk
|
|
TOKEN NonInitials[] tokens that may not start a chunk
|
|
TOKEN Openers[] openers of parentheses that must balance
|
|
in functions
|
|
TOKEN Closers[] the corresponding closers, in the same order
|
|
*/
|
|
|
|
#include "options.h"
|
|
#include "token.h"
|
|
#include "algollike.h"
|
|
|
|
/* Arrays for fast identification tests for tokens. Each token is
|
|
identified by its position in the set + 1. For example, if T is
|
|
the n-th Opener, openers[TOKEN2int(tk)] == n+1.
|
|
*/
|
|
static char non_finals[256];
|
|
static char non_initials[256];
|
|
static char openers[256];
|
|
static char closers[256];
|
|
|
|
static void cvt2bittable(const TOKEN *tl, char bt[256]);
|
|
static unsigned int largest_function(const TOKEN *str, unsigned int size);
|
|
|
|
void
|
|
InitLanguage(void) {
|
|
/* convert the token sets to bitmaps */
|
|
cvt2bittable(NonFinals, non_finals);
|
|
cvt2bittable(NonInitials, non_initials);
|
|
cvt2bittable(Openers, openers);
|
|
cvt2bittable(Closers, closers);
|
|
}
|
|
|
|
static void
|
|
cvt2bittable(const TOKEN *tl, char bt[256]) {
|
|
int i;
|
|
int cnt = 1;
|
|
|
|
for (i = 0; !TOKEN_EQ(tl[i], NOTOKEN); i++) {
|
|
bt[TOKEN2int(tl[i])] = cnt++;
|
|
}
|
|
}
|
|
|
|
int
|
|
MayBeStartOfRun(TOKEN tk) {
|
|
return !non_initials[TOKEN2int(tk)];
|
|
}
|
|
|
|
unsigned int
|
|
CheckRun(const TOKEN *str, unsigned int size) {
|
|
/* Checks the run starting at str with length size for
|
|
acceptability in the language. Cuts from the end if
|
|
necessary and returns the accepted length, which may
|
|
be zero.
|
|
*/
|
|
|
|
if (option_set('f')) {
|
|
/* reduce to a function-like form first */
|
|
size = largest_function(str, size);
|
|
}
|
|
|
|
while ( /* there is trailing garbage */
|
|
size != 0 && non_finals[TOKEN2int(str[size-1])]
|
|
) {
|
|
/* remove it */
|
|
size--;
|
|
}
|
|
|
|
return size;
|
|
}
|
|
|
|
static unsigned int
|
|
largest_function(const TOKEN *str, unsigned int size) {
|
|
/* Returns the size of the longest sequence starting at
|
|
str[0] and not containing unbalanced parentheses.
|
|
Does not check the nesting of the parentheses, but then,
|
|
sim is syntax-free anyway.
|
|
*/
|
|
register unsigned int mrb_size = 0; /* most recent balancing size */
|
|
register unsigned int pos;
|
|
register int i;
|
|
int balance_count[256];
|
|
int n_imbalances;
|
|
|
|
/* clear administration */
|
|
n_imbalances = 0;
|
|
for (i = 0; i < 255; i++) {
|
|
balance_count[i] = 0;
|
|
}
|
|
|
|
/* scan str[] and see how far we get */
|
|
for (pos = 0; pos < size; pos++) {
|
|
register int tkval = TOKEN2int(str[pos]);
|
|
register int pp; /* parenthesis position */
|
|
|
|
/* account for openers */
|
|
if ((pp = openers[tkval])) {
|
|
if (balance_count[pp] == 0) {
|
|
/* about to create an imbalance */
|
|
n_imbalances++;
|
|
}
|
|
balance_count[pp]++;
|
|
}
|
|
|
|
/* account for closers */
|
|
if ((pp = closers[tkval])) {
|
|
if (balance_count[pp] == 0) {
|
|
/* this is one Closer too many */
|
|
return mrb_size;
|
|
}
|
|
balance_count[pp]--;
|
|
if (balance_count[pp] == 0) {
|
|
/* we just cleared an imbalance */
|
|
n_imbalances--;
|
|
}
|
|
}
|
|
|
|
if (n_imbalances == 0) {
|
|
/* register balance point */
|
|
mrb_size = pos + 1;
|
|
}
|
|
}
|
|
return mrb_size;
|
|
}
|