* regex implementation from S. Fisher

git-svn-id: trunk@9142 -
This commit is contained in:
peter 2007-11-05 19:40:33 +00:00
parent 5284b1694f
commit b741d5f2a3

View File

@ -1,45 +1,132 @@
{$mode objfpc}
{$H-}
uses
regexpr;
var
buffer : PChar;
buffer2 : ansistring;
seqlen : longint;
TextBuf: array[0..$FFF] of byte;
{ The Computer Language Benchmarks Game
http://shootout.alioth.debian.org
procedure Load;
contributed by Steve Fisher
compile with
fpc -O3 regex-dna.pp
}
uses regexpr, strutils;
function replace_matches( const target: pchar; const repl: pchar;
const str: ansistring; var dest: ansistring ): longint;
var
len : longint;
buffersize, bufferptr: longint;
s : Shortstring;
engine : tRegexprEngine;
substr : ansistring;
count, index, size : longint;
begin
buffersize:=1024;
buffer:=getmem(buffersize);
bufferptr :=0;
while not eof do begin
readln(s);
len:=length(s);
if (bufferptr+len+1)>buffersize then begin
inc(buffersize,buffersize);
reallocmem(buffer,buffersize);
end;
move (s[1],buffer[bufferptr],len);
inc(bufferptr,len);
if not GenerateRegExprEngine( target, [], engine) then
begin
writeln( 'Failed to generate regex. engine for "',target,'".' );
halt(1)
end;
buffer[bufferptr] := #0;
seqlen:=bufferptr;
writeln(seqlen);
count := 0;
dest := '';
substr := str;
while length(substr) > 0 do
begin
if RegExprPos(engine, pchar(substr), index, size ) then
begin
count += 1;
dest += ansiLeftStr( substr, index) + repl;
substr := ansiRightStr(substr,length(substr)-index-size);
end
else
break
end;
DestroyRegExprEngine( engine );
dest += substr;
exit(count)
end;
procedure ReplaceNewline;
function count_matches( target: pchar; const str: ansistring ): longint;
var
engine : tRegexprEngine;
substr : ansistring;
count, index, size : longint;
begin
if not GenerateRegExprEngine( target, [ref_caseinsensitive], engine) then
begin
GenerateRegExprEngine('>.*\n|\n',[],RegExprEngine);
writeln(RegExprReplace(RegExprEngine,buffer,'',buffer2));
DestroyRegExprEngine(RegExprEngine);
writeln( 'Failed to generate regex. engine for "',target,'".' );
halt(1)
end;
count := 0;
substr := str;
while length(substr) > 0 do
begin
if RegExprPos(engine, pchar(substr), index, size ) then
begin
count += 1;
substr := ansiRightStr(substr,length(substr)-index-size);
end
else
break
end;
DestroyRegExprEngine( engine );
exit(count)
end;
const
patterns : array[1..9] of pchar =
(
'(agggtaaa)|(tttaccct)',
'([cgt]gggtaaa)|(tttaccc[acg])',
'(a[act]ggtaaa)|(tttacc[agt]t)',
'(ag[act]gtaaa)|(tttac[agt]ct)',
'(agg[act]taaa)|(ttta[agt]cct)',
'(aggg[acg]aaa)|(ttt[cgt]ccct)',
'(agggt[cgt]aa)|(tt[acg]accct)',
'(agggta[cgt]a)|(t[acg]taccct)',
'(agggtaa[cgt])|([acg]ttaccct)'
);
replacements : array[1..11,1..2] of pchar =
(
('B', '(c|g|t)'), ('D', '(a|g|t)'), ('H', '(a|c|t)'), ('K', '(g|t)'),
('M', '(a|c)'), ('N', '(a|c|g|t)'), ('R', '(a|g)'), ('S', '(c|t)'),
('V', '(a|c|g)'), ('W', '(a|t)'), ('Y', '(c|t)')
);
var
pattern : pchar;
sequence, new_seq : ansiString;
line, tmp: string[255];
letter, repl : pchar;
i, count, init_length, clean_length, reps : longint;
begin
SetTextBuf(input, TextBuf, sizeof(TextBuf));
Load;
sequence := '';
init_length := 0;
while not eof do
begin
readln( line );
init_length += length( line ) + 1;
if line[1] <> '>' then
sequence := sequence + line;
end;
clean_length := length(sequence);
for i := low(patterns) to high(patterns) do
begin
pattern := patterns[i];
count := count_matches( pattern, sequence );
tmp := delChars( delChars(pattern,'('), ')' );
writeln( tmp, ' ', count);
end;
// Replace.
for i := low(replacements) to high(replacements) do
begin
letter := replacements[i][1]; repl := replacements[i][2];
reps := replace_matches(letter,repl,sequence,new_seq);
sequence := new_seq;
end;
writeln;
writeln( init_length );
writeln( clean_length );
writeln( length(sequence) );
end.