mirror of
https://gitlab.com/freepascal.org/fpc/source.git
synced 2025-08-18 05:49:23 +02:00
* regex implementation from S. Fisher
git-svn-id: trunk@9142 -
This commit is contained in:
parent
5284b1694f
commit
b741d5f2a3
@ -1,45 +1,132 @@
|
|||||||
{$mode objfpc}
|
{ The Computer Language Benchmarks Game
|
||||||
{$H-}
|
http://shootout.alioth.debian.org
|
||||||
uses
|
|
||||||
regexpr;
|
|
||||||
var
|
|
||||||
buffer : PChar;
|
|
||||||
buffer2 : ansistring;
|
|
||||||
seqlen : longint;
|
|
||||||
TextBuf: array[0..$FFF] of byte;
|
|
||||||
|
|
||||||
procedure Load;
|
contributed by Steve Fisher
|
||||||
|
|
||||||
|
compile with
|
||||||
|
fpc -O3 regex-dna.pp
|
||||||
|
}
|
||||||
|
|
||||||
|
uses regexpr, strutils;
|
||||||
|
|
||||||
|
function replace_matches( const target: pchar; const repl: pchar;
|
||||||
|
const str: ansistring; var dest: ansistring ): longint;
|
||||||
var
|
var
|
||||||
len : longint;
|
engine : tRegexprEngine;
|
||||||
buffersize, bufferptr: longint;
|
substr : ansistring;
|
||||||
s : Shortstring;
|
count, index, size : longint;
|
||||||
begin
|
begin
|
||||||
buffersize:=1024;
|
if not GenerateRegExprEngine( target, [], engine) then
|
||||||
buffer:=getmem(buffersize);
|
begin
|
||||||
bufferptr :=0;
|
writeln( 'Failed to generate regex. engine for "',target,'".' );
|
||||||
while not eof do begin
|
halt(1)
|
||||||
readln(s);
|
|
||||||
len:=length(s);
|
|
||||||
if (bufferptr+len+1)>buffersize then begin
|
|
||||||
inc(buffersize,buffersize);
|
|
||||||
reallocmem(buffer,buffersize);
|
|
||||||
end;
|
|
||||||
move (s[1],buffer[bufferptr],len);
|
|
||||||
inc(bufferptr,len);
|
|
||||||
end;
|
end;
|
||||||
buffer[bufferptr] := #0;
|
count := 0;
|
||||||
seqlen:=bufferptr;
|
dest := '';
|
||||||
writeln(seqlen);
|
substr := str;
|
||||||
|
while length(substr) > 0 do
|
||||||
|
begin
|
||||||
|
if RegExprPos(engine, pchar(substr), index, size ) then
|
||||||
|
begin
|
||||||
|
count += 1;
|
||||||
|
dest += ansiLeftStr( substr, index) + repl;
|
||||||
|
substr := ansiRightStr(substr,length(substr)-index-size);
|
||||||
|
end
|
||||||
|
else
|
||||||
|
break
|
||||||
|
end;
|
||||||
|
DestroyRegExprEngine( engine );
|
||||||
|
dest += substr;
|
||||||
|
exit(count)
|
||||||
end;
|
end;
|
||||||
|
|
||||||
procedure ReplaceNewline;
|
function count_matches( target: pchar; const str: ansistring ): longint;
|
||||||
|
var
|
||||||
|
engine : tRegexprEngine;
|
||||||
|
substr : ansistring;
|
||||||
|
count, index, size : longint;
|
||||||
|
begin
|
||||||
|
if not GenerateRegExprEngine( target, [ref_caseinsensitive], engine) then
|
||||||
begin
|
begin
|
||||||
GenerateRegExprEngine('>.*\n|\n',[],RegExprEngine);
|
writeln( 'Failed to generate regex. engine for "',target,'".' );
|
||||||
writeln(RegExprReplace(RegExprEngine,buffer,'',buffer2));
|
halt(1)
|
||||||
DestroyRegExprEngine(RegExprEngine);
|
|
||||||
end;
|
end;
|
||||||
|
count := 0;
|
||||||
|
substr := str;
|
||||||
|
while length(substr) > 0 do
|
||||||
|
begin
|
||||||
|
if RegExprPos(engine, pchar(substr), index, size ) then
|
||||||
|
begin
|
||||||
|
count += 1;
|
||||||
|
substr := ansiRightStr(substr,length(substr)-index-size);
|
||||||
|
end
|
||||||
|
else
|
||||||
|
break
|
||||||
|
end;
|
||||||
|
DestroyRegExprEngine( engine );
|
||||||
|
exit(count)
|
||||||
|
end;
|
||||||
|
|
||||||
|
const
|
||||||
|
patterns : array[1..9] of pchar =
|
||||||
|
(
|
||||||
|
'(agggtaaa)|(tttaccct)',
|
||||||
|
'([cgt]gggtaaa)|(tttaccc[acg])',
|
||||||
|
'(a[act]ggtaaa)|(tttacc[agt]t)',
|
||||||
|
'(ag[act]gtaaa)|(tttac[agt]ct)',
|
||||||
|
'(agg[act]taaa)|(ttta[agt]cct)',
|
||||||
|
'(aggg[acg]aaa)|(ttt[cgt]ccct)',
|
||||||
|
'(agggt[cgt]aa)|(tt[acg]accct)',
|
||||||
|
'(agggta[cgt]a)|(t[acg]taccct)',
|
||||||
|
'(agggtaa[cgt])|([acg]ttaccct)'
|
||||||
|
);
|
||||||
|
replacements : array[1..11,1..2] of pchar =
|
||||||
|
(
|
||||||
|
('B', '(c|g|t)'), ('D', '(a|g|t)'), ('H', '(a|c|t)'), ('K', '(g|t)'),
|
||||||
|
('M', '(a|c)'), ('N', '(a|c|g|t)'), ('R', '(a|g)'), ('S', '(c|t)'),
|
||||||
|
('V', '(a|c|g)'), ('W', '(a|t)'), ('Y', '(c|t)')
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
var
|
||||||
|
pattern : pchar;
|
||||||
|
sequence, new_seq : ansiString;
|
||||||
|
line, tmp: string[255];
|
||||||
|
letter, repl : pchar;
|
||||||
|
i, count, init_length, clean_length, reps : longint;
|
||||||
|
|
||||||
begin
|
begin
|
||||||
SetTextBuf(input, TextBuf, sizeof(TextBuf));
|
sequence := '';
|
||||||
Load;
|
init_length := 0;
|
||||||
|
while not eof do
|
||||||
|
begin
|
||||||
|
readln( line );
|
||||||
|
init_length += length( line ) + 1;
|
||||||
|
if line[1] <> '>' then
|
||||||
|
sequence := sequence + line;
|
||||||
|
end;
|
||||||
|
clean_length := length(sequence);
|
||||||
|
|
||||||
|
for i := low(patterns) to high(patterns) do
|
||||||
|
begin
|
||||||
|
pattern := patterns[i];
|
||||||
|
count := count_matches( pattern, sequence );
|
||||||
|
tmp := delChars( delChars(pattern,'('), ')' );
|
||||||
|
writeln( tmp, ' ', count);
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
// Replace.
|
||||||
|
for i := low(replacements) to high(replacements) do
|
||||||
|
begin
|
||||||
|
letter := replacements[i][1]; repl := replacements[i][2];
|
||||||
|
reps := replace_matches(letter,repl,sequence,new_seq);
|
||||||
|
sequence := new_seq;
|
||||||
|
end;
|
||||||
|
|
||||||
|
|
||||||
|
writeln;
|
||||||
|
writeln( init_length );
|
||||||
|
writeln( clean_length );
|
||||||
|
writeln( length(sequence) );
|
||||||
end.
|
end.
|
||||||
|
Loading…
Reference in New Issue
Block a user