[Shootout-list] FW: Fasta benchmark (perl)
Shoe Lace
shoelace_822695@hotmail.com
Sat, 26 Mar 2005 16:58:19 +1100
This is a multi-part message in MIME format.
------=_NextPart_000_0000_01C53225.031DA3A0
Content-Type: text/plain;
charset="us-ascii"
Content-Transfer-Encoding: 7bit
Please find attached a perl implementation of the Fasta benchmark.
Im not sure how quick it is.. But it does producethe correct output.
I based this code of the C implemetation. But made some changes in the
"print" routines because a) I coult quite understand what it was doing. And
b) I could replicate it anyway.
I havent tested but I think imporvements could be made in that area.
Shoe Lace.
------=_NextPart_000_0000_01C53225.031DA3A0
Content-Type: application/octet-stream;
name="fasta.perl"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
filename="fasta.perl"
#!/cygdrive/c/Perl/bin/perl -w=0A=
#/usr/bin/perl=0A=
# http://www.bagley.org/~doug/shootout/=0A=
# =0A=
# contributed by David Pyke=0A=
=0A=
use constant IM =3D> 139968;=0A=
use constant IA =3D> 3877;=0A=
use constant IC =3D> 29573;=0A=
=0A=
use constant LINELENGTH =3D> 60;=0A=
=0A=
my $LAST =3D 42;=0A=
sub gen_random ($) { ($_[0] * ($LAST =3D ($LAST * IA + IC) % IM)) / IM } =0A=
=0A=
sub makeCumulative(@){=0A=
my(@genelist) =3D @_;=0A=
$cp =3D 0.0;=0A=
=0A=
foreach $gene (@genelist){=0A=
$gene->[1] =3D $cp +=3D $gene->[1];=0A=
}=0A=
}=0A=
=0A=
sub selectRandom(@){=0A=
my(@genelist) =3D @_;=0A=
$r =3D gen_random (1);=0A=
=0A=
if ($r < $genelist[0][1]){ return $genelist[0][0]; }=0A=
=0A=
foreach my $gene (@genelist){=0A=
if ($r < $gene->[1]){ return $gene->[0]; }=0A=
}=0A=
}=0A=
=0A=
=0A=
sub makeRandomFasta($$$@){=0A=
#void makeRandomFasta (const char * id, const char * desc, const struct =
aminoacids * genelist, int count, int n) {=0A=
my ($id,$desc,$n,@genelist) =3D @_;=0A=
=0A=
print ">$id $desc\n";=0A=
$pick=3D"";=0A=
=0A=
#print whole lines=0A=
for my $i (1 .. int($n / LINELENGTH) ){=0A=
for (1 .. LINELENGTH ){=0A=
$pick .=3D selectRandom(@genelist);=0A=
}=0A=
print "$pick\n";=0A=
$pick =3D "";=0A=
}=0A=
#print remaining line (if required)=0A=
if ($n % LINELENGTH > 0 ){=0A=
for (1 .. $n % LINELENGTH ){=0A=
$pick .=3D selectRandom(@genelist);=0A=
}=0A=
print "$pick\n";=0A=
}=0A=
}=0A=
=0A=
sub makeRepeatFasta($$$$){=0A=
my($id,$desc,$s,$n) =3D @_;=0A=
# we want to print $n characters of $s (repeated if nessary) with =
newlines every LINELENGTH=0A=
#void makeRepeatFasta (const char * id, const char * desc, const char * =
s, int n) {=0A=
=0A=
print ">$id $desc\n";=0A=
=0A=
my $ss =3D $s;=0A=
for my $i (1 .. int($n / LINELENGTH) ){=0A=
while (length $ss < LINELENGTH){=0A=
$ss .=3D $s=0A=
}=0A=
print substr( $ss,0,LINELENGTH)."\n";=0A=
$ss =3D substr($ss,LINELENGTH);=0A=
}=0A=
#final_line=0A=
while (length $ss < LINELENGTH){=0A=
$ss .=3D $s=0A=
}=0A=
print substr( $ss, 0, ($n % LINELENGTH)) . "\n";=0A=
=0A=
}=0A=
=0A=
=0A=
my @iub =3D (=0A=
[ 'a', 0.27 ],=0A=
[ 'c', 0.12 ],=0A=
[ 'g', 0.12 ],=0A=
[ 't', 0.27 ],=0A=
[ 'B', 0.02 ],=0A=
[ 'D', 0.02 ],=0A=
[ 'H', 0.02 ],=0A=
[ 'K', 0.02 ],=0A=
[ 'M', 0.02 ],=0A=
[ 'N', 0.02 ],=0A=
[ 'R', 0.02 ],=0A=
[ 'S', 0.02 ],=0A=
[ 'V', 0.02 ],=0A=
[ 'W', 0.02 ],=0A=
[ 'Y', 0.02 ]=0A=
=0A=
); =0A=
=0A=
my @homosapiens =3D (=0A=
[ 'a', 0.3029549426680 ],=0A=
[ 'c', 0.1979883004921 ],=0A=
[ 'g', 0.1975473066391 ],=0A=
[ 't', 0.3015094502008 ],=0A=
);=0A=
=0A=
$alu =3D=0A=
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" .=0A=
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" .=0A=
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" .=0A=
"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" .=0A=
"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" .=0A=
"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" .=0A=
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";=0A=
=0A=
######################################################################=0A=
#main=0A=
=0A=
my $n =3D ($ARGV[0] || 1000) ;=0A=
=0A=
makeCumulative @iub;=0A=
makeCumulative @homosapiens;=0A=
=0A=
makeRepeatFasta ("ONE", "Homo sapiens alu", $alu, $n*2);=0A=
makeRandomFasta ("TWO", "IUB ambiguity codes", $n*3, @iub);=0A=
makeRandomFasta ("THREE", "Homo sapiens frequency", $n*5, =
@homosapiens);=0A=
=0A=
exit 0;=0A=
=0A=
#END OF FILE =0A=
------=_NextPart_000_0000_01C53225.031DA3A0--