[Shootout-list] FW: Fasta benchmark (perl)

Shoe Lace shoelace_822695@hotmail.com
Sat, 26 Mar 2005 16:58:19 +1100


This is a multi-part message in MIME format.

------=_NextPart_000_0000_01C53225.031DA3A0
Content-Type: text/plain;
	charset="us-ascii"
Content-Transfer-Encoding: 7bit



Please find attached a perl implementation of the Fasta benchmark.

Im not sure how quick it is.. But it does producethe correct output.

I based this code of the C implemetation.  But made some changes in the
"print" routines because a) I coult quite understand what it was doing. And
b) I could replicate it anyway.

I havent tested but I think imporvements could be made in that area.

Shoe Lace.

------=_NextPart_000_0000_01C53225.031DA3A0
Content-Type: application/octet-stream;
	name="fasta.perl"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
	filename="fasta.perl"

#!/cygdrive/c/Perl/bin/perl -w=0A=
#/usr/bin/perl=0A=
# http://www.bagley.org/~doug/shootout/=0A=
# =0A=
# contributed by David Pyke=0A=
=0A=
use constant IM =3D> 139968;=0A=
use constant IA =3D> 3877;=0A=
use constant IC =3D> 29573;=0A=
=0A=
use constant LINELENGTH =3D> 60;=0A=
=0A=
my $LAST =3D 42;=0A=
sub gen_random ($) { ($_[0] * ($LAST =3D ($LAST * IA + IC) % IM)) / IM } =0A=
=0A=
sub makeCumulative(@){=0A=
	my(@genelist) =3D @_;=0A=
    $cp =3D 0.0;=0A=
=0A=
	foreach $gene (@genelist){=0A=
		 $gene->[1] =3D $cp +=3D $gene->[1];=0A=
	}=0A=
}=0A=
=0A=
sub selectRandom(@){=0A=
	my(@genelist) =3D @_;=0A=
    $r =3D gen_random (1);=0A=
=0A=
    if ($r < $genelist[0][1]){ return $genelist[0][0]; }=0A=
=0A=
	foreach my $gene (@genelist){=0A=
		if ($r < $gene->[1]){ return $gene->[0]; }=0A=
	}=0A=
}=0A=
=0A=
=0A=
sub makeRandomFasta($$$@){=0A=
#void makeRandomFasta (const char * id, const char * desc, const struct =
aminoacids * genelist, int count, int n) {=0A=
	my ($id,$desc,$n,@genelist) =3D @_;=0A=
=0A=
	print ">$id $desc\n";=0A=
	$pick=3D"";=0A=
=0A=
	#print whole lines=0A=
	for my $i (1 .. int($n / LINELENGTH) ){=0A=
	   for (1 ..  LINELENGTH ){=0A=
		   $pick .=3D selectRandom(@genelist);=0A=
	   }=0A=
	   print "$pick\n";=0A=
	   $pick =3D "";=0A=
   }=0A=
   	#print remaining line (if required)=0A=
	   if ($n % LINELENGTH  > 0 ){=0A=
		   for (1 ..  $n % LINELENGTH ){=0A=
			   $pick .=3D selectRandom(@genelist);=0A=
		   }=0A=
		   print "$pick\n";=0A=
	   }=0A=
}=0A=
=0A=
sub makeRepeatFasta($$$$){=0A=
	 my($id,$desc,$s,$n) =3D @_;=0A=
   # we want to print $n characters of $s (repeated if nessary) with =
newlines every LINELENGTH=0A=
#void makeRepeatFasta (const char * id, const char * desc, const char * =
s, int n) {=0A=
=0A=
   print ">$id $desc\n";=0A=
=0A=
   my $ss =3D $s;=0A=
   for my $i (1 .. int($n / LINELENGTH) ){=0A=
	   while (length $ss < LINELENGTH){=0A=
		   $ss .=3D $s=0A=
	   }=0A=
	   print substr( $ss,0,LINELENGTH)."\n";=0A=
	   $ss =3D substr($ss,LINELENGTH);=0A=
   }=0A=
   #final_line=0A=
	   while (length $ss < LINELENGTH){=0A=
		   $ss .=3D $s=0A=
	   }=0A=
   print substr( $ss, 0, ($n % LINELENGTH)) . "\n";=0A=
=0A=
}=0A=
=0A=
=0A=
my @iub =3D  (=0A=
	[ 'a', 0.27 ],=0A=
	[ 'c', 0.12 ],=0A=
	[ 'g', 0.12 ],=0A=
	[ 't', 0.27 ],=0A=
    [ 'B', 0.02 ],=0A=
    [ 'D', 0.02 ],=0A=
    [ 'H', 0.02 ],=0A=
    [ 'K', 0.02 ],=0A=
    [ 'M', 0.02 ],=0A=
    [ 'N', 0.02 ],=0A=
    [ 'R', 0.02 ],=0A=
    [ 'S', 0.02 ],=0A=
    [ 'V', 0.02 ],=0A=
    [ 'W', 0.02 ],=0A=
    [ 'Y', 0.02 ]=0A=
	  =0A=
	); =0A=
=0A=
my @homosapiens =3D (=0A=
    [ 'a', 0.3029549426680 ],=0A=
    [ 'c', 0.1979883004921 ],=0A=
    [ 'g', 0.1975473066391 ],=0A=
    [ 't', 0.3015094502008 ],=0A=
);=0A=
	=0A=
$alu =3D=0A=
   "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" .=0A=
   "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" .=0A=
   "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" .=0A=
   "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" .=0A=
   "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" .=0A=
   "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" .=0A=
   "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";=0A=
=0A=
######################################################################=0A=
#main=0A=
=0A=
	my $n =3D ($ARGV[0] || 1000) ;=0A=
=0A=
    makeCumulative @iub;=0A=
    makeCumulative @homosapiens;=0A=
=0A=
    makeRepeatFasta ("ONE", "Homo sapiens alu", $alu, $n*2);=0A=
    makeRandomFasta ("TWO", "IUB ambiguity codes", $n*3, @iub);=0A=
    makeRandomFasta ("THREE", "Homo sapiens frequency", $n*5, =
@homosapiens);=0A=
=0A=
    exit 0;=0A=
=0A=
#END OF FILE	=0A=

------=_NextPart_000_0000_01C53225.031DA3A0--