[Shootout-list] fasta
Christophe TROESTLER
del-con@tiscali.be
Tue, 15 Feb 2005 15:35:34 +0100 (CET)
----Next_Part(Tue_Feb_15_15_35_34_2005_587)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Hi,
Attached in an OCaml version of the fasta program. Also, isn't there
a mistake in the "statement": the sum of probas of iub is 1.01 ...
Cheers,
ChriS
----Next_Part(Tue_Feb_15_15_35_34_2005_587)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="fasta.ml"
(* http://shootout.alioth.debian.org/benchmark.php?test=fasta&lang=all *)
(* This version is only marginally faster (2%) -- but less elegant --
than the HOF one. *)
(* Random number generator (Shootout version) *)
let rec gen_random =
let im = 139968
and ia = 3877
and ic = 29573 in
let last = ref 42
and inv_im = 1. /. float im in
fun max ->
last := (!last * ia + ic) mod im;
max *. float !last *. inv_im
let make_cumulative =
let rec cumul prob = function
| [] -> []
| (c, p) :: tl -> let prob = prob +. p in (c, prob) :: cumul prob tl in
fun table -> cumul 0. table
let rec rand_char prob = function
| [] -> failwith "rand_c"
| (c, p) :: tl -> if prob <= p then c else rand_char prob tl
let width = 60
(* [write s i0 l w] outputs [w] chars of [s.[0 .. l]] starting with
[s.[i0]] and considering the substring [s.[0 .. l]] as a "circle".
One assumes [0 <= i0 <= l <= String.length s].
@return [i0] needed for subsequent writes. *)
let rec write s i0 l w =
let len = l - i0 in
if w <= len then (output stdout s i0 w; i0 + w)
else (output stdout s i0 len; write s 0 l (w - len))
let make_repeat_fasta id desc src n =
Printf.printf ">%s %s\n" id desc;
let i0 = ref 0
and l = String.length src in
for i = 1 to n / width do
i0 := write src !i0 l width;
print_char '\n';
done;
i0 := write src !i0 l (n mod width);
print_char '\n'
let make_random_fasta id desc table n =
Printf.printf ">%s %s\n" id desc;
let table = make_cumulative table in
for i = 1 to n / width do
for j = 1 to width do print_char(rand_char (gen_random 1.) table); done;
print_char '\n';
done;
for j = 1 to (n mod width) do
print_char(rand_char (gen_random 1.) table);
done;
print_char '\n'
let alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
let iub = [
('a', 0.270); ('c', 0.125); ('g', 0.125); ('t', 0.270);
('B', 0.02); ('D', 0.02); ('H', 0.02); ('K', 0.02);
('M', 0.02); ('N', 0.02); ('R', 0.02); ('S', 0.02);
('V', 0.02); ('W', 0.02); ('Y', 0.02);
]
let homosapiens = [
('a', 0.3029549426680); ('c', 0.1979883004921);
('g', 0.1975473066391); ('t', 0.3015094502008);
]
let () =
let n = int_of_string Sys.argv.(1) in
make_repeat_fasta "ONE" "Homo sapiens alu" alu (n*2);
make_random_fasta "TWO" "IUB ambiguity codes" iub (n*3);
make_random_fasta "THREE" "Homo sapiens frequency" homosapiens (n*5)
----Next_Part(Tue_Feb_15_15_35_34_2005_587)----