[Shootout-list] fasta (OCaml)
ChriS
del-con@tiscali.be
Fri, 04 Mar 2005 11:04:09 +0100 (CET)
----Next_Part(Fri_Mar__4_11_04_09_2005_867)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Hi,
The OCaml fasta previously sent is failing because the probabilities
have been corrected (but AFAIK that was not said on this list).
Here is the version accordingly modified.
ChriS
----Next_Part(Fri_Mar__4_11_04_09_2005_867)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="fasta.ml"
(* http://shootout.alioth.debian.org/benchmark.php?test=fasta&lang=all *)
(* This version is only marginally faster (2%) -- but less elegant --
than the HOF one. *)
(* Random number generator (Shootout version) *)
let rec gen_random =
let im = 139968
and ia = 3877
and ic = 29573 in
let last = ref 42
and inv_im = 1. /. float im in
fun max ->
last := (!last * ia + ic) mod im;
max *. float !last *. inv_im
let make_cumulative =
let rec cumul prob = function
| [] -> []
| (c, p) :: tl -> let prob = prob +. p in (c, prob) :: cumul prob tl in
fun table -> cumul 0. table
let rec rand_char (prob : float) = function
| [] -> failwith "rand_c"
| (c, p) :: tl -> if prob <= p then c else rand_char prob tl
let width = 60
(* [write s i0 l w] outputs [w] chars of [s.[0 .. l]] starting with
[s.[i0]] and considering the substring [s.[0 .. l]] as a "circle".
One assumes [0 <= i0 <= l <= String.length s].
@return [i0] needed for subsequent writes. *)
let rec write s i0 l w =
let len = l - i0 in
if w <= len then (output stdout s i0 w; i0 + w)
else (output stdout s i0 len; write s 0 l (w - len))
let make_repeat_fasta id desc src n =
Printf.printf ">%s %s\n" id desc;
let i0 = ref 0
and l = String.length src in
for i = 1 to n / width do
i0 := write src !i0 l width;
print_char '\n';
done;
i0 := write src !i0 l (n mod width);
print_char '\n'
let make_random_fasta id desc table n =
Printf.printf ">%s %s\n" id desc;
let table = make_cumulative table in
for i = 1 to n / width do
for j = 1 to width do print_char(rand_char (gen_random 1.) table); done;
print_char '\n';
done;
for j = 1 to n mod width do
print_char(rand_char (gen_random 1.) table);
done;
print_char '\n'
let alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
let iub = [
('a', 0.27); ('c', 0.12); ('g', 0.12); ('t', 0.27);
('B', 0.02); ('D', 0.02); ('H', 0.02); ('K', 0.02);
('M', 0.02); ('N', 0.02); ('R', 0.02); ('S', 0.02);
('V', 0.02); ('W', 0.02); ('Y', 0.02);
]
let homosapiens = [
('a', 0.3029549426680); ('c', 0.1979883004921);
('g', 0.1975473066391); ('t', 0.3015094502008);
]
let () =
let n = int_of_string Sys.argv.(1) in
make_repeat_fasta "ONE" "Homo sapiens alu" alu (n*2);
make_random_fasta "TWO" "IUB ambiguity codes" iub (n*3);
make_random_fasta "THREE" "Homo sapiens frequency" homosapiens (n*5)
----Next_Part(Fri_Mar__4_11_04_09_2005_867)----