[Shootout-list] fasta (OCaml)

ChriS del-con@tiscali.be
Fri, 04 Mar 2005 11:04:09 +0100 (CET)


----Next_Part(Fri_Mar__4_11_04_09_2005_867)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Hi,

The OCaml fasta previously sent is failing because the probabilities
have been corrected (but AFAIK that was not said on this list).

Here is the version accordingly modified.

ChriS

----Next_Part(Fri_Mar__4_11_04_09_2005_867)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="fasta.ml"

(* http://shootout.alioth.debian.org/benchmark.php?test=fasta&lang=all *)
(* This version is only marginally faster (2%) -- but less elegant --
   than the HOF one. *)

(* Random number generator (Shootout version) *)
let rec gen_random =
  let im = 139968
  and ia = 3877
  and ic = 29573 in
  let last = ref 42
  and inv_im = 1. /. float im in
  fun max ->
    last := (!last * ia + ic) mod im;
    max *. float !last *. inv_im


let make_cumulative =
  let rec cumul prob = function
    | [] -> []
    | (c, p) :: tl -> let prob = prob +. p in (c, prob) :: cumul prob tl  in
  fun table -> cumul 0. table

let rec rand_char (prob : float) = function
  | [] -> failwith "rand_c"
  | (c, p) :: tl -> if prob <= p then c else rand_char prob tl

let width = 60

(* [write s i0 l w] outputs [w] chars of [s.[0 .. l]] starting with
   [s.[i0]] and considering the substring [s.[0 .. l]] as a "circle".
   One assumes [0 <= i0 <= l <= String.length s].
   @return [i0] needed for subsequent writes.  *)
let rec write s i0 l w =
  let len = l - i0 in
  if w <= len then (output stdout s i0 w; i0 + w)
  else (output stdout s i0 len; write s 0 l (w - len))

let make_repeat_fasta id desc src n =
  Printf.printf ">%s %s\n" id desc;
  let i0 = ref 0
  and l = String.length src in
  for i = 1 to n / width do
    i0 := write src !i0 l width;
    print_char '\n';
  done;
  i0 := write src !i0 l (n mod width);
  print_char '\n'

let make_random_fasta id desc table n =
  Printf.printf ">%s %s\n" id desc;
  let table = make_cumulative table in
  for i = 1 to n / width do
    for j = 1 to width do print_char(rand_char (gen_random 1.) table); done;
    print_char '\n';
  done;
  for j = 1 to n mod width do
    print_char(rand_char (gen_random 1.) table);
  done;
  print_char '\n'



let alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"

let iub = [
  ('a', 0.27);	('c', 0.12);	('g', 0.12);	('t', 0.27);

  ('B', 0.02);	('D', 0.02);	('H', 0.02);	('K', 0.02);
  ('M', 0.02);	('N', 0.02);	('R', 0.02);	('S', 0.02);
  ('V', 0.02);	('W', 0.02);	('Y', 0.02);
]

let homosapiens = [
    ('a', 0.3029549426680);    ('c', 0.1979883004921);
    ('g', 0.1975473066391);    ('t', 0.3015094502008);
]

let () =
  let n = int_of_string Sys.argv.(1) in
  make_repeat_fasta "ONE" "Homo sapiens alu" alu (n*2);
  make_random_fasta "TWO" "IUB ambiguity codes" iub (n*3);
  make_random_fasta "THREE" "Homo sapiens frequency" homosapiens (n*5)

----Next_Part(Fri_Mar__4_11_04_09_2005_867)----