[Shootout-list] fasta - expected output

Christophe TROESTLER del-con@tiscali.be
Sat, 05 Mar 2005 11:43:09 +0100 (CET)


----Next_Part(Sat_Mar__5_11_43_09_2005_716)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

On Fri, 4 Mar 2005, Isaac Gouy <igouy2@yahoo.com> wrote:
> 
> Several of the fasta programs write extra line feeds after the end
> of section TWO and section THREE.

In case it is considered an error, here is a version of the OCaml
program that does not do this.

ChriS

----Next_Part(Sat_Mar__5_11_43_09_2005_716)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="fasta.ml"

(* http://shootout.alioth.debian.org/benchmark.php?test=fasta&lang=all *)

(* Random number generator (Shootout version) *)
let rec gen_random =
  let im = 139968
  and ia = 3877
  and ic = 29573 in
  let last = ref 42
  and inv_im = 1. /. float im in
  fun max ->
    last := (!last * ia + ic) mod im;
    max *. float !last *. inv_im


let make_cumulative =
  let rec cumul prob = function
    | [] -> []
    | (c, p) :: tl -> let prob = prob +. p in (c, prob) :: cumul prob tl  in
  fun table -> cumul 0. table

let rec rand_char (prob : float) = function
  | [] -> failwith "rand_c"
  | (c, p) :: tl -> if prob <= p then c else rand_char prob tl

let width = 60

(* [write s i0 l w] outputs [w] chars of [s.[0 .. l]] starting with
   [s.[i0]] and considering the substring [s.[0 .. l]] as a "circle".
   One assumes [0 <= i0 <= l <= String.length s].
   @return [i0] needed for subsequent writes.  *)
let rec write s i0 l w =
  let len = l - i0 in
  if w <= len then (output stdout s i0 w; i0 + w)
  else (output stdout s i0 len; write s 0 l (w - len))

let make_repeat_fasta id desc src n =
  Printf.printf ">%s %s\n" id desc;
  let i0 = ref 0
  and l = String.length src in
  for i = 1 to n / width do
    i0 := write src !i0 l width;
    print_char '\n';
  done;
  let w = n mod width in
  if w > 0 then (ignore(write src !i0 l w);  print_char '\n')

let make_random_fasta id desc table n =
  Printf.printf ">%s %s\n" id desc;
  let table = make_cumulative table in
  for i = 1 to n / width do
    for j = 1 to width do print_char(rand_char (gen_random 1.) table); done;
    print_char '\n';
  done;
  let w = n mod width in
  if w > 0 then (
    for j = 1 to w do print_char(rand_char (gen_random 1.) table); done;
    print_char '\n'
  )



let alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"

let iub = [
  ('a', 0.27);	('c', 0.12);	('g', 0.12);	('t', 0.27);

  ('B', 0.02);	('D', 0.02);	('H', 0.02);	('K', 0.02);
  ('M', 0.02);	('N', 0.02);	('R', 0.02);	('S', 0.02);
  ('V', 0.02);	('W', 0.02);	('Y', 0.02);
]

let homosapiens = [
    ('a', 0.3029549426680);    ('c', 0.1979883004921);
    ('g', 0.1975473066391);    ('t', 0.3015094502008);
]

let () =
  let n = int_of_string Sys.argv.(1) in
  make_repeat_fasta "ONE" "Homo sapiens alu" alu (n*2);
  make_random_fasta "TWO" "IUB ambiguity codes" iub (n*3);
  make_random_fasta "THREE" "Homo sapiens frequency" homosapiens (n*5)

----Next_Part(Sat_Mar__5_11_43_09_2005_716)----