[Shootout-list] OCaml implementation for fasta test.

Olivier Andrieu oliv__a at users.sourceforge.net
Sun Apr 23 22:54:51 UTC 2006


Hi guys,

I've made this in OCaml for the fasta test.

-- 
   Olivier
-------------- next part --------------

module Rand =
  struct
    let im = 139968
    let im' = 1. /. float im
    let ia = 3877
    let ic = 29573
    let last = ref 42

    let gen_random max =
      last := (!last * ia + ic) mod im ;
      max *. float !last *. im'
  end

let make_cumulative tbl = 
  let (_, cumul) =
    List.fold_left
      (fun (prob, acc) (char, p) -> 
	let new_prob = prob +. p in
	(new_prob, (new_prob, char) :: acc))
      (0., []) tbl in
  List.rev cumul

let alu = 
  "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
   GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
   CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
   ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
   GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
   AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
   AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"

let iub = make_cumulative [
  'a', 0.270 ;
  'c', 0.125 ;
  'g', 0.125 ;
  't', 0.270 ;

  'B', 0.02 ;
  'D', 0.02 ;
  'H', 0.02 ;
  'K', 0.02 ;
  'M', 0.02 ;
  'N', 0.02 ;
  'R', 0.02 ;
  'S', 0.02 ;
  'V', 0.02 ;
  'W', 0.02 ;
  'Y', 0.02 ;
]

let homosapiens = make_cumulative [
  'a', 0.3029549426680 ;
  'c', 0.1979883004921 ;
  'g', 0.1975473066391 ;
  't', 0.3015094502008 ;
]

let width = 60

let make_repeat_fasta id desc src n =
  Printf.printf ">%s %s\n" id desc ;
  let rec proc written off_s off_l =
    let to_write = min (n - written) (min (width - off_l) (String.length src - off_s)) in
    output stdout src off_s to_write ;
    let new_written = written + to_write in
    let new_off_s = (off_s + to_write) mod (String.length src) in
    let new_off_l = (off_l + to_write) mod width in
    if new_off_l = 0 || new_written >= n
    then output_char stdout '\n' ;
    if new_written < n
    then proc new_written new_off_s new_off_l in
  proc 0 0 0

let pick_char tbl =
  let r = Rand.gen_random 1. in
  let (_, c) = List.find (fun (p, _) -> (r : float) < p) tbl in
  c

let fill_random s nb tbl =
  for i = 0 to nb - 1 do
    s.[i] <- pick_char tbl
  done

let make_random_fasta id desc tbl n =
  Printf.printf ">%s %s\n" id desc ;
  let buff = String.create width in
  let rec proc written = 
    let to_write = min width (n - written) in
    fill_random buff to_write tbl ;
    output stdout buff 0 to_write ;
    output_char stdout '\n' ;
    if written + to_write < n
    then proc (written + to_write) in
  proc 0

let main n =
  make_repeat_fasta "ONE"   "Homo sapiens alu" alu (n * 2) ;
  make_random_fasta "TWO"   "IUB ambiguity codes" iub (n * 3) ;
  make_random_fasta "THREE" "Homo sapiens frequency" homosapiens (n * 5)

let _ = 
  main
    begin
      try int_of_string Sys.argv.(1)
      with _ -> 
	Printf.printf "Usage: %s <N>\n" Sys.argv.(0) ;
	exit 1
    end


More information about the Shootout-list mailing list