[Shootout-list] Fasta in OCaml

William Douglas Neumann wdnx@unm.edu
Mon, 20 Dec 2004 13:13:09 -0700 (MST)


On Mon, 20 Dec 2004, Isaac Gouy wrote:

>>>> the sample output file doesn't jive with the benchmark program
>>> The output file is correct, the Oberon-2 program is not.
>>> (The Lua program is correct.)
>>
>> Shouldn't you guys really remove incorrect programs?  Especially if
>> the test has been promoted to the front page?
>
> No we shouldn't remove incorrect programs - they should fail.
> In this case, they didn't fail because the output is being checked
> against an obsolete output file.

OK... then shouldn't you show that the program resulted in an error, 
rather than completing successfully with a full CPU time of 0.60 seconds? 
And perhaps it might be nice if the "correct" implementation reflected a 
successful run rather than an error, as the Lua program currently does?

Hey... it might just be me, but I prefer to use a program that has 
supposedly passed the test as a reference for my implementation rather 
than one that looks as if it has failed...

in any case, the theoretically correct OCaml version is below:

(* fasta.ml *)

let length = 60;;
let s = String.create length;;

type freq = {c : char; mutable p : float};;

let makeCumulative arr =
   ignore (Array.fold_left
     (fun b v -> let cp = (b +. v.p) in v.p <- cp; cp) 0.0 arr);
   arr;;

let gen_random =
   let last = ref 42 and im = 139968 and ia = 3877 and ic = 29573 in
   let fim = float im in
   fun () -> last := (!last * ia + ic) mod im; (float !last) /. fim;;

let makeRandomFasta id desc arr n =
   let selectRandom () =
     let rec afh r i =
       if r <= arr.(i).p then arr.(i).c else afh r (succ i) in
     afh (gen_random ()) 0 in
   let rec loop n =
     if n > 0 then
     begin
       let x = n < length in
       for i = 0 to pred (if x then n else length) do
         s.[i] <- (selectRandom ())
       done;
       if x then String.fill s n (length - n) ' ';
       print_endline s;
       loop (n - length)
     end in
   Printf.printf ">%s %s\n" id desc;
   loop n;;

let makeRepFasta id desc seq n =
   let repString s length =
     let rec reph rep =
       if String.length rep >= length then rep else reph (s ^ rep) in
     reph s in
   let rep = repString seq length in
   let replen = String.length rep in
   let rec write pos todo =
     if todo > 0 then
     let len = min length todo and left = replen - pos in
     write
       ( if left >= len then
           (print_endline (String.sub rep pos len); pos + len)
         else
           (let ll = len - left in
             print_string (String.sub rep pos left);
             print_endline (String.sub rep 0 ll); ll))
       (todo - len) in
   Printf.printf ">%s %s\n" id desc;
   write 0 n;;

let _ =
   let n = try int_of_string Sys.argv.(1) with _ -> 1 in
   let homosapiens =
     makeCumulative
     [| {c = 'a'; p = 0.3029549426680}; {c = 'c'; p = 0.1979883004921};
        {c = 'g'; p = 0.1975473066391}; {c = 't'; p = 0.3015094502008} |]
   and iub =
     makeCumulative
     [| {c = 'a'; p = 0.270}; {c = 'c'; p = 0.125};
        {c = 'g'; p = 0.125}; {c = 't'; p = 0.270};
        {c = 'B'; p = 0.02}; {c = 'D'; p = 0.02}; {c = 'H'; p = 0.02};
        {c = 'K'; p = 0.02}; {c = 'M'; p = 0.02}; {c = 'N'; p = 0.02};
        {c = 'R'; p = 0.02}; {c = 'S'; p = 0.02}; {c = 'V'; p = 0.02};
        {c = 'W'; p = 0.02}; {c = 'Y'; p = 0.02} |]
   and alu = String.concat ""
     ["GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG";
      "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA";
      "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT";
      "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA";
      "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG";
      "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC";
      "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"] in
   makeRepFasta "ONE" "Homo sapiens alu" alu (n*2);
   makeRandomFasta "TWO" "IUB ambiguity codes" iub (n*3);
   makeRandomFasta "THREE" "Homo sapiens frequency" homosapiens (n*5);;


William D. Neumann
<wdnx@unm.edu>

FWO to the Nth degree!!!
---
Dear Lord, please make me the kind of person
my dog thinks I am.