[Shootout-list] Fasta in OCaml
William Douglas Neumann
wdnx@unm.edu
Mon, 20 Dec 2004 13:13:09 -0700 (MST)
On Mon, 20 Dec 2004, Isaac Gouy wrote:
>>>> the sample output file doesn't jive with the benchmark program
>>> The output file is correct, the Oberon-2 program is not.
>>> (The Lua program is correct.)
>>
>> Shouldn't you guys really remove incorrect programs? Especially if
>> the test has been promoted to the front page?
>
> No we shouldn't remove incorrect programs - they should fail.
> In this case, they didn't fail because the output is being checked
> against an obsolete output file.
OK... then shouldn't you show that the program resulted in an error,
rather than completing successfully with a full CPU time of 0.60 seconds?
And perhaps it might be nice if the "correct" implementation reflected a
successful run rather than an error, as the Lua program currently does?
Hey... it might just be me, but I prefer to use a program that has
supposedly passed the test as a reference for my implementation rather
than one that looks as if it has failed...
in any case, the theoretically correct OCaml version is below:
(* fasta.ml *)
let length = 60;;
let s = String.create length;;
type freq = {c : char; mutable p : float};;
let makeCumulative arr =
ignore (Array.fold_left
(fun b v -> let cp = (b +. v.p) in v.p <- cp; cp) 0.0 arr);
arr;;
let gen_random =
let last = ref 42 and im = 139968 and ia = 3877 and ic = 29573 in
let fim = float im in
fun () -> last := (!last * ia + ic) mod im; (float !last) /. fim;;
let makeRandomFasta id desc arr n =
let selectRandom () =
let rec afh r i =
if r <= arr.(i).p then arr.(i).c else afh r (succ i) in
afh (gen_random ()) 0 in
let rec loop n =
if n > 0 then
begin
let x = n < length in
for i = 0 to pred (if x then n else length) do
s.[i] <- (selectRandom ())
done;
if x then String.fill s n (length - n) ' ';
print_endline s;
loop (n - length)
end in
Printf.printf ">%s %s\n" id desc;
loop n;;
let makeRepFasta id desc seq n =
let repString s length =
let rec reph rep =
if String.length rep >= length then rep else reph (s ^ rep) in
reph s in
let rep = repString seq length in
let replen = String.length rep in
let rec write pos todo =
if todo > 0 then
let len = min length todo and left = replen - pos in
write
( if left >= len then
(print_endline (String.sub rep pos len); pos + len)
else
(let ll = len - left in
print_string (String.sub rep pos left);
print_endline (String.sub rep 0 ll); ll))
(todo - len) in
Printf.printf ">%s %s\n" id desc;
write 0 n;;
let _ =
let n = try int_of_string Sys.argv.(1) with _ -> 1 in
let homosapiens =
makeCumulative
[| {c = 'a'; p = 0.3029549426680}; {c = 'c'; p = 0.1979883004921};
{c = 'g'; p = 0.1975473066391}; {c = 't'; p = 0.3015094502008} |]
and iub =
makeCumulative
[| {c = 'a'; p = 0.270}; {c = 'c'; p = 0.125};
{c = 'g'; p = 0.125}; {c = 't'; p = 0.270};
{c = 'B'; p = 0.02}; {c = 'D'; p = 0.02}; {c = 'H'; p = 0.02};
{c = 'K'; p = 0.02}; {c = 'M'; p = 0.02}; {c = 'N'; p = 0.02};
{c = 'R'; p = 0.02}; {c = 'S'; p = 0.02}; {c = 'V'; p = 0.02};
{c = 'W'; p = 0.02}; {c = 'Y'; p = 0.02} |]
and alu = String.concat ""
["GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG";
"GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA";
"CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT";
"ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA";
"GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG";
"AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC";
"AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"] in
makeRepFasta "ONE" "Homo sapiens alu" alu (n*2);
makeRandomFasta "TWO" "IUB ambiguity codes" iub (n*3);
makeRandomFasta "THREE" "Homo sapiens frequency" homosapiens (n*5);;
William D. Neumann
<wdnx@unm.edu>
FWO to the Nth degree!!!
---
Dear Lord, please make me the kind of person
my dog thinks I am.