[Shootout-list] OCaml implementation for fasta test.
Olivier Andrieu
oliv__a at users.sourceforge.net
Sun Apr 23 22:54:51 UTC 2006
Hi guys,
I've made this in OCaml for the fasta test.
--
Olivier
-------------- next part --------------
module Rand =
struct
let im = 139968
let im' = 1. /. float im
let ia = 3877
let ic = 29573
let last = ref 42
let gen_random max =
last := (!last * ia + ic) mod im ;
max *. float !last *. im'
end
let make_cumulative tbl =
let (_, cumul) =
List.fold_left
(fun (prob, acc) (char, p) ->
let new_prob = prob +. p in
(new_prob, (new_prob, char) :: acc))
(0., []) tbl in
List.rev cumul
let alu =
"GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
let iub = make_cumulative [
'a', 0.270 ;
'c', 0.125 ;
'g', 0.125 ;
't', 0.270 ;
'B', 0.02 ;
'D', 0.02 ;
'H', 0.02 ;
'K', 0.02 ;
'M', 0.02 ;
'N', 0.02 ;
'R', 0.02 ;
'S', 0.02 ;
'V', 0.02 ;
'W', 0.02 ;
'Y', 0.02 ;
]
let homosapiens = make_cumulative [
'a', 0.3029549426680 ;
'c', 0.1979883004921 ;
'g', 0.1975473066391 ;
't', 0.3015094502008 ;
]
let width = 60
let make_repeat_fasta id desc src n =
Printf.printf ">%s %s\n" id desc ;
let rec proc written off_s off_l =
let to_write = min (n - written) (min (width - off_l) (String.length src - off_s)) in
output stdout src off_s to_write ;
let new_written = written + to_write in
let new_off_s = (off_s + to_write) mod (String.length src) in
let new_off_l = (off_l + to_write) mod width in
if new_off_l = 0 || new_written >= n
then output_char stdout '\n' ;
if new_written < n
then proc new_written new_off_s new_off_l in
proc 0 0 0
let pick_char tbl =
let r = Rand.gen_random 1. in
let (_, c) = List.find (fun (p, _) -> (r : float) < p) tbl in
c
let fill_random s nb tbl =
for i = 0 to nb - 1 do
s.[i] <- pick_char tbl
done
let make_random_fasta id desc tbl n =
Printf.printf ">%s %s\n" id desc ;
let buff = String.create width in
let rec proc written =
let to_write = min width (n - written) in
fill_random buff to_write tbl ;
output stdout buff 0 to_write ;
output_char stdout '\n' ;
if written + to_write < n
then proc (written + to_write) in
proc 0
let main n =
make_repeat_fasta "ONE" "Homo sapiens alu" alu (n * 2) ;
make_random_fasta "TWO" "IUB ambiguity codes" iub (n * 3) ;
make_random_fasta "THREE" "Homo sapiens frequency" homosapiens (n * 5)
let _ =
main
begin
try int_of_string Sys.argv.(1)
with _ ->
Printf.printf "Usage: %s <N>\n" Sys.argv.(0) ;
exit 1
end
More information about the Shootout-list
mailing list