[Shootout-list] OCaml k-nucleotide
Christophe TROESTLER
del-con@tiscali.be
Sat, 26 Mar 2005 22:34:00 +0100 (CET)
----Next_Part(Sat_Mar_26_22_34_00_2005_490)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Hi,
Here is an OCaml implementation of k-nucleotide.
ChriS
----Next_Part(Sat_Mar_26_22_34_00_2005_490)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="k-nucleotide.ml"
(* k-nucleotide.ml
*
* The Great Computer Language Shootout
* http://shootout.alioth.debian.org/
*
* Contributed by Troestler Christophe
*)
open Printf
module H = Hashtbl
let count = H.create 100000
let counts k dna =
H.clear count;
for i = 0 to String.length dna - k do
let key = String.sub dna i k in
try let c = H.find count key in H.replace count key (c+1)
with Not_found -> H.add count key 1
done;
count
let compare_freq ((k:string),(f:float)) (k', f') =
if f > f' then -1 else if f < f' then 1 else compare k k'
let write_frequencies k dna =
let cnt = counts k dna in
let tot = float(H.fold (fun _ n t -> n + t) cnt 0) in
let frq = H.fold (fun k n l -> (k, (100. *.(float n)/. tot)) :: l) cnt [] in
let frq = List.sort compare_freq frq in
List.iter (fun (k,f) -> printf "%s %.2f\n" k f) frq;
print_string "\n"
let write_count seq dna =
let cnt = counts (String.length seq) dna in
printf "%d\t%s\n" (try H.find cnt seq with Not_found -> 0) seq
let dna_three =
let is_three s = try String.sub s 0 6 = ">THREE" with _ -> false in
while not(is_three(input_line stdin)) do () done;
let buf = Buffer.create 1000 in
(try while true do
Buffer.add_string buf (String.uppercase(input_line stdin))
done with End_of_file -> ());
Buffer.contents buf
let () =
write_frequencies 1 dna_three;
write_frequencies 2 dna_three;
List.iter (fun k -> write_count k dna_three)
["GGT"; "GGTA"; "GGTATT"; "GGTATTTTAATT"; "GGTATTTTAATTTATAGT"]
----Next_Part(Sat_Mar_26_22_34_00_2005_490)----