[Shootout-list] Re: OCaml k-nucleotide
Christophe TROESTLER
del-con@tiscali.be
Mon, 28 Mar 2005 01:24:39 +0200 (CEST)
----Next_Part(Mon_Mar_28_01_24_39_2005_815)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
On Sun, 27 Mar 2005, Joel Hoffman <hoffmanj@pacifier.com> wrote:
>
> I'm no OCaml expert, but it doesn't appear to ignore comments (lines
> starting with ;).
Ah, ok, I did not realize we had to implement that part of the
standard. If the test file contains comments, so should the "trial
file" to point out mistakes.
Here is a corrected version.
ChriS
----Next_Part(Mon_Mar_28_01_24_39_2005_815)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="knucleotide.ml"
(* k-nucleotide.ml
*
* The Great Computer Language Shootout
* http://shootout.alioth.debian.org/
*
* Contributed by Troestler Christophe
*)
open Printf
let count = Hashtbl.create 100000
let counts k dna =
Hashtbl.clear count;
for i = 0 to String.length dna - k do
let key = String.sub dna i k in
try let c = Hashtbl.find count key in Hashtbl.replace count key (c+1)
with Not_found -> Hashtbl.add count key 1
done;
count
let compare_freq ((k:string),(f:float)) (k', f') =
if f > f' then -1 else if f < f' then 1 else compare k k'
let write_frequencies k dna =
let cnt = counts k dna in
let tot = float(Hashtbl.fold (fun _ n t -> n + t) cnt 0) in
let frq = Hashtbl.fold
(fun k n l -> (k, (100. *.(float n)/. tot)) :: l) cnt [] in
let frq = List.sort compare_freq frq in
List.iter (fun (k,f) -> printf "%s %.2f\n" k f) frq;
print_string "\n"
let write_count seq dna =
let cnt = counts (String.length seq) dna in
printf "%d\t%s\n" (try Hashtbl.find cnt seq with Not_found -> 0) seq
let dna_three =
let is_three s = try String.sub s 0 6 = ">THREE" with _ -> false in
while not(is_three(input_line stdin)) do () done;
let buf = Buffer.create 1000 in
(* Skip possible comment and read the protein/DNA sequence *)
(try while true do
let line = input_line stdin in
if line.[0] = '>' then raise End_of_file;
if line.[0] <> ';' then Buffer.add_string buf (String.uppercase line)
done with End_of_file -> ());
Buffer.contents buf
let () =
write_frequencies 1 dna_three;
write_frequencies 2 dna_three;
List.iter (fun k -> write_count k dna_three)
["GGT"; "GGTA"; "GGTATT"; "GGTATTTTAATT"; "GGTATTTTAATTTATAGT"]
----Next_Part(Mon_Mar_28_01_24_39_2005_815)----