[Shootout-list] Re: OCaml k-nucleotide

Christophe TROESTLER del-con@tiscali.be
Mon, 28 Mar 2005 01:24:39 +0200 (CEST)


----Next_Part(Mon_Mar_28_01_24_39_2005_815)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

On Sun, 27 Mar 2005, Joel Hoffman <hoffmanj@pacifier.com> wrote:
> 
> I'm no OCaml expert, but it doesn't appear to ignore comments (lines
> starting with ;).

Ah, ok, I did not realize we had to implement that part of the
standard.  If the test file contains comments, so should the "trial
file" to point out mistakes.

Here is a corrected version.

ChriS

----Next_Part(Mon_Mar_28_01_24_39_2005_815)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="knucleotide.ml"

(* k-nucleotide.ml
 *
 * The Great Computer Language Shootout
 * http://shootout.alioth.debian.org/
 *
 * Contributed by Troestler Christophe
 *)

open Printf

let count = Hashtbl.create 100000
let counts k dna =
  Hashtbl.clear count;
  for i = 0 to String.length dna - k do
    let key = String.sub dna i k in
    try let c = Hashtbl.find count key in Hashtbl.replace count key (c+1)
    with Not_found -> Hashtbl.add count key 1
  done;
  count

let compare_freq ((k:string),(f:float)) (k', f') =
  if f > f' then -1 else if f < f' then 1 else compare k k'

let write_frequencies k dna =
  let cnt = counts k dna in
  let tot = float(Hashtbl.fold (fun _ n t -> n + t) cnt 0) in
  let frq = Hashtbl.fold
    (fun k n l -> (k, (100. *.(float n)/. tot)) :: l) cnt [] in
  let frq = List.sort compare_freq frq in
  List.iter (fun (k,f) -> printf "%s %.2f\n" k f) frq;
  print_string "\n"

let write_count seq dna =
  let cnt = counts (String.length seq) dna in
  printf "%d\t%s\n" (try Hashtbl.find cnt seq with Not_found -> 0) seq


let dna_three =
  let is_three s = try String.sub s 0 6 = ">THREE" with _ -> false in
  while not(is_three(input_line stdin)) do () done;
  let buf = Buffer.create 1000 in
  (* Skip possible comment and read the protein/DNA sequence *)
  (try while true do
       let line = input_line stdin in
       if line.[0] = '>' then raise End_of_file;
       if line.[0] <> ';' then Buffer.add_string buf (String.uppercase line)
   done with End_of_file -> ());
  Buffer.contents buf

let () =
  write_frequencies 1 dna_three;
  write_frequencies 2 dna_three;
  List.iter (fun k -> write_count k dna_three)
    ["GGT"; "GGTA"; "GGTATT"; "GGTATTTTAATT"; "GGTATTTTAATTTATAGT"]

----Next_Part(Mon_Mar_28_01_24_39_2005_815)----