[Shootout-list] Java k-nucleotide

James McIlree ovrskeek@mac.com
Sun, 27 Mar 2005 02:30:38 -0800


--Apple-Mail-12-468252712
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
	charset=WINDOWS-1252;
	format=flowed


	I wrote this using a conservative reading of the specification.

	I would like to ask, though, was it intended to require two =
loops
(and methods) to generate keys for the hashtable?

	The relevant portion of the spec reads:

	"define a procedure/function to update a hashtable of =
k-nucleotide
keys and count values, for a particular reading-frame =97 even though=20
we'll
combine k-nucleotide counts for all reading-frames"

	Some of the submitted programs use two (D Digital Mars, C#),
others appear not to (perl, OCaml)


--Apple-Mail-12-468252712
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
	x-unix-mode=0644;
	name="knucleotide.java"
Content-Disposition: attachment;
	filename=knucleotide.java

/* The Great Computer Language Shootout
   http://shootout.alioth.debian.org/

   contributed by James McIlree
*/

import java.util.*;
import java.io.*;
import java.text.*;

public class knucleotide {
  String sequence;
  int count = 1;

  knucleotide(String s) {
    sequence = s;
  }

  public static void main(String[] args) throws Exception
  {
    StringBuffer sbuffer = new StringBuffer();
    String line;
    
    BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
    while ((line = in.readLine()) != null) {
      if (line.startsWith(">THREE")) break;
    }
    
    while ((line = in.readLine()) != null) {
      char c = line.charAt(0);
      if (c == '>')
        break;
      else if (c != ';')
        sbuffer.append(line.toUpperCase());
    }
    
    knucleotide kn = new knucleotide(sbuffer.toString());
    kn.writeFrequencies(1);
    kn.writeFrequencies(2);

    kn.writeCount("GGT");
    kn.writeCount("GGTA");
    kn.writeCount("GGTATT");
    kn.writeCount("GGTATTTTAATT");
    kn.writeCount("GGTATTTTAATTTATAGT");
  }

  void writeFrequencies(int nucleotideLength) {
    Map frequencies = calculateFrequencies(nucleotideLength);
    ArrayList list = new ArrayList(frequencies.size());
    Iterator it = frequencies.entrySet().iterator();

    int sum = 0;
    while (it.hasNext()) {
      knucleotide fragment = (knucleotide)((Map.Entry)it.next()).getValue();
      list.add(fragment);
      sum += fragment.count;
    }

    Collections.sort(list, new Comparator() {
        public int compare(Object o1, Object o2) {
          int c = ((knucleotide)o2).count - ((knucleotide)o1).count;
          if (c == 0) {
            c = ((knucleotide)o1).sequence.compareTo(((knucleotide)o2).sequence);
          }
          return c;
        }
      });

    NumberFormat nf = NumberFormat.getInstance();
    nf.setMaximumFractionDigits(2);
    nf.setMinimumFractionDigits(2);

    for (int i=0; i<list.size(); i++) {
      knucleotide fragment = (knucleotide)list.get(i);
      double percent = (double)fragment.count/(double)sum * 100.0;
      System.out.println(fragment.sequence + " " + nf.format(percent));
    }
    System.out.println("");
  }

  void writeCount(String nucleotideFragment) {
    Map frequencies = calculateFrequencies(nucleotideFragment.length());

    knucleotide found = (knucleotide)frequencies.get(nucleotideFragment);
    int count = (found == null) ? 0 : found.count;
    System.out.println(count + "\t" + nucleotideFragment);
  }

  Map calculateFrequencies(int fragmentLength) {
    HashMap map = new HashMap();
    for (int offset=0; offset<fragmentLength; offset++)
      calculateFrequencies(map, offset, fragmentLength);

    return map;
  }

  // Is this method really needed? The benchmark specification seems to
  // indicate so, but it is not entirely clear. This method could easily
  // be folded up.
  void calculateFrequencies(Map map, int offset, int fragmentLength) {
    int lastIndex = sequence.length() - fragmentLength;
    for (int index=offset; index<lastIndex; index+=fragmentLength) {
      String temp = sequence.substring(index, index + fragmentLength);
      knucleotide fragment = (knucleotide)map.get(temp);
      if (fragment != null)
        fragment.count++;
      else
        map.put(temp, new knucleotide(temp));
    }
  }
}


--Apple-Mail-12-468252712--