[Shootout-list] fasta - expected output
Christophe TROESTLER
del-con@tiscali.be
Sat, 05 Mar 2005 11:43:09 +0100 (CET)
----Next_Part(Sat_Mar__5_11_43_09_2005_716)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
On Fri, 4 Mar 2005, Isaac Gouy <igouy2@yahoo.com> wrote:
>
> Several of the fasta programs write extra line feeds after the end
> of section TWO and section THREE.
In case it is considered an error, here is a version of the OCaml
program that does not do this.
ChriS
----Next_Part(Sat_Mar__5_11_43_09_2005_716)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="fasta.ml"
(* http://shootout.alioth.debian.org/benchmark.php?test=fasta&lang=all *)
(* Random number generator (Shootout version) *)
let rec gen_random =
let im = 139968
and ia = 3877
and ic = 29573 in
let last = ref 42
and inv_im = 1. /. float im in
fun max ->
last := (!last * ia + ic) mod im;
max *. float !last *. inv_im
let make_cumulative =
let rec cumul prob = function
| [] -> []
| (c, p) :: tl -> let prob = prob +. p in (c, prob) :: cumul prob tl in
fun table -> cumul 0. table
let rec rand_char (prob : float) = function
| [] -> failwith "rand_c"
| (c, p) :: tl -> if prob <= p then c else rand_char prob tl
let width = 60
(* [write s i0 l w] outputs [w] chars of [s.[0 .. l]] starting with
[s.[i0]] and considering the substring [s.[0 .. l]] as a "circle".
One assumes [0 <= i0 <= l <= String.length s].
@return [i0] needed for subsequent writes. *)
let rec write s i0 l w =
let len = l - i0 in
if w <= len then (output stdout s i0 w; i0 + w)
else (output stdout s i0 len; write s 0 l (w - len))
let make_repeat_fasta id desc src n =
Printf.printf ">%s %s\n" id desc;
let i0 = ref 0
and l = String.length src in
for i = 1 to n / width do
i0 := write src !i0 l width;
print_char '\n';
done;
let w = n mod width in
if w > 0 then (ignore(write src !i0 l w); print_char '\n')
let make_random_fasta id desc table n =
Printf.printf ">%s %s\n" id desc;
let table = make_cumulative table in
for i = 1 to n / width do
for j = 1 to width do print_char(rand_char (gen_random 1.) table); done;
print_char '\n';
done;
let w = n mod width in
if w > 0 then (
for j = 1 to w do print_char(rand_char (gen_random 1.) table); done;
print_char '\n'
)
let alu = "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG\
GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA\
CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT\
ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA\
GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG\
AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC\
AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"
let iub = [
('a', 0.27); ('c', 0.12); ('g', 0.12); ('t', 0.27);
('B', 0.02); ('D', 0.02); ('H', 0.02); ('K', 0.02);
('M', 0.02); ('N', 0.02); ('R', 0.02); ('S', 0.02);
('V', 0.02); ('W', 0.02); ('Y', 0.02);
]
let homosapiens = [
('a', 0.3029549426680); ('c', 0.1979883004921);
('g', 0.1975473066391); ('t', 0.3015094502008);
]
let () =
let n = int_of_string Sys.argv.(1) in
make_repeat_fasta "ONE" "Homo sapiens alu" alu (n*2);
make_random_fasta "TWO" "IUB ambiguity codes" iub (n*3);
make_random_fasta "THREE" "Homo sapiens frequency" homosapiens (n*5)
----Next_Part(Sat_Mar__5_11_43_09_2005_716)----