-
-
Notifications
You must be signed in to change notification settings - Fork 37
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add nucleotide-count exercise (#286)
- Loading branch information
1 parent
b2bcdef
commit 2b5ae3f
Showing
8 changed files
with
278 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Instructions | ||
|
||
Each of us inherits from our biological parents a set of chemical instructions known as DNA that influence how our bodies are constructed. | ||
All known life depends on DNA! | ||
|
||
> Note: You do not need to understand anything about nucleotides or DNA to complete this exercise. | ||
DNA is a long chain of other chemicals and the most important are the four nucleotides, adenine, cytosine, guanine and thymine. | ||
A single DNA chain can contain billions of these four nucleotides and the order in which they occur is important! | ||
We call the order of these nucleotides in a bit of DNA a "DNA sequence". | ||
|
||
We represent a DNA sequence as an ordered collection of these four nucleotides and a common way to do that is with a string of characters such as "ATTACG" for a DNA sequence of 6 nucleotides. | ||
'A' for adenine, 'C' for cytosine, 'G' for guanine, and 'T' for thymine. | ||
|
||
Given a string representing a DNA sequence, count how many of each nucleotide is present. | ||
If the string contains characters that aren't A, C, G, or T then it is invalid and you should signal an error. | ||
|
||
For example: | ||
|
||
```text | ||
"GATTACA" -> 'A': 3, 'C': 1, 'G': 1, 'T': 2 | ||
"INVALID" -> error | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
{ | ||
"authors": [ | ||
"keiravillekode" | ||
], | ||
"files": { | ||
"solution": [ | ||
"nucleotide-count.sml" | ||
], | ||
"test": [ | ||
"test.sml" | ||
], | ||
"example": [ | ||
".meta/example.sml" | ||
] | ||
}, | ||
"blurb": "Given a DNA string, compute how many times each nucleotide occurs in the string.", | ||
"source": "The Calculating DNA Nucleotides_problem at Rosalind", | ||
"source_url": "https://rosalind.info/problems/dna/" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
fun nucleotideCounts (strand: string): {a: int, c: int, g: int, t: int} = | ||
let | ||
fun recurse(a: int, c: int, g: int, t: int, nil) = {a = a, c = c, g = g, t = t} | ||
| recurse(a: int, c: int, g: int, t: int, hd :: tl) = | ||
case hd of | ||
#"A" => recurse(a + 1, c, g, t, tl) | ||
| #"C" => recurse(a, c + 1, g, t, tl) | ||
| #"G" => recurse(a, c, g + 1, t, tl) | ||
| #"T" => recurse(a, c, g, t + 1, tl) | ||
| _ => raise Fail "Invalid nucleotide in strand" | ||
in | ||
recurse (0, 0, 0, 0, explode strand) | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
# This is an auto-generated file. | ||
# | ||
# Regenerating this file via `configlet sync` will: | ||
# - Recreate every `description` key/value pair | ||
# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications | ||
# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion) | ||
# - Preserve any other key/value pair | ||
# | ||
# As user-added comments (using the # character) will be removed when this file | ||
# is regenerated, comments can be added via a `comment` key. | ||
|
||
[3e5c30a8-87e2-4845-a815-a49671ade970] | ||
description = "empty strand" | ||
|
||
[a0ea42a6-06d9-4ac6-828c-7ccaccf98fec] | ||
description = "can count one nucleotide in single-character input" | ||
|
||
[eca0d565-ed8c-43e7-9033-6cefbf5115b5] | ||
description = "strand with repeated nucleotide" | ||
|
||
[40a45eac-c83f-4740-901a-20b22d15a39f] | ||
description = "strand with multiple nucleotides" | ||
|
||
[b4c47851-ee9e-4b0a-be70-a86e343bd851] | ||
description = "strand with invalid nucleotides" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
fun nucleotideCounts (strand: string): {a: int, c: int, g: int, t: int} = | ||
raise Fail "'nucleotideCounts' is not implemented" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
(* version 1.0.0 *) | ||
|
||
use "testlib.sml"; | ||
use "nucleotide-count.sml"; | ||
|
||
infixr |> | ||
fun x |> f = f x | ||
|
||
val testsuite = | ||
describe "nucleotide-count" [ | ||
test "empty strand" | ||
(fn _ => nucleotideCounts "" |> Expect.equalTo {a = 0, c = 0, g = 0, t = 0}), | ||
|
||
test "can count one nucleotide in single-character input" | ||
(fn _ => nucleotideCounts "G" |> Expect.equalTo {a = 0, c = 0, g = 1, t = 0}), | ||
|
||
test "strand with repeated nucleotide" | ||
(fn _ => nucleotideCounts "GGGGGGG" |> Expect.equalTo {a = 0, c = 0, g = 7, t = 0}), | ||
|
||
test "strand with multiple nucleotides" | ||
(fn _ => nucleotideCounts "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" |> Expect.equalTo {a = 20, c = 12, g = 17, t = 21}), | ||
|
||
test "strand with invalid nucleotides" | ||
(fn _ => (fn _ => nucleotideCounts "AGXXACT") |> Expect.error (Fail "Invalid nucleotide in strand")) | ||
] | ||
|
||
val _ = Test.run testsuite |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
structure Expect = | ||
struct | ||
datatype expectation = Pass | Fail of string * string | ||
|
||
local | ||
fun failEq b a = | ||
Fail ("Expected: " ^ b, "Got: " ^ a) | ||
|
||
fun failExn b a = | ||
Fail ("Expected: " ^ b, "Raised: " ^ a) | ||
|
||
fun exnName (e: exn): string = General.exnName e | ||
in | ||
fun truthy a = | ||
if a | ||
then Pass | ||
else failEq "true" "false" | ||
|
||
fun falsy a = | ||
if a | ||
then failEq "false" "true" | ||
else Pass | ||
|
||
fun equalTo b a = | ||
if a = b | ||
then Pass | ||
else failEq (PolyML.makestring b) (PolyML.makestring a) | ||
|
||
fun nearTo delta b a = | ||
if Real.abs (a - b) <= delta * Real.abs a orelse | ||
Real.abs (a - b) <= delta * Real.abs b | ||
then Pass | ||
else failEq (Real.toString b ^ " +/- " ^ Real.toString delta) (Real.toString a) | ||
|
||
fun anyError f = | ||
( | ||
f (); | ||
failExn "an exception" "Nothing" | ||
) handle _ => Pass | ||
|
||
fun error e f = | ||
( | ||
f (); | ||
failExn (exnName e) "Nothing" | ||
) handle e' => if exnMessage e' = exnMessage e | ||
then Pass | ||
else failExn (exnMessage e) (exnMessage e') | ||
end | ||
end | ||
|
||
structure TermColor = | ||
struct | ||
datatype color = Red | Green | Yellow | Normal | ||
|
||
fun f Red = "\027[31m" | ||
| f Green = "\027[32m" | ||
| f Yellow = "\027[33m" | ||
| f Normal = "\027[0m" | ||
|
||
fun colorize color s = (f color) ^ s ^ (f Normal) | ||
|
||
val redit = colorize Red | ||
|
||
val greenit = colorize Green | ||
|
||
val yellowit = colorize Yellow | ||
end | ||
|
||
structure Test = | ||
struct | ||
datatype testnode = TestGroup of string * testnode list | ||
| Test of string * (unit -> Expect.expectation) | ||
|
||
local | ||
datatype evaluation = Success of string | ||
| Failure of string * string * string | ||
| Error of string * string | ||
|
||
fun indent n s = (implode (List.tabulate (n, fn _ => #" "))) ^ s | ||
|
||
fun fmt indentlvl ev = | ||
let | ||
val check = TermColor.greenit "\226\156\148 " (* ✔ *) | ||
val cross = TermColor.redit "\226\156\150 " (* ✖ *) | ||
val indentlvl = indentlvl * 2 | ||
in | ||
case ev of | ||
Success descr => indent indentlvl (check ^ descr) | ||
| Failure (descr, exp, got) => | ||
String.concatWith "\n" [indent indentlvl (cross ^ descr), | ||
indent (indentlvl + 2) exp, | ||
indent (indentlvl + 2) got] | ||
| Error (descr, reason) => | ||
String.concatWith "\n" [indent indentlvl (cross ^ descr), | ||
indent (indentlvl + 2) (TermColor.redit reason)] | ||
end | ||
|
||
fun eval (TestGroup _) = raise Fail "Only a 'Test' can be evaluated" | ||
| eval (Test (descr, thunk)) = | ||
( | ||
case thunk () of | ||
Expect.Pass => ((1, 0, 0), Success descr) | ||
| Expect.Fail (s, s') => ((0, 1, 0), Failure (descr, s, s')) | ||
) | ||
handle e => ((0, 0, 1), Error (descr, "Unexpected error: " ^ exnMessage e)) | ||
|
||
fun flatten depth testnode = | ||
let | ||
fun sum (x, y, z) (a, b, c) = (x + a, y + b, z + c) | ||
|
||
fun aux (t, (counter, acc)) = | ||
let | ||
val (counter', texts) = flatten (depth + 1) t | ||
in | ||
(sum counter' counter, texts :: acc) | ||
end | ||
in | ||
case testnode of | ||
TestGroup (descr, ts) => | ||
let | ||
val (counter, texts) = foldr aux ((0, 0, 0), []) ts | ||
in | ||
(counter, (indent (depth * 2) descr) :: List.concat texts) | ||
end | ||
| Test _ => | ||
let | ||
val (counter, evaluation) = eval testnode | ||
in | ||
(counter, [fmt depth evaluation]) | ||
end | ||
end | ||
|
||
fun println s = print (s ^ "\n") | ||
in | ||
fun run suite = | ||
let | ||
val ((succeeded, failed, errored), texts) = flatten 0 suite | ||
|
||
val summary = String.concatWith ", " [ | ||
TermColor.greenit ((Int.toString succeeded) ^ " passed"), | ||
TermColor.redit ((Int.toString failed) ^ " failed"), | ||
TermColor.redit ((Int.toString errored) ^ " errored"), | ||
(Int.toString (succeeded + failed + errored)) ^ " total" | ||
] | ||
|
||
val status = if failed = 0 andalso errored = 0 | ||
then OS.Process.success | ||
else OS.Process.failure | ||
|
||
in | ||
List.app println texts; | ||
println ""; | ||
println ("Tests: " ^ summary); | ||
OS.Process.exit status | ||
end | ||
end | ||
end | ||
|
||
fun describe description tests = Test.TestGroup (description, tests) | ||
fun test description thunk = Test.Test (description, thunk) |