Skip to content

Commit

Permalink
Add nucleotide-count exercise (#286)
Browse files Browse the repository at this point in the history
  • Loading branch information
keiravillekode authored Dec 16, 2024
1 parent b2bcdef commit 2b5ae3f
Show file tree
Hide file tree
Showing 8 changed files with 278 additions and 0 deletions.
9 changes: 9 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,15 @@
"math"
]
},
{
"slug": "nucleotide-count",
"name": "Nucleotide Count",
"uuid": "4ce578b2-9cfb-498b-947f-79f97abeb224",
"practices": [],
"prerequisites": [],
"difficulty": 2,
"topics": []
},
{
"slug": "eliuds-eggs",
"name": "Eliud's Eggs",
Expand Down
23 changes: 23 additions & 0 deletions exercises/practice/nucleotide-count/.docs/instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Instructions

Each of us inherits from our biological parents a set of chemical instructions known as DNA that influence how our bodies are constructed.
All known life depends on DNA!

> Note: You do not need to understand anything about nucleotides or DNA to complete this exercise.
DNA is a long chain of other chemicals and the most important are the four nucleotides, adenine, cytosine, guanine and thymine.
A single DNA chain can contain billions of these four nucleotides and the order in which they occur is important!
We call the order of these nucleotides in a bit of DNA a "DNA sequence".

We represent a DNA sequence as an ordered collection of these four nucleotides and a common way to do that is with a string of characters such as "ATTACG" for a DNA sequence of 6 nucleotides.
'A' for adenine, 'C' for cytosine, 'G' for guanine, and 'T' for thymine.

Given a string representing a DNA sequence, count how many of each nucleotide is present.
If the string contains characters that aren't A, C, G, or T then it is invalid and you should signal an error.

For example:

```text
"GATTACA" -> 'A': 3, 'C': 1, 'G': 1, 'T': 2
"INVALID" -> error
```
19 changes: 19 additions & 0 deletions exercises/practice/nucleotide-count/.meta/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"authors": [
"keiravillekode"
],
"files": {
"solution": [
"nucleotide-count.sml"
],
"test": [
"test.sml"
],
"example": [
".meta/example.sml"
]
},
"blurb": "Given a DNA string, compute how many times each nucleotide occurs in the string.",
"source": "The Calculating DNA Nucleotides_problem at Rosalind",
"source_url": "https://rosalind.info/problems/dna/"
}
13 changes: 13 additions & 0 deletions exercises/practice/nucleotide-count/.meta/example.sml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
fun nucleotideCounts (strand: string): {a: int, c: int, g: int, t: int} =
let
fun recurse(a: int, c: int, g: int, t: int, nil) = {a = a, c = c, g = g, t = t}
| recurse(a: int, c: int, g: int, t: int, hd :: tl) =
case hd of
#"A" => recurse(a + 1, c, g, t, tl)
| #"C" => recurse(a, c + 1, g, t, tl)
| #"G" => recurse(a, c, g + 1, t, tl)
| #"T" => recurse(a, c, g, t + 1, tl)
| _ => raise Fail "Invalid nucleotide in strand"
in
recurse (0, 0, 0, 0, explode strand)
end
25 changes: 25 additions & 0 deletions exercises/practice/nucleotide-count/.meta/tests.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# This is an auto-generated file.
#
# Regenerating this file via `configlet sync` will:
# - Recreate every `description` key/value pair
# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
# - Preserve any other key/value pair
#
# As user-added comments (using the # character) will be removed when this file
# is regenerated, comments can be added via a `comment` key.

[3e5c30a8-87e2-4845-a815-a49671ade970]
description = "empty strand"

[a0ea42a6-06d9-4ac6-828c-7ccaccf98fec]
description = "can count one nucleotide in single-character input"

[eca0d565-ed8c-43e7-9033-6cefbf5115b5]
description = "strand with repeated nucleotide"

[40a45eac-c83f-4740-901a-20b22d15a39f]
description = "strand with multiple nucleotides"

[b4c47851-ee9e-4b0a-be70-a86e343bd851]
description = "strand with invalid nucleotides"
2 changes: 2 additions & 0 deletions exercises/practice/nucleotide-count/nucleotide-count.sml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
fun nucleotideCounts (strand: string): {a: int, c: int, g: int, t: int} =
raise Fail "'nucleotideCounts' is not implemented"
27 changes: 27 additions & 0 deletions exercises/practice/nucleotide-count/test.sml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
(* version 1.0.0 *)

use "testlib.sml";
use "nucleotide-count.sml";

infixr |>
fun x |> f = f x

val testsuite =
describe "nucleotide-count" [
test "empty strand"
(fn _ => nucleotideCounts "" |> Expect.equalTo {a = 0, c = 0, g = 0, t = 0}),

test "can count one nucleotide in single-character input"
(fn _ => nucleotideCounts "G" |> Expect.equalTo {a = 0, c = 0, g = 1, t = 0}),

test "strand with repeated nucleotide"
(fn _ => nucleotideCounts "GGGGGGG" |> Expect.equalTo {a = 0, c = 0, g = 7, t = 0}),

test "strand with multiple nucleotides"
(fn _ => nucleotideCounts "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" |> Expect.equalTo {a = 20, c = 12, g = 17, t = 21}),

test "strand with invalid nucleotides"
(fn _ => (fn _ => nucleotideCounts "AGXXACT") |> Expect.error (Fail "Invalid nucleotide in strand"))
]

val _ = Test.run testsuite
160 changes: 160 additions & 0 deletions exercises/practice/nucleotide-count/testlib.sml
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
structure Expect =
struct
datatype expectation = Pass | Fail of string * string

local
fun failEq b a =
Fail ("Expected: " ^ b, "Got: " ^ a)

fun failExn b a =
Fail ("Expected: " ^ b, "Raised: " ^ a)

fun exnName (e: exn): string = General.exnName e
in
fun truthy a =
if a
then Pass
else failEq "true" "false"

fun falsy a =
if a
then failEq "false" "true"
else Pass

fun equalTo b a =
if a = b
then Pass
else failEq (PolyML.makestring b) (PolyML.makestring a)

fun nearTo delta b a =
if Real.abs (a - b) <= delta * Real.abs a orelse
Real.abs (a - b) <= delta * Real.abs b
then Pass
else failEq (Real.toString b ^ " +/- " ^ Real.toString delta) (Real.toString a)

fun anyError f =
(
f ();
failExn "an exception" "Nothing"
) handle _ => Pass

fun error e f =
(
f ();
failExn (exnName e) "Nothing"
) handle e' => if exnMessage e' = exnMessage e
then Pass
else failExn (exnMessage e) (exnMessage e')
end
end

structure TermColor =
struct
datatype color = Red | Green | Yellow | Normal

fun f Red = "\027[31m"
| f Green = "\027[32m"
| f Yellow = "\027[33m"
| f Normal = "\027[0m"

fun colorize color s = (f color) ^ s ^ (f Normal)

val redit = colorize Red

val greenit = colorize Green

val yellowit = colorize Yellow
end

structure Test =
struct
datatype testnode = TestGroup of string * testnode list
| Test of string * (unit -> Expect.expectation)

local
datatype evaluation = Success of string
| Failure of string * string * string
| Error of string * string

fun indent n s = (implode (List.tabulate (n, fn _ => #" "))) ^ s

fun fmt indentlvl ev =
let
val check = TermColor.greenit "\226\156\148 " (**)
val cross = TermColor.redit "\226\156\150 " (**)
val indentlvl = indentlvl * 2
in
case ev of
Success descr => indent indentlvl (check ^ descr)
| Failure (descr, exp, got) =>
String.concatWith "\n" [indent indentlvl (cross ^ descr),
indent (indentlvl + 2) exp,
indent (indentlvl + 2) got]
| Error (descr, reason) =>
String.concatWith "\n" [indent indentlvl (cross ^ descr),
indent (indentlvl + 2) (TermColor.redit reason)]
end

fun eval (TestGroup _) = raise Fail "Only a 'Test' can be evaluated"
| eval (Test (descr, thunk)) =
(
case thunk () of
Expect.Pass => ((1, 0, 0), Success descr)
| Expect.Fail (s, s') => ((0, 1, 0), Failure (descr, s, s'))
)
handle e => ((0, 0, 1), Error (descr, "Unexpected error: " ^ exnMessage e))

fun flatten depth testnode =
let
fun sum (x, y, z) (a, b, c) = (x + a, y + b, z + c)

fun aux (t, (counter, acc)) =
let
val (counter', texts) = flatten (depth + 1) t
in
(sum counter' counter, texts :: acc)
end
in
case testnode of
TestGroup (descr, ts) =>
let
val (counter, texts) = foldr aux ((0, 0, 0), []) ts
in
(counter, (indent (depth * 2) descr) :: List.concat texts)
end
| Test _ =>
let
val (counter, evaluation) = eval testnode
in
(counter, [fmt depth evaluation])
end
end

fun println s = print (s ^ "\n")
in
fun run suite =
let
val ((succeeded, failed, errored), texts) = flatten 0 suite

val summary = String.concatWith ", " [
TermColor.greenit ((Int.toString succeeded) ^ " passed"),
TermColor.redit ((Int.toString failed) ^ " failed"),
TermColor.redit ((Int.toString errored) ^ " errored"),
(Int.toString (succeeded + failed + errored)) ^ " total"
]

val status = if failed = 0 andalso errored = 0
then OS.Process.success
else OS.Process.failure

in
List.app println texts;
println "";
println ("Tests: " ^ summary);
OS.Process.exit status
end
end
end

fun describe description tests = Test.TestGroup (description, tests)
fun test description thunk = Test.Test (description, thunk)

0 comments on commit 2b5ae3f

Please sign in to comment.