From 2b5ae3f57e486dcf3ba3c156a7b966c92a4a8eaf Mon Sep 17 00:00:00 2001 From: keiravillekode Date: Tue, 17 Dec 2024 06:55:29 +1100 Subject: [PATCH] Add nucleotide-count exercise (#286) --- config.json | 9 + .../nucleotide-count/.docs/instructions.md | 23 +++ .../nucleotide-count/.meta/config.json | 19 +++ .../nucleotide-count/.meta/example.sml | 13 ++ .../nucleotide-count/.meta/tests.toml | 25 +++ .../nucleotide-count/nucleotide-count.sml | 2 + exercises/practice/nucleotide-count/test.sml | 27 +++ .../practice/nucleotide-count/testlib.sml | 160 ++++++++++++++++++ 8 files changed, 278 insertions(+) create mode 100644 exercises/practice/nucleotide-count/.docs/instructions.md create mode 100644 exercises/practice/nucleotide-count/.meta/config.json create mode 100644 exercises/practice/nucleotide-count/.meta/example.sml create mode 100644 exercises/practice/nucleotide-count/.meta/tests.toml create mode 100644 exercises/practice/nucleotide-count/nucleotide-count.sml create mode 100644 exercises/practice/nucleotide-count/test.sml create mode 100644 exercises/practice/nucleotide-count/testlib.sml diff --git a/config.json b/config.json index da24a9c0..d0e69847 100644 --- a/config.json +++ b/config.json @@ -333,6 +333,15 @@ "math" ] }, + { + "slug": "nucleotide-count", + "name": "Nucleotide Count", + "uuid": "4ce578b2-9cfb-498b-947f-79f97abeb224", + "practices": [], + "prerequisites": [], + "difficulty": 2, + "topics": [] + }, { "slug": "eliuds-eggs", "name": "Eliud's Eggs", diff --git a/exercises/practice/nucleotide-count/.docs/instructions.md b/exercises/practice/nucleotide-count/.docs/instructions.md new file mode 100644 index 00000000..548d9ba5 --- /dev/null +++ b/exercises/practice/nucleotide-count/.docs/instructions.md @@ -0,0 +1,23 @@ +# Instructions + +Each of us inherits from our biological parents a set of chemical instructions known as DNA that influence how our bodies are constructed. +All known life depends on DNA! + +> Note: You do not need to understand anything about nucleotides or DNA to complete this exercise. + +DNA is a long chain of other chemicals and the most important are the four nucleotides, adenine, cytosine, guanine and thymine. +A single DNA chain can contain billions of these four nucleotides and the order in which they occur is important! +We call the order of these nucleotides in a bit of DNA a "DNA sequence". + +We represent a DNA sequence as an ordered collection of these four nucleotides and a common way to do that is with a string of characters such as "ATTACG" for a DNA sequence of 6 nucleotides. +'A' for adenine, 'C' for cytosine, 'G' for guanine, and 'T' for thymine. + +Given a string representing a DNA sequence, count how many of each nucleotide is present. +If the string contains characters that aren't A, C, G, or T then it is invalid and you should signal an error. + +For example: + +```text +"GATTACA" -> 'A': 3, 'C': 1, 'G': 1, 'T': 2 +"INVALID" -> error +``` diff --git a/exercises/practice/nucleotide-count/.meta/config.json b/exercises/practice/nucleotide-count/.meta/config.json new file mode 100644 index 00000000..24435e60 --- /dev/null +++ b/exercises/practice/nucleotide-count/.meta/config.json @@ -0,0 +1,19 @@ +{ + "authors": [ + "keiravillekode" + ], + "files": { + "solution": [ + "nucleotide-count.sml" + ], + "test": [ + "test.sml" + ], + "example": [ + ".meta/example.sml" + ] + }, + "blurb": "Given a DNA string, compute how many times each nucleotide occurs in the string.", + "source": "The Calculating DNA Nucleotides_problem at Rosalind", + "source_url": "https://rosalind.info/problems/dna/" +} diff --git a/exercises/practice/nucleotide-count/.meta/example.sml b/exercises/practice/nucleotide-count/.meta/example.sml new file mode 100644 index 00000000..44583e9d --- /dev/null +++ b/exercises/practice/nucleotide-count/.meta/example.sml @@ -0,0 +1,13 @@ +fun nucleotideCounts (strand: string): {a: int, c: int, g: int, t: int} = + let + fun recurse(a: int, c: int, g: int, t: int, nil) = {a = a, c = c, g = g, t = t} + | recurse(a: int, c: int, g: int, t: int, hd :: tl) = + case hd of + #"A" => recurse(a + 1, c, g, t, tl) + | #"C" => recurse(a, c + 1, g, t, tl) + | #"G" => recurse(a, c, g + 1, t, tl) + | #"T" => recurse(a, c, g, t + 1, tl) + | _ => raise Fail "Invalid nucleotide in strand" + in + recurse (0, 0, 0, 0, explode strand) + end diff --git a/exercises/practice/nucleotide-count/.meta/tests.toml b/exercises/practice/nucleotide-count/.meta/tests.toml new file mode 100644 index 00000000..7c55e53f --- /dev/null +++ b/exercises/practice/nucleotide-count/.meta/tests.toml @@ -0,0 +1,25 @@ +# This is an auto-generated file. +# +# Regenerating this file via `configlet sync` will: +# - Recreate every `description` key/value pair +# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications +# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion) +# - Preserve any other key/value pair +# +# As user-added comments (using the # character) will be removed when this file +# is regenerated, comments can be added via a `comment` key. + +[3e5c30a8-87e2-4845-a815-a49671ade970] +description = "empty strand" + +[a0ea42a6-06d9-4ac6-828c-7ccaccf98fec] +description = "can count one nucleotide in single-character input" + +[eca0d565-ed8c-43e7-9033-6cefbf5115b5] +description = "strand with repeated nucleotide" + +[40a45eac-c83f-4740-901a-20b22d15a39f] +description = "strand with multiple nucleotides" + +[b4c47851-ee9e-4b0a-be70-a86e343bd851] +description = "strand with invalid nucleotides" diff --git a/exercises/practice/nucleotide-count/nucleotide-count.sml b/exercises/practice/nucleotide-count/nucleotide-count.sml new file mode 100644 index 00000000..ebd9a650 --- /dev/null +++ b/exercises/practice/nucleotide-count/nucleotide-count.sml @@ -0,0 +1,2 @@ +fun nucleotideCounts (strand: string): {a: int, c: int, g: int, t: int} = + raise Fail "'nucleotideCounts' is not implemented" diff --git a/exercises/practice/nucleotide-count/test.sml b/exercises/practice/nucleotide-count/test.sml new file mode 100644 index 00000000..33c468ae --- /dev/null +++ b/exercises/practice/nucleotide-count/test.sml @@ -0,0 +1,27 @@ +(* version 1.0.0 *) + +use "testlib.sml"; +use "nucleotide-count.sml"; + +infixr |> +fun x |> f = f x + +val testsuite = + describe "nucleotide-count" [ + test "empty strand" + (fn _ => nucleotideCounts "" |> Expect.equalTo {a = 0, c = 0, g = 0, t = 0}), + + test "can count one nucleotide in single-character input" + (fn _ => nucleotideCounts "G" |> Expect.equalTo {a = 0, c = 0, g = 1, t = 0}), + + test "strand with repeated nucleotide" + (fn _ => nucleotideCounts "GGGGGGG" |> Expect.equalTo {a = 0, c = 0, g = 7, t = 0}), + + test "strand with multiple nucleotides" + (fn _ => nucleotideCounts "AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC" |> Expect.equalTo {a = 20, c = 12, g = 17, t = 21}), + + test "strand with invalid nucleotides" + (fn _ => (fn _ => nucleotideCounts "AGXXACT") |> Expect.error (Fail "Invalid nucleotide in strand")) + ] + +val _ = Test.run testsuite diff --git a/exercises/practice/nucleotide-count/testlib.sml b/exercises/practice/nucleotide-count/testlib.sml new file mode 100644 index 00000000..0c8370c0 --- /dev/null +++ b/exercises/practice/nucleotide-count/testlib.sml @@ -0,0 +1,160 @@ +structure Expect = +struct + datatype expectation = Pass | Fail of string * string + + local + fun failEq b a = + Fail ("Expected: " ^ b, "Got: " ^ a) + + fun failExn b a = + Fail ("Expected: " ^ b, "Raised: " ^ a) + + fun exnName (e: exn): string = General.exnName e + in + fun truthy a = + if a + then Pass + else failEq "true" "false" + + fun falsy a = + if a + then failEq "false" "true" + else Pass + + fun equalTo b a = + if a = b + then Pass + else failEq (PolyML.makestring b) (PolyML.makestring a) + + fun nearTo delta b a = + if Real.abs (a - b) <= delta * Real.abs a orelse + Real.abs (a - b) <= delta * Real.abs b + then Pass + else failEq (Real.toString b ^ " +/- " ^ Real.toString delta) (Real.toString a) + + fun anyError f = + ( + f (); + failExn "an exception" "Nothing" + ) handle _ => Pass + + fun error e f = + ( + f (); + failExn (exnName e) "Nothing" + ) handle e' => if exnMessage e' = exnMessage e + then Pass + else failExn (exnMessage e) (exnMessage e') + end +end + +structure TermColor = +struct + datatype color = Red | Green | Yellow | Normal + + fun f Red = "\027[31m" + | f Green = "\027[32m" + | f Yellow = "\027[33m" + | f Normal = "\027[0m" + + fun colorize color s = (f color) ^ s ^ (f Normal) + + val redit = colorize Red + + val greenit = colorize Green + + val yellowit = colorize Yellow +end + +structure Test = +struct + datatype testnode = TestGroup of string * testnode list + | Test of string * (unit -> Expect.expectation) + + local + datatype evaluation = Success of string + | Failure of string * string * string + | Error of string * string + + fun indent n s = (implode (List.tabulate (n, fn _ => #" "))) ^ s + + fun fmt indentlvl ev = + let + val check = TermColor.greenit "\226\156\148 " (* ✔ *) + val cross = TermColor.redit "\226\156\150 " (* ✖ *) + val indentlvl = indentlvl * 2 + in + case ev of + Success descr => indent indentlvl (check ^ descr) + | Failure (descr, exp, got) => + String.concatWith "\n" [indent indentlvl (cross ^ descr), + indent (indentlvl + 2) exp, + indent (indentlvl + 2) got] + | Error (descr, reason) => + String.concatWith "\n" [indent indentlvl (cross ^ descr), + indent (indentlvl + 2) (TermColor.redit reason)] + end + + fun eval (TestGroup _) = raise Fail "Only a 'Test' can be evaluated" + | eval (Test (descr, thunk)) = + ( + case thunk () of + Expect.Pass => ((1, 0, 0), Success descr) + | Expect.Fail (s, s') => ((0, 1, 0), Failure (descr, s, s')) + ) + handle e => ((0, 0, 1), Error (descr, "Unexpected error: " ^ exnMessage e)) + + fun flatten depth testnode = + let + fun sum (x, y, z) (a, b, c) = (x + a, y + b, z + c) + + fun aux (t, (counter, acc)) = + let + val (counter', texts) = flatten (depth + 1) t + in + (sum counter' counter, texts :: acc) + end + in + case testnode of + TestGroup (descr, ts) => + let + val (counter, texts) = foldr aux ((0, 0, 0), []) ts + in + (counter, (indent (depth * 2) descr) :: List.concat texts) + end + | Test _ => + let + val (counter, evaluation) = eval testnode + in + (counter, [fmt depth evaluation]) + end + end + + fun println s = print (s ^ "\n") + in + fun run suite = + let + val ((succeeded, failed, errored), texts) = flatten 0 suite + + val summary = String.concatWith ", " [ + TermColor.greenit ((Int.toString succeeded) ^ " passed"), + TermColor.redit ((Int.toString failed) ^ " failed"), + TermColor.redit ((Int.toString errored) ^ " errored"), + (Int.toString (succeeded + failed + errored)) ^ " total" + ] + + val status = if failed = 0 andalso errored = 0 + then OS.Process.success + else OS.Process.failure + + in + List.app println texts; + println ""; + println ("Tests: " ^ summary); + OS.Process.exit status + end + end +end + +fun describe description tests = Test.TestGroup (description, tests) +fun test description thunk = Test.Test (description, thunk)