From 0308ba8786fcd48823affeb6a740ad6329d3339d Mon Sep 17 00:00:00 2001
From: Sourabh Kashyap
Date: Mon, 17 Oct 2022 09:56:36 +0530
Subject: [PATCH 01/14] Update CodonFrame.java
Refactor CodonFrame.equals(Object) to reduce branch points
Signed-off-by: Sourabh Kashyap
---
src/main/java/DNAnalyzer/CodonFrame.java | 15 ++++++++-------
1 file changed, 8 insertions(+), 7 deletions(-)
diff --git a/src/main/java/DNAnalyzer/CodonFrame.java b/src/main/java/DNAnalyzer/CodonFrame.java
index 04d58399..f275a093 100644
--- a/src/main/java/DNAnalyzer/CodonFrame.java
+++ b/src/main/java/DNAnalyzer/CodonFrame.java
@@ -60,13 +60,14 @@ public int getMax() {
@Override
public boolean equals(final Object o) {
- if (this == o)
- return true;
- if (!(o instanceof CodonFrame))
- return false;
- final CodonFrame that = (CodonFrame) o;
- return getReadingFrame() == that.getReadingFrame() && getMin() == that.getMin() && getMax() == that.getMax()
- && getDna().equals(that.getDna());
+ boolean result = false;
+ if (this == o) {
+ result = true;
+ } else if (o instanceof final CodonFrame inputFrame) {
+ result = getReadingFrame() == inputFrame.getReadingFrame() && getMin() == inputFrame.getMin() && getMax() == inputFrame.getMax()
+ && getDna().equals(inputFrame.getDna());
+ }
+ return result;
}
@Override
From 44e45b3805ba7b0d0068631cc71289232af6e9ff Mon Sep 17 00:00:00 2001
From: boaglio
Date: Tue, 18 Oct 2022 07:30:44 -0300
Subject: [PATCH 02/14] refactor: new package protein
---
.gitignore | 1 +
src/main/java/DNAnalyzer/CmdArgs.java | 233 +++++++++---------
.../DNAnalyzer/protein/ProteinFinder.java | 94 ++++---
3 files changed, 170 insertions(+), 158 deletions(-)
diff --git a/.gitignore b/.gitignore
index f82fa3c7..1bc3a3ef 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
.config/*
bin/*
.vs/*
+.settings/*
### Java ###
*.class
diff --git a/src/main/java/DNAnalyzer/CmdArgs.java b/src/main/java/DNAnalyzer/CmdArgs.java
index c5038c1b..89715340 100644
--- a/src/main/java/DNAnalyzer/CmdArgs.java
+++ b/src/main/java/DNAnalyzer/CmdArgs.java
@@ -18,141 +18,134 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import DNAnalyzer.protein.ProteinAnalysis;
+import DNAnalyzer.protein.ProteinFinder;
+import DNAnalyzer.codon.CodonFrame;
import picocli.CommandLine.Command;
import picocli.CommandLine.Option;
import picocli.CommandLine.Parameters;
-import DNAnalyzer.codon.*;
-import DNAnalyzer.protein.*;
-
/**
* Class for handling command-line arguments.
*
* @version 1.2.1
*/
-@Command(
- name = "DNAnalyzer",
- mixinStandardHelpOptions = true,
- description = "A program to analyze DNA sequences.")
+@Command(name = "DNAnalyzer", mixinStandardHelpOptions = true, description = "A program to analyze DNA sequences.")
public class CmdArgs implements Runnable {
- private static final short READING_FRAME = 1;
-
- @Option(
- required = true,
- names = {"--amino"},
- description = "The amino acid representing the start of a gene.")
- String aminoAcid;
-
- @Option(
- names = {"--min"},
- description = "The minimum count of the reading frame.")
- int minCount = 0;
-
- @Option(
- names = {"--max"},
- description = "The maximum count of the reading frame.")
- int maxCount = 0;
-
- @Parameters(paramLabel = "DNA", description = "The FASTA file to be analyzed.")
- File dnaFile;
-
- @Option(
- names = {"--find"},
- description = "The DNA sequence to be found within the FASTA file.")
- File proteinFile;
-
- @Option(
- names = {"--reverse", "-r"},
- description = "Reverse the DNA sequence before processing.")
- boolean reverse;
-
- /**
- * Output a list of proteins, GC content, Nucleotide content, and other information found in a DNA
- * sequence.
- *
- * @throws IllegalArgumentException when the DNA FASTA file contains an invalid DNA sequence
- */
- @Override
- public void run() {
- try {
- Main.clearTerminal();
-
- final String dna = readDNA();
- final List proteins = new ProteinFinder().getProtein(dna, aminoAcid);
- // Output the proteins, GC content, and nucleotide cnt found in the DNA
- Properties.printProteinList(proteins, aminoAcid);
- System.out.println("\nGC-content (genome): " + Properties.getGCContent(dna) + "\n");
- Properties.printNucleotideCount(dna);
-
- // Output the number of codons based on the reading frame the user wants to look
- // at, and minimum and maximum filters
- final CodonFrame codonFrame = new CodonFrame(dna, READING_FRAME, minCount, maxCount);
- final ReadingFrames aap = new ReadingFrames(codonFrame);
- System.out.println();
- aap.printCodonCounts();
-
- // Find protein sequence in DNA if necessary
- readProtein().ifPresent(pr -> findProtein(dna, pr));
-
- // Find the longest protein in DNA
- ProteinAnalysis.printLongestProtein(proteins);
-
- // Print if DNA is random
- if (Properties.isRandomDNA(dna)) {
- System.out.println("\n" + dnaFile.getName() + " has been detected to be random.");
- }
- } catch (IOException | InterruptedException e) {
- e.printStackTrace();
+ private static final short READING_FRAME = 1;
+
+ @Option(required = true, names = { "--amino" }, description = "The amino acid representing the start of a gene.")
+ String aminoAcid;
+
+ @Option(names = { "--min" }, description = "The minimum count of the reading frame.")
+ int minCount = 0;
+
+ @Option(names = { "--max" }, description = "The maximum count of the reading frame.")
+ int maxCount = 0;
+
+ @Parameters(paramLabel = "DNA", description = "The FASTA file to be analyzed.")
+ File dnaFile;
+
+ @Option(names = { "--find" }, description = "The DNA sequence to be found within the FASTA file.")
+ File proteinFile;
+
+ @Option(names = { "--reverse", "-r" }, description = "Reverse the DNA sequence before processing.")
+ boolean reverse;
+
+ /**
+ * Output a list of proteins, GC content, Nucleotide content, and other
+ * information found in a DNA
+ * sequence.
+ *
+ * @throws IllegalArgumentException
+ * when the DNA FASTA file contains an invalid DNA sequence
+ */
+ @Override
+ public void run() {
+ try {
+ Main.clearTerminal();
+
+ final String dna = readDNA();
+ final List proteins = ProteinFinder.getProtein(dna, aminoAcid);
+ // Output the proteins, GC content, and nucleotide cnt found in the DNA
+ Properties.printProteinList(proteins, aminoAcid);
+ System.out.println("\nGC-content (genome): " + Properties.getGCContent(dna) + "\n");
+ Properties.printNucleotideCount(dna);
+
+ // Output the number of codons based on the reading frame the user wants to look
+ // at, and minimum and maximum filters
+ final CodonFrame codonFrame = new CodonFrame(dna, READING_FRAME, minCount, maxCount);
+ final ReadingFrames aap = new ReadingFrames(codonFrame);
+ System.out.println();
+ aap.printCodonCounts();
+
+ // Find protein sequence in DNA if necessary
+ readProtein().ifPresent(pr -> findProtein(dna, pr));
+
+ // Find the longest protein in DNA
+ ProteinAnalysis.printLongestProtein(proteins);
+
+ // Print if DNA is random
+ if (Properties.isRandomDNA(dna)) {
+ System.out.println("\n" + dnaFile.getName() + " has been detected to be random.");
+ }
+ } catch (IOException | InterruptedException e) {
+ e.printStackTrace();
+ }
}
- }
-
- /**
- * Reads the contents of a file, stripping out newlines and converting everything to lowercase.
- *
- * @param file the file to read
- * @throws IOException if there is an error reading the file
- * @return String with the contents of the file (newlines removed and converted to lowercase)
- */
- private static String readFile(File file) throws IOException {
- return Files.readString(file.toPath()).replace("\n", "").toLowerCase();
- }
-
- /**
- * Find protein sequence in DNA and print to stdout its position.
- *
- * @param dna The DNA string
- * @param protein The protein string
- */
- private void findProtein(String dna, String protein) {
- Matcher m = Pattern.compile(protein).matcher(dna);
- if (m.find()) {
- System.out.println(
- "\nProtein sequence found at index " + m.start() + " in the DNA sequence.");
- } else {
- System.out.println("\nProtein sequence not found in the DNA sequence.");
+
+ /**
+ * Reads the contents of a file, stripping out newlines and converting
+ * everything to lowercase.
+ *
+ * @param file
+ * the file to read
+ * @throws IOException
+ * if there is an error reading the file
+ * @return String with the contents of the file (newlines removed and converted
+ * to lowercase)
+ */
+ private static String readFile(File file) throws IOException {
+ return Files.readString(file.toPath()).replace("\n", "").toLowerCase();
}
- }
- /** Read protein form the proteinFile */
- private Optional readProtein() throws IOException {
- if (proteinFile == null) {
- return Optional.empty();
+ /**
+ * Find protein sequence in DNA and print to stdout its position.
+ *
+ * @param dna
+ * The DNA string
+ * @param protein
+ * The protein string
+ */
+ private void findProtein(String dna, String protein) {
+ Matcher m = Pattern.compile(protein).matcher(dna);
+ if (m.find()) {
+ System.out.println("\nProtein sequence found at index " + m.start() + " in the DNA sequence.");
+ } else {
+ System.out.println("\nProtein sequence not found in the DNA sequence.");
+ }
}
- return Optional.of(readFile(proteinFile));
- }
-
- /** Load and preprocess DNA data */
- private String readDNA() throws IOException { // Valid DNA?
- String dna = readFile(dnaFile);
- if (!dna.matches("[atgc]+")) {
- throw new IllegalArgumentException("Invalid characters present in DNA sequence.");
+
+ /** Read protein form the proteinFile */
+ private Optional readProtein() throws IOException {
+ if (proteinFile == null) {
+ return Optional.empty();
+ }
+ return Optional.of(readFile(proteinFile));
}
- // Replace Uracil with Thymine (in case user entered RNA and not DNA)
- dna = dna.replace("u", "t");
- if (reverse) {
- dna = new StringBuilder(dna).reverse().toString();
+ /** Load and preprocess DNA data */
+ private String readDNA() throws IOException { // Valid DNA?
+ String dna = readFile(dnaFile);
+ if (!dna.matches("[atgc]+")) {
+ throw new IllegalArgumentException("Invalid characters present in DNA sequence.");
+ }
+ // Replace Uracil with Thymine (in case user entered RNA and not DNA)
+ dna = dna.replace("u", "t");
+
+ if (reverse) {
+ dna = new StringBuilder(dna).reverse().toString();
+ }
+ return dna;
}
- return dna;
- }
}
diff --git a/src/main/java/DNAnalyzer/protein/ProteinFinder.java b/src/main/java/DNAnalyzer/protein/ProteinFinder.java
index 336e55ea..48928678 100644
--- a/src/main/java/DNAnalyzer/protein/ProteinFinder.java
+++ b/src/main/java/DNAnalyzer/protein/ProteinFinder.java
@@ -12,77 +12,95 @@
package DNAnalyzer.protein;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
-import DNAnalyzer.codon.*;
-import DNAnalyzer.aminoAcid.*;
+
+import DNAnalyzer.aminoAcid.AminoAcidMapping;
+import DNAnalyzer.aminoAcid.AminoAcidNames;
+import DNAnalyzer.codon.CodonDataUtils;
/**
* Find proteins in a DNA sequence (contains the main algorithm).
*
* @author Piyush Acharya (@Verisimilitude11)
+ * @author Fernando Boaglio (@boaglio)
* @version 1.2.1
*/
public class ProteinFinder {
- /**
- * Amino acid list
- */
- private final List aminoAcidList;
/**
- * protein list
+ * Utility classes should not have public constructors
*/
- private final List proteinList;
-
- /**
- * ProteinFinder default constructor to initialize aminoAcidList and proteinList
- */
- public ProteinFinder() {
- aminoAcidList = new ArrayList<>();
- proteinList = new ArrayList<>();
+ private ProteinFinder() {
}
+ private static final int DNA_SIZE = 3;
+
/**
* Gets proteins from dna and amino acid
*
- * @param dna dna
- * @param aminoAcid amino acid
+ * @param dna
+ * dna
+ * @param aminoAcid
+ * amino acid
* @return list of proteins
*/
- public List getProtein(final String dna, final String aminoAcid) {
- this.aminoAcidList.addAll(CodonDataUtils.getAminoAcid(AminoAcidMapping.getAminoAcidMapping(aminoAcid)));
+ public static List getProtein(final String dna, final String aminoAcid) {
- int start_index;
- int stop_index;
- final List stop = CodonDataUtils.getAminoAcid(AminoAcidNames.STOP);
+ List aminoAcidList = new ArrayList<>();
+ List proteinList = new ArrayList<>();
+
+ aminoAcidList.addAll(CodonDataUtils.getAminoAcid(AminoAcidMapping.getAminoAcidMapping(aminoAcid)));
+
+ int startIndex;
+ final List stopCodonList = CodonDataUtils.getAminoAcid(AminoAcidNames.STOP);
// Outer loop loops through the start codons for the amino acids that the user
// entered.
// store the start index
- // Inner loop loops through the stop that the user entered.
- // store the stop_index
- // if index is not -1 then store the substring of dna with start and stop index
- // in the protein list
- for (final String start_codon : this.aminoAcidList) {
- start_index = dna.indexOf(start_codon.toLowerCase());
- for (final String stop_codon : stop) {
- stop_index = dna.indexOf(stop_codon.toLowerCase(), start_index + 3);
- if ((start_index != -1) && (stop_index != -1)) {
- this.proteinList.add(dna.substring(start_index, stop_index + 3).toUpperCase());
- break;
- }
- }
+ for (final String startCodon : aminoAcidList) {
+ startIndex = dna.indexOf(startCodon.toLowerCase());
+ addProtein(dna, proteinList, startIndex, stopCodonList);
}
// if no proteins are found, return null
- if (this.proteinList.isEmpty()) {
+ if (proteinList.isEmpty()) {
// Return null if no protein found in DNA sequence
System.out.println("No proteins found");
- return null;
+ return Collections.emptyList();
}
// Return list of proteins found in the DNA sequence
- return this.proteinList;
+ return proteinList;
+ }
+
+ /**
+ * Add protein to protein list
+ *
+ * @param dna
+ * @param proteinList
+ * @param startIndex
+ * @param stopCodonList
+ */
+ private static void addProtein(final String dna, List proteinList, int startIndex,
+ final List stopCodonList) {
+
+ // Inner loop loops through the stop that the user entered.
+ // store the stopIndex
+ // if index is not -1 then store the substring of dna with start and stop index
+ // in the protein list
+ int stopIndex;
+ for (final String stopCodon : stopCodonList) {
+
+ stopIndex = dna.indexOf(stopCodon.toLowerCase(), startIndex + DNA_SIZE);
+
+ if ((startIndex != -1) && (stopIndex != -1)) {
+ proteinList.add(dna.substring(startIndex, stopIndex + DNA_SIZE).toUpperCase());
+ break;
+ }
+
+ }
}
}
From e9e72e2e8d1d655462e74a6f3e6cd7612934020b Mon Sep 17 00:00:00 2001
From: boaglio
Date: Wed, 19 Oct 2022 23:45:06 -0300
Subject: [PATCH 03/14] fix: conflicting files
---
src/main/java/DNAnalyzer/CmdArgs.java | 172 +++++++++-------------
src/main/java/DNAnalyzer/DnaAnalyzer.java | 78 ++++++++++
2 files changed, 146 insertions(+), 104 deletions(-)
create mode 100644 src/main/java/DNAnalyzer/DnaAnalyzer.java
diff --git a/src/main/java/DNAnalyzer/CmdArgs.java b/src/main/java/DNAnalyzer/CmdArgs.java
index 89715340..09697ecd 100644
--- a/src/main/java/DNAnalyzer/CmdArgs.java
+++ b/src/main/java/DNAnalyzer/CmdArgs.java
@@ -10,142 +10,106 @@
*/
package DNAnalyzer;
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.util.List;
-import java.util.Optional;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import DNAnalyzer.protein.ProteinAnalysis;
-import DNAnalyzer.protein.ProteinFinder;
-import DNAnalyzer.codon.CodonFrame;
import picocli.CommandLine.Command;
import picocli.CommandLine.Option;
import picocli.CommandLine.Parameters;
+import java.io.File;
+import java.io.IOException;
+import java.nio.file.Files;
+
/**
* Class for handling command-line arguments.
*
* @version 1.2.1
*/
-@Command(name = "DNAnalyzer", mixinStandardHelpOptions = true, description = "A program to analyze DNA sequences.")
+@Command(
+ name = "DNAnalyzer",
+ mixinStandardHelpOptions = true,
+ description = "A program to analyze DNA sequences.")
public class CmdArgs implements Runnable {
- private static final short READING_FRAME = 1;
-
- @Option(required = true, names = { "--amino" }, description = "The amino acid representing the start of a gene.")
+ @Option(
+ required = true,
+ names = {"--amino"},
+ description = "The amino acid representing the start of a gene.")
String aminoAcid;
- @Option(names = { "--min" }, description = "The minimum count of the reading frame.")
+ @Option(
+ names = {"--min"},
+ description = "The minimum count of the reading frame.")
int minCount = 0;
- @Option(names = { "--max" }, description = "The maximum count of the reading frame.")
+ @Option(
+ names = {"--max"},
+ description = "The maximum count of the reading frame.")
int maxCount = 0;
@Parameters(paramLabel = "DNA", description = "The FASTA file to be analyzed.")
File dnaFile;
- @Option(names = { "--find" }, description = "The DNA sequence to be found within the FASTA file.")
+ @Option(
+ names = {"--find"},
+ description = "The DNA sequence to be found within the FASTA file.")
File proteinFile;
- @Option(names = { "--reverse", "-r" }, description = "Reverse the DNA sequence before processing.")
+ @Option(
+ names = {"--reverse", "-r"},
+ description = "Reverse the DNA sequence before processing.")
boolean reverse;
/**
- * Output a list of proteins, GC content, Nucleotide content, and other
- * information found in a DNA
- * sequence.
- *
- * @throws IllegalArgumentException
- * when the DNA FASTA file contains an invalid DNA sequence
- */
- @Override
- public void run() {
- try {
- Main.clearTerminal();
-
- final String dna = readDNA();
- final List proteins = ProteinFinder.getProtein(dna, aminoAcid);
- // Output the proteins, GC content, and nucleotide cnt found in the DNA
- Properties.printProteinList(proteins, aminoAcid);
- System.out.println("\nGC-content (genome): " + Properties.getGCContent(dna) + "\n");
- Properties.printNucleotideCount(dna);
-
- // Output the number of codons based on the reading frame the user wants to look
- // at, and minimum and maximum filters
- final CodonFrame codonFrame = new CodonFrame(dna, READING_FRAME, minCount, maxCount);
- final ReadingFrames aap = new ReadingFrames(codonFrame);
- System.out.println();
- aap.printCodonCounts();
-
- // Find protein sequence in DNA if necessary
- readProtein().ifPresent(pr -> findProtein(dna, pr));
-
- // Find the longest protein in DNA
- ProteinAnalysis.printLongestProtein(proteins);
-
- // Print if DNA is random
- if (Properties.isRandomDNA(dna)) {
- System.out.println("\n" + dnaFile.getName() + " has been detected to be random.");
- }
- } catch (IOException | InterruptedException e) {
- e.printStackTrace();
- }
- }
-
- /**
- * Reads the contents of a file, stripping out newlines and converting
- * everything to lowercase.
+ * Reads the contents of a file, stripping out newlines and converting everything to lowercase.
*
- * @param file
- * the file to read
- * @throws IOException
- * if there is an error reading the file
- * @return String with the contents of the file (newlines removed and converted
- * to lowercase)
+ * @param file the file to read
+ * @return String with the contents of the file (newlines removed and converted to lowercase)
+ * @throws IOException if there is an error reading the file
*/
- private static String readFile(File file) throws IOException {
+ String readFile(final File file) throws IOException {
return Files.readString(file.toPath()).replace("\n", "").toLowerCase();
}
- /**
- * Find protein sequence in DNA and print to stdout its position.
- *
- * @param dna
- * The DNA string
- * @param protein
- * The protein string
- */
- private void findProtein(String dna, String protein) {
- Matcher m = Pattern.compile(protein).matcher(dna);
- if (m.find()) {
- System.out.println("\nProtein sequence found at index " + m.start() + " in the DNA sequence.");
- } else {
- System.out.println("\nProtein sequence not found in the DNA sequence.");
- }
+ /**
+ * Output a list of proteins, GC content, Nucleotide content, and other information found in a DNA
+ * sequence.
+ *
+ * @throws IllegalArgumentException when the DNA FASTA file contains an invalid DNA sequence
+ */
+ @Override
+ public void run() {
+ DnaAnalyzer dnaAnalyzer = dnaAnalyzer(aminoAcid)
+ .isValidDna()
+ .replaceDNA("u", "t");
+
+ if (reverse) {
+ dnaAnalyzer = dnaAnalyzer.reverseDna();
}
- /** Read protein form the proteinFile */
- private Optional readProtein() throws IOException {
- if (proteinFile == null) {
- return Optional.empty();
- }
- return Optional.of(readFile(proteinFile));
- }
+ dnaAnalyzer
+ .printProteins()
+ .outPutCodons(minCount, maxCount)
+ .printLongestProtein();
- /** Load and preprocess DNA data */
- private String readDNA() throws IOException { // Valid DNA?
- String dna = readFile(dnaFile);
- if (!dna.matches("[atgc]+")) {
- throw new IllegalArgumentException("Invalid characters present in DNA sequence.");
- }
- // Replace Uracil with Thymine (in case user entered RNA and not DNA)
- dna = dna.replace("u", "t");
+ if (Properties.isRandomDNA(dnaAnalyzer.dna().getDna())) {
+ System.out.println("\n" + dnaFile.getName() + " has been detected to be random.");
+ }
+ }
- if (reverse) {
- dna = new StringBuilder(dna).reverse().toString();
- }
- return dna;
+ /**
+ * @param aminoAcid representing the start of the gene
+ * @return DnaAnalyzer which provides functions to analyze the dnaFile, protein file and supplied aminoAcid
+ */
+ private DnaAnalyzer dnaAnalyzer(String aminoAcid) {
+ try {
+ String protein = null;
+ Main.clearTerminal();
+ final String dna = readFile(dnaFile);
+ if (proteinFile != null) {
+ protein = readFile(proteinFile);
+ }
+ return new DnaAnalyzer(new Dna(dna), protein, aminoAcid);
+ } catch (IOException | InterruptedException e) {
+ e.printStackTrace();
+ return new DnaAnalyzer(null, null, aminoAcid);
}
+ }
}
diff --git a/src/main/java/DNAnalyzer/DnaAnalyzer.java b/src/main/java/DNAnalyzer/DnaAnalyzer.java
new file mode 100644
index 00000000..bc3ab7f5
--- /dev/null
+++ b/src/main/java/DNAnalyzer/DnaAnalyzer.java
@@ -0,0 +1,78 @@
+package DNAnalyzer;
+
+import DNAnalyzer.codon.CodonFrame;
+import DNAnalyzer.protein.ProteinAnalysis;
+import DNAnalyzer.protein.ProteinFinder;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import static java.util.Optional.ofNullable;
+
+/**
+ * Provides functionality to analyze the DNA
+ *
+ * @param dna then DNA to be analyzed
+ * @param protein the DNA sequence
+ * @param aminoAcid name of amino acid
+ */
+public record DnaAnalyzer(Dna dna, String protein, String aminoAcid) {
+ public DnaAnalyzer isValidDna() {
+ dna.isValid();
+ return this;
+ }
+
+ public DnaAnalyzer replaceDNA(String input, String replacement) {
+ return new DnaAnalyzer(dna.replace(input, replacement), protein, aminoAcid);
+ }
+
+ public DnaAnalyzer reverseDna() {
+ return new DnaAnalyzer(dna.reverse(), protein, aminoAcid);
+ }
+
+ // Create protein list
+ // Output the proteins, GC content, and nucleotide cnt found in the DNA
+ public DnaAnalyzer printProteins() {
+ ofNullable(dna).map(Dna::getDna).ifPresent(dna -> {
+ Properties.printProteinList(getProteins(aminoAcid), aminoAcid);
+
+ System.out.println("\nGC-content (genome): " + Properties.getGCContent(dna) + "\n");
+ Properties.printNucleotideCount(dna);
+ });
+ return this;
+ }
+
+ // Output the number of codons based on the reading frame the user wants to look
+ // at, and minimum and maximum filters
+ public DnaAnalyzer outPutCodons(int minCount, int maxCount) {
+ final short READING_FRAME = 1;
+ final String dna = this.dna.getDna();
+ final ReadingFrames aap =
+ new ReadingFrames(new CodonFrame(dna, READING_FRAME, minCount, maxCount));
+ System.out.print("\n");
+ aap.printCodonCounts();
+
+ // Find protein sequence in DNA
+ if (protein != null) {
+ final Pattern p = Pattern.compile(protein);
+ final Matcher m = p.matcher(dna);
+ if (m.find()) {
+ System.out.println(
+ "\nProtein sequence found at index " + m.start() + " in the DNA sequence.");
+ } else {
+ System.out.println("\nProtein sequence not found in the DNA sequence.");
+ }
+ }
+ return this;
+ }
+
+ public DnaAnalyzer printLongestProtein() {
+ ProteinAnalysis.printLongestProtein(getProteins(aminoAcid));
+ return this;
+ }
+
+ private List getProteins(String aminoAcid) {
+ return ProteinFinder.getProtein(dna.getDna(), aminoAcid);
+ }
+}
From 2fb25983cd290583c6079ca07ed88467546eaf6d Mon Sep 17 00:00:00 2001
From: boaglio
Date: Thu, 20 Oct 2022 07:09:11 -0300
Subject: [PATCH 04/14] refactor: renamed variable
---
src/main/java/DNAnalyzer/protein/ProteinFinder.java | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/main/java/DNAnalyzer/protein/ProteinFinder.java b/src/main/java/DNAnalyzer/protein/ProteinFinder.java
index 48928678..b6a2e0c7 100644
--- a/src/main/java/DNAnalyzer/protein/ProteinFinder.java
+++ b/src/main/java/DNAnalyzer/protein/ProteinFinder.java
@@ -35,7 +35,7 @@ public class ProteinFinder {
private ProteinFinder() {
}
- private static final int DNA_SIZE = 3;
+ private static final int QT_NUCLEOTIDES = 3;
/**
* Gets proteins from dna and amino acid
@@ -93,10 +93,10 @@ private static void addProtein(final String dna, List proteinList, int s
int stopIndex;
for (final String stopCodon : stopCodonList) {
- stopIndex = dna.indexOf(stopCodon.toLowerCase(), startIndex + DNA_SIZE);
+ stopIndex = dna.indexOf(stopCodon.toLowerCase(), startIndex + QT_NUCLEOTIDES);
if ((startIndex != -1) && (stopIndex != -1)) {
- proteinList.add(dna.substring(startIndex, stopIndex + DNA_SIZE).toUpperCase());
+ proteinList.add(dna.substring(startIndex, stopIndex + QT_NUCLEOTIDES).toUpperCase());
break;
}
From a7bd335f44a44b92781ad4f348f7a9f3361a2aa9 Mon Sep 17 00:00:00 2001
From: Kit PANG
Date: Thu, 20 Oct 2022 19:07:06 +0800
Subject: [PATCH 05/14] Fix deploy github page workflow
Signed-off-by: Kit PANG
---
.github/workflows/static.yml | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.github/workflows/static.yml b/.github/workflows/static.yml
index 5828616f..aa1fb103 100644
--- a/.github/workflows/static.yml
+++ b/.github/workflows/static.yml
@@ -35,8 +35,8 @@ jobs:
- name: Upload artifact
uses: actions/upload-pages-artifact@v1
with:
- # Upload entire repository
- path: '.'
+ # Upload all content under the web/ directory
+ path: "web/"
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v1
From 83fa711ed500aef863e6c2b8a89c81a9b18d6189 Mon Sep 17 00:00:00 2001
From: Piyush Acharya
Date: Thu, 20 Oct 2022 23:37:23 -0700
Subject: [PATCH 06/14] Update index.html
Signed-off-by: Piyush Acharya
---
web/index.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/web/index.html b/web/index.html
index 5903a120..7b64d911 100644
--- a/web/index.html
+++ b/web/index.html
@@ -71,7 +71,7 @@
Features
From 9ed5d241fd0473b142fdc0d4567b36e05293cb63 Mon Sep 17 00:00:00 2001
From: Piyush Acharya
Date: Fri, 21 Oct 2022 21:43:00 -0700
Subject: [PATCH 07/14] Added Citation file
---
CITATION.cff | 11 +++++++++++
1 file changed, 11 insertions(+)
create mode 100644 CITATION.cff
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 00000000..e34be228
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,11 @@
+cff-version: "1.1.0"
+authors:
+ -
+ family-names: Acharya
+ given-names: Piyush
+
+date-released: 2022-10-10
+doi: "10.1038/s41592-021-01101-x"
+message: "If you use this software, please cite it using these metadata."
+title: "A highly efficient, powerful, and feature-rich machine learning algorithm for analyzing DNA sequences"
+version: "2.0.0"
\ No newline at end of file
From 6ec9874ee0dc923a8d77a1dec19335f6b24b7450 Mon Sep 17 00:00:00 2001
From: LimesKey <85136735+LimesKey@users.noreply.github.com>
Date: Sat, 22 Oct 2022 01:41:06 -0400
Subject: [PATCH 08/14] Changing wording, Gradle may not work with JDK 19.
https://docs.gradle.org/current/userguide/compatibility.html
Also adding link on how to download gradle
Signed-off-by: LimesKey <85136735+LimesKey@users.noreply.github.com>
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 72bf7b98..817dc27e 100644
--- a/README.md
+++ b/README.md
@@ -97,7 +97,7 @@
To build and run the DNAnalyzer, you need
-* JDK 17 or greater
+* JDK 17
* A JAVA\_HOME environment variable pointing to your JDK, or the Java executable in your PATH
* Gradle
From f89e28a360a37de2a26029fefca443d35b529978 Mon Sep 17 00:00:00 2001
From: Piyush Acharya
Date: Sat, 22 Oct 2022 00:30:32 -0700
Subject: [PATCH 09/14] Update README.md
Signed-off-by: Piyush Acharya
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 817dc27e..47f96314 100644
--- a/README.md
+++ b/README.md
@@ -264,5 +264,5 @@ https://user-images.githubusercontent.com/27987685/194954560-5f470ecc-e733-4757-