From d75fe39fb880150ce317707c33ab2f9b2e91e221 Mon Sep 17 00:00:00 2001 From: Binglan Li Date: Wed, 6 Mar 2024 18:36:39 -0800 Subject: [PATCH] test(autogenerated): sort alleles in diplotype and skip slow genes --- .../haplotype/AutogeneratedVcfTester.java | 46 +++++++++++-------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/AutogeneratedVcfTester.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/AutogeneratedVcfTester.java index e05bf9c00..11859f14c 100644 --- a/src/main/java/org/pharmgkb/pharmcat/haplotype/AutogeneratedVcfTester.java +++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/AutogeneratedVcfTester.java @@ -29,6 +29,7 @@ import org.pharmgkb.pharmcat.haplotype.model.GeneCall; import org.pharmgkb.pharmcat.haplotype.model.Result; import org.pharmgkb.pharmcat.util.DataManager; +import org.pharmgkb.pharmcat.util.HaplotypeNameComparator; import static org.pharmgkb.pharmcat.Constants.isLowestFunctionGene; @@ -47,12 +48,14 @@ * @author Mark Woon */ public class AutogeneratedVcfTester implements AutoCloseable { + private static final List sf_slowGenes = List.of("RYR1"); private static final ResultSerializer sf_resultSerializer = new ResultSerializer(); private final Path m_outputDir; private final boolean m_saveData; private final boolean m_exactMatchOnly; private final boolean m_fuzzyMatch; private final boolean m_testCyp2d6; + private final boolean m_skipSlowGenes; private int m_numTests; private final SortedMap m_geneTestTotals = new TreeMap<>(); private final SortedMap m_geneTestWarnings = new TreeMap<>(); @@ -62,7 +65,7 @@ public class AutogeneratedVcfTester implements AutoCloseable { private AutogeneratedVcfTester(Path outputDir, boolean saveData, boolean exactMatchOnly, boolean fuzzyMatch, - boolean testCyp2d6) throws IOException { + boolean testCyp2d6, boolean skipSlowGenes) throws IOException { Preconditions.checkArgument(!(exactMatchOnly && fuzzyMatch)); m_outputDir = outputDir; m_errorWriter = new ErrorWriter(m_outputDir); @@ -70,6 +73,7 @@ private AutogeneratedVcfTester(Path outputDir, boolean saveData, boolean exactMa m_exactMatchOnly = exactMatchOnly; m_fuzzyMatch = fuzzyMatch; m_testCyp2d6 = testCyp2d6; + m_skipSlowGenes = skipSlowGenes; m_quiet = Boolean.parseBoolean(System.getenv("PHARMCAT_TEST_QUIET")); } @@ -89,6 +93,7 @@ public static void main(String[] args) { .addOption("e", "exact-match-only", "only pass if matcher produces single exact match") .addOption("f", "fuzzy-match", "pass if matcher produces any match") .addOption("cyp2d6", "cyp2d6", "run CYP2D6 tests") + .addOption("skipSlowGenes", "skip-slow-genes", "skip slow genes") ; cliHelper.execute(args, cli -> { @@ -103,7 +108,7 @@ public static void main(String[] args) { } try (AutogeneratedVcfTester tester = new AutogeneratedVcfTester(outputDir, cliHelper.hasOption("s"), - exact, fuzzy, cliHelper.hasOption("nocyp2d6"))) { + exact, fuzzy, cliHelper.hasOption("cyp2d6"), cliHelper.hasOption("skipSlowGenes"))) { Stopwatch stopwatch = Stopwatch.createStarted(); if (cliHelper.hasOption("g")) { @@ -123,11 +128,13 @@ public static void main(String[] args) { return 0; } catch (Exception ex) { + //noinspection CallToPrintStackTrace ex.printStackTrace(); return 1; } }); } catch (Exception ex) { + //noinspection CallToPrintStackTrace ex.printStackTrace(); } } @@ -139,7 +146,11 @@ private void testAllGenes(Path vcfDir) throws Exception { try (DirectoryStream geneDirStream = Files.newDirectoryStream(vcfDir)) { for (Path geneDir : geneDirStream) { if (Files.isDirectory(geneDir)) { - if (geneDir.getFileName().toString().equals("CYP2D6") && !m_testCyp2d6) { + String gene = geneDir.getFileName().toString(); + if (gene.equals("CYP2D6") && !m_testCyp2d6) { + continue; + } + if (m_skipSlowGenes && sf_slowGenes.contains(gene)) { continue; } testGene(geneDir); @@ -198,7 +209,7 @@ private void test(String gene, NamedAlleleMatcher namedAlleleMatcher, VcfFile vc String expectedDiplotype = vcfReader.getVcfMetadata().getRawProperties().get("PharmCATnamedAlleles").get(0); List expectedAlleles = Arrays.asList(expectedDiplotype.split("/")); - Collections.sort(expectedAlleles); + expectedAlleles.sort(new HaplotypeNameComparator()); expectedDiplotype = String.join("/", expectedAlleles); boolean hasUnknownCall = expectedAlleles.contains("?"); @@ -257,7 +268,7 @@ private void test(String gene, Path vcfFile, DefinitionReader definitionReader, boolean gotExpectedInTopPair = false; List topPairs = new ArrayList<>(); List alternatePairs = new ArrayList<>(); - if (matches.size() > 0) { + if (!matches.isEmpty()) { int topScore = matches.iterator().next().getScore(); for (DiplotypeMatch match : matches) { boolean isMatch = isMatch(expectedDiplotype, match); @@ -276,7 +287,7 @@ private void test(String gene, Path vcfFile, DefinitionReader definitionReader, } if (hasUnknownCall || hasComboCall) { - if (topPairs.size() > 0) { + if (!topPairs.isEmpty()) { fail(gene, vcfFile, result, topPairs, alternatePairs, "no call (" + expectedDiplotype + ")", null, exemption); } return; @@ -288,7 +299,7 @@ private void test(String gene, Path vcfFile, DefinitionReader definitionReader, String extraMsg = null; if (gotExpected) { List errors = checkOverlaps(definitionReader.getPositions(gene), matches); - if (errors.size() > 0) { + if (!errors.isEmpty()) { if (errors.size() == 1) { extraMsg = errors.get(0); } else { @@ -319,7 +330,6 @@ private void test(String gene, Path vcfFile, DefinitionReader definitionReader, } private List checkOverlaps(VariantLocus[] positions, Set matches) { - //noinspection UnstableApiUsage Set> combinations = Sets.combinations(matches, 2); List overlaps = new ArrayList<>(); for (Set combo : combinations) { @@ -337,7 +347,7 @@ private List checkOverlaps(VariantLocus[] positions, Set StringBuilder errBuilder = new StringBuilder(); for (int x = 0; x < size; x += 1) { if (!Objects.requireNonNull(alleles1[x]).equals(alleles2[x])) { - if (errBuilder.length() > 0) { + if (!errBuilder.isEmpty()) { errBuilder.append("\n"); } errBuilder.append(" Mismatch in ") @@ -372,7 +382,7 @@ private String buildAllele(DiplotypeMatch m, int x) { bases.add(allele); } } - if (bases.size() > 0) { + if (!bases.isEmpty()) { return String.join(",", bases); } return null; @@ -384,7 +394,7 @@ private String buildAllele(DiplotypeMatch m, int x) { */ private static boolean isMatch(String expected, DiplotypeMatch match) { List rezAlleles = Arrays.asList(match.getName().split("/")); - Collections.sort(rezAlleles); + rezAlleles.sort(new HaplotypeNameComparator()); String rez = String.join("/", rezAlleles); return rez.equals(expected); } @@ -423,7 +433,7 @@ private void saveData(Path vcfFile, Result result) throws IOException { } private static boolean isFuzzyMatch(List expectedAlleles, Collection matches) { - Collections.sort(expectedAlleles); + expectedAlleles.sort(new HaplotypeNameComparator()); String expected = String.join("/", expectedAlleles); return matches.stream() .anyMatch(dm -> isMatch(expected, dm)); @@ -454,7 +464,7 @@ private static class ErrorMessage implements Comparable { gene = geneCall.getGene(); MatchData matchData = geneCall.getMatchData(); StringBuilder keyBuilder = new StringBuilder(geneCall.getGene()); - if (matchData.getMissingPositions().size() > 0) { + if (!matchData.getMissingPositions().isEmpty()) { missingPositions = matchData.getMissingPositions().stream() .map(VariantLocus::toString) .collect(Collectors.joining(", ")); @@ -474,12 +484,12 @@ private static class ErrorMessage implements Comparable { isWarning = warn; actual = topPairs.stream() .map(DiplotypeMatch::getName) - .collect(Collectors.joining(", ")); + .collect(Collectors.joining("; ")); actualFuzzyMatch = isFuzzyMatch(expectedAlleles, topPairs); - if (alternatePairs.size() > 0) { + if (!alternatePairs.isEmpty()) { alt = alternatePairs.stream() .map(m -> m.getName() + " (" + m.getScore() + ")") - .collect(Collectors.joining(", ")); + .collect(Collectors.joining("; ")); altFuzzyMatch = isFuzzyMatch(expectedAlleles, alternatePairs); } else { noAlt = true; @@ -522,7 +532,7 @@ private static class ErrorMessage implements Comparable { .append(" Expected: ").append(expectedDiplotype) .append("\n") .append(" Actual: ").append(actual); - if (topPairs.size() > 0) { + if (!topPairs.isEmpty()) { errBuilder.append(" (") .append(topPairs.get(0).getScore()) .append(")"); @@ -677,7 +687,7 @@ private void printSummary(int numTests, SortedMap geneTestTotal m_writer.println("| ---------- | -------- | -------- |"); for (String gene : geneTestTotals.keySet()) { int warnings = geneTestWarnings.computeIfAbsent(gene, c -> 0); - int failures = geneTestFailures.computeIfAbsent(gene, c -> 0);; + int failures = geneTestFailures.computeIfAbsent(gene, c -> 0); m_writer.println(String.format("| %-10s | %7s | %7s |", gene, numFormatter.format(warnings), numFormatter.format(failures))); } }