diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/CombinationUtil.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/CombinationUtil.java index 6adcc9a5..1c8c10cb 100644 --- a/src/main/java/org/pharmgkb/pharmcat/haplotype/CombinationUtil.java +++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/CombinationUtil.java @@ -22,10 +22,25 @@ public static Set generatePermutations(List alleles) { Preconditions.checkNotNull(alleles); Preconditions.checkArgument(!alleles.isEmpty(), "No alleles to generate permutations for"); - boolean isHaploid = alleles.stream().allMatch(sa -> sa.getAllele2() == null); - Set rez = generatePermutations(alleles, 0, isHaploid, true, ""); - if (!isHaploid) { - rez.addAll(generatePermutations(alleles, 0, false, false, "")); + boolean isS1Blank = true; + boolean isS2Blank = true; + for (SampleAllele sa : alleles) { + if (sa.getAllele1() != null) { + isS1Blank = false; + } + if (sa.getAllele2() != null) { + isS2Blank = false; + } + if (!isS1Blank && !isS2Blank) { + break; + } + } + Set rez = new HashSet<>(); + if (!isS1Blank) { + rez.addAll(generatePermutations(alleles, 0, isS2Blank, true, "")); + } + if (!isS2Blank) { + rez.addAll(generatePermutations(alleles, 0, isS1Blank, false, "")); } if (rez.isEmpty()) { throw new IllegalStateException("No permutations generated from " + alleles.size() + " alleles"); diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/DpydHapB3Matcher.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/DpydHapB3Matcher.java index bd493b57..6ede8b5a 100644 --- a/src/main/java/org/pharmgkb/pharmcat/haplotype/DpydHapB3Matcher.java +++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/DpydHapB3Matcher.java @@ -70,59 +70,71 @@ public DpydHapB3Matcher(Env env, SortedMap alleleMap, bool m_isMissingHapB3 = false; if (hapB3IntronSample != null) { List intronicHapB3 = callHapB3(hapB3Allele, hapB3IntronLocus, hapB3IntronSample); + long numIntronicStrands = intronicHapB3.stream().filter(a -> !a.equals(".")).count(); if (hapB3ExonSample == null) { m_hapB3IntronCall = intronicHapB3; } else { // we have both intron and exon variants List exonicHapB3 = callHapB3(hapB3Allele, hapB3ExonLocus, hapB3ExonSample); + long numExonicStrands = exonicHapB3.stream().filter(a -> !a.equals(".")).count(); - if (intronicHapB3.isEmpty()) { + if (numIntronicStrands == 0) { m_hapB3Call = exonicHapB3; - if (!exonicHapB3.isEmpty()) { + if (numExonicStrands != 0) { m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_INTRONIC_MISMATCH_EXONIC); } - } else if (intronicHapB3.size() == 1) { - // ignore this case - } else if (intronicHapB3.size() == 2) { - if (exonicHapB3.isEmpty()) { + } else if (numIntronicStrands == 1) { + if (numExonicStrands == 0) { m_hapB3IntronCall = intronicHapB3; - } else if (exonicHapB3.size() == 1) { - // ignore this case - } else if (exonicHapB3.size() == 2) { - m_hapB3IntronCall = new ArrayList<>(); - m_hapB3Call = new ArrayList<>(); + } else { + // ignore if not phased if (isEffectivelyPhased) { - handlePhasedCall(env, intronicHapB3.get(0), exonicHapB3.get(0)); - handlePhasedCall(env, intronicHapB3.get(1), exonicHapB3.get(1)); + handlePhasedCall(env, intronicHapB3, exonicHapB3, 0); + handlePhasedCall(env, intronicHapB3, exonicHapB3, 1); + } + } + } else if (numIntronicStrands == 2) { + if (numExonicStrands == 0) { + m_hapB3IntronCall = intronicHapB3; + } else { + if (isEffectivelyPhased) { + handlePhasedCall(env, intronicHapB3, exonicHapB3, 0); + handlePhasedCall(env, intronicHapB3, exonicHapB3, 1); } else { - long numIntrons = intronicHapB3.stream().filter(c -> c.equals("1")).count(); - long numExons = exonicHapB3.stream().filter(c -> c.equals("1")).count(); - - if (numIntrons == numExons) { - for (int x = 0; x < numIntrons; x += 1) { - m_hapB3Call.add("1"); - } - } else if (numIntrons > numExons) { - // 2 intron, 1 exon - // 2 intron, 0 exon - // 1 intron, 0 exon - m_hapB3IntronCall.add("1"); - if (numExons == 1) { - m_hapB3Call.add("1"); - } else if (numIntrons == 2) { + if (numExonicStrands == 1) { + // ignore this case + } else if (numExonicStrands == 2) { + m_hapB3IntronCall = new ArrayList<>(); + m_hapB3Call = new ArrayList<>(); + long numIntronic = intronicHapB3.stream().filter(c -> c.equals("1")).count(); + long numExonic = exonicHapB3.stream().filter(c -> c.equals("1")).count(); + + if (numIntronic == numExonic) { + for (int x = 0; x < numIntronic; x += 1) { + m_hapB3Call.add("1"); + } + } else if (numIntronic > numExonic) { + // 2 intron, 1 exon + // 2 intron, 0 exon + // 1 intron, 0 exon m_hapB3IntronCall.add("1"); - } - } else { - // intronic call trumps exonic call - if (numIntrons == 1) { - // 2 exons, 1 intron - call ref/hapB3 - m_hapB3Call.add("1"); - m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_INTRONIC_MISMATCH_EXONIC); + if (numExonic == 1) { + m_hapB3Call.add("1"); + } else if (numIntronic == 2) { + m_hapB3IntronCall.add("1"); + } } else { - // 1 exon, 0 intron - call reference - // 2 exon, 0 intron - call reference - m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_INTRONIC_MISMATCH_EXONIC); + // intronic call trumps exonic call + if (numIntronic == 1) { + // 2 exons, 1 intron - call ref/hapB3 + m_hapB3Call.add("1"); + m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_INTRONIC_MISMATCH_EXONIC); + } else { + // 1 exon, 0 intron - call reference + // 2 exon, 0 intron - call reference + m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_INTRONIC_MISMATCH_EXONIC); + } } } } @@ -131,7 +143,8 @@ public DpydHapB3Matcher(Env env, SortedMap alleleMap, bool } } else { m_hapB3Call = callHapB3(hapB3Allele, hapB3ExonLocus, hapB3ExonSample); - if (m_hapB3Call.size() == 2) { + long numAlleles = m_hapB3Call.stream().filter(a -> !a.equals(".")).count(); + if (numAlleles == 2) { m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_EXONIC_ONLY); } } @@ -146,7 +159,25 @@ public DpydHapB3Matcher(Env env, SortedMap alleleMap, bool m_isHapB3Present = m_numHapB3Called > 0; } - private void handlePhasedCall(Env env, String intronicCall, String exonicCall) { + + private void handlePhasedCall(Env env, List intronicCalls, List exonicCalls, int idx) { + + String intronicCall = "."; + if (intronicCalls.size() > idx) { + intronicCall = intronicCalls.get(idx); + } + String exonicCall = "."; + if (exonicCalls.size() > idx) { + exonicCall = exonicCalls.get(idx); + } + + if (m_hapB3IntronCall == null) { + m_hapB3IntronCall = new ArrayList<>(); + } + if (m_hapB3Call == null) { + m_hapB3Call = new ArrayList<>(); + } + // intronic call trumps exonic call if (intronicCall.equals("0")) { m_hapB3IntronCall.add("0"); @@ -154,7 +185,7 @@ private void handlePhasedCall(Env env, String intronicCall, String exonicCall) { if (!exonicCall.equals("0")) { m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_INTRONIC_MISMATCH_EXONIC); } - } else { + } else if (intronicCall.equals("1")) { if (exonicCall.equals("0")) { m_hapB3IntronCall.add("1"); m_hapB3Call.add("0"); @@ -162,6 +193,19 @@ private void handlePhasedCall(Env env, String intronicCall, String exonicCall) { m_hapB3IntronCall.add("0"); m_hapB3Call.add("1"); } + } else { + // intronic call is "." + m_hapB3IntronCall.add("."); + if (exonicCall.equals("1")) { + m_hapB3Call.add("1"); + } else { + // if intronic is "0" or "." + m_hapB3Call.add("."); + } + if (!exonicCall.equals("0")) { + m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_INTRONIC_MISMATCH_EXONIC); + } + } } @@ -171,14 +215,25 @@ private void handlePhasedCall(Env env, String intronicCall, String exonicCall) { private List callHapB3(NamedAllele hapB3Allele, VariantLocus locus, SampleAllele sampleAllele) { List rez = new ArrayList<>(); String exonAllele = Objects.requireNonNull(hapB3Allele.getAllele(locus)); - rez.add(exonAllele.equals(sampleAllele.getAllele1()) ? "1" : "0"); + int count = 0; + if (sampleAllele.getAllele1() == null) { + if (sampleAllele.isPhased()) { + // we only care if this is phased + rez.add("."); + } + } else { + rez.add(exonAllele.equals(sampleAllele.getAllele1()) ? "1" : "0"); + count += 1; + } if (sampleAllele.getAllele2() != null) { rez.add(exonAllele.equals(sampleAllele.getAllele2()) ? "1" : "0"); - } else { + count += 1; + } + if (count < 2) { // this warning can get overwritten! // but we don't really care m_warning = new MessageAnnotation(MessageAnnotation.TYPE_NOTE, "warn.alleleCount", - "Only found 1 allele for " + locus.getRsid()); + "Only found " + count + " allele for " + locus.getRsid()); } return rez; } diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/MatchData.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/MatchData.java index 1a23ae85..7de3da32 100644 --- a/src/main/java/org/pharmgkb/pharmcat/haplotype/MatchData.java +++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/MatchData.java @@ -1,17 +1,7 @@ package org.pharmgkb.pharmcat.haplotype; import java.lang.invoke.MethodHandles; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.SortedMap; -import java.util.SortedSet; -import java.util.TreeMap; -import java.util.TreeSet; +import java.util.*; import java.util.stream.Collectors; import com.google.common.base.Preconditions; import com.google.gson.annotations.Expose; @@ -114,7 +104,7 @@ public MatchData(String sampleId, String gene, SortedMap a } } } - m_isHaploid = m_sampleMap.values().stream().allMatch(sa -> sa.getAllele2() == null); + m_isHaploid = areSampleAllelesHaploid(m_sampleMap.values()); m_isPhased = m_sampleMap.values().stream().allMatch(SampleAllele::isPhased); m_isHomozygous = m_isHaploid || m_sampleMap.values().stream().allMatch(SampleAllele::isHomozygous); @@ -376,4 +366,19 @@ protected SortedSet comparePermutations() { public String toString() { return m_gene + " match data for " + m_sampleId; } + + + private static boolean areSampleAllelesHaploid(Collection sampleAlleles) { + int s1 = 0; + int s2 = 0; + for (SampleAllele a : sampleAlleles) { + if (a.getAllele1() == null) { + s1 += 1; + } + if (a.getAllele2() == null) { + s2 += 1; + } + } + return s1 == sampleAlleles.size() || s2 == sampleAlleles.size(); + } } diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/SampleAllele.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/SampleAllele.java index fe16a232..7ecd9d6d 100644 --- a/src/main/java/org/pharmgkb/pharmcat/haplotype/SampleAllele.java +++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/SampleAllele.java @@ -47,6 +47,12 @@ public class SampleAllele implements Comparable { @SerializedName("vcfAlleles") private final List m_vcfAlleles; @Expose + @SerializedName("gt") + private final String m_gt; + @Expose + @SerializedName("vcfCall") + private final String m_vcfCall; + @Expose @SerializedName("undocumentedVariations") private final Set m_undocumentedVariations = new HashSet<>(); @Expose @@ -54,8 +60,8 @@ public class SampleAllele implements Comparable { private boolean m_treatUndocumentedVariationsAsReference; - public SampleAllele(String chromosome, long position, String a1, @Nullable String a2, boolean isPhased, - boolean isEffectivelyPhased, List vcfAlleles, @Nullable Set undocumentedVariations, + public SampleAllele(String chromosome, long position, @Nullable String a1, @Nullable String a2, boolean isPhased, + boolean isEffectivelyPhased, List vcfAlleles, String gt, @Nullable Set undocumentedVariations, boolean treatUndocumentedAsReference) { Preconditions.checkNotNull(vcfAlleles); m_chromosome = chromosome; @@ -66,16 +72,32 @@ public SampleAllele(String chromosome, long position, String a1, @Nullable Strin m_undocumentedVariations.addAll(undocumentedVariations); } m_vcfAlleles = vcfAlleles; + m_gt = gt; - m_allele1 = a1.toUpperCase(); - m_computedAllele1 = computeAllele(m_allele1, treatUndocumentedAsReference); + StringBuilder callBuilder = new StringBuilder(); + if (a1 != null) { + m_allele1 = a1.toUpperCase(); + m_computedAllele1 = computeAllele(m_allele1, treatUndocumentedAsReference); + callBuilder.append(m_allele1); + } else { + m_allele1 = null; + m_computedAllele1 = null; + callBuilder.append("."); + } if (a2 != null) { m_allele2 = a2.toUpperCase(); m_computedAllele2 = computeAllele(m_allele2, treatUndocumentedAsReference); + if (isPhased) { + callBuilder.append("|"); + } else { + callBuilder.append("/"); + } + callBuilder.append(m_allele2); } else { m_allele2 = null; m_computedAllele2 = null; } + m_vcfCall = callBuilder.toString(); m_isPhased = isPhased; m_isEffectivelyPhased = isEffectivelyPhased; } @@ -89,11 +111,11 @@ private String computeAllele(String allele, boolean treatUndocumentedAsReference } /** - * This constructor is primarily for use in tests only. + * This constructor should only be use in tests. */ - protected SampleAllele(String chromosome, long position, String a1, @Nullable String a2, - boolean isPhased, List vcfAlleles) { - this(chromosome, position, a1, a2, isPhased, isPhased, vcfAlleles, null, false); + protected SampleAllele(String chromosome, long position, @Nullable String a1, @Nullable String a2, + boolean isPhased, List vcfAlleles, String gt) { + this(chromosome, position, a1, a2, isPhased, isPhased, vcfAlleles, gt, null, false); } @@ -112,7 +134,7 @@ public String getChrPosition() { /** * Gets the first allele from the VCF (per GT column). */ - public String getAllele1() { + public @Nullable String getAllele1() { return m_allele1; } @@ -127,7 +149,7 @@ public String getAllele1() { /** * Gets the first allele, taking treatUndocumentedVariationsAsReference into account. */ - public String getComputedAllele1() { + public @Nullable String getComputedAllele1() { return m_computedAllele1; } @@ -160,8 +182,27 @@ public List getVcfAlleles() { return m_vcfAlleles; } + /** + * Gets the GT value from the VCF. + */ + public String getGt() { + return m_gt; + } + + /** + * Gets the alleles separated by VCF phasing delimiter (i.e. "/" or "|"). + * Missing allele will be represented by ".". + */ + public String getVcfCall() { + return m_vcfCall; + } public boolean isHomozygous() { + if (m_computedAllele1 == null) { + // both alleles can never be null + Preconditions.checkState(m_computedAllele2 != null); + return false; + } return m_computedAllele1.equals(m_computedAllele2); } @@ -180,7 +221,7 @@ public boolean isTreatUndocumentedVariationsAsReference() { @Override public String toString() { - return m_allele1 + (m_allele2 == null ? "" : "/" + m_allele2) + " @ " + m_chromosome + ":" + m_position; + return m_vcfCall + " @ " + m_chromosome + ":" + m_position; } @Override diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/VcfReader.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/VcfReader.java index 3ab35540..afbb37c7 100644 --- a/src/main/java/org/pharmgkb/pharmcat/haplotype/VcfReader.java +++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/VcfReader.java @@ -19,6 +19,7 @@ import java.util.zip.GZIPInputStream; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.SortedSetMultimap; import com.google.common.collect.TreeMultimap; import org.checkerframework.checker.nullness.qual.Nullable; @@ -31,6 +32,7 @@ import org.pharmgkb.parser.vcf.model.VcfMetadata; import org.pharmgkb.parser.vcf.model.VcfPosition; import org.pharmgkb.parser.vcf.model.VcfSample; +import org.pharmgkb.pharmcat.ParseException; import org.pharmgkb.pharmcat.VcfFile; import org.pharmgkb.pharmcat.definition.DefinitionReader; import org.pharmgkb.pharmcat.definition.model.VariantLocus; @@ -49,8 +51,8 @@ public class VcfReader implements VcfLineParser { public static final String MSG_AD_FORMAT_MISSING = "AD format is not defined. Assuming AD field is valid."; private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + private static final Set sf_haploidChromosomes = ImmutableSet.of("chrY", "chrM"); private static final Pattern sf_gtDelimiter = Pattern.compile("[|/]"); - private static final Pattern sf_noCallPattern = Pattern.compile("^[.|/]+$"); private static final Pattern sf_allelePattern = Pattern.compile("^[AaCcGgTt]+$"); private static final String sf_filterCodeRef = "PCATxREF"; private static final String sf_filterCodeAlt = "PCATxALT"; @@ -75,9 +77,11 @@ public class VcfReader implements VcfLineParser { /** * Constructor. * Reads in a VCF file and pulls the sample's alleles at positions of interest. + * + * @throws ParseException if there are no samples in the VCF file */ public VcfReader(DefinitionReader definitionReader, BufferedReader vcfReader, @Nullable String sampleId, - boolean findCombinations) throws IOException { + boolean findCombinations) throws IOException, ParseException { m_locationsOfInterest = definitionReader.getLocationsOfInterest(); m_locationsByGene = definitionReader.getLocationsByGene(); m_sampleId = sampleId; @@ -90,6 +94,8 @@ public VcfReader(DefinitionReader definitionReader, BufferedReader vcfReader, @N /** * Constructor. Primarily used for testing. * This will read in the VCF file and pull the (first) sample's alleles at positions of interest. + * + * @throws ParseException if there are no samples in the VCF file */ public VcfReader(DefinitionReader definitionReader, Path vcfFile) throws IOException { m_locationsOfInterest = definitionReader.getLocationsOfInterest(); @@ -103,6 +109,8 @@ public VcfReader(DefinitionReader definitionReader, Path vcfFile) throws IOExcep /** * Constructor. Primarily for testing. * This will read in the VCF file and pull the sample's alleles at ALL positions. + * + * @throws ParseException if there are no samples in the VCF file */ VcfReader(Path vcfFile) throws IOException { m_locationsOfInterest = null; @@ -153,8 +161,10 @@ public SortedSetMultimap getWarnings() { /** * Reads the VCF file. + * + * @throws ParseException if there are no samples in the VCF file */ - private void read(Path vcfFile) throws IOException { + private void read(Path vcfFile) throws IOException, ParseException { Preconditions.checkNotNull(vcfFile); Preconditions.checkArgument(VcfFile.isVcfFile(vcfFile), "%s is not a VCF file", vcfFile); @@ -165,8 +175,10 @@ private void read(Path vcfFile) throws IOException { /** * Read VCF data via reader. + * + * @throws ParseException if there are no samples in the VCF file */ - private void read(BufferedReader reader) throws IOException { + private void read(BufferedReader reader) throws IOException, ParseException { // read VCF try (VcfParser vcfParser = new VcfParser.Builder() .fromReader(reader) @@ -221,29 +233,32 @@ private void addWarning(String chrPos, String msg) { private void addWarning(String chrPos, String reportMsg, String clMsg) { // saved to report m_warnings.put(chrPos, reportMsg); - // prints to command line + // prints to the command line sf_logger.warn(clMsg); } + /** + * Parses a single line from VCF. + * + * @throws ParseException if there are no samples in the VCF file + */ @Override - public void parseLine(VcfMetadata metadata, VcfPosition position, List sampleData) { - - final String chrPos = position.getChromosome() + ":" + position.getPosition(); + public void parseLine(VcfMetadata metadata, VcfPosition position, List sampleData) throws ParseException { if (sampleData.isEmpty()) { - sf_logger.warn("Missing sample data on {}", chrPos); - return; + throw new ParseException("VCF does not contain sample data"); } + + final String chrPos = position.getChromosome() + ":" + position.getPosition(); if (m_alleleMap.containsKey(chrPos)) { addWarning(chrPos, "Duplicate entry found in VCF; this entry trumps others.", "Duplicate entry: first valid position wins"); return; } - String gt = sampleData.get(m_sampleIdx).getProperty("GT"); - VariantLocus varLoc; - Set undocumentedVariations = new HashSet<>(); + // only care about locations of interest + VariantLocus varLoc = null; if (m_locationsOfInterest != null) { varLoc = m_locationsOfInterest.get(chrPos); if (varLoc == null) { @@ -259,15 +274,68 @@ public void parseLine(VcfMetadata metadata, VcfPosition position, List gtNonMissing = Arrays.stream(gtArray) + .filter(g -> !g.equals(".")) + .map(Integer::parseInt) + .toList(); + if (gtNonMissing.isEmpty()) { + addWarning(chrPos, "Ignoring: no call (" + gt + ")"); + m_discardedPositions.add(chrPos); + return; + } - Set gtCalls = new HashSet<>(); - if (gt != null && !sf_noCallPattern.matcher(gt).matches()) { - Arrays.stream(sf_gtDelimiter.split(gt)) - .filter(a -> !a.equals(".")) - .map(Integer::parseInt) - .forEach(gtCalls::add); + if (sf_haploidChromosomes.contains(position.getChromosome())) { + // expect a single allele + if (gtNonMissing.size() > 1) { + addWarning(chrPos, gtNonMissing.size() + " genotypes found (" + getGeneForWarning(chrPos) + + ") for haploid chromosome. Will only use first non-missing genotype."); } + } else { + // diploid chromosome + if (gtNonMissing.size() > 2) { + addWarning(chrPos, gtNonMissing.size() + " genotypes found" + getGeneForWarning(chrPos) + + ". Will only use first two genotypes."); + } else if (gtNonMissing.size() == 1) { + if (!position.getChromosome().equals("chrX")) { + if (gtNonMissing.get(0) == 0) { + // treating "./0" like any other missing position + addWarning(chrPos, "Ignoring: only a single genotype found (GT=" + gt + + "). Since it's reference, treating this as a missing position."); + m_discardedPositions.add(chrPos); + return; + } + addWarning(chrPos, gtNonMissing.size() + " genotype found" + getGeneForWarning(chrPos) + ", expecting 2."); + } + } + } + Set undocumentedVariations = new HashSet<>(); + Set gtUnique = new HashSet<>(gtNonMissing); + if (varLoc != null) { // strip out structural alts (e.g. <*>) List altBases = position.getAltBases().stream() .filter(a -> !a.startsWith("<")) @@ -285,7 +353,7 @@ public void parseLine(VcfMetadata metadata, VcfPosition position, List 1 && !m_useSpecificSample) { - addWarning(chrPos, "Multiple samples found, only using first entry."); + // only warn once + if (m_alleleMap.isEmpty()) { + addWarning(chrPos, "Multiple samples found, only using first entry."); + } } - if (gt == null) { - addWarning(chrPos, "Ignoring: no genotype"); - m_discardedPositions.add(chrPos); - return; - } - if (sf_noCallPattern.matcher(gt).matches()) { - addWarning(chrPos, "Ignoring: no call (" + gt + ")"); - m_discardedPositions.add(chrPos); - return; - } - // already filtered out no-calls, guaranteed to have at least 1 GT here - String[] gtArray = sf_gtDelimiter.split(gt); - int[] alleleIndices = Arrays.stream(gtArray) - .filter(a -> !a.equals(".")) - .mapToInt(Integer::parseInt) - .toArray(); - boolean isHaploid = gtArray.length == 1; - // normalize alleles to use the same syntax as haplotype definition List alleles = new ArrayList<>(); if (position.getAltBases().isEmpty()) { @@ -364,10 +402,9 @@ public void parseLine(VcfMetadata metadata, VcfPosition position, List selected = Arrays.stream(alleleIndices).boxed().toList(); for (int x = 1; x <= position.getAltBases().size(); x += 1) { String gt2 = position.getAllele(x); - boolean isSelected = selected.contains(x); + boolean isSelected = gtNonMissing.contains(x); if (!validateAlleles(chrPos, gt1, gt2, isSelected)) { m_discardedPositions.add(chrPos); return; @@ -406,8 +443,7 @@ public void parseLine(VcfMetadata metadata, VcfPosition position, List genotype = new HashMap<>(); - Arrays.stream(alleleIndices) - .forEach(g -> genotype.merge(g, 1, Integer::sum)); + gtNonMissing.forEach(g -> genotype.merge(g, 1, Integer::sum)); // GT is het, but AD is not (only one side has any reads) if (genotype.size() != 1 && depths.stream().filter(d -> d > 0).count() == 1) { addWarning(chrPos, "Discarding genotype at this position because GT field indicates heterozygous (" + @@ -422,32 +458,20 @@ public void parseLine(VcfMetadata metadata, VcfPosition position, List= alleles.size()) { throw new VcfFormatException("Invalid GT allele value (" + alleleIdx + ") for " + chrPos + " (only " + (alleles.size() - 1) + " ALT allele" + (alleles.size() > 1 ? "s" : "") + " specified)"); } } - String a1 = alleles.get(alleleIndices[0]); - String a2 = isHaploid ? null : "."; - if (alleleIndices.length > 1) { - a2 = alleles.get(alleleIndices[1]); - if (alleleIndices.length > 2) { - addWarning(chrPos, alleleIndices.length + " genotypes found. Only using first 2 genotypes."); - } - } else if (!position.getChromosome().equals("chrM") && - !position.getChromosome().equals("chrY") && - !position.getChromosome().equals("chrX")) { - if (alleleIndices[0] == 0) { - // treating "./0" like any other missing position - addWarning(chrPos, "Ignoring: only a single genotype found. " + - "Since it's reference, treating this as a missing position."); - m_discardedPositions.add(chrPos); - return; - } - // going to accept "./1" because there's a variation, and we want to pass this on to the reporter - addWarning(chrPos, "Only a single genotype found."); + String a1 = null; + if (!gtArray[0].equals(".")) { + a1 = alleles.get(Integer.parseInt(gtArray[0])); + } + String a2 = null; + if (gtArray.length > 1 && !gtArray[1].equals(".")) { + a2 = alleles.get(Integer.parseInt(gtArray[1])); } // genotype divided by "|" if phased and "/" if unphased @@ -455,9 +479,9 @@ public void parseLine(VcfMetadata metadata, VcfPosition position, List 1) { + // we'll also treat homozygous as phased + isEffectivelyPhased = false; } } @@ -467,7 +491,7 @@ public void parseLine(VcfMetadata metadata, VcfPosition position, List + * Example: {@code " (TPMT)"} + */ + private String getGeneForWarning(String chrPos) { + if (m_locationsByGene == null) { + return ""; + } + return " (" + m_locationsByGene.get(chrPos) + ")"; + } private boolean treatUndocumentedAsReference(String chrPos) { String gene = m_locationsByGene.get(chrPos); diff --git a/src/main/java/org/pharmgkb/pharmcat/haplotype/model/Variant.java b/src/main/java/org/pharmgkb/pharmcat/haplotype/model/Variant.java index 2378abf6..be0f1863 100644 --- a/src/main/java/org/pharmgkb/pharmcat/haplotype/model/Variant.java +++ b/src/main/java/org/pharmgkb/pharmcat/haplotype/model/Variant.java @@ -35,30 +35,19 @@ public class Variant implements Comparable { */ public Variant(VariantLocus variant, SampleAllele allele) { String vcfAlleles = sf_vcfAlleleJoiner.join(allele.getVcfAlleles()); - - StringBuilder callBuilder = new StringBuilder() - .append(allele.getAllele1()); - if (allele.getAllele2() != null) { - if (allele.isPhased()) { - callBuilder.append("|"); - } else { - callBuilder.append("/"); - } - callBuilder.append(allele.getAllele2()); - } - initialize(variant.getPosition(), variant.getRsid(), callBuilder.toString(), vcfAlleles); + initialize(variant.getPosition(), variant.getRsid(), allele.getVcfCall(), vcfAlleles); } /** * Constructor for creating faux-{@link Variant}s for extra positions and tests. */ - public Variant(long pos, @Nullable String rsids, @Nullable String call, @Nullable String vcfAlleles) { - initialize(pos, rsids, call, vcfAlleles); + public Variant(long pos, @Nullable String rsid, @Nullable String call, @Nullable String vcfAlleles) { + initialize(pos, rsid, call, vcfAlleles); } - private void initialize(long pos, @Nullable String rsids, @Nullable String call, @Nullable String vcfAlleles) { + private void initialize(long pos, @Nullable String rsid, @Nullable String call, @Nullable String vcfAlleles) { m_position = pos; - m_rsid = rsids; + m_rsid = rsid; m_vcfCall = call; if (call != null) { // check for phasing by lack of "/" to match behavior in VCF reader @@ -76,6 +65,10 @@ public long getPosition() { return m_rsid; } + /** + * Gets the alleles separated by VCF phasing delimiter (i.e. "/" or "|"). + * Missing allele will be represented by ".". + */ public @Nullable String getVcfCall() { return m_vcfCall; } @@ -84,6 +77,9 @@ public boolean isPhased() { return m_isPhased; } + /** + * Gets the comma-separated list of all possible alleles from the VCF (i.e. REF and ALT). + */ public @Nullable String getVcfAlleles() { return m_vcfAlleles; } diff --git a/src/test/java/org/pharmgkb/pharmcat/DpydTest.java b/src/test/java/org/pharmgkb/pharmcat/DpydTest.java index 67cc91a4..17619fc2 100644 --- a/src/test/java/org/pharmgkb/pharmcat/DpydTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/DpydTest.java @@ -47,12 +47,12 @@ class DpydTest { @BeforeAll static void prepare() { ReportHelpers.setDebugMode(true); - //TestUtils.setSaveTestOutput(true); + TestUtils.setSaveTestOutput(true); } @AfterEach void deleteDirectory(TestInfo testInfo) { - TestUtils.deleteTestOutputDirectory(testInfo); + //TestUtils.deleteTestOutputDirectory(testInfo); } @@ -253,9 +253,10 @@ static void dpydHtmlChecks(Document document, @Nullable List expectedCal assertEquals(1, capecitabineSection.size()); Elements missingVariantsWarning = capecitabineSection.get(0).select(".alert-info.missing-variants"); if (hasMissingPositions) { - assertEquals(1, missingVariantsWarning.size()); + assertEquals(1, missingVariantsWarning.size(), "Expected missing variants, but found none"); } else { - assertEquals(0, missingVariantsWarning.size()); + assertEquals(0, missingVariantsWarning.size(), "Expected no missing variants, but found " + + missingVariantsWarning.size()); } List expectedRxCalls = cpicStyleCalls == null ? @@ -771,6 +772,40 @@ void hapB3AndIntronicC(TestInfo testInfo) throws Exception { dpydHtmlChecks(document, expectedCalls, false, hasDpwgAnnotations); } + @Test + void hapB3AndIntronicC_missingFirstAllele(TestInfo testInfo) throws Exception { + + PipelineWrapper testWrapper = new PipelineWrapper(testInfo, true, false, false) + .saveIntermediateFiles(); + testWrapper.getVcfBuilder() + .phased() + // hapB3 exon C>T + .variation("DPYD", "rs56038477", ".", "T") + // hapB3 intron G>C + .variation("DPYD", "rs75017182", ".", "C") + ; + + Path vcfFile = testWrapper.execute(); + + List expectedCalls = List.of( + "Reference/c.1129-5923C>G, c.1236G>A (HapB3)" + ); + List cpicStyleCalls = List.of( + "c.1129-5923C>G, c.1236G>A (HapB3) (heterozygous)" + ); + RecPresence hasDpwgAnnotations = RecPresence.YES; + + testWrapper.testCalledByMatcher("DPYD"); + testWrapper.testSourceDiplotypes(DataSource.CPIC, "DPYD", expectedCalls, cpicStyleCalls); + testWrapper.testRecommendedDiplotypes(DataSource.CPIC, "DPYD", List.of("Reference", DpydHapB3Matcher.HAPB3_ALLELE)); + testWrapper.testPrintCalls(DataSource.CPIC, "DPYD", cpicStyleCalls); + + dpydHasReports(testWrapper, hasDpwgAnnotations); + + Document document = readHtmlReport(vcfFile); + dpydHtmlChecks(document, expectedCalls, cpicStyleCalls, true, RecPresence.YES, hasDpwgAnnotations); + } + @Test void hapB3AndIntronicD(TestInfo testInfo) throws Exception { diff --git a/src/test/java/org/pharmgkb/pharmcat/TestVcfBuilder.java b/src/test/java/org/pharmgkb/pharmcat/TestVcfBuilder.java index 49d0ce31..bb249854 100644 --- a/src/test/java/org/pharmgkb/pharmcat/TestVcfBuilder.java +++ b/src/test/java/org/pharmgkb/pharmcat/TestVcfBuilder.java @@ -10,7 +10,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Optional; import org.checkerframework.checker.nullness.qual.Nullable; import org.junit.jupiter.api.TestInfo; import org.pharmgkb.pharmcat.definition.DefinitionReader; @@ -343,7 +342,9 @@ private void handleCustomEdit(DefinitionFile definitionFile, PrintWriter writer, } String vcfAllele = vl.getCpicToVcfAlleleMap().get(cpicAllele); if (vcfAllele == null) { - if (m_allowUnknownAllele) { + if (cpicAllele.equals(".")) { + vcfAllele = "."; + } else if (m_allowUnknownAllele) { vcfAllele = cpicAllele; alts.add(vcfAllele); } else { @@ -351,7 +352,9 @@ private void handleCustomEdit(DefinitionFile definitionFile, PrintWriter writer, ", expected one of " + vl.getCpicToVcfAlleleMap().keySet()); } } - if (vcfAllele.equals(vl.getRef())) { + if (vcfAllele.equals(".")) { + builder.append("."); + } else if (vcfAllele.equals(vl.getRef())) { builder.append("0"); } else { int idx = alts.indexOf(vcfAllele); diff --git a/src/test/java/org/pharmgkb/pharmcat/haplotype/CombinationUtilTest.java b/src/test/java/org/pharmgkb/pharmcat/haplotype/CombinationUtilTest.java index 79691c87..2ff974c7 100644 --- a/src/test/java/org/pharmgkb/pharmcat/haplotype/CombinationUtilTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/haplotype/CombinationUtilTest.java @@ -26,10 +26,10 @@ class CombinationUtilTest { void testGeneratePermutationsNotPhased() { List alleles = Arrays.asList( - new SampleAllele("chr1", 1, "T", "T", false, Lists.newArrayList("T", "C")), - new SampleAllele("chr1", 2, "A", "T", false, Lists.newArrayList("A", "T")), - new SampleAllele("chr1", 3, "C", "C", false, Lists.newArrayList("C", "C")), - new SampleAllele("chr1", 4, "C", "G", false, Lists.newArrayList("C", "G")) + new SampleAllele("chr1", 1, "T", "T", false, Lists.newArrayList("T", "C"), "0/0"), + new SampleAllele("chr1", 2, "A", "T", false, Lists.newArrayList("A", "T"), "0/1"), + new SampleAllele("chr1", 3, "C", "C", false, Lists.newArrayList("C", "C"), "0/0"), + new SampleAllele("chr1", 4, "C", "G", false, Lists.newArrayList("C", "G"), "0/1") ); Set expectedPermutations = Sets.newHashSet( @@ -49,10 +49,10 @@ void testGeneratePermutationsNotPhased() { void testGeneratePermutationPhased() { List alleles = Arrays.asList( - new SampleAllele("chr1", 1, "T", "T", true, Lists.newArrayList("T", "C")), - new SampleAllele("chr1", 2, "A", "T", true, Lists.newArrayList("A", "T")), - new SampleAllele("chr1", 3, "C", "C", true, Lists.newArrayList("C", "C")), - new SampleAllele("chr1", 4, "C", "G", true, Lists.newArrayList("C", "G")) + new SampleAllele("chr1", 1, "T", "T", true, Lists.newArrayList("T", "C"), "0|0"), + new SampleAllele("chr1", 2, "A", "T", true, Lists.newArrayList("A", "T"), "0|1"), + new SampleAllele("chr1", 3, "C", "C", true, Lists.newArrayList("C", "C"), "0|0"), + new SampleAllele("chr1", 4, "C", "G", true, Lists.newArrayList("C", "G"), "0|1") ); Set expectedPermutations = Sets.newHashSet( diff --git a/src/test/java/org/pharmgkb/pharmcat/haplotype/DiplotypeMatcherTest.java b/src/test/java/org/pharmgkb/pharmcat/haplotype/DiplotypeMatcherTest.java index 70752bce..e7e00e05 100644 --- a/src/test/java/org/pharmgkb/pharmcat/haplotype/DiplotypeMatcherTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/haplotype/DiplotypeMatcherTest.java @@ -85,9 +85,9 @@ static void beforeClass() { void test1() { SortedSet alleles = new TreeSet<>(Arrays.asList( - new SampleAllele("chr1", 1, "A", "G", false, Lists.newArrayList("A", "G")), - new SampleAllele("chr1", 2, "C", "C", false, Lists.newArrayList("C", "T")), - new SampleAllele("chr1", 3, "C", "C", false, Lists.newArrayList("C", "T")) + new SampleAllele("chr1", 1, "A", "G", false, Lists.newArrayList("A", "G"), "0/1"), + new SampleAllele("chr1", 2, "C", "C", false, Lists.newArrayList("C", "T"), "0/0"), + new SampleAllele("chr1", 3, "C", "C", false, Lists.newArrayList("C", "T"), "0/0") )); SortedSet pairMatches = computeHaplotypes(alleles); @@ -100,9 +100,9 @@ void test1() { void test2() { SortedSet alleles = new TreeSet<>(Arrays.asList( - new SampleAllele("chr1", 1, "A", "G", false, Lists.newArrayList("A", "G")), - new SampleAllele("chr1", 2, "C", "T", false, Lists.newArrayList("C", "T")), - new SampleAllele("chr1", 3, "C", "T", false, Lists.newArrayList("C", "T")) + new SampleAllele("chr1", 1, "A", "G", false, Lists.newArrayList("A", "G"), "0/1"), + new SampleAllele("chr1", 2, "C", "T", false, Lists.newArrayList("C", "T"), "0/1"), + new SampleAllele("chr1", 3, "C", "T", false, Lists.newArrayList("C", "T"), "0/1") )); SortedSet pairMatches = computeHaplotypes(alleles); @@ -115,9 +115,9 @@ void test2() { void test3() { SortedSet alleles = new TreeSet<>(Arrays.asList( - new SampleAllele("chr1", 1, "A", "A", false, Lists.newArrayList("A", "G")), - new SampleAllele("chr1", 2, "C", "T", false, Lists.newArrayList("C", "T")), - new SampleAllele("chr1", 3, "C", "T", false, Lists.newArrayList("C", "T")) + new SampleAllele("chr1", 1, "A", "A", false, Lists.newArrayList("A", "G"), "0/0"), + new SampleAllele("chr1", 2, "C", "T", false, Lists.newArrayList("C", "T"), "0/1"), + new SampleAllele("chr1", 3, "C", "T", false, Lists.newArrayList("C", "T"), "0/1") )); SortedSet pairMatches = computeHaplotypes(alleles); @@ -130,9 +130,9 @@ void test3() { void test4() { SortedSet alleles = new TreeSet<>(Arrays.asList( - new SampleAllele("chr1", 1, "G", "G", false, Lists.newArrayList("A", "G")), - new SampleAllele("chr1", 2, "T", "T", false, Lists.newArrayList("C", "T")), - new SampleAllele("chr1", 3, "C", "T", false, Lists.newArrayList("C", "T")) + new SampleAllele("chr1", 1, "G", "G", false, Lists.newArrayList("A", "G"), "1/1"), + new SampleAllele("chr1", 2, "T", "T", false, Lists.newArrayList("C", "T"), "1/1"), + new SampleAllele("chr1", 3, "C", "T", false, Lists.newArrayList("C", "T"), "0/1") )); SortedSet pairMatches = computeHaplotypes(alleles); @@ -145,9 +145,9 @@ void test4() { void test5() { SortedSet alleles = new TreeSet<>(Arrays.asList( - new SampleAllele("chr1", 1, "G", "G", false, Lists.newArrayList("A", "G")), - new SampleAllele("chr1", 2, "C", "T", false, Lists.newArrayList("C", "T")), - new SampleAllele("chr1", 3, "C", "T", false, Lists.newArrayList("C", "T")) + new SampleAllele("chr1", 1, "G", "G", false, Lists.newArrayList("A", "G"), "1/1"), + new SampleAllele("chr1", 2, "C", "T", false, Lists.newArrayList("C", "T"), "0/1"), + new SampleAllele("chr1", 3, "C", "T", false, Lists.newArrayList("C", "T"), "0/1") )); SortedSet pairMatches = computeHaplotypes(alleles); List expectedMatches = Lists.newArrayList("*4a/*4b", "*4a/*17", "*4a/*4a"); @@ -216,10 +216,10 @@ void testComparePermutations() { "1:T;2:T;3:C;4:G;" ); SortedMap sampleAlleleMap = new TreeMap<>(); - sampleAlleleMap.put("chr1:1", new SampleAllele("chr1", 1, "T", "T", true, Lists.newArrayList("T"))); - sampleAlleleMap.put("chr1:2", new SampleAllele("chr1", 2, "A", "T", false, Lists.newArrayList("T"))); - sampleAlleleMap.put("chr1:3", new SampleAllele("chr1", 3, "C", "C", false, Lists.newArrayList("C"))); - sampleAlleleMap.put("chr1:4", new SampleAllele("chr1", 4, "C", "G", false, Lists.newArrayList("C"))); + sampleAlleleMap.put("chr1:1", new SampleAllele("chr1", 1, "T", "T", true, Lists.newArrayList("T"), "0/0")); + sampleAlleleMap.put("chr1:2", new SampleAllele("chr1", 2, "A", "T", false, Lists.newArrayList("T"), "1/0")); + sampleAlleleMap.put("chr1:3", new SampleAllele("chr1", 3, "C", "C", false, Lists.newArrayList("C"), "0/0")); + sampleAlleleMap.put("chr1:4", new SampleAllele("chr1", 4, "C", "G", false, Lists.newArrayList("C"), "0/1")); MatchData dataset = new MatchData("Sample_1", "GENE", sampleAlleleMap, variants, null, null); dataset.marshallHaplotypes("TEST", new TreeSet<>(Lists.newArrayList(hap1, hap2, hap3)), false); diff --git a/src/test/java/org/pharmgkb/pharmcat/haplotype/SampleAlleleTest.java b/src/test/java/org/pharmgkb/pharmcat/haplotype/SampleAlleleTest.java index dcc2c73a..1d61fbe0 100644 --- a/src/test/java/org/pharmgkb/pharmcat/haplotype/SampleAlleleTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/haplotype/SampleAlleleTest.java @@ -18,12 +18,12 @@ class SampleAlleleTest { @Test void testCompare() { - SampleAllele sa1 = new SampleAllele("chr1", 1, "A", "A", true, Lists.newArrayList("A", "G")); - SampleAllele sa2 = new SampleAllele("chr1", 2, "A", "A", true, Lists.newArrayList("A", "G")); - SampleAllele sa3 = new SampleAllele("chr2", 1, "A", "A", true, Lists.newArrayList("A", "G")); - SampleAllele sa4 = new SampleAllele("chrX", 1, "A", "A", true, Lists.newArrayList("A", "G")); - SampleAllele sa5 = new SampleAllele("chrY", 1, "A", "A", true, Lists.newArrayList("A", "G")); - SampleAllele sa6 = new SampleAllele("chr11", 1, "A", "A", true, Lists.newArrayList("A", "G")); + SampleAllele sa1 = new SampleAllele("chr1", 1, "A", "A", true, Lists.newArrayList("A", "G"), "0/0"); + SampleAllele sa2 = new SampleAllele("chr1", 2, "A", "A", true, Lists.newArrayList("A", "G"), "0/0"); + SampleAllele sa3 = new SampleAllele("chr2", 1, "A", "A", true, Lists.newArrayList("A", "G"), "0/0"); + SampleAllele sa4 = new SampleAllele("chrX", 1, "A", "A", true, Lists.newArrayList("A", "G"), "0/0"); + SampleAllele sa5 = new SampleAllele("chrY", 1, "A", "A", true, Lists.newArrayList("A", "G"), "0/0"); + SampleAllele sa6 = new SampleAllele("chr11", 1, "A", "A", true, Lists.newArrayList("A", "G"), "0/0"); assertSame(Sets.newTreeSet(Sets.newHashSet(sa1, sa2)).first(), sa1); assertSame(Sets.newTreeSet(Sets.newHashSet(sa2, sa1)).first(), sa1); diff --git a/src/test/java/org/pharmgkb/pharmcat/haplotype/VcfReaderTest.java b/src/test/java/org/pharmgkb/pharmcat/haplotype/VcfReaderTest.java index fc33d1bc..ecbb1816 100644 --- a/src/test/java/org/pharmgkb/pharmcat/haplotype/VcfReaderTest.java +++ b/src/test/java/org/pharmgkb/pharmcat/haplotype/VcfReaderTest.java @@ -31,6 +31,47 @@ void testCompressed() throws Exception { } + @Test + void testAlleleOrder() throws Exception { + VcfReader reader = new VcfReader(PathUtils.getPathToResource("org/pharmgkb/pharmcat/haplotype/VcfReaderTest-alleleOrder.vcf")); + Map alleleMap = reader.getAlleleMap(); + + assertEquals(4, alleleMap.size()); + + reader.getWarnings().keySet() + .forEach(p -> { + System.out.println(p); + for (String warning : reader.getWarnings().get(p)) { + System.out.println(" * " + warning); + } + }); + + // .|1 + String chrPos = "chr1:97079005"; + assertTrue(reader.getWarnings().containsKey(chrPos)); + assertEquals(1, reader.getWarnings().get(chrPos).size()); + assertTrue(reader.getWarnings().get(chrPos).contains("1 genotype found, expecting 2.")); + SampleAllele sa = alleleMap.get(chrPos); + assertNotNull(sa); + assertNull(sa.getAllele1()); + assertEquals("T", sa.getAllele2()); + + // .|1 + assertTrue(reader.getWarnings().containsKey("chr1:97079071")); + assertEquals(1, reader.getWarnings().get("chr1:97079071").size()); + assertTrue(reader.getWarnings().get("chr1:97079071").contains("1 genotype found, expecting 2.")); + + // 0|. + assertTrue(reader.getWarnings().containsKey("chr1:97079076")); + assertEquals(1, reader.getWarnings().get("chr1:97079076").size()); + assertTrue(reader.getWarnings().get("chr1:97079076").contains("Ignoring: only a single genotype found (GT=0|.). Since it's reference, treating this as a missing position.")); + // .|0 + assertTrue(reader.getWarnings().containsKey("chr1:97079077")); + assertEquals(1, reader.getWarnings().get("chr1:97079077").size()); + assertTrue(reader.getWarnings().get("chr1:97079077").contains("Ignoring: only a single genotype found (GT=.|0). Since it's reference, treating this as a missing position.")); + } + + @Test void testPhasing() throws Exception {