Skip to content

Commit

Permalink
fix: improve handling of missing first allele in phased data
Browse files Browse the repository at this point in the history
  • Loading branch information
markwoon committed Jan 28, 2025
1 parent 4c18435 commit ca2ca7a
Show file tree
Hide file tree
Showing 12 changed files with 435 additions and 209 deletions.
23 changes: 19 additions & 4 deletions src/main/java/org/pharmgkb/pharmcat/haplotype/CombinationUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,25 @@ public static Set<String> generatePermutations(List<SampleAllele> alleles) {
Preconditions.checkNotNull(alleles);
Preconditions.checkArgument(!alleles.isEmpty(), "No alleles to generate permutations for");

boolean isHaploid = alleles.stream().allMatch(sa -> sa.getAllele2() == null);
Set<String> rez = generatePermutations(alleles, 0, isHaploid, true, "");
if (!isHaploid) {
rez.addAll(generatePermutations(alleles, 0, false, false, ""));
boolean isS1Blank = true;
boolean isS2Blank = true;
for (SampleAllele sa : alleles) {
if (sa.getAllele1() != null) {
isS1Blank = false;
}
if (sa.getAllele2() != null) {
isS2Blank = false;
}
if (!isS1Blank && !isS2Blank) {
break;
}
}
Set<String> rez = new HashSet<>();
if (!isS1Blank) {
rez.addAll(generatePermutations(alleles, 0, isS2Blank, true, ""));
}
if (!isS2Blank) {
rez.addAll(generatePermutations(alleles, 0, isS1Blank, false, ""));
}
if (rez.isEmpty()) {
throw new IllegalStateException("No permutations generated from " + alleles.size() + " alleles");
Expand Down
143 changes: 99 additions & 44 deletions src/main/java/org/pharmgkb/pharmcat/haplotype/DpydHapB3Matcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -70,59 +70,71 @@ public DpydHapB3Matcher(Env env, SortedMap<String, SampleAllele> alleleMap, bool
m_isMissingHapB3 = false;
if (hapB3IntronSample != null) {
List<String> intronicHapB3 = callHapB3(hapB3Allele, hapB3IntronLocus, hapB3IntronSample);
long numIntronicStrands = intronicHapB3.stream().filter(a -> !a.equals(".")).count();

if (hapB3ExonSample == null) {
m_hapB3IntronCall = intronicHapB3;
} else {
// we have both intron and exon variants
List<String> exonicHapB3 = callHapB3(hapB3Allele, hapB3ExonLocus, hapB3ExonSample);
long numExonicStrands = exonicHapB3.stream().filter(a -> !a.equals(".")).count();

if (intronicHapB3.isEmpty()) {
if (numIntronicStrands == 0) {
m_hapB3Call = exonicHapB3;
if (!exonicHapB3.isEmpty()) {
if (numExonicStrands != 0) {
m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_INTRONIC_MISMATCH_EXONIC);
}
} else if (intronicHapB3.size() == 1) {
// ignore this case
} else if (intronicHapB3.size() == 2) {
if (exonicHapB3.isEmpty()) {
} else if (numIntronicStrands == 1) {
if (numExonicStrands == 0) {
m_hapB3IntronCall = intronicHapB3;
} else if (exonicHapB3.size() == 1) {
// ignore this case
} else if (exonicHapB3.size() == 2) {
m_hapB3IntronCall = new ArrayList<>();
m_hapB3Call = new ArrayList<>();
} else {
// ignore if not phased
if (isEffectivelyPhased) {
handlePhasedCall(env, intronicHapB3.get(0), exonicHapB3.get(0));
handlePhasedCall(env, intronicHapB3.get(1), exonicHapB3.get(1));
handlePhasedCall(env, intronicHapB3, exonicHapB3, 0);
handlePhasedCall(env, intronicHapB3, exonicHapB3, 1);
}
}
} else if (numIntronicStrands == 2) {
if (numExonicStrands == 0) {
m_hapB3IntronCall = intronicHapB3;
} else {
if (isEffectivelyPhased) {
handlePhasedCall(env, intronicHapB3, exonicHapB3, 0);
handlePhasedCall(env, intronicHapB3, exonicHapB3, 1);
} else {
long numIntrons = intronicHapB3.stream().filter(c -> c.equals("1")).count();
long numExons = exonicHapB3.stream().filter(c -> c.equals("1")).count();

if (numIntrons == numExons) {
for (int x = 0; x < numIntrons; x += 1) {
m_hapB3Call.add("1");
}
} else if (numIntrons > numExons) {
// 2 intron, 1 exon
// 2 intron, 0 exon
// 1 intron, 0 exon
m_hapB3IntronCall.add("1");
if (numExons == 1) {
m_hapB3Call.add("1");
} else if (numIntrons == 2) {
if (numExonicStrands == 1) {
// ignore this case
} else if (numExonicStrands == 2) {
m_hapB3IntronCall = new ArrayList<>();
m_hapB3Call = new ArrayList<>();
long numIntronic = intronicHapB3.stream().filter(c -> c.equals("1")).count();
long numExonic = exonicHapB3.stream().filter(c -> c.equals("1")).count();

if (numIntronic == numExonic) {
for (int x = 0; x < numIntronic; x += 1) {
m_hapB3Call.add("1");
}
} else if (numIntronic > numExonic) {
// 2 intron, 1 exon
// 2 intron, 0 exon
// 1 intron, 0 exon
m_hapB3IntronCall.add("1");
}
} else {
// intronic call trumps exonic call
if (numIntrons == 1) {
// 2 exons, 1 intron - call ref/hapB3
m_hapB3Call.add("1");
m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_INTRONIC_MISMATCH_EXONIC);
if (numExonic == 1) {
m_hapB3Call.add("1");
} else if (numIntronic == 2) {
m_hapB3IntronCall.add("1");
}
} else {
// 1 exon, 0 intron - call reference
// 2 exon, 0 intron - call reference
m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_INTRONIC_MISMATCH_EXONIC);
// intronic call trumps exonic call
if (numIntronic == 1) {
// 2 exons, 1 intron - call ref/hapB3
m_hapB3Call.add("1");
m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_INTRONIC_MISMATCH_EXONIC);
} else {
// 1 exon, 0 intron - call reference
// 2 exon, 0 intron - call reference
m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_INTRONIC_MISMATCH_EXONIC);
}
}
}
}
Expand All @@ -131,7 +143,8 @@ public DpydHapB3Matcher(Env env, SortedMap<String, SampleAllele> alleleMap, bool
}
} else {
m_hapB3Call = callHapB3(hapB3Allele, hapB3ExonLocus, hapB3ExonSample);
if (m_hapB3Call.size() == 2) {
long numAlleles = m_hapB3Call.stream().filter(a -> !a.equals(".")).count();
if (numAlleles == 2) {
m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_EXONIC_ONLY);
}
}
Expand All @@ -146,22 +159,53 @@ public DpydHapB3Matcher(Env env, SortedMap<String, SampleAllele> alleleMap, bool
m_isHapB3Present = m_numHapB3Called > 0;
}

private void handlePhasedCall(Env env, String intronicCall, String exonicCall) {

private void handlePhasedCall(Env env, List<String> intronicCalls, List<String> exonicCalls, int idx) {

String intronicCall = ".";
if (intronicCalls.size() > idx) {
intronicCall = intronicCalls.get(idx);
}
String exonicCall = ".";
if (exonicCalls.size() > idx) {
exonicCall = exonicCalls.get(idx);
}

if (m_hapB3IntronCall == null) {
m_hapB3IntronCall = new ArrayList<>();
}
if (m_hapB3Call == null) {
m_hapB3Call = new ArrayList<>();
}

// intronic call trumps exonic call
if (intronicCall.equals("0")) {
m_hapB3IntronCall.add("0");
m_hapB3Call.add("0");
if (!exonicCall.equals("0")) {
m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_INTRONIC_MISMATCH_EXONIC);
}
} else {
} else if (intronicCall.equals("1")) {
if (exonicCall.equals("0")) {
m_hapB3IntronCall.add("1");
m_hapB3Call.add("0");
} else {
m_hapB3IntronCall.add("0");
m_hapB3Call.add("1");
}
} else {
// intronic call is "."
m_hapB3IntronCall.add(".");
if (exonicCall.equals("1")) {
m_hapB3Call.add("1");
} else {
// if intronic is "0" or "."
m_hapB3Call.add(".");
}
if (!exonicCall.equals("0")) {
m_warning = env.getMessage(MessageHelper.MSG_DPYD_HAPB3_INTRONIC_MISMATCH_EXONIC);
}

}
}

Expand All @@ -171,14 +215,25 @@ private void handlePhasedCall(Env env, String intronicCall, String exonicCall) {
private List<String> callHapB3(NamedAllele hapB3Allele, VariantLocus locus, SampleAllele sampleAllele) {
List<String> rez = new ArrayList<>();
String exonAllele = Objects.requireNonNull(hapB3Allele.getAllele(locus));
rez.add(exonAllele.equals(sampleAllele.getAllele1()) ? "1" : "0");
int count = 0;
if (sampleAllele.getAllele1() == null) {
if (sampleAllele.isPhased()) {
// we only care if this is phased
rez.add(".");
}
} else {
rez.add(exonAllele.equals(sampleAllele.getAllele1()) ? "1" : "0");
count += 1;
}
if (sampleAllele.getAllele2() != null) {
rez.add(exonAllele.equals(sampleAllele.getAllele2()) ? "1" : "0");
} else {
count += 1;
}
if (count < 2) {
// this warning can get overwritten!
// but we don't really care
m_warning = new MessageAnnotation(MessageAnnotation.TYPE_NOTE, "warn.alleleCount",
"Only found 1 allele for " + locus.getRsid());
"Only found " + count + " allele for " + locus.getRsid());
}
return rez;
}
Expand Down
29 changes: 17 additions & 12 deletions src/main/java/org/pharmgkb/pharmcat/haplotype/MatchData.java
Original file line number Diff line number Diff line change
@@ -1,17 +1,7 @@
package org.pharmgkb.pharmcat.haplotype;

import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.*;
import java.util.stream.Collectors;
import com.google.common.base.Preconditions;
import com.google.gson.annotations.Expose;
Expand Down Expand Up @@ -114,7 +104,7 @@ public MatchData(String sampleId, String gene, SortedMap<String, SampleAllele> a
}
}
}
m_isHaploid = m_sampleMap.values().stream().allMatch(sa -> sa.getAllele2() == null);
m_isHaploid = areSampleAllelesHaploid(m_sampleMap.values());
m_isPhased = m_sampleMap.values().stream().allMatch(SampleAllele::isPhased);
m_isHomozygous = m_isHaploid ||
m_sampleMap.values().stream().allMatch(SampleAllele::isHomozygous);
Expand Down Expand Up @@ -376,4 +366,19 @@ protected SortedSet<HaplotypeMatch> comparePermutations() {
public String toString() {
return m_gene + " match data for " + m_sampleId;
}


private static boolean areSampleAllelesHaploid(Collection<SampleAllele> sampleAlleles) {
int s1 = 0;
int s2 = 0;
for (SampleAllele a : sampleAlleles) {
if (a.getAllele1() == null) {
s1 += 1;
}
if (a.getAllele2() == null) {
s2 += 1;
}
}
return s1 == sampleAlleles.size() || s2 == sampleAlleles.size();
}
}
Loading

0 comments on commit ca2ca7a

Please sign in to comment.