Skip to content

Commit

Permalink
minor changes and comments
Browse files Browse the repository at this point in the history
  • Loading branch information
ahaessly committed Jan 21, 2020
1 parent 9386b3a commit 94dd92a
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,10 @@
import htsjdk.variant.variantcontext.VariantContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.tools.walkers.annotator.UniqueAltReadCount;
import org.broadinstitute.hellbender.utils.IndexRange;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;

import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;

// This filter checks for the case in which PCR-duplicates with unique UMIs (which we assume is caused by false adapter priming)
// amplify the erroneous signal for an alternate allele.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,19 @@ public ErrorProbabilities(final List<Mutect2Filter> filters, final VariantContex
}
LinkedHashMap<ErrorType, List<List<Double>>> probabilitiesByAllelesForEachFilter = alleleProbabilitiesByFilter.entrySet().stream().collect(
groupingBy(entry -> entry.getKey().errorType(), LinkedHashMap::new, mapping(entry -> entry.getValue(), toList())));
// convert the data so we have a list of probabilities by allele instead of filter
probabilitiesByAllelesForEachFilter.replaceAll((k, v) -> ErrorProbabilities.transpose(v));

// foreach error type, get the max probability for each allele
probabilitiesByTypeAndAllele = probabilitiesByAllelesForEachFilter.entrySet().stream().collect(toMap(
Map.Entry::getKey,
entry -> entry.getValue().stream().map(alleleList -> alleleList.stream().max(Double::compare).orElse(0.0)).collect(Collectors.toList()),
(a,b) -> a, HashMap::new));


// treat errors of different types as independent
// transpose the lists of allele probabilities, so it is now a list per allele that contains the prob for each type
// combine allele-wise
combinedErrorProbabilitiesByAllele = transpose(probabilitiesByTypeAndAllele.values().stream().collect(toList()))
.stream().map(
alleleProbabilities -> alleleProbabilities.stream().map(p -> 1.0 - p).reduce(1.0, (a, b) -> a * b)).collect(Collectors.toList());
Expand All @@ -56,6 +60,8 @@ public ErrorProbabilities(final List<Mutect2Filter> filters, final VariantContex
public List<Double> getNonSomaticProbabilities() { return probabilitiesByTypeAndAllele.get(ErrorType.NON_SOMATIC); }
public Map<Mutect2Filter, List<Double>> getProbabilitiesByFilter() { return alleleProbabilitiesByFilter; }

// helper functions for the few operations that still differ depending on whether the filter
// is per variant or allele
public Map<Mutect2Filter, List<Double>> getProbabilitiesForAlleleFilters() {
return getPartitionedProbabilitiesByFilter(false);
}
Expand All @@ -76,7 +82,11 @@ private Map<Mutect2Filter, List<Double>> getPartitionedProbabilitiesByFilter(boo

// TODO would this be useful in a util class somewhere?
private static <T> List<List<T>> transpose(List<List<T>> list) {
// all lists need to be the same size
final int N = list.stream().mapToInt(l -> l.size()).max().orElse(-1);
if (list.stream().anyMatch(l -> l.size() != N)) {

}
List<Iterator<T>> iterList = list.stream().map(it->it.iterator()).collect(toList());
return IntStream.range(0, N)
.mapToObj(n -> iterList.stream()
Expand Down

0 comments on commit 94dd92a

Please sign in to comment.