-
Notifications
You must be signed in to change notification settings - Fork 596
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fixed bugs and simplified AlleleLikelihoods evidence-to-index cache #6593
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,6 @@ | |
import org.broadinstitute.hellbender.utils.MathUtils; | ||
import org.broadinstitute.hellbender.utils.Utils; | ||
import org.broadinstitute.hellbender.utils.downsampling.AlleleBiasedDownsamplingUtils; | ||
import org.broadinstitute.hellbender.utils.pileup.PileupElement; | ||
|
||
import java.util.*; | ||
import java.util.function.Function; | ||
|
@@ -77,6 +76,8 @@ private double getInformativeThreshold() { | |
* | ||
* <p>In order to save CPU time the indices contained in this array (not the array itself) is | ||
* lazily initialized by invoking {@link #evidenceIndexBySampleIndex(int)}.</p> | ||
* | ||
* <p>Whenever a sample's evidence list is modified this cache must either be updated or invalidated by {@link #invalidateEvidenceToIndexCache(int)}.</p> | ||
*/ | ||
protected final List<Object2IntMap<EVIDENCE>> evidenceIndexBySampleIndex; | ||
|
||
|
@@ -737,9 +738,10 @@ public void addEvidence(final Map<String,List<EVIDENCE>> evidenceBySample, final | |
continue; | ||
} | ||
|
||
final int initialNumberOfEvidences = evidenceBySampleIndex.get(sampleIndex).size(); | ||
final List<EVIDENCE> newlyAddedEvidence = appendEvidence(newSampleEvidence, sampleIndex); | ||
extendsLikelihoodArrays(initialLikelihood, sampleIndex, initialNumberOfEvidences, initialNumberOfEvidences + newlyAddedEvidence.size()); | ||
final int oldEvidenceCount = evidenceBySampleIndex.get(sampleIndex).size(); | ||
appendEvidence(newSampleEvidence, sampleIndex); | ||
final int newEvidenceCount = evidenceBySampleIndex.get(sampleIndex).size(); | ||
extendsLikelihoodArrays(initialLikelihood, sampleIndex, oldEvidenceCount, newEvidenceCount); | ||
} | ||
} | ||
|
||
|
@@ -758,46 +760,24 @@ private void extendsLikelihoodArrays(final double initialLikelihood, final int s | |
} | ||
} | ||
|
||
// Append the new evidence reference into the structure per-sample. | ||
private List<EVIDENCE> appendEvidence(final List<EVIDENCE> newSampleEvidence, final int sampleIndex) { | ||
// Append the new evidence reference into the structure per-sample, returning the count of evidence actually added (duplicates are not added) | ||
// NOTE: the evidence-to-index cache is updated in place and not invalidated via {@link #invalidateEvidenceToIndexCache(int)} because adding new evidence | ||
// to the cache, as opposed to removing evidence, is just a matter of appending entries | ||
private void appendEvidence(final List<EVIDENCE> newSampleEvidence, final int sampleIndex) { | ||
|
||
final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(sampleIndex); | ||
final Object2IntMap<EVIDENCE> sampleEvidenceIndex = evidenceIndexBySampleIndex(sampleIndex); | ||
|
||
// actually-added will have the list of evidence added at the end of this method. | ||
// this won't include those that were already in the table. | ||
// being optimistic we assume that there is no repeats in the input new evidence so we set it to | ||
// the input list but if we found something we then start a new list. | ||
List<EVIDENCE> actuallyAdded = newSampleEvidence; | ||
|
||
int i, nextIndex = sampleEvidence.size(); | ||
final int stop = newSampleEvidence.size(); | ||
for (i = 0; i < stop; i++) { | ||
final EVIDENCE newEvidence = newSampleEvidence.get(i); | ||
final int previousValue = sampleEvidenceIndex.put(newEvidence, nextIndex); | ||
for (final EVIDENCE newEvidence : newSampleEvidence) { | ||
final int previousValue = sampleEvidenceIndex.put(newEvidence, sampleEvidence.size()); | ||
if (previousValue == MISSING_INDEX) { | ||
nextIndex++; | ||
sampleEvidence.add(newEvidence); | ||
} else { | ||
actuallyAdded = new ArrayList<>(newSampleEvidence.subList(0, i)); | ||
i++; // skip the repeated element. | ||
break; | ||
} | ||
} | ||
// second for below only use if we encounter some evidence that is already in the table: | ||
for (; i < stop; i++) { | ||
final EVIDENCE newEvidence = newSampleEvidence.get(i); | ||
final int previousValue = sampleEvidenceIndex.put(newEvidence, nextIndex); | ||
if (previousValue == MISSING_INDEX) { | ||
nextIndex++; | ||
sampleEvidence.add(newEvidence); | ||
actuallyAdded.add(newEvidence); | ||
} else { | ||
sampleEvidenceIndex.put(newEvidence, previousValue); // revert | ||
} | ||
} | ||
|
||
numberOfEvidences[sampleIndex] = sampleEvidence.size(); | ||
return actuallyAdded; | ||
} | ||
|
||
/** | ||
|
@@ -1129,60 +1109,41 @@ private void removeEvidence(final int sampleIndex, final Collection<EVIDENCE> ev | |
removeEvidenceByIndex(sampleIndex, indexesToRemove); | ||
} | ||
|
||
// remove evidence and unset the {@code evidenceIndexBySampleIndex} cache for this sample | ||
// assumes that evidencesToRemove is sorted and without duplicates. | ||
private void removeEvidenceByIndex(final int sampleIndex, final int[] evidencesToRemove) { | ||
if (evidencesToRemove.length == 0) { | ||
final int numToRemove = evidencesToRemove.length; | ||
if (numToRemove == 0) { | ||
return; | ||
} else { | ||
final Object2IntMap<EVIDENCE> evidenceIndex = evidenceIndexBySampleIndex(sampleIndex); | ||
final int oldEvidenceCount = numberOfEvidences[sampleIndex]; | ||
final int newEvidenceCount = oldEvidenceCount - evidencesToRemove.length; | ||
if (newEvidenceCount < 0) { | ||
throw new IllegalStateException("attempt to remove non-existent evidence or repeated evidence"); | ||
} else if (newEvidenceCount == 0) { // taking in consideration the assumption we simply remove | ||
// all evidence. | ||
evidenceIndex.clear(); | ||
numberOfEvidences[sampleIndex] = 0; | ||
} | ||
final int oldEvidenceCount = numberOfEvidences[sampleIndex]; | ||
final int newEvidenceCount = oldEvidenceCount - evidencesToRemove.length; | ||
|
||
// update the list of evidence and evidence count | ||
final List<EVIDENCE> oldEvidence = evidenceBySampleIndex.get(sampleIndex); | ||
final List<EVIDENCE> newEvidence = new ArrayList<>(newEvidenceCount); | ||
for (int n = 0, numRemoved = 0; n < oldEvidenceCount; n++) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You can be a bit more efficient by putting the next to skip "on-deck":
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's pretty neat but I think I'll keep the loop simpler as-is. |
||
if (numRemoved < numToRemove && n == evidencesToRemove[numRemoved]) { | ||
numRemoved++; | ||
} else { | ||
final List<EVIDENCE> evidences = evidenceBySampleIndex.get(sampleIndex); | ||
final double[][] values = valuesBySampleIndex[sampleIndex]; | ||
newEvidence.add(oldEvidence.get(n)); | ||
|
||
int nextIndexToRemove = evidencesToRemove[0]; | ||
if (nextIndexToRemove < 0) { | ||
throw new IllegalStateException("invalid input index array as it contains negatives"); | ||
} | ||
evidenceIndex.remove(evidences.get(nextIndexToRemove)); | ||
for (int etrIndex = 1, to = nextIndexToRemove, from = to + 1; to < newEvidenceCount; etrIndex++, from++) { | ||
if (etrIndex < evidencesToRemove.length) { | ||
nextIndexToRemove = evidencesToRemove[etrIndex]; | ||
if (nextIndexToRemove < from) { | ||
throw new IllegalStateException("invalid input index array contains indexes out of order"); | ||
} else if (nextIndexToRemove >= oldEvidenceCount) { | ||
throw new IllegalStateException("invalid input index array contains indexes out of order"); | ||
} | ||
evidenceIndex.remove(evidences.get(nextIndexToRemove)); | ||
} else { | ||
nextIndexToRemove = oldEvidenceCount; | ||
} | ||
for (; from < nextIndexToRemove; from++) { | ||
final EVIDENCE evidence = evidences.get(from); | ||
evidences.set(to, evidence); | ||
evidenceIndex.put(evidence, to++); | ||
} | ||
} | ||
Utils.truncate(evidences, newEvidenceCount); | ||
// now we do the likelihood values, we save ourselves all the checks: | ||
for (final double[] alleleValues : values) { | ||
for (int etrIndex = 1, to = evidencesToRemove[0], from = to + 1; to < newEvidenceCount; from++, etrIndex++) { | ||
nextIndexToRemove = etrIndex < evidencesToRemove.length ? evidencesToRemove[etrIndex] : oldEvidenceCount; | ||
for (; from < nextIndexToRemove; from++) { | ||
alleleValues[to++] = alleleValues[from]; | ||
} | ||
} | ||
// update the likelihoods arrays in place | ||
for (final double[] alleleValues : valuesBySampleIndex[sampleIndex]) { | ||
alleleValues[n - numRemoved] = alleleValues[n]; | ||
davidbenjamin marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
numberOfEvidences[sampleIndex] = newEvidenceCount; | ||
} | ||
} | ||
evidenceBySampleIndex.set(sampleIndex, newEvidence); | ||
numberOfEvidences[sampleIndex] = newEvidenceCount; | ||
|
||
invalidateEvidenceToIndexCache(sampleIndex); | ||
} | ||
|
||
// The evidenceToIndex map becomes invalid when the evidence list is modified, for example by deleting evidence | ||
// When adding evidence it is simple enough to add new entries to the map, but we must be careful to do so. | ||
private void invalidateEvidenceToIndexCache(final int sampleIndex) { | ||
evidenceIndexBySampleIndex.set(sampleIndex, null); | ||
} | ||
|
||
/** | ||
|
@@ -1215,21 +1176,21 @@ public void filterPoorlyModeledEvidence(final ToDoubleFunction<EVIDENCE> log10Mi | |
|
||
|
||
private Object2IntMap<EVIDENCE> evidenceIndexBySampleIndex(final int sampleIndex) { | ||
if (evidenceIndexBySampleIndex.get(sampleIndex) == null) { | ||
final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(sampleIndex); | ||
final int sampleEvidenceCount = sampleEvidence.size(); | ||
final Object2IntMap<EVIDENCE> index = new Object2IntOpenHashMap<>(sampleEvidenceCount); | ||
index.defaultReturnValue(MISSING_INDEX); | ||
evidenceIndexBySampleIndex.set(sampleIndex, index); | ||
for (int r = 0; r < sampleEvidenceCount; r++) { | ||
index.put(sampleEvidence.get(r), r); | ||
} | ||
return index; | ||
} else { | ||
return evidenceIndexBySampleIndex.get(sampleIndex); | ||
} | ||
final Object2IntMap<EVIDENCE> cached = evidenceIndexBySampleIndex.get(sampleIndex); | ||
return cached == null ? fillEvidenceToIndexCache(sampleIndex) : cached; | ||
} | ||
|
||
private Object2IntMap<EVIDENCE> fillEvidenceToIndexCache(int sampleIndex) { | ||
final List<EVIDENCE> sampleEvidence = evidenceBySampleIndex.get(sampleIndex); | ||
final int sampleEvidenceCount = sampleEvidence.size(); | ||
final Object2IntMap<EVIDENCE> index = new Object2IntOpenHashMap<>(sampleEvidenceCount); | ||
index.defaultReturnValue(MISSING_INDEX); | ||
for (int r = 0; r < sampleEvidenceCount; r++) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what about using a
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like the |
||
index.put(sampleEvidence.get(r), r); | ||
} | ||
evidenceIndexBySampleIndex.set(sampleIndex, index); | ||
return index; | ||
} | ||
|
||
/** | ||
* Implements a likelihood matrix per sample given its index. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nextIndex
contains the value you need here.perhaps it would be better to call this variable
currentSize
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree, but now that you mention it I think it's even better to drop
nextIndex
entirely and replace it withsampleEvidence.size()
.