Skip to content

Commit

Permalink
Merge pull request #735 from deeptools/pca_fixes
Browse files Browse the repository at this point in the history
Bug fix for hicPCA with new test data and test case. Fix #719 #716 #7
  • Loading branch information
joachimwolff authored Jul 12, 2021
2 parents 0d8855d + af768fe commit f5e94ed
Show file tree
Hide file tree
Showing 6 changed files with 2,474 additions and 3 deletions.
17 changes: 14 additions & 3 deletions hicexplorer/hicPCA.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def correlateEigenvectorWithGeneTrack(pMatrix, pEigenvector, pGeneTrack):
density. If the correlation is negative, the eigenvector values are
multiplied with -1.
'''

log.debug('correlate eigenvector!')
file_h = opener(pGeneTrack)
bed = ReadBed(file_h)

Expand All @@ -154,10 +154,21 @@ def correlateEigenvectorWithGeneTrack(pMatrix, pEigenvector, pGeneTrack):
chromosome_name = interval.chromosome
if chromosome_name not in chromosome_list:
continue
if interval.start > pMatrix.get_chromosome_sizes()[chromosome_name]:
log.warning('Your chromosome sizes do not match the chromosome sizes of the extraTrack data!')
log.warning('Your chromosome {}; Size {}. ExtraTrack data {} {} {}'.format(chromosome_name, pMatrix.get_chromosome_sizes()[chromosome_name], interval.chromosome,
interval.start, interval.end))
log.warning('Please create your interaction matrix with a chromosome size file! However, if the sizes are intended and it is accepted that certain regions are not part of the correlation, you can ignore this message.')
continue
# in which bin of the Hi-C matrix is the given gene?
bin_id = pMatrix.getRegionBinRange(interval.chromosome,
interval.start, interval.end)

if bin_id is None:
log.warning('Your chromosome sizes do not match the chromosome sizes of the extraTrack data!')
log.warning('Your chromosome {}; Size {}. ExtraTrack data {} {} {}'.format(chromosome_name, pMatrix.get_chromosome_sizes()[chromosome_name], interval.chromosome,
interval.start, interval.end))
log.warning('Please create your interaction matrix with a chromosome size file! However, if the sizes are intended and it is accepted that certain regions are not part of the correlation, you can ignore this message.')
continue
# add +1 for one gene occurrence in this bin
gene_occurrence[bin_id[1]] += 1

Expand All @@ -180,7 +191,7 @@ def correlateEigenvectorWithGeneTrack(pMatrix, pEigenvector, pGeneTrack):
gene_occurrence_per_chr[chromosome])
if _correlation[0] < 0:
eigenvector[bin_id[0]:bin_id[1]] = np.negative(eigenvector[bin_id[0]:bin_id[1]])

# log.debug('correlated to {}!'.format(_correlation[0]))
return np.array(pEigenvector).transpose()


Expand Down
23 changes: 23 additions & 0 deletions hicexplorer/test/general/test_hicPCA.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,3 +327,26 @@ def test_pca_bedgraph_lieberman_histoneMark_track():

os.unlink(pca1.name)
os.unlink(pca2.name)


def test_pca_extratrack_extends_chromosomesize():
pca1 = NamedTemporaryFile(suffix='.bw', delete=False)
pca2 = NamedTemporaryFile(suffix='.bw', delete=False)

pca1.close()
pca2.close()
matrix = ROOT + "hicPCA/mm9_reduced_chr1.cool"
extra_track = ROOT + 'hicPCA/mm9_genes_sorted.bed12'

args = "--matrix {} --outputFileName {} {} -f bigwig -noe 2 --method lieberman \
--extraTrack {}"\
.format(matrix, pca1.name, pca2.name, extra_track).split()
# hicPCA.main(args)
compute(hicPCA.main, args, 5)
chrom_list = ['chr1']
assert are_files_equal_bigwig(ROOT + "hicPCA/pca_reduced_mm9_1.bw",
pca1.name, chrom_list)
assert are_files_equal_bigwig(ROOT + "hicPCA/pca_reduced_mm9_2.bw",
pca2.name, chrom_list)
os.unlink(pca1.name)
os.unlink(pca2.name)
Loading

0 comments on commit f5e94ed

Please sign in to comment.