Skip to content

Commit

Permalink
Implementing a feature to return random principal components. User re…
Browse files Browse the repository at this point in the history
…quest #669
  • Loading branch information
joachimwolff committed Jul 12, 2021
1 parent f5e94ed commit 8d4415f
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 25 deletions.
33 changes: 20 additions & 13 deletions hicexplorer/hicPCA.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ def parse_arguments():

parserOpt = parser.add_argument_group('Optional arguments')

parserOpt.add_argument('--numberOfEigenvectors', '-noe',
help='The number of eigenvectors that the PCA '
'should compute'
parserOpt.add_argument('--whichEigenvectors', '-we',
help='The list of eigenvectors that the PCA '
'should compute e.g. 1 2 5 will return the first, second and fifth eigenvector.'
' (Default: %(default)s).',
default=2,
type=int,
default='1 2',
nargs='+',
required=False)

parserOpt.add_argument('--format', '-f',
Expand Down Expand Up @@ -239,12 +239,12 @@ def correlateEigenvectorWithHistonMarkTrack(pEigenvector, bwTrack, chromosome,

def main(args=None):
args = parse_arguments().parse_args(args)
if int(args.numberOfEigenvectors) != len(args.outputFileName):
if len(args.whichEigenvectors) != len(args.outputFileName):
log.error("Number of output file names and number of eigenvectors"
" does not match. Please"
"provide the name of each file.\nFiles: {}\nNumber of "
"eigenvectors: {}".format(args.outputFileName,
args.numberOfEigenvectors))
len(args.whichEigenvectors)))
exit(1)

ma = hm.hiCMatrix(args.matrix)
Expand Down Expand Up @@ -303,21 +303,27 @@ def main(args=None):
corrmatrix = convertNansToZeros(csr_matrix(corrmatrix)).todense()
corrmatrix = convertInfsToZeros(csr_matrix(corrmatrix)).todense()
evals, eigs = linalg.eig(corrmatrix)
k = args.numberOfEigenvectors
# k = len(rgs.numberOfEigenvectors

chrom, start, end, _ = zip(*ma.cut_intervals[chr_range[0]:chr_range[1]])

chrom_list += chrom
start_list += start
end_list += end
eigenvectors_correlate = None
for id in args.whichEigenvectors:
if eigenvectors_correlate is None:
eigenvectors_correlate = eigs[:, int(id) - 1:int(id)]
else:
eigenvectors_correlate = np.hstack((eigenvectors_correlate, eigs[:, int(id) - 1:int(id)]))

if args.extraTrack and (args.extraTrack.endswith('.bw') or args.extraTrack.endswith('.bigwig')):
assert(len(end) == len(start))
correlateEigenvectorWithHistonMarkTrack(eigs[:, :k].transpose(),
correlateEigenvectorWithHistonMarkTrack(eigenvectors_correlate.transpose(),
bwTrack, chrname, start,
end, args.extraTrack,
args.histonMarkType)

vecs_list += eigs[:, :k].tolist()
vecs_list += eigenvectors_correlate.tolist()

if args.pearsonMatrix:
file_type = 'cool'
Expand Down Expand Up @@ -350,11 +356,12 @@ def main(args=None):

if args.format == 'bedgraph':
for idx, outfile in enumerate(args.outputFileName):

assert(len(vecs_list) == len(chrom_list))

with open(outfile, 'w') as fh:
for i, value in enumerate(vecs_list):
if len(value) == args.numberOfEigenvectors:
if len(value) == len(args.whichEigenvectors):
if isinstance(value[idx], np.complex):
value[idx] = value[idx].real
fh.write("{}\t{}\t{}\t{:.12f}\n".format(toString(chrom_list[i]), start_list[i], end_list[i], value[idx]))
Expand Down Expand Up @@ -388,7 +395,7 @@ def main(args=None):
# create entry lists
for i, value in enumerate(vecs_list):
# it can happen that some 'value' is having less dimensions than it should
if len(value) == args.numberOfEigenvectors:
if len(value) == len(args.whichEigenvectors):
if isinstance(value[idx], np.complex):
value[idx] = value[idx].real
values.append(value[idx])
Expand Down
24 changes: 12 additions & 12 deletions hicexplorer/test/general/test_hicPCA.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,10 @@ def test_pca_bedgraph_lieberman():
pca1.close()
pca2.close()
matrix = ROOT + "small_test_matrix_50kb_res.h5"
args = "--matrix {} --outputFileName {} {} -f bedgraph -noe 2 --method lieberman"\
args = "--matrix {} --outputFileName {} {} -f bedgraph --whichEigenvectors 1 2 --method lieberman"\
.format(matrix, pca1.name, pca2.name).split()
# hicPCA.main(args)
compute(hicPCA.main, args, 5)
hicPCA.main(args)
# compute(hicPCA.main, args, 5)
assert are_files_equal(ROOT + "hicPCA/pca1.bedgraph", pca1.name)
assert are_files_equal(ROOT + "hicPCA/pca2.bedgraph", pca2.name)

Expand All @@ -92,7 +92,7 @@ def test_pca_bedgraph_lieberman_ignore_masked_bins():
pca1.close()
pca2.close()
matrix = ROOT + "small_test_matrix_50kb_res.h5"
args = "--matrix {} --outputFileName {} {} -f bedgraph -noe 2 \
args = "--matrix {} --outputFileName {} {} -f bedgraph --whichEigenvectors 1 2 \
--ignoreMaskedBins --method lieberman".format(matrix, pca1.name, pca2.name).split()
# hicPCA.main(args)
compute(hicPCA.main, args, 5)
Expand All @@ -111,7 +111,7 @@ def test_pca_bigwig_lieberman():
pca1.close()
pca2.close()
matrix = ROOT + "small_test_matrix_50kb_res.h5"
args = "--matrix {} --outputFileName {} {} -f bigwig -noe 2 --method lieberman"\
args = "--matrix {} --outputFileName {} {} -f bigwig --whichEigenvectors 1 2 --method lieberman"\
.format(matrix, pca1.name, pca2.name).split()
# hicPCA.main(args)
compute(hicPCA.main, args, 5)
Expand All @@ -137,7 +137,7 @@ def test_pca_bedgraph_lieberman_gene_density():
matrix = ROOT + "small_test_matrix.h5"
gene_track = ROOT + 'dm3_genes.bed.gz'
chromosomes = 'chrX chrXHet'
args = "--matrix {} --outputFileName {} {} -f bedgraph -noe 2 --method lieberman \
args = "--matrix {} --outputFileName {} {} -f bedgraph --whichEigenvectors 1 2 --method lieberman \
--extraTrack {} --chromosomes {}"\
.format(matrix, pca1.name, pca2.name, gene_track, chromosomes).split()
# hicPCA.main(args)
Expand All @@ -159,7 +159,7 @@ def test_pca_bigwig_lieberman_gene_density():
matrix = ROOT + "small_test_matrix.h5"
gene_track = ROOT + 'dm3_genes.bed.gz'
chromosomes = 'chrX chrXHet'
args = "--matrix {} --outputFileName {} {} -f bigwig -noe 2 --method lieberman \
args = "--matrix {} --outputFileName {} {} -f bigwig --whichEigenvectors 1 2 --method lieberman \
--extraTrack {} --chromosomes {}"\
.format(matrix, pca1.name, pca2.name, gene_track, chromosomes).split()
# hicPCA.main(args)
Expand Down Expand Up @@ -187,7 +187,7 @@ def test_pca_bigwig_lieberman_gene_density_intermediate_matrices():
matrix = ROOT + "small_test_matrix.h5"
gene_track = ROOT + 'dm3_genes.bed.gz'
chromosomes = 'chrX chrXHet'
args = "--matrix {} --outputFileName {} {} -f bigwig -noe 2 --method lieberman \
args = "--matrix {} --outputFileName {} {} -f bigwig --whichEigenvectors 1 2 --method lieberman \
--extraTrack {} --chromosomes {} --pearsonMatrix {} --obsexpMatrix {}"\
.format(matrix, pca1.name, pca2.name, gene_track, chromosomes,
pearson_matrix.name, obs_exp_matrix.name).split()
Expand Down Expand Up @@ -235,7 +235,7 @@ def test_pca_bigwig_gene_density_intermediate_matrices_norm():
matrix = ROOT + "small_test_matrix.h5"
gene_track = ROOT + 'dm3_genes.bed.gz'
chromosomes = 'chrX chrXHet'
args = "--matrix {} --outputFileName {} {} -f bigwig -noe 2 \
args = "--matrix {} --outputFileName {} {} -f bigwig --whichEigenvectors 1 2 \
--extraTrack {} --chromosomes {} --pearsonMatrix {} --obsexpMatrix {} \
--method dist_norm --ligation_factor"\
.format(matrix, pca1.name, pca2.name, gene_track, chromosomes,
Expand Down Expand Up @@ -291,7 +291,7 @@ def test_pca_bigwig_lieberman_histoneMark_track():
matrix = ROOT + "small_test_matrix.h5"
extra_track = ROOT + 'bigwig_chrx_2e6_5e6.bw'
chromosomes = 'chrX '
args = "--matrix {} --outputFileName {} {} -f bigwig -noe 2 --method lieberman \
args = "--matrix {} --outputFileName {} {} -f bigwig --whichEigenvectors 1 2 --method lieberman \
--extraTrack {} --chromosomes {}"\
.format(matrix, pca1.name, pca2.name, extra_track, chromosomes).split()
# hicPCA.main(args)
Expand All @@ -315,7 +315,7 @@ def test_pca_bedgraph_lieberman_histoneMark_track():
matrix = ROOT + "small_test_matrix.h5"
extra_track = ROOT + 'bigwig_chrx_2e6_5e6.bw'
chromosomes = 'chrX '
args = "--matrix {} --outputFileName {} {} -f bedgraph -noe 2 --method lieberman \
args = "--matrix {} --outputFileName {} {} -f bedgraph --whichEigenvectors 1 2 --method lieberman \
--extraTrack {} --chromosomes {}"\
.format(matrix, pca1.name, pca2.name, extra_track, chromosomes).split()
# hicPCA.main(args)
Expand All @@ -338,7 +338,7 @@ def test_pca_extratrack_extends_chromosomesize():
matrix = ROOT + "hicPCA/mm9_reduced_chr1.cool"
extra_track = ROOT + 'hicPCA/mm9_genes_sorted.bed12'

args = "--matrix {} --outputFileName {} {} -f bigwig -noe 2 --method lieberman \
args = "--matrix {} --outputFileName {} {} -f bigwig --whichEigenvectors 1 2 --method lieberman \
--extraTrack {}"\
.format(matrix, pca1.name, pca2.name, extra_track).split()
# hicPCA.main(args)
Expand Down

0 comments on commit 8d4415f

Please sign in to comment.