-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathh5py_util.py
152 lines (129 loc) · 4.43 KB
/
h5py_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import h5py
import numpy as np
from multiprocessing.sharedctypes import RawArray
import ctypes
def add_major_minor(all_majorminor, h5mm):
for chrom, chrom_mm in all_majorminor.iteritems():
h5chrom = h5mm.create_group(chrom)
for bam, bam_mm in chrom_mm.iteritems():
bam_dat = np.array(bam_mm, dtype = 'S1')
h5chrom.create_dataset(bam, dtype = 'S1', shape = bam_dat.shape, data = bam_dat)
def get_major_minor(h5in):
mm = {}
for chrom in h5in['major_minor'].keys():
h5_chrom_mm = h5in['major_minor'][chrom]
mm[chrom] = {}
for bam in h5_chrom_mm.keys():
h5_bam_mm = h5_chrom_mm[bam]
t_h5_bam_mm = h5_bam_mm[:,:].copy()
mm[chrom][bam] = t_h5_bam_mm
return mm
empty_locobs = lambda: (
(np.empty((0, 5), dtype = np.uint32), np.empty((0, 5), dtype = np.uint32)),
(np.empty((0, 5), dtype = np.uint32), np.empty((0, 5), dtype = np.uint32)))
'''
def get_locus_locobs(h5lo_locus):
f1shape = h5lo_locus['f1'].shape
f2shape = h5lo_locus['f2'].shape
r1shape = h5lo_locus['r1'].shape
r2shape = h5lo_locus['r2'].shape
loc_obs = ((
get_raw_uint32_array(np.prod(f1shape)),
get_raw_uint32_array(np.prod(f2shape))
),
(
get_raw_uint32_array(np.prod(r1shape)),
get_raw_uint32_array(np.prod(r2shape))
))
x = memoryview(loc_obs[0][0])
y = h5lo_locus['f1'][:,:].reshape(np.prod(f1shape))
x[:] = y
loc_obs[0][0].shape = f1shape
x = memoryview(loc_obs[0][1])
y = h5lo_locus['f2'][:,:].reshape(np.prod(f2shape))
x[:] = y
loc_obs[0][1].shape = f2shape
x = memoryview(loc_obs[1][0])
y = h5lo_locus['r1'][:,:].reshape(np.prod(r1shape))
x[:] = y
loc_obs[1][0].shape = r1shape
x = memoryview(loc_obs[1][1])
y = h5lo_locus['r2'][:,:].reshape(np.prod(r2shape))
x[:] = y
loc_obs[1][1].shape = r2shape
return loc_obs
'''
'''
def get_locus_locobs(h5lo_locus):
f1shape = h5lo_locus['f1'].shape
f2shape = h5lo_locus['f2'].shape
r1shape = h5lo_locus['r1'].shape
r2shape = h5lo_locus['r2'].shape
loc_obs = ((
h5lo_locus['f1'][:,:],
h5lo_locus['f2'][:,:]
),
(
h5lo_locus['r1'][:,:],
h5lo_locus['r2'][:,:]
))
return loc_obs
'''
def get_locus_locobs(h5lo_locus):
loc_obs = ((
h5lo_locus['f1'],
h5lo_locus['f2']
),
(
h5lo_locus['r1'],
h5lo_locus['r2']
))
return loc_obs
def get_raw_uint32_array(shape):
n_el = np.prod(shape)
arr = np.frombuffer(RawArray(ctypes.c_uint32, n_el), dtype = np.uint32).reshape(shape)
return arr
def get_locobs(h5in, mm):
lo = {}
for chrom, h5lo_chrom in h5in['locus_observations'].iteritems():
chrom_mm = mm[chrom]
lo[chrom] = {}
bams = h5lo_chrom.keys()
for bam in bams:
h5lo_bam = h5lo_chrom[bam]
print '# loading locus observations for {} ({} of {})'.format(bam, bams.index(bam)+1, len(bams))
bam_mm = chrom_mm[bam]
seqlen = bam_mm.shape[0]
bam_lo = []
for i in xrange(0, seqlen):
loc_str = str(i)
if loc_str in h5lo_bam:
loc_obs = get_locus_locobs(h5lo_bam[loc_str])
bam_lo.append(loc_obs)
else:
bam_lo.append(None)
assert len(bam_lo) == seqlen
lo[chrom][bam] = bam_lo
return lo
def get_covariate_matrices(h5in, mm):
cm = {}
for chrom, h5cm_chrom in h5in['covariate_matrices'].iteritems():
chrom_mm = mm[chrom]
cm[chrom] = {}
bams = h5cm_chrom.keys()
for bam in bams:
bam_mm = chrom_mm[bam]
seqlen = bam_mm.shape[0]
h5cm_bam = h5cm_chrom[bam]
print '# loading covariate matrices for {} ({} of {})'.format(bam, bams.index(bam)+1, len(bams))
bam_cm = []
for i in xrange(0, seqlen):
loc_str = str(i)
if loc_str in h5cm_bam.keys():
loc_cm = h5cm_bam[loc_str]
bam_cm.append(loc_cm)
else:
bam_cm.append([])
assert len(bam_cm) == seqlen
cm[chrom][bam] = bam_cm
return cm