Skip to content

Commit

Permalink
fix for #158 (kde) and #149
Browse files Browse the repository at this point in the history
  • Loading branch information
HadrienG committed May 12, 2020
1 parent f0f3fe2 commit 95f5992
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 18 deletions.
4 changes: 3 additions & 1 deletion iss/error_models/kde.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class KDErrorModel(ErrorModel):
- the substitution for each nucleotide at each position (for R1 and R2)
- the insertion and deletion rates for each position (for R1 and R2)
"""

def __init__(self, npz_path):
super().__init__()
self.npz_path = npz_path
Expand Down Expand Up @@ -74,7 +75,8 @@ def gen_phred_scores(self, cdfs, orientation):
mean = self.mean_reverse

norm_mean = mean / sum(mean)
quality_bin = np.searchsorted(norm_mean, np.random.rand())
# quality_bin = np.searchsorted(norm_mean, np.random.rand())
quality_bin = np.random.choice(range(len(norm_mean)), p=norm_mean)
# downgrades index out of bound (ex rand is 1, last bin in searchsorted
# is 0.95) to best quality bin
if quality_bin == 4:
Expand Down
40 changes: 23 additions & 17 deletions iss/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,21 +62,30 @@ def reads(record, ErrorModel, n_pairs, cpu_number, output, seed,
# except ValueError as e:
# logger.error('Skipping this record: %s' % record.id)
# return
forward, reverse = simulate_read(record, ErrorModel, i, cpu_number)
if gc_bias:
stiched_seq = forward.seq + reverse.seq
gc_content = GC(stiched_seq)
if 40 < gc_content < 60:
read_tuple_list.append((forward, reverse))
i += 1
elif np.random.rand() < 0.90:
try:
forward, reverse = simulate_read(record, ErrorModel, i, cpu_number)
except AssertionError as e:
logger.warning(
'%s shorter than read length for this ErrorModel' % record.id)
logger.warning(
'Skipping %s. You will have less reads than specified'
% record.id)
break
else:
if gc_bias:
stiched_seq = forward.seq + reverse.seq
gc_content = GC(stiched_seq)
if 40 < gc_content < 60:
read_tuple_list.append((forward, reverse))
i += 1
elif np.random.rand() < 0.90:
read_tuple_list.append((forward, reverse))
i += 1
else:
continue
else:
read_tuple_list.append((forward, reverse))
i += 1
else:
continue
else:
read_tuple_list.append((forward, reverse))
i += 1

temp_file_name = output + '.iss.tmp.%s.%s' % (record.id, cpu_number)
to_fastq(read_tuple_list, temp_file_name)
Expand Down Expand Up @@ -112,10 +121,7 @@ def simulate_read(record, ErrorModel, i, cpu_number):
forward_start = random.randrange(
0, len(record.seq) - (2 * read_length + insert_size))
except AssertionError as e:
logger.error(
'%s shorter than read length for this ErrorModel:%s'
% (e, record.id))
sys.exit(1)
raise
except ValueError as e:
logger.debug(
'%s shorter than template length for this ErrorModel:%s'
Expand Down

0 comments on commit 95f5992

Please sign in to comment.