@@ -28,18 +28,20 @@ def main():
28
28
print_header (res_classes , bla_classes )
29
29
30
30
for contigs in args .assemblies :
31
- hits_dict = resblast_one_assembly (contigs , gene_info , args .qrdr , args .trunc , args .seqs ,
32
- args .mincov , args .minident )
31
+ hits_dict = resblast_one_assembly (contigs , gene_info , args .qrdr , args .trunc , args .omp ,
32
+ args .seqs , args . mincov , args .minident )
33
33
print_results (contigs , res_classes , bla_classes , hits_dict )
34
34
35
35
36
- def resblast_one_assembly (contigs , gene_info , qrdr , trunc , seqs , mincov , minident ):
37
- build_blast_databases (seqs , qrdr , trunc )
36
+ def resblast_one_assembly (contigs , gene_info , qrdr , trunc , omp , seqs , mincov , minident ):
37
+ build_blast_databases (seqs , qrdr , trunc , omp )
38
38
hits_dict = blast_against_all (seqs , mincov , minident , contigs , gene_info )
39
39
if qrdr :
40
40
check_for_qrdr_mutations (hits_dict , contigs , qrdr )
41
41
if trunc :
42
- check_for_gene_truncations (hits_dict , contigs , trunc )
42
+ check_for_mgrb_pmrb_gene_truncations (hits_dict , contigs , trunc )
43
+ if omp :
44
+ check_omp_genes (hits_dict , contigs , omp )
43
45
return hits_dict
44
46
45
47
@@ -63,6 +65,9 @@ def parse_arguments():
63
65
additional_screening_args .add_argument ('-r' , '--trunc' , type = str ,
64
66
help = 'MgrB and PmrB genes for which truncation can '
65
67
'cause colistin resistance' )
68
+ additional_screening_args .add_argument ('-o' , '--omp' , type = str ,
69
+ help = 'OmpK genes for which truncation/mutation can '
70
+ 'cause carbapenem resistance' )
66
71
67
72
settings_args = parser .add_argument_group ('Settings' )
68
73
settings_args .add_argument ('-m' , '--minident' , type = float , default = 90.0 ,
@@ -77,7 +82,7 @@ def parse_arguments():
77
82
return parser .parse_args ()
78
83
79
84
80
- def build_blast_databases (seqs , qrdr , trunc ):
85
+ def build_blast_databases (seqs , qrdr , trunc , omp ):
81
86
if not os .path .exists (seqs + '.nin' ):
82
87
with open (os .devnull , 'w' ) as devnull :
83
88
subprocess .check_call ('makeblastdb -dbtype nucl -in ' + seqs ,
@@ -92,6 +97,11 @@ def build_blast_databases(seqs, qrdr, trunc):
92
97
with open (os .devnull , 'w' ) as devnull :
93
98
subprocess .check_call ('makeblastdb -dbtype prot -in ' + trunc ,
94
99
stdout = devnull , shell = True )
100
+ if omp :
101
+ if not os .path .exists (omp + '.pin' ):
102
+ with open (os .devnull , 'w' ) as devnull :
103
+ subprocess .check_call ('makeblastdb -dbtype prot -in ' + omp ,
104
+ stdout = devnull , shell = True )
95
105
96
106
97
107
def read_class_file (res_class_file ):
@@ -223,7 +233,7 @@ def check_for_exact_aa_match(seqs, gene_nucl_seq):
223
233
224
234
def blastx_results_as_xml_tree (database , query ):
225
235
blastx_cmd = 'blastx -db ' + database + ' -query ' + query + ' -query_gencode 11' + \
226
- ' -outfmt 5 -ungapped - comp_based_stats F -culling_limit 1 -max_hsps 1 -seg no'
236
+ ' -outfmt 5 -comp_based_stats F -culling_limit 1 -max_hsps 1 -seg no'
227
237
process = subprocess .Popen (blastx_cmd , stdout = subprocess .PIPE , stderr = None , shell = True )
228
238
blast_output = process .communicate ()[0 ]
229
239
if not isinstance (blast_output , str ):
@@ -232,11 +242,12 @@ def blastx_results_as_xml_tree(database, query):
232
242
233
243
234
244
def check_for_qrdr_mutations (hits_dict , contigs , qrdr ):
235
- qrdr_loci = {'GyrA' : [(83 , 'S' ), (87 , 'D' )], 'ParC' : [(80 , 'S' ), (84 , 'E' )]}
245
+ qrdr_loci = {'GyrA' : [(83 , 'S' ), (87 , 'D' )],
246
+ 'ParC' : [(80 , 'S' ), (84 , 'E' )]}
236
247
237
248
# key = (locus, pos), value = allele,
238
249
# if found in a simple hit starting at position 1 of the protein seq
239
- complete_hits , incomplete_hits = {}, {}
250
+ complete_hits , incomplete_hits = collections . defaultdict ( list ), collections . defaultdict ( list )
240
251
241
252
root = blastx_results_as_xml_tree (qrdr , contigs )
242
253
for query in root [8 ]:
@@ -250,29 +261,22 @@ def check_for_qrdr_mutations(hits_dict, contigs, qrdr):
250
261
hsp_qseq = hsp [14 ].text
251
262
hsp_hseq = hsp [15 ].text
252
263
253
- for ( pos , wt ) in qrdr_loci [gene_id ]:
254
- if hsp_hit_to >= pos and ( hsp_gaps == 0 ) and ( hsp_hit_from == 1 ) :
264
+ for pos , wt in qrdr_loci [gene_id ]:
265
+ if hsp_hit_to >= pos and hsp_gaps == 0 and hsp_hit_from == 1 :
255
266
# simple alignment
256
- if (gene_id , pos ) in complete_hits :
257
- complete_hits [(gene_id , pos )].append (hsp_qseq [pos - 1 ])
258
- else :
259
- complete_hits [(gene_id , pos )] = [hsp_qseq [pos - 1 ]]
267
+ complete_hits [(gene_id , pos )].append (hsp_qseq [pos - 1 ])
260
268
else :
261
269
# not a simple alignment, need to align query and hit and extract loci
262
270
# manually
263
- if (pos >= hsp_hit_from ) and (pos <= hsp_hit_to ) and \
264
- (hsp_hit_eval <= 0.00001 ):
271
+ if hsp_hit_from <= pos <= hsp_hit_to and hsp_hit_eval <= 0.00001 :
265
272
# locus is within aligned area, set evalue to filter out the junk
266
273
# alignments
267
274
pos_in_aln = get_gapped_position (hsp_hseq , pos - hsp_hit_from + 1 )
268
- if (gene_id , pos ) in incomplete_hits :
269
- incomplete_hits [(gene_id , pos )].append (hsp_qseq [pos_in_aln - 1 ])
270
- else :
271
- incomplete_hits [(gene_id , pos )] = [hsp_qseq [pos_in_aln - 1 ]]
275
+ incomplete_hits [(gene_id , pos )].append (hsp_qseq [pos_in_aln - 1 ])
272
276
snps = []
273
277
274
278
for locus in qrdr_loci :
275
- for ( pos , wt ) in qrdr_loci [locus ]:
279
+ for pos , wt in qrdr_loci [locus ]:
276
280
if (locus , pos ) in complete_hits :
277
281
if complete_hits [(locus , pos )][0 ] != wt :
278
282
snps .append (locus + '-' + str (pos ) +
@@ -283,12 +287,10 @@ def check_for_qrdr_mutations(hits_dict, contigs, qrdr):
283
287
snps .append (locus + '-' + str (pos ) +
284
288
incomplete_hits [(locus , pos )][0 ]) # record SNP at this site
285
289
if snps :
286
- if 'Flq' not in hits_dict :
287
- hits_dict ['Flq' ] = []
288
290
hits_dict ['Flq' ] += snps
289
291
290
292
291
- def check_for_gene_truncations (hits_dict , contigs , trunc ):
293
+ def check_for_mgrb_pmrb_gene_truncations (hits_dict , contigs , trunc ):
292
294
best_mgrb_cov , best_pmrb_cov = 0.0 , 0.0
293
295
294
296
root = blastx_results_as_xml_tree (trunc , contigs )
@@ -297,12 +299,10 @@ def check_for_gene_truncations(hits_dict, contigs, trunc):
297
299
gene_id = hit [2 ].text
298
300
assert gene_id == 'MgrB' or gene_id == 'PmrB'
299
301
gene_len = int (hit [4 ].text )
300
-
301
302
for hsp in hit [5 ]:
302
303
hsp_qseq = hsp [14 ].text
303
304
hit_length = max (len (x ) for x in hsp_qseq .split ('*' ))
304
305
coverage = 100.0 * float (hit_length ) / gene_len
305
-
306
306
if gene_id == 'MgrB' and coverage > best_mgrb_cov :
307
307
best_mgrb_cov = coverage
308
308
elif gene_id == 'PmrB' and coverage > best_pmrb_cov :
@@ -320,6 +320,59 @@ def check_for_gene_truncations(hits_dict, contigs, trunc):
320
320
hits_dict ['Col' ] += truncations
321
321
322
322
323
+ def check_omp_genes (hits_dict , contigs , omp ):
324
+ check_for_omp_gene_truncations (hits_dict , contigs , omp )
325
+ check_for_ompk36_mutations (hits_dict , contigs , omp )
326
+
327
+
328
+ def check_for_omp_gene_truncations (hits_dict , contigs , omp ):
329
+ best_ompk35_cov , best_ompk36_cov = 0.0 , 0.0
330
+
331
+ root = blastx_results_as_xml_tree (omp , contigs )
332
+ for query in root [8 ]:
333
+ for hit in query [4 ]:
334
+ gene_id = hit [2 ].text
335
+ gene_len = int (hit [4 ].text )
336
+ for hsp in hit [5 ]:
337
+ hsp_qseq = hsp [14 ].text
338
+ hit_length = max (len (x ) for x in hsp_qseq .split ('*' ))
339
+ coverage = 100.0 * float (hit_length ) / gene_len
340
+
341
+ if gene_id == 'OmpK35' and coverage > best_ompk35_cov :
342
+ best_ompk35_cov = coverage
343
+ elif (gene_id == 'OmpK36' or gene_id == 'OmpK36GD' or
344
+ gene_id == 'OmpK36TD' ) and coverage > best_ompk36_cov :
345
+ best_ompk36_cov = coverage
346
+
347
+ truncations = []
348
+ if best_ompk35_cov < 90.0 :
349
+ truncations .append ('OmpK35-' + ('%.0f' % best_ompk35_cov ) + '%' )
350
+ if best_ompk36_cov < 90.0 :
351
+ truncations .append ('OmpK36-' + ('%.0f' % best_ompk36_cov ) + '%' )
352
+
353
+ if truncations :
354
+ if 'Bla_Carb' not in hits_dict :
355
+ hits_dict ['Bla_Carb' ] = []
356
+ hits_dict ['Bla_Carb' ] += truncations
357
+
358
+
359
+ def check_for_ompk36_mutations (hits_dict , contigs , omp ):
360
+ root = blastx_results_as_xml_tree (omp , contigs )
361
+ for query in root [8 ]:
362
+ for hit in query [4 ]:
363
+ gene_id = hit [2 ].text
364
+ gene_len = int (hit [4 ].text )
365
+ for hsp in hit [5 ]:
366
+ hsp_qseq = hsp [14 ].text
367
+ hit_length = max (len (x ) for x in hsp_qseq .split ('*' ))
368
+ coverage = 100.0 * float (hit_length ) / gene_len
369
+ if coverage >= 90.0 :
370
+ if gene_id == 'OmpK36GD' and 'GDGDTY' in hsp_qseq :
371
+ hits_dict ['Bla_Carb' ].append ('OmpK36GD' )
372
+ if gene_id == 'OmpK36TD' and 'GDTDTY' in hsp_qseq :
373
+ hits_dict ['Bla_Carb' ].append ('OmpK36TD' )
374
+
375
+
323
376
def get_strain_name (full_path ):
324
377
filename = os .path .split (full_path )[1 ]
325
378
if filename .endswith ('_temp_decompress.fasta' ):
0 commit comments