Skip to content

Commit b512888

Browse files
committed
Change assembly_Ns to ambiguous_bases
1 parent 5a908ab commit b512888

File tree

5 files changed

+42
-14
lines changed

5 files changed

+42
-14
lines changed

kleborate/contig_stats.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616

1717

1818
def get_contig_stat_results(contigs):
19-
contig_count, n50, longest_contig, contains_n = get_contig_stats(contigs)
19+
contig_count, n50, longest_contig, ambiguous_bases = get_contig_stats(contigs)
2020
return {'contig_count': str(contig_count),
2121
'N50': str(n50),
2222
'largest_contig': str(longest_contig),
23-
'assembly_Ns': contains_n}
23+
'ambiguous_bases': ambiguous_bases}
2424

2525

2626
def get_contig_stats(assembly):
@@ -29,11 +29,17 @@ def get_contig_stats(assembly):
2929
"""
3030
fasta = load_fasta(assembly)
3131

32-
contains_n = 'no'
32+
characters = set()
3333
for _, seq in fasta:
34-
if 'N' in seq:
35-
contains_n = 'yes'
36-
break
34+
characters |= set(b for b in seq)
35+
characters.discard('A')
36+
characters.discard('C')
37+
characters.discard('G')
38+
characters.discard('T')
39+
if characters:
40+
ambiguous_bases = 'yes'
41+
else:
42+
ambiguous_bases = 'no'
3743

3844
contig_lengths = sorted([len(x[1]) for x in fasta])
3945
if not contig_lengths:
@@ -51,4 +57,4 @@ def get_contig_stats(assembly):
5157
else:
5258
n50 = 0
5359

54-
return len(contig_lengths), n50, longest, contains_n
60+
return len(contig_lengths), n50, longest, ambiguous_bases

kleborate/kleborate.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,8 @@ def get_output_headers(args, data_folder):
185185
stdout_header = ['strain', 'species']
186186
full_header = ['strain', 'species', 'species_match']
187187
stdout_header += ['ST', 'virulence_score']
188-
full_header += ['contig_count', 'N50', 'largest_contig', 'assembly_Ns', 'ST', 'virulence_score']
188+
full_header += ['contig_count', 'N50', 'largest_contig', 'ambiguous_bases', 'ST',
189+
'virulence_score']
189190

190191
if args.resistance:
191192
stdout_header.append('resistance_score')

test/sequences/contig_stats_3.fasta

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
>1
2+
agggggggaagggtttcgta
3+
>2
4+
attaggccgtggagccttcctgccgggttcgaaaagtagt
5+
>3
6+
aaatgtaatgcttgtgtttttgggctaagtgctaccattaacaggcgtcagcggatcggcttagttagttgccttggtccaacccaaattttctttaggcggcccgaagactccaactctgctgcataccccgttgtcagctatcatgtcactaaaaatgcacctataagacagtactagaaccacacagacatgacagg

test/sequences/contig_stats_4.fasta

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
>1
2+
AGGGGGGGAAGGGTTTCGTA
3+
>2
4+
ATTAGGCCGTGGAGCCTTCCTGCCGGGTTCGAAAAGTAGT
5+
>3
6+
AAATGTAATGCTTGTGTTTTTGGGCTAAGTGCTACCATTAACAGGCGTCAGCGGATCGGCTTAGTTAGTTGCCTTGGTCCAACCCAAATTTTCTTTAGGCGGCCCGAAGACTCCAACTCTGCTGCATACCCCGTTGTCAGCTATCATGTCACTAAAAATGCACCTATAAGACAGTACTAGAADCACACAGACATGACAGG

test/test_contig_stats.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,19 @@ def test_longest_2(self):
4343
_, _, longest_contig, _ = get_contig_stats('test/sequences/contig_stats_2.fasta')
4444
self.assertEqual(longest_contig, 200)
4545

46-
def test_n_1(self):
47-
_, _, _, contains_n = get_contig_stats('test/sequences/contig_stats_1.fasta')
48-
self.assertEqual(contains_n, 'no')
46+
def test_ambiguous_bases_1(self):
47+
_, _, _, ambiguous_bases = get_contig_stats('test/sequences/contig_stats_1.fasta')
48+
self.assertEqual(ambiguous_bases, 'no')
49+
50+
def test_ambiguous_bases_2(self):
51+
_, _, _, ambiguous_bases = get_contig_stats('test/sequences/contig_stats_2.fasta')
52+
self.assertEqual(ambiguous_bases, 'yes')
53+
54+
def test_ambiguous_bases_3(self):
55+
_, _, _, ambiguous_bases = get_contig_stats('test/sequences/contig_stats_3.fasta')
56+
self.assertEqual(ambiguous_bases, 'no')
57+
58+
def test_ambiguous_bases_4(self):
59+
_, _, _, ambiguous_bases = get_contig_stats('test/sequences/contig_stats_4.fasta')
60+
self.assertEqual(ambiguous_bases, 'yes')
4961

50-
def test_n_2(self):
51-
_, _, _, contains_n = get_contig_stats('test/sequences/contig_stats_2.fasta')
52-
self.assertEqual(contains_n, 'yes')

0 commit comments

Comments
 (0)