Skip to content

Commit

Permalink
Merge pull request #155 from prihoda/master
Browse files Browse the repository at this point in the history
Support zero-length sequences, fixes #93. Release new version 0.5.6 to PyPI.
  • Loading branch information
mdshw5 authored Nov 22, 2019
2 parents f0f5d7b + bb9b311 commit f1668d2
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 11 deletions.
25 changes: 14 additions & 11 deletions pyfaidx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

dna_bases = re.compile(r'([ACTGNactgnYRWSKMDVHBXyrwskmdvhbx]+)')

__version__ = '0.5.5.2'
__version__ = '0.5.6'


class KeyFunctionError(ValueError):
Expand Down Expand Up @@ -123,7 +123,7 @@ def __getitem__(self, n):
>chr1
AC
"""
if self.start is None or self.end is None:
if self.start is None or self.end is None or len(self.seq) == 0:
correction_factor = 0
elif len(
self.seq
Expand Down Expand Up @@ -461,7 +461,7 @@ def read_fai(self):
rname, rlen, offset, lenc, lenb = line.split('\t')
rlen, offset, lenc, lenb = map(int,
(rlen, offset, lenc, lenb))
newlines = int(ceil(rlen / lenc) * (lenb - lenc))
newlines = int(ceil(rlen / lenc) * (lenb - lenc)) if lenc else 0
bend = offset + newlines + rlen
rec = IndexRecord(rlen, offset, lenc, lenb, bend,
prev_bend)
Expand Down Expand Up @@ -508,8 +508,8 @@ def build_index(self):
rname = None # reference sequence name
offset = 0 # binary offset of end of current line
rlen = 0 # reference character length
blen = None # binary line length (includes newline)
clen = None # character line length
blen = 0 # binary line length (includes newline)
clen = 0 # character line length
bad_lines = [] # lines > || < than blen
thisoffset = offset
valid_entry = False
Expand All @@ -535,9 +535,9 @@ def build_index(self):
"Inconsistent line found in >{0} at "
"line {1:n}.".format(
rname, bad_lines[0][0] + 1))
blen = None
blen = 0
rlen = 0
clen = None
clen = 0
bad_lines = []
try: # must catch empty deflines (actually these might be okay: https://github.com/samtools/htslib/pull/258)
rname = line.rstrip('\n\r')[1:].split()[
Expand Down Expand Up @@ -648,8 +648,8 @@ def from_file(self, rname, start, end, internals=False):

# Calculate offset (https://github.com/samtools/htslib/blob/20238f354894775ed22156cdd077bc0d544fa933/faidx.c#L398)
newlines_before = int(
(start0 - 1) / i.lenc * (i.lenb - i.lenc)) if start0 > 0 else 0
newlines_to_end = int(end / i.lenc * (i.lenb - i.lenc))
(start0 - 1) / i.lenc * (i.lenb - i.lenc)) if start0 > 0 and i.lenc else 0
newlines_to_end = int(end / i.lenc * (i.lenb - i.lenc)) if i.lenc else 0
newlines_inside = newlines_to_end - newlines_before
seq_blen = newlines_inside + seq_len
bstart = i.offset + newlines_before + start0
Expand All @@ -669,12 +669,15 @@ def from_file(self, rname, start, end, internals=False):
else:
self.file.seek(bstart)

# If the requested sequence exceeds len(FastaRecord), return as much as possible
if bstart + seq_blen > i.bend and not self.strict_bounds:
seq_blen = i.bend - bstart

# Otherwise it should be safe to read the sequence
if seq_blen > 0:
seq = self.file.read(seq_blen).decode()
elif seq_blen <= 0 and not self.strict_bounds:
# If the requested sequence is negative, we will pad the empty string with default_seq.
# This was changed to support #155 with strict_bounds=True.
elif seq_blen <= 0:
seq = ''

if not internals:
Expand Down
47 changes: 47 additions & 0 deletions tests/test_feature_bounds_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,53 @@
path = os.path.dirname(__file__)
os.chdir(path)

class TestFeatureZeroLength:
"""Tests for handling zero-length entries, added in #155"""
def setUp(self):
with open('data/zero_length.fasta', 'w') as fasta:
fasta.write(""">A
ATCG
>B
>C
>D
GTA
GC""")

def tearDown(self):
os.remove('data/zero_length.fasta')
os.remove('data/zero_length.fasta.fai')

def test_index_zero_length(self):
fasta = Fasta('data/zero_length.fasta')

def test_fetch_zero_length(self):
fasta = Fasta('data/zero_length.fasta')
b = fasta["B"]
assert str(b) == ''

class TestZeroLengthSequenceSubRange(TestCase):
def setUp(self):
pass

def tearDown(self):
try:
os.remove('data/genes.fasta.fai')
except EnvironmentError:
pass # some tests may delete this file

def test_as_raw_zero_length_subsequence(self):
fasta = Fasta('data/genes.fasta', as_raw=True, strict_bounds=True)
expect = ''
result = fasta['gi|557361099|gb|KF435150.1|'][100:100]
assert result == expect

def test_zero_length_subsequence(self):
fasta = Fasta('data/genes.fasta', strict_bounds=True)
expect = ''
result = fasta['gi|557361099|gb|KF435150.1|'][100:100]
assert result.seq == expect

class TestFeatureBoundsCheck:
def setUp(self):
pass
Expand Down

0 comments on commit f1668d2

Please sign in to comment.