From 6d7e2af1072bef8a4af1f743065aab380498052f Mon Sep 17 00:00:00 2001 From: inksink Date: Wed, 4 Apr 2018 12:25:55 +0800 Subject: [PATCH] Fix bug with changing `f._current_pos` when call `f.readline() (#182) --- smart_open/s3.py | 6 ++---- smart_open/tests/test_s3.py | 7 ++++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/smart_open/s3.py b/smart_open/s3.py index 0c6df410..ae401896 100644 --- a/smart_open/s3.py +++ b/smart_open/s3.py @@ -232,12 +232,10 @@ def readline(self, limit=-1): # if self._line_terminator in self._buffer: next_newline = self._buffer.index(self._line_terminator) - the_line.write(self._buffer[:next_newline + 1]) - self._buffer = self._buffer[next_newline + 1:] + the_line.write(self._read_from_buffer(next_newline + 1)) break else: - the_line.write(self._buffer) - self._buffer = b'' + the_line.write(self._read_from_buffer(len(self._buffer))) self._fill_buffer(self._buffer_size) return the_line.getvalue() diff --git a/smart_open/tests/test_s3.py b/smart_open/tests/test_s3.py index 7330e17b..3dd20945 100644 --- a/smart_open/tests/test_s3.py +++ b/smart_open/tests/test_s3.py @@ -191,8 +191,13 @@ def test_readline(self): content = b'englishman\nin\nnew\nyork\n' create_bucket_and_key(contents=content) - with smart_open.s3.BufferedInputBase(BUCKET_NAME, KEY_NAME) as fin: + with smart_open.s3.SeekableBufferedInputBase(BUCKET_NAME, KEY_NAME) as fin: + fin.readline() + self.assertEqual(fin.tell(), content.index(b'\n')+1) + + fin.seek(0) actual = list(fin) + self.assertEqual(fin.tell(), len(content)) expected = [b'englishman\n', b'in\n', b'new\n', b'york\n'] self.assertEqual(expected, actual)