Skip to content

Commit

Permalink
Add support for rt, rt+, wt, wt+, at, at+ methods (#342)
Browse files Browse the repository at this point in the history
* Update README.rst

* Update README.rst

* Update README.rst

* Implement seek over HTTPS

* Add 'rt' mode in API and test this mode.

* Add test api_rt_plus

* Add new mode for function 'open'.

* Revert "Merge remote-tracking branch 'remotes/upstream/master'"

This reverts commit ebeab51, reversing
changes made to 0f17449.

* Add test for binary mode:
- a
- a+
- at
- at+

* Update smart_open/tests/test_smart_open.py

Co-Authored-By: Michael Penkov <[email protected]>

* Add tempfile in tests.

* Add new mode function 'open'

* API test names updated.

* Delete "delete mode" in temp file

Co-Authored-By: Michael Penkov <[email protected]>

* Update smart_open/tests/test_smart_open.py

Co-Authored-By: Michael Penkov <[email protected]>

* Update smart_open/tests/test_smart_open.py

Co-Authored-By: Michael Penkov <[email protected]>

* Update test_read_str_from_bytes_api_a_plus
- read text

* Update tests api: when the text is compared, the text stream opens.

* Update tests api: update docstrings.

* remove unused variable buffer

* refactor unit tests

move tests that touch the file system to a separate fixture

* rename buffer to buf, avoid keyword as variable name

* add explicit rt and wt tests

* minor unit test improvements
  • Loading branch information
interpolatio authored and mpenkov committed Sep 10, 2019
1 parent 6501328 commit ed62400
Show file tree
Hide file tree
Showing 2 changed files with 125 additions and 29 deletions.
5 changes: 4 additions & 1 deletion smart_open/smart_open_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,11 @@ def open(
#
try:
binary_mode = {'r': 'rb', 'r+': 'rb+',
'rt': 'rb', 'rt+': 'rb+',
'w': 'wb', 'w+': 'wb+',
'a': 'ab', 'a+': 'ab+'}[mode]
'wt': 'wb', "wt+": 'wb+',
'a': 'ab', 'a+': 'ab+',
'at': 'ab', 'at+': 'ab+'}[mode]
except KeyError:
binary_mode = mode
binary, filename = _open_binary_stream(uri, binary_mode, transport_params)
Expand Down
149 changes: 121 additions & 28 deletions smart_open/tests/test_smart_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,10 +263,10 @@ def test_http_pass(self):
def _test_compressed_http(self, suffix, query):
"""Can open <suffix> via http?"""
raw_data = b'Hello World Compressed.' * 10000
buffer = make_buffer(name='data' + suffix)
with smart_open.smart_open(buffer, 'wb') as outfile:
buf = make_buffer(name='data' + suffix)
with smart_open.smart_open(buf, 'wb') as outfile:
outfile.write(raw_data)
compressed_data = buffer.getvalue()
compressed_data = buf.getvalue()
# check that the string was actually compressed
self.assertNotEqual(compressed_data, raw_data)

Expand Down Expand Up @@ -296,13 +296,14 @@ def test_http_bz2_query(self):
self._test_compressed_http(".bz2", True)


def make_buffer(cls=six.BytesIO, initial_value=None, name=None):
def make_buffer(cls=six.BytesIO, initial_value=None, name=None, noclose=False):
"""
Construct a new in-memory file object aka "buffer".
Construct a new in-memory file object aka "buf".
:param cls: Class of the file object. Meaningful values are BytesIO and StringIO.
:param initial_value: Passed directly to the constructor, this is the content of the returned buffer.
:param name: Associated file path. Not assigned if is None (default).
:param noclose: If True, disables the .close function.
:return: Instance of `cls`.
"""
buf = cls(initial_value) if initial_value else cls()
Expand All @@ -311,71 +312,163 @@ def make_buffer(cls=six.BytesIO, initial_value=None, name=None):
if six.PY2:
buf.__enter__ = lambda: buf
buf.__exit__ = lambda exc_type, exc_val, exc_tb: None
if noclose:
buf.close = lambda: None
return buf


class RealFileSystemTests(unittest.TestCase):
"""Tests that touch the file system via temporary files."""

def setUp(self):
with tempfile.NamedTemporaryFile(prefix='test', delete=False) as fout:
fout.write(SAMPLE_BYTES)
self.temp_file = fout.name

def tearDown(self):
os.unlink(self.temp_file)

def test_rt(self):
with smart_open.smart_open(self.temp_file, 'rt') as fin:
data = fin.read()
self.assertEqual(data, SAMPLE_TEXT)

def test_wt(self):
#
# The file already contains SAMPLE_TEXT, so write something different.
#
text = 'nippon budokan'
with smart_open.smart_open(self.temp_file, 'wt') as fout:
fout.write(text)

with smart_open.smart_open(self.temp_file, 'rt') as fin:
data = fin.read()
self.assertEqual(data, text)

def test_ab(self):
with smart_open.smart_open(self.temp_file, 'ab') as fout:
fout.write(SAMPLE_BYTES)
with smart_open.smart_open(self.temp_file, 'rb') as fin:
data = fin.read()
self.assertEqual(data, SAMPLE_BYTES * 2)

def test_aplus(self):
with smart_open.smart_open(self.temp_file, 'a+') as fout:
fout.write(SAMPLE_TEXT)
with smart_open.smart_open(self.temp_file, 'rt') as fin:
text = fin.read()
self.assertEqual(text, SAMPLE_TEXT * 2)

def test_at(self):
with smart_open.smart_open(self.temp_file, 'at') as fout:
fout.write(SAMPLE_TEXT)
with smart_open.smart_open(self.temp_file, 'rt') as fin:
text = fin.read()
self.assertEqual(text, SAMPLE_TEXT * 2)

def test_atplus(self):
with smart_open.smart_open(self.temp_file, 'at+') as fout:
fout.write(SAMPLE_TEXT)
with smart_open.smart_open(self.temp_file, 'rt') as fin:
text = fin.read()
self.assertEqual(text, SAMPLE_TEXT * 2)


class SmartOpenFileObjTest(unittest.TestCase):
"""
Test passing raw file objects.
"""

def test_read_bytes(self):
"""Can we read bytes from a byte stream?"""
buffer = make_buffer(initial_value=SAMPLE_BYTES)
with smart_open.smart_open(buffer, 'rb') as sf:
buf = make_buffer(initial_value=SAMPLE_BYTES)
with smart_open.smart_open(buf, 'rb') as sf:
data = sf.read()
self.assertEqual(data, SAMPLE_BYTES)

def test_write_bytes(self):
"""Can we write bytes to a byte stream?"""
buffer = make_buffer()
with smart_open.smart_open(buffer, 'wb') as sf:
buf = make_buffer()
with smart_open.smart_open(buf, 'wb') as sf:
sf.write(SAMPLE_BYTES)
self.assertEqual(buffer.getvalue(), SAMPLE_BYTES)
self.assertEqual(buf.getvalue(), SAMPLE_BYTES)

@unittest.skipIf(six.PY2, "Python 2 does not differentiate between str and bytes")
def test_read_text_stream_fails(self):
"""Attempts to read directly from a text stream should fail."""
buffer = make_buffer(six.StringIO, initial_value=SAMPLE_TEXT)
with smart_open.smart_open(buffer, 'r') as sf:
"""Attempts to read directly from a text stream should fail.
This is because smart_open.open expects a byte stream as input.
If you have a text stream, there's no point passing it to smart_open:
you can read from it directly.
"""
buf = make_buffer(six.StringIO, initial_value=SAMPLE_TEXT)
with smart_open.smart_open(buf, 'r') as sf:
self.assertRaises(TypeError, sf.read) # we expect binary mode

@unittest.skipIf(six.PY2, "Python 2 does not differentiate between str and bytes")
def test_write_text_stream_fails(self):
"""Attempts to write directly to a text stream should fail."""
buffer = make_buffer(six.StringIO)
with smart_open.smart_open(buffer, 'w') as sf:
buf = make_buffer(six.StringIO)
with smart_open.smart_open(buf, 'w') as sf:
self.assertRaises(TypeError, sf.write, SAMPLE_TEXT) # we expect binary mode

def test_read_str_from_bytes(self):
"""Can we read strings from a byte stream?"""
buffer = make_buffer(initial_value=SAMPLE_BYTES)
with smart_open.smart_open(buffer, 'r') as sf:
def test_read_text_from_bytestream(self):
buf = make_buffer(initial_value=SAMPLE_BYTES)
with smart_open.smart_open(buf, 'r') as sf:
data = sf.read()
self.assertEqual(data, SAMPLE_TEXT)

def test_read_text_from_bytestream_rt(self):
buf = make_buffer(initial_value=SAMPLE_BYTES)
with smart_open.smart_open(buf, 'rt') as sf:
data = sf.read()
self.assertEqual(data, SAMPLE_TEXT)

def test_write_str_to_bytes(self):
def test_read_text_from_bytestream_rtplus(self):
buf = make_buffer(initial_value=SAMPLE_BYTES)
with smart_open.smart_open(buf, 'rt+') as sf:
data = sf.read()
self.assertEqual(data, SAMPLE_TEXT)

def test_write_text_to_bytestream(self):
"""Can we write strings to a byte stream?"""
buf = make_buffer(noclose=True)
with smart_open.smart_open(buf, 'w') as sf:
sf.write(SAMPLE_TEXT)

self.assertEqual(buf.getvalue(), SAMPLE_BYTES)

def test_write_text_to_bytestream_wt(self):
"""Can we write strings to a byte stream?"""
buf = make_buffer(noclose=True)
with smart_open.smart_open(buf, 'wt') as sf:
sf.write(SAMPLE_TEXT)

self.assertEqual(buf.getvalue(), SAMPLE_BYTES)

def test_write_text_to_bytestream_wtplus(self):
"""Can we write strings to a byte stream?"""
buffer = make_buffer()
with smart_open.smart_open(buffer, 'w') as sf:
buf = make_buffer(noclose=True)
with smart_open.smart_open(buf, 'wt+') as sf:
sf.write(SAMPLE_TEXT)
self.assertEqual(buffer.getvalue(), SAMPLE_BYTES)

self.assertEqual(buf.getvalue(), SAMPLE_BYTES)

def test_name_read(self):
"""Can we use the "name" attribute to decompress on the fly?"""
data = SAMPLE_BYTES * 1000
buffer = make_buffer(initial_value=bz2.compress(data), name='data.bz2')
with smart_open.smart_open(buffer, 'rb') as sf:
buf = make_buffer(initial_value=bz2.compress(data), name='data.bz2')
with smart_open.smart_open(buf, 'rb') as sf:
data = sf.read()
self.assertEqual(data, data)

def test_name_write(self):
"""Can we use the "name" attribute to compress on the fly?"""
data = SAMPLE_BYTES * 1000
buffer = make_buffer(name='data.bz2')
with smart_open.smart_open(buffer, 'wb') as sf:
buf = make_buffer(name='data.bz2')
with smart_open.smart_open(buf, 'wb') as sf:
sf.write(data)
self.assertEqual(bz2.decompress(buffer.getvalue()), data)
self.assertEqual(bz2.decompress(buf.getvalue()), data)

def test_open_side_effect(self):
"""
Expand Down

0 comments on commit ed62400

Please sign in to comment.