Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for rt, rt+, wt, wt+, at, at+ methods #342

Merged
merged 27 commits into from
Sep 10, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
aa3cdc3
Update README.rst
interpolatio Jun 22, 2019
b869206
Update README.rst
interpolatio Jun 22, 2019
63b5f17
Update README.rst
interpolatio Jul 9, 2019
ebeab51
Merge remote-tracking branch 'remotes/upstream/master'
Jul 21, 2019
c5b6090
Implement seek over HTTPS
Jul 21, 2019
e93605f
Merge branch 'rare'
Jul 27, 2019
4e60f45
Add 'rt' mode in API and test this mode.
Jul 28, 2019
b87e729
Add test api_rt_plus
Jul 31, 2019
0e84917
Add new mode for function 'open'.
Jul 31, 2019
0ebe207
Revert "Merge remote-tracking branch 'remotes/upstream/master'"
Aug 1, 2019
e62af3e
Add test for binary mode:
Aug 1, 2019
51ffc65
Update smart_open/tests/test_smart_open.py
interpolatio Aug 4, 2019
2a5ca61
Add tempfile in tests.
Aug 4, 2019
46a4642
Add new mode function 'open'
Aug 7, 2019
5346272
API test names updated.
Aug 9, 2019
8a8624a
Delete "delete mode" in temp file
interpolatio Aug 17, 2019
fbca313
Update smart_open/tests/test_smart_open.py
interpolatio Aug 17, 2019
878efc6
Update smart_open/tests/test_smart_open.py
interpolatio Aug 17, 2019
b0362ed
Update test_read_str_from_bytes_api_a_plus
Aug 20, 2019
8c2100a
Merge remote-tracking branch 'origin/python_api' into python_api
Aug 20, 2019
ad7a8c3
Update tests api: when the text is compared, the text stream opens.
Aug 20, 2019
35fc206
Update tests api: update docstrings.
Aug 20, 2019
ca9e891
remove unused variable buffer
mpenkov Aug 27, 2019
3f29fb0
refactor unit tests
mpenkov Aug 27, 2019
721fbd1
rename buffer to buf, avoid keyword as variable name
mpenkov Aug 27, 2019
133a094
add explicit rt and wt tests
mpenkov Aug 27, 2019
33373d3
minor unit test improvements
mpenkov Aug 27, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion smart_open/smart_open_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,11 @@ def open(
#
try:
binary_mode = {'r': 'rb', 'r+': 'rb+',
'rt': 'rb', 'rt+': 'rb+',
mpenkov marked this conversation as resolved.
Show resolved Hide resolved
'w': 'wb', 'w+': 'wb+',
'a': 'ab', 'a+': 'ab+'}[mode]
'wt': 'wb', "wt+": 'wb+',
'a': 'ab', 'a+': 'ab+',
'at': 'ab', 'at+': 'ab+'}[mode]
except KeyError:
binary_mode = mode
binary, filename = _open_binary_stream(uri, binary_mode, transport_params)
Expand Down
149 changes: 121 additions & 28 deletions smart_open/tests/test_smart_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,10 +263,10 @@ def test_http_pass(self):
def _test_compressed_http(self, suffix, query):
"""Can open <suffix> via http?"""
raw_data = b'Hello World Compressed.' * 10000
buffer = make_buffer(name='data' + suffix)
with smart_open.smart_open(buffer, 'wb') as outfile:
buf = make_buffer(name='data' + suffix)
with smart_open.smart_open(buf, 'wb') as outfile:
outfile.write(raw_data)
compressed_data = buffer.getvalue()
compressed_data = buf.getvalue()
# check that the string was actually compressed
self.assertNotEqual(compressed_data, raw_data)

Expand Down Expand Up @@ -296,13 +296,14 @@ def test_http_bz2_query(self):
self._test_compressed_http(".bz2", True)


def make_buffer(cls=six.BytesIO, initial_value=None, name=None):
def make_buffer(cls=six.BytesIO, initial_value=None, name=None, noclose=False):
"""
Construct a new in-memory file object aka "buffer".
Construct a new in-memory file object aka "buf".

:param cls: Class of the file object. Meaningful values are BytesIO and StringIO.
:param initial_value: Passed directly to the constructor, this is the content of the returned buffer.
:param name: Associated file path. Not assigned if is None (default).
:param noclose: If True, disables the .close function.
:return: Instance of `cls`.
"""
buf = cls(initial_value) if initial_value else cls()
Expand All @@ -311,71 +312,163 @@ def make_buffer(cls=six.BytesIO, initial_value=None, name=None):
if six.PY2:
buf.__enter__ = lambda: buf
buf.__exit__ = lambda exc_type, exc_val, exc_tb: None
if noclose:
buf.close = lambda: None
return buf


class RealFileSystemTests(unittest.TestCase):
"""Tests that touch the file system via temporary files."""

def setUp(self):
with tempfile.NamedTemporaryFile(prefix='test', delete=False) as fout:
fout.write(SAMPLE_BYTES)
self.temp_file = fout.name

def tearDown(self):
os.unlink(self.temp_file)

def test_rt(self):
with smart_open.smart_open(self.temp_file, 'rt') as fin:
data = fin.read()
self.assertEqual(data, SAMPLE_TEXT)

def test_wt(self):
#
# The file already contains SAMPLE_TEXT, so write something different.
#
text = 'nippon budokan'
with smart_open.smart_open(self.temp_file, 'wt') as fout:
fout.write(text)

with smart_open.smart_open(self.temp_file, 'rt') as fin:
data = fin.read()
self.assertEqual(data, text)

def test_ab(self):
with smart_open.smart_open(self.temp_file, 'ab') as fout:
fout.write(SAMPLE_BYTES)
with smart_open.smart_open(self.temp_file, 'rb') as fin:
data = fin.read()
self.assertEqual(data, SAMPLE_BYTES * 2)

def test_aplus(self):
with smart_open.smart_open(self.temp_file, 'a+') as fout:
fout.write(SAMPLE_TEXT)
with smart_open.smart_open(self.temp_file, 'rt') as fin:
text = fin.read()
self.assertEqual(text, SAMPLE_TEXT * 2)

def test_at(self):
with smart_open.smart_open(self.temp_file, 'at') as fout:
fout.write(SAMPLE_TEXT)
with smart_open.smart_open(self.temp_file, 'rt') as fin:
text = fin.read()
self.assertEqual(text, SAMPLE_TEXT * 2)

def test_atplus(self):
with smart_open.smart_open(self.temp_file, 'at+') as fout:
fout.write(SAMPLE_TEXT)
with smart_open.smart_open(self.temp_file, 'rt') as fin:
text = fin.read()
self.assertEqual(text, SAMPLE_TEXT * 2)


class SmartOpenFileObjTest(unittest.TestCase):
"""
Test passing raw file objects.
"""

def test_read_bytes(self):
"""Can we read bytes from a byte stream?"""
buffer = make_buffer(initial_value=SAMPLE_BYTES)
with smart_open.smart_open(buffer, 'rb') as sf:
buf = make_buffer(initial_value=SAMPLE_BYTES)
with smart_open.smart_open(buf, 'rb') as sf:
data = sf.read()
self.assertEqual(data, SAMPLE_BYTES)

def test_write_bytes(self):
"""Can we write bytes to a byte stream?"""
buffer = make_buffer()
with smart_open.smart_open(buffer, 'wb') as sf:
buf = make_buffer()
with smart_open.smart_open(buf, 'wb') as sf:
sf.write(SAMPLE_BYTES)
self.assertEqual(buffer.getvalue(), SAMPLE_BYTES)
self.assertEqual(buf.getvalue(), SAMPLE_BYTES)

@unittest.skipIf(six.PY2, "Python 2 does not differentiate between str and bytes")
def test_read_text_stream_fails(self):
"""Attempts to read directly from a text stream should fail."""
buffer = make_buffer(six.StringIO, initial_value=SAMPLE_TEXT)
with smart_open.smart_open(buffer, 'r') as sf:
"""Attempts to read directly from a text stream should fail.

This is because smart_open.open expects a byte stream as input.
If you have a text stream, there's no point passing it to smart_open:
you can read from it directly.
"""
buf = make_buffer(six.StringIO, initial_value=SAMPLE_TEXT)
with smart_open.smart_open(buf, 'r') as sf:
self.assertRaises(TypeError, sf.read) # we expect binary mode

@unittest.skipIf(six.PY2, "Python 2 does not differentiate between str and bytes")
def test_write_text_stream_fails(self):
"""Attempts to write directly to a text stream should fail."""
buffer = make_buffer(six.StringIO)
with smart_open.smart_open(buffer, 'w') as sf:
buf = make_buffer(six.StringIO)
with smart_open.smart_open(buf, 'w') as sf:
self.assertRaises(TypeError, sf.write, SAMPLE_TEXT) # we expect binary mode

def test_read_str_from_bytes(self):
"""Can we read strings from a byte stream?"""
buffer = make_buffer(initial_value=SAMPLE_BYTES)
with smart_open.smart_open(buffer, 'r') as sf:
def test_read_text_from_bytestream(self):
buf = make_buffer(initial_value=SAMPLE_BYTES)
with smart_open.smart_open(buf, 'r') as sf:
data = sf.read()
self.assertEqual(data, SAMPLE_TEXT)

def test_read_text_from_bytestream_rt(self):
buf = make_buffer(initial_value=SAMPLE_BYTES)
with smart_open.smart_open(buf, 'rt') as sf:
data = sf.read()
self.assertEqual(data, SAMPLE_TEXT)

def test_write_str_to_bytes(self):
def test_read_text_from_bytestream_rtplus(self):
buf = make_buffer(initial_value=SAMPLE_BYTES)
with smart_open.smart_open(buf, 'rt+') as sf:
data = sf.read()
self.assertEqual(data, SAMPLE_TEXT)

def test_write_text_to_bytestream(self):
"""Can we write strings to a byte stream?"""
buf = make_buffer(noclose=True)
with smart_open.smart_open(buf, 'w') as sf:
sf.write(SAMPLE_TEXT)

self.assertEqual(buf.getvalue(), SAMPLE_BYTES)

def test_write_text_to_bytestream_wt(self):
"""Can we write strings to a byte stream?"""
buf = make_buffer(noclose=True)
with smart_open.smart_open(buf, 'wt') as sf:
sf.write(SAMPLE_TEXT)

self.assertEqual(buf.getvalue(), SAMPLE_BYTES)

def test_write_text_to_bytestream_wtplus(self):
"""Can we write strings to a byte stream?"""
buffer = make_buffer()
with smart_open.smart_open(buffer, 'w') as sf:
buf = make_buffer(noclose=True)
with smart_open.smart_open(buf, 'wt+') as sf:
sf.write(SAMPLE_TEXT)
self.assertEqual(buffer.getvalue(), SAMPLE_BYTES)

self.assertEqual(buf.getvalue(), SAMPLE_BYTES)

def test_name_read(self):
"""Can we use the "name" attribute to decompress on the fly?"""
data = SAMPLE_BYTES * 1000
buffer = make_buffer(initial_value=bz2.compress(data), name='data.bz2')
with smart_open.smart_open(buffer, 'rb') as sf:
buf = make_buffer(initial_value=bz2.compress(data), name='data.bz2')
with smart_open.smart_open(buf, 'rb') as sf:
data = sf.read()
self.assertEqual(data, data)

def test_name_write(self):
"""Can we use the "name" attribute to compress on the fly?"""
data = SAMPLE_BYTES * 1000
buffer = make_buffer(name='data.bz2')
with smart_open.smart_open(buffer, 'wb') as sf:
buf = make_buffer(name='data.bz2')
with smart_open.smart_open(buf, 'wb') as sf:
sf.write(data)
self.assertEqual(bz2.decompress(buffer.getvalue()), data)
self.assertEqual(bz2.decompress(buf.getvalue()), data)

def test_open_side_effect(self):
"""
Expand Down