Skip to content

Commit

Permalink
Fix ResourceWarnings & replace deprecated assertEquals (#140)
Browse files Browse the repository at this point in the history
* Fix inner file-like objects not closed properly so ResourceWarning-s was raised

* Fix DeprecationWarning-s in tests

* rename name file to infile in tests

* Redo slightly strange code as classes
  • Loading branch information
horpto authored and menshikh-iv committed Oct 31, 2017
1 parent 5f3b6fa commit d0a2521
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 36 deletions.
44 changes: 34 additions & 10 deletions smart_open/smart_open_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@
logger.warning("multiprocessing could not be imported and won't be used")
from itertools import imap

if IS_PY2:
from bz2file import BZ2File
else:
from bz2 import BZ2File

import gzip
import smart_open.s3 as smart_open_s3

Expand Down Expand Up @@ -504,15 +509,41 @@ def make_closing(base, **attrs):
This is needed for gzip.GzipFile, bz2.BZ2File etc in older Pythons (<=2.6), which otherwise
raise "AttributeError: GzipFile instance has no attribute '__exit__'".
"""
if not hasattr(base, '__enter__'):
attrs['__enter__'] = lambda self: self

if not hasattr(base, '__exit__'):
attrs['__exit__'] = lambda self, type, value, traceback: self.close()

return type('Closing' + base.__name__, (base, object), attrs)


class ClosingBZ2File(make_closing(BZ2File)):
"""
Implements wrapper for BZ2File that closes file object receieved as argument
"""
def __init__(self, inner_stream, *args, **kwargs):
self.inner_stream = inner_stream
super(ClosingBZ2File, self).__init__(inner_stream, *args, **kwargs)

def close(self):
super(ClosingBZ2File, self).close()
if not self.inner_stream.closed:
self.inner_stream.close()

class ClosingGzipFile(make_closing(gzip.GzipFile)):
"""
Implement wrapper for GzipFile that closes file object receieved from arguments
"""
def close(self):
fileobj = self.fileobj
super(ClosingGzipFile, self).close()
if not fileobj.closed:
fileobj.close()

def compression_wrapper(file_obj, filename, mode):
"""
This function will wrap the file_obj with an appropriate
Expand All @@ -526,16 +557,9 @@ def compression_wrapper(file_obj, filename, mode):
"""
_, ext = os.path.splitext(filename)
if ext == '.bz2':
if IS_PY2:
from bz2file import BZ2File
else:
from bz2 import BZ2File
return make_closing(BZ2File)(file_obj, mode)

return ClosingBZ2File(file_obj, mode)
elif ext == '.gz':
from gzip import GzipFile
return make_closing(GzipFile)(fileobj=file_obj, mode=mode)

return ClosingGzipFile(fileobj=file_obj, mode=mode)
else:
return file_obj

Expand Down
57 changes: 31 additions & 26 deletions smart_open/tests/test_smart_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,17 @@ def test_http_pass(self):
"""Does http authentication work correctly"""
responses.add(responses.GET, "http://127.0.0.1/index.html", body='line1\nline2')
_ = smart_open.HttpOpenRead(smart_open.ParseUri("http://127.0.0.1/index.html"), user='me', password='pass')
self.assertEquals(len(responses.calls), 1)
self.assertEqual(len(responses.calls), 1)
actual_request = responses.calls[0].request
self.assert_('Authorization' in actual_request.headers)
self.assert_(actual_request.headers['Authorization'].startswith('Basic '))
self.assertTrue('Authorization' in actual_request.headers)
self.assertTrue(actual_request.headers['Authorization'].startswith('Basic '))

@responses.activate
def test_http_gz(self):
"""Can open gzip via http?"""
fpath = os.path.join(CURR_DIR, 'test_data/crlf_at_1k_boundary.warc.gz')
data = open(fpath, 'rb').read()
with open(fpath, 'rb') as infile:
data = infile.read()

responses.add(responses.GET, "http://127.0.0.1/data.gz",
body=data)
Expand All @@ -146,7 +147,8 @@ def test_http_gz(self):
def test_http_bz2(self):
"""Can open bz2 via http?"""
test_string = b'Hello World Compressed.'
test_file = tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False).name
with tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False) as infile:
test_file = infile.name

with smart_open.smart_open(test_file, 'wb') as outfile:
outfile.write(test_string)
Expand Down Expand Up @@ -182,9 +184,9 @@ def test_read_never_returns_none(self):
fout.write(test_string.encode('utf8'))

r = smart_open.smart_open("s3://mybucket/mykey", "rb")
self.assertEquals(r.read(), test_string.encode("utf-8"))
self.assertEquals(r.read(), b"")
self.assertEquals(r.read(), b"")
self.assertEqual(r.read(), test_string.encode("utf-8"))
self.assertEqual(r.read(), b"")
self.assertEqual(r.read(), b"")

@mock_s3
def test_readline(self):
Expand All @@ -196,7 +198,7 @@ def test_readline(self):
fout.write(test_string)

reader = smart_open.smart_open("s3://mybucket/mykey", "rb")
self.assertEquals(reader.readline(), u"hello žluťoučký world!\n".encode("utf-8"))
self.assertEqual(reader.readline(), u"hello žluťoučký world!\n".encode("utf-8"))

@mock_s3
def test_readline_iter(self):
Expand All @@ -210,9 +212,9 @@ def test_readline_iter(self):
reader = smart_open.smart_open("s3://mybucket/mykey", "rb")

actual_lines = [l.decode("utf-8") for l in reader]
self.assertEquals(2, len(actual_lines))
self.assertEquals(lines[0], actual_lines[0])
self.assertEquals(lines[1], actual_lines[1])
self.assertEqual(2, len(actual_lines))
self.assertEqual(lines[0], actual_lines[0])
self.assertEqual(lines[1], actual_lines[1])

@mock_s3
def test_readline_eof(self):
Expand All @@ -224,9 +226,9 @@ def test_readline_eof(self):

reader = smart_open.smart_open("s3://mybucket/mykey", "rb")

self.assertEquals(reader.readline(), b"")
self.assertEquals(reader.readline(), b"")
self.assertEquals(reader.readline(), b"")
self.assertEqual(reader.readline(), b"")
self.assertEqual(reader.readline(), b"")
self.assertEqual(reader.readline(), b"")

@mock_s3
def test_s3_iter_lines(self):
Expand Down Expand Up @@ -785,20 +787,23 @@ def write_read_assertion(self, test_file):
def test_open_gz(self):
"""Can open gzip?"""
fpath = os.path.join(CURR_DIR, 'test_data/crlf_at_1k_boundary.warc.gz')
data = smart_open.smart_open(fpath).read()
with smart_open.smart_open(fpath) as infile:
data = infile.read()
m = hashlib.md5(data)
assert m.hexdigest() == '18473e60f8c7c98d29d65bf805736a0d', \
'Failed to read gzip'

def test_write_read_gz(self):
"""Can write and read gzip?"""
test_file = tempfile.NamedTemporaryFile('wb', suffix='.gz', delete=False).name
self.write_read_assertion(test_file)
with tempfile.NamedTemporaryFile('wb', suffix='.gz', delete=False) as infile:
test_file_name = infile.name
self.write_read_assertion(test_file_name)

def test_write_read_bz2(self):
"""Can write and read bz2?"""
test_file = tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False).name
self.write_read_assertion(test_file)
with tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False) as infile:
test_file_name = infile.name
self.write_read_assertion(test_file_name)


class MultistreamsBZ2Test(unittest.TestCase):
Expand Down Expand Up @@ -906,11 +911,11 @@ def test_r(self):
key.set_contents_from_string(text.encode("utf-8"))

with smart_open.s3_open_key(key, "r") as fin:
self.assertEquals(fin.read(), u"физкульт-привет!")
self.assertEqual(fin.read(), u"физкульт-привет!")

parsed_uri = smart_open.ParseUri("s3://bucket/key")
with smart_open.s3_open_uri(parsed_uri, "r") as fin:
self.assertEquals(fin.read(), u"физкульт-привет!")
self.assertEqual(fin.read(), u"физкульт-привет!")

def test_bad_mode(self):
"""Bad mode should raise and exception."""
Expand All @@ -930,10 +935,10 @@ def test_rw_encoding(self):
fout.write(text)

with smart_open.s3_open_uri(uri, "r", encoding="koi8-r") as fin:
self.assertEquals(text, fin.read())
self.assertEqual(text, fin.read())

with smart_open.s3_open_uri(uri, "rb") as fin:
self.assertEquals(text.encode("koi8-r"), fin.read())
self.assertEqual(text.encode("koi8-r"), fin.read())

with smart_open.s3_open_uri(uri, "r", encoding="euc-jp") as fin:
self.assertRaises(UnicodeDecodeError, fin.read)
Expand All @@ -958,13 +963,13 @@ def test_rw_gzip(self):
#
with smart_open.s3_open_uri(uri, "rb", ignore_extension=True) as fin:
gz = gzip.GzipFile(fileobj=fin)
self.assertEquals(gz.read().decode("utf-8"), text)
self.assertEqual(gz.read().decode("utf-8"), text)

#
# We should be able to read it back as well.
#
with smart_open.s3_open_uri(uri, "rb") as fin:
self.assertEquals(fin.read().decode("utf-8"), text)
self.assertEqual(fin.read().decode("utf-8"), text)

@mock_s3
def test_gzip_write_mode(self):
Expand Down

0 comments on commit d0a2521

Please sign in to comment.