From d0a25218a22db7529aa8d2504ae05f4eabb2d4b9 Mon Sep 17 00:00:00 2001 From: horpto Date: Tue, 31 Oct 2017 11:32:54 +0500 Subject: [PATCH] Fix ResourceWarnings & replace deprecated assertEquals (#140) * Fix inner file-like objects not closed properly so ResourceWarning-s was raised * Fix DeprecationWarning-s in tests * rename name file to infile in tests * Redo slightly strange code as classes --- smart_open/smart_open_lib.py | 44 +++++++++++++++++----- smart_open/tests/test_smart_open.py | 57 ++++++++++++++++------------- 2 files changed, 65 insertions(+), 36 deletions(-) diff --git a/smart_open/smart_open_lib.py b/smart_open/smart_open_lib.py index 4b36caf1..f075b9d9 100644 --- a/smart_open/smart_open_lib.py +++ b/smart_open/smart_open_lib.py @@ -61,6 +61,11 @@ logger.warning("multiprocessing could not be imported and won't be used") from itertools import imap +if IS_PY2: + from bz2file import BZ2File +else: + from bz2 import BZ2File + import gzip import smart_open.s3 as smart_open_s3 @@ -504,15 +509,41 @@ def make_closing(base, **attrs): This is needed for gzip.GzipFile, bz2.BZ2File etc in older Pythons (<=2.6), which otherwise raise "AttributeError: GzipFile instance has no attribute '__exit__'". - """ if not hasattr(base, '__enter__'): attrs['__enter__'] = lambda self: self + if not hasattr(base, '__exit__'): attrs['__exit__'] = lambda self, type, value, traceback: self.close() + return type('Closing' + base.__name__, (base, object), attrs) +class ClosingBZ2File(make_closing(BZ2File)): + """ + Implements wrapper for BZ2File that closes file object receieved as argument + + """ + def __init__(self, inner_stream, *args, **kwargs): + self.inner_stream = inner_stream + super(ClosingBZ2File, self).__init__(inner_stream, *args, **kwargs) + + def close(self): + super(ClosingBZ2File, self).close() + if not self.inner_stream.closed: + self.inner_stream.close() + +class ClosingGzipFile(make_closing(gzip.GzipFile)): + """ + Implement wrapper for GzipFile that closes file object receieved from arguments + + """ + def close(self): + fileobj = self.fileobj + super(ClosingGzipFile, self).close() + if not fileobj.closed: + fileobj.close() + def compression_wrapper(file_obj, filename, mode): """ This function will wrap the file_obj with an appropriate @@ -526,16 +557,9 @@ def compression_wrapper(file_obj, filename, mode): """ _, ext = os.path.splitext(filename) if ext == '.bz2': - if IS_PY2: - from bz2file import BZ2File - else: - from bz2 import BZ2File - return make_closing(BZ2File)(file_obj, mode) - + return ClosingBZ2File(file_obj, mode) elif ext == '.gz': - from gzip import GzipFile - return make_closing(GzipFile)(fileobj=file_obj, mode=mode) - + return ClosingGzipFile(fileobj=file_obj, mode=mode) else: return file_obj diff --git a/smart_open/tests/test_smart_open.py b/smart_open/tests/test_smart_open.py index b2f6488d..049af376 100644 --- a/smart_open/tests/test_smart_open.py +++ b/smart_open/tests/test_smart_open.py @@ -122,16 +122,17 @@ def test_http_pass(self): """Does http authentication work correctly""" responses.add(responses.GET, "http://127.0.0.1/index.html", body='line1\nline2') _ = smart_open.HttpOpenRead(smart_open.ParseUri("http://127.0.0.1/index.html"), user='me', password='pass') - self.assertEquals(len(responses.calls), 1) + self.assertEqual(len(responses.calls), 1) actual_request = responses.calls[0].request - self.assert_('Authorization' in actual_request.headers) - self.assert_(actual_request.headers['Authorization'].startswith('Basic ')) + self.assertTrue('Authorization' in actual_request.headers) + self.assertTrue(actual_request.headers['Authorization'].startswith('Basic ')) @responses.activate def test_http_gz(self): """Can open gzip via http?""" fpath = os.path.join(CURR_DIR, 'test_data/crlf_at_1k_boundary.warc.gz') - data = open(fpath, 'rb').read() + with open(fpath, 'rb') as infile: + data = infile.read() responses.add(responses.GET, "http://127.0.0.1/data.gz", body=data) @@ -146,7 +147,8 @@ def test_http_gz(self): def test_http_bz2(self): """Can open bz2 via http?""" test_string = b'Hello World Compressed.' - test_file = tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False).name + with tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False) as infile: + test_file = infile.name with smart_open.smart_open(test_file, 'wb') as outfile: outfile.write(test_string) @@ -182,9 +184,9 @@ def test_read_never_returns_none(self): fout.write(test_string.encode('utf8')) r = smart_open.smart_open("s3://mybucket/mykey", "rb") - self.assertEquals(r.read(), test_string.encode("utf-8")) - self.assertEquals(r.read(), b"") - self.assertEquals(r.read(), b"") + self.assertEqual(r.read(), test_string.encode("utf-8")) + self.assertEqual(r.read(), b"") + self.assertEqual(r.read(), b"") @mock_s3 def test_readline(self): @@ -196,7 +198,7 @@ def test_readline(self): fout.write(test_string) reader = smart_open.smart_open("s3://mybucket/mykey", "rb") - self.assertEquals(reader.readline(), u"hello žluťoučký world!\n".encode("utf-8")) + self.assertEqual(reader.readline(), u"hello žluťoučký world!\n".encode("utf-8")) @mock_s3 def test_readline_iter(self): @@ -210,9 +212,9 @@ def test_readline_iter(self): reader = smart_open.smart_open("s3://mybucket/mykey", "rb") actual_lines = [l.decode("utf-8") for l in reader] - self.assertEquals(2, len(actual_lines)) - self.assertEquals(lines[0], actual_lines[0]) - self.assertEquals(lines[1], actual_lines[1]) + self.assertEqual(2, len(actual_lines)) + self.assertEqual(lines[0], actual_lines[0]) + self.assertEqual(lines[1], actual_lines[1]) @mock_s3 def test_readline_eof(self): @@ -224,9 +226,9 @@ def test_readline_eof(self): reader = smart_open.smart_open("s3://mybucket/mykey", "rb") - self.assertEquals(reader.readline(), b"") - self.assertEquals(reader.readline(), b"") - self.assertEquals(reader.readline(), b"") + self.assertEqual(reader.readline(), b"") + self.assertEqual(reader.readline(), b"") + self.assertEqual(reader.readline(), b"") @mock_s3 def test_s3_iter_lines(self): @@ -785,20 +787,23 @@ def write_read_assertion(self, test_file): def test_open_gz(self): """Can open gzip?""" fpath = os.path.join(CURR_DIR, 'test_data/crlf_at_1k_boundary.warc.gz') - data = smart_open.smart_open(fpath).read() + with smart_open.smart_open(fpath) as infile: + data = infile.read() m = hashlib.md5(data) assert m.hexdigest() == '18473e60f8c7c98d29d65bf805736a0d', \ 'Failed to read gzip' def test_write_read_gz(self): """Can write and read gzip?""" - test_file = tempfile.NamedTemporaryFile('wb', suffix='.gz', delete=False).name - self.write_read_assertion(test_file) + with tempfile.NamedTemporaryFile('wb', suffix='.gz', delete=False) as infile: + test_file_name = infile.name + self.write_read_assertion(test_file_name) def test_write_read_bz2(self): """Can write and read bz2?""" - test_file = tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False).name - self.write_read_assertion(test_file) + with tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False) as infile: + test_file_name = infile.name + self.write_read_assertion(test_file_name) class MultistreamsBZ2Test(unittest.TestCase): @@ -906,11 +911,11 @@ def test_r(self): key.set_contents_from_string(text.encode("utf-8")) with smart_open.s3_open_key(key, "r") as fin: - self.assertEquals(fin.read(), u"физкульт-привет!") + self.assertEqual(fin.read(), u"физкульт-привет!") parsed_uri = smart_open.ParseUri("s3://bucket/key") with smart_open.s3_open_uri(parsed_uri, "r") as fin: - self.assertEquals(fin.read(), u"физкульт-привет!") + self.assertEqual(fin.read(), u"физкульт-привет!") def test_bad_mode(self): """Bad mode should raise and exception.""" @@ -930,10 +935,10 @@ def test_rw_encoding(self): fout.write(text) with smart_open.s3_open_uri(uri, "r", encoding="koi8-r") as fin: - self.assertEquals(text, fin.read()) + self.assertEqual(text, fin.read()) with smart_open.s3_open_uri(uri, "rb") as fin: - self.assertEquals(text.encode("koi8-r"), fin.read()) + self.assertEqual(text.encode("koi8-r"), fin.read()) with smart_open.s3_open_uri(uri, "r", encoding="euc-jp") as fin: self.assertRaises(UnicodeDecodeError, fin.read) @@ -958,13 +963,13 @@ def test_rw_gzip(self): # with smart_open.s3_open_uri(uri, "rb", ignore_extension=True) as fin: gz = gzip.GzipFile(fileobj=fin) - self.assertEquals(gz.read().decode("utf-8"), text) + self.assertEqual(gz.read().decode("utf-8"), text) # # We should be able to read it back as well. # with smart_open.s3_open_uri(uri, "rb") as fin: - self.assertEquals(fin.read().decode("utf-8"), text) + self.assertEqual(fin.read().decode("utf-8"), text) @mock_s3 def test_gzip_write_mode(self):