Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix warnings #140

Merged
merged 4 commits into from
Oct 31, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 34 additions & 10 deletions smart_open/smart_open_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@
logger.warning("multiprocessing could not be imported and won't be used")
from itertools import imap

if IS_PY2:
from bz2file import BZ2File
else:
from bz2 import BZ2File

import gzip
import smart_open.s3 as smart_open_s3

Expand Down Expand Up @@ -504,15 +509,41 @@ def make_closing(base, **attrs):

This is needed for gzip.GzipFile, bz2.BZ2File etc in older Pythons (<=2.6), which otherwise
raise "AttributeError: GzipFile instance has no attribute '__exit__'".

"""
if not hasattr(base, '__enter__'):
attrs['__enter__'] = lambda self: self

if not hasattr(base, '__exit__'):
attrs['__exit__'] = lambda self, type, value, traceback: self.close()

return type('Closing' + base.__name__, (base, object), attrs)


class ClosingBZ2File(make_closing(BZ2File)):
"""
Implements wrapper for BZ2File that closes file object receieved as argument

"""
def __init__(self, inner_stream, *args, **kwargs):
self.inner_stream = inner_stream
super(ClosingBZ2File, self).__init__(inner_stream, *args, **kwargs)

def close(self):
super(ClosingBZ2File, self).close()
if not self.inner_stream.closed:
self.inner_stream.close()

class ClosingGzipFile(make_closing(gzip.GzipFile)):
"""
Implement wrapper for GzipFile that closes file object receieved from arguments

"""
def close(self):
fileobj = self.fileobj
super(ClosingGzipFile, self).close()
if not fileobj.closed:
fileobj.close()

def compression_wrapper(file_obj, filename, mode):
"""
This function will wrap the file_obj with an appropriate
Expand All @@ -526,16 +557,9 @@ def compression_wrapper(file_obj, filename, mode):
"""
_, ext = os.path.splitext(filename)
if ext == '.bz2':
if IS_PY2:
from bz2file import BZ2File
else:
from bz2 import BZ2File
return make_closing(BZ2File)(file_obj, mode)

return ClosingBZ2File(file_obj, mode)
elif ext == '.gz':
from gzip import GzipFile
return make_closing(GzipFile)(fileobj=file_obj, mode=mode)

return ClosingGzipFile(fileobj=file_obj, mode=mode)
else:
return file_obj

Expand Down
57 changes: 31 additions & 26 deletions smart_open/tests/test_smart_open.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,16 +122,17 @@ def test_http_pass(self):
"""Does http authentication work correctly"""
responses.add(responses.GET, "http://127.0.0.1/index.html", body='line1\nline2')
_ = smart_open.HttpOpenRead(smart_open.ParseUri("http://127.0.0.1/index.html"), user='me', password='pass')
self.assertEquals(len(responses.calls), 1)
self.assertEqual(len(responses.calls), 1)
actual_request = responses.calls[0].request
self.assert_('Authorization' in actual_request.headers)
self.assert_(actual_request.headers['Authorization'].startswith('Basic '))
self.assertTrue('Authorization' in actual_request.headers)
self.assertTrue(actual_request.headers['Authorization'].startswith('Basic '))

@responses.activate
def test_http_gz(self):
"""Can open gzip via http?"""
fpath = os.path.join(CURR_DIR, 'test_data/crlf_at_1k_boundary.warc.gz')
data = open(fpath, 'rb').read()
with open(fpath, 'rb') as infile:
data = infile.read()

responses.add(responses.GET, "http://127.0.0.1/data.gz",
body=data)
Expand All @@ -146,7 +147,8 @@ def test_http_gz(self):
def test_http_bz2(self):
"""Can open bz2 via http?"""
test_string = b'Hello World Compressed.'
test_file = tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False).name
with tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False) as infile:
test_file = infile.name

with smart_open.smart_open(test_file, 'wb') as outfile:
outfile.write(test_string)
Expand Down Expand Up @@ -182,9 +184,9 @@ def test_read_never_returns_none(self):
fout.write(test_string.encode('utf8'))

r = smart_open.smart_open("s3://mybucket/mykey", "rb")
self.assertEquals(r.read(), test_string.encode("utf-8"))
self.assertEquals(r.read(), b"")
self.assertEquals(r.read(), b"")
self.assertEqual(r.read(), test_string.encode("utf-8"))
self.assertEqual(r.read(), b"")
self.assertEqual(r.read(), b"")

@mock_s3
def test_readline(self):
Expand All @@ -196,7 +198,7 @@ def test_readline(self):
fout.write(test_string)

reader = smart_open.smart_open("s3://mybucket/mykey", "rb")
self.assertEquals(reader.readline(), u"hello žluťoučký world!\n".encode("utf-8"))
self.assertEqual(reader.readline(), u"hello žluťoučký world!\n".encode("utf-8"))

@mock_s3
def test_readline_iter(self):
Expand All @@ -210,9 +212,9 @@ def test_readline_iter(self):
reader = smart_open.smart_open("s3://mybucket/mykey", "rb")

actual_lines = [l.decode("utf-8") for l in reader]
self.assertEquals(2, len(actual_lines))
self.assertEquals(lines[0], actual_lines[0])
self.assertEquals(lines[1], actual_lines[1])
self.assertEqual(2, len(actual_lines))
self.assertEqual(lines[0], actual_lines[0])
self.assertEqual(lines[1], actual_lines[1])

@mock_s3
def test_readline_eof(self):
Expand All @@ -224,9 +226,9 @@ def test_readline_eof(self):

reader = smart_open.smart_open("s3://mybucket/mykey", "rb")

self.assertEquals(reader.readline(), b"")
self.assertEquals(reader.readline(), b"")
self.assertEquals(reader.readline(), b"")
self.assertEqual(reader.readline(), b"")
self.assertEqual(reader.readline(), b"")
self.assertEqual(reader.readline(), b"")

@mock_s3
def test_s3_iter_lines(self):
Expand Down Expand Up @@ -785,20 +787,23 @@ def write_read_assertion(self, test_file):
def test_open_gz(self):
"""Can open gzip?"""
fpath = os.path.join(CURR_DIR, 'test_data/crlf_at_1k_boundary.warc.gz')
data = smart_open.smart_open(fpath).read()
with smart_open.smart_open(fpath) as infile:
data = infile.read()
m = hashlib.md5(data)
assert m.hexdigest() == '18473e60f8c7c98d29d65bf805736a0d', \
'Failed to read gzip'

def test_write_read_gz(self):
"""Can write and read gzip?"""
test_file = tempfile.NamedTemporaryFile('wb', suffix='.gz', delete=False).name
self.write_read_assertion(test_file)
with tempfile.NamedTemporaryFile('wb', suffix='.gz', delete=False) as infile:
test_file_name = infile.name
self.write_read_assertion(test_file_name)

def test_write_read_bz2(self):
"""Can write and read bz2?"""
test_file = tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False).name
self.write_read_assertion(test_file)
with tempfile.NamedTemporaryFile('wb', suffix='.bz2', delete=False) as infile:
test_file_name = infile.name
self.write_read_assertion(test_file_name)


class MultistreamsBZ2Test(unittest.TestCase):
Expand Down Expand Up @@ -906,11 +911,11 @@ def test_r(self):
key.set_contents_from_string(text.encode("utf-8"))

with smart_open.s3_open_key(key, "r") as fin:
self.assertEquals(fin.read(), u"физкульт-привет!")
self.assertEqual(fin.read(), u"физкульт-привет!")

parsed_uri = smart_open.ParseUri("s3://bucket/key")
with smart_open.s3_open_uri(parsed_uri, "r") as fin:
self.assertEquals(fin.read(), u"физкульт-привет!")
self.assertEqual(fin.read(), u"физкульт-привет!")

def test_bad_mode(self):
"""Bad mode should raise and exception."""
Expand All @@ -930,10 +935,10 @@ def test_rw_encoding(self):
fout.write(text)

with smart_open.s3_open_uri(uri, "r", encoding="koi8-r") as fin:
self.assertEquals(text, fin.read())
self.assertEqual(text, fin.read())

with smart_open.s3_open_uri(uri, "rb") as fin:
self.assertEquals(text.encode("koi8-r"), fin.read())
self.assertEqual(text.encode("koi8-r"), fin.read())

with smart_open.s3_open_uri(uri, "r", encoding="euc-jp") as fin:
self.assertRaises(UnicodeDecodeError, fin.read)
Expand All @@ -958,13 +963,13 @@ def test_rw_gzip(self):
#
with smart_open.s3_open_uri(uri, "rb", ignore_extension=True) as fin:
gz = gzip.GzipFile(fileobj=fin)
self.assertEquals(gz.read().decode("utf-8"), text)
self.assertEqual(gz.read().decode("utf-8"), text)

#
# We should be able to read it back as well.
#
with smart_open.s3_open_uri(uri, "rb") as fin:
self.assertEquals(fin.read().decode("utf-8"), text)
self.assertEqual(fin.read().decode("utf-8"), text)

@mock_s3
def test_gzip_write_mode(self):
Expand Down