Skip to content

Commit

Permalink
Reproduce, fix unexpected exception when processing zero byte file (#65)
Browse files Browse the repository at this point in the history
Replace it with an expected exception with a better error message that comes from Pandas.
  • Loading branch information
vinceatbluelabs authored May 23, 2020
1 parent ff786a8 commit 525d9dd
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 22 deletions.
2 changes: 1 addition & 1 deletion metrics/coverage_high_water_mark
Original file line number Diff line number Diff line change
@@ -1 +1 @@
93.9900
94.0200
2 changes: 1 addition & 1 deletion metrics/mypy_high_water_mark
Original file line number Diff line number Diff line change
@@ -1 +1 @@
92.1000
92.1100
2 changes: 2 additions & 0 deletions records_mover/records/delimited/sniff.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ def sniff_hints(fileobj: IO[bytes],
record_terminator_hint,
final_encoding_hint,
compression_hint)
else:
python_inferred_hints = {}

#
# Pandas can both validate that we chose correctly by parsing
Expand Down
60 changes: 40 additions & 20 deletions tests/unit/records/test_hints.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,14 @@ def test_sniff_hints(self):
initial_hints = config['initial_hints']

with open(csv_filename, 'rb') as fileobj:
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
try:
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
except Exception as e:
if str(e) != config.get('raises'):
raise

def test_sniff_hints_gzipped_preinformed(self):
for basename in self.sample_file_basenames():
Expand All @@ -54,10 +58,14 @@ def test_sniff_hints_gzipped_preinformed(self):
with open(csv_filename, 'rb') as uncompressed_fileobj:
gzipped_data = gzip.compress(uncompressed_fileobj.read())
fileobj = io.BytesIO(gzipped_data)
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
try:
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
except Exception as e:
if str(e) != config.get('raises'):
raise

def test_sniff_hints_gzipped_sniffed(self):
for basename in self.sample_file_basenames():
Expand All @@ -72,10 +80,14 @@ def test_sniff_hints_gzipped_sniffed(self):
with open(csv_filename, 'rb') as uncompressed_fileobj:
gzipped_data = gzip.compress(uncompressed_fileobj.read())
fileobj = io.BytesIO(gzipped_data)
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
try:
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
except Exception as e:
if str(e) != config.get('raises'):
raise

def test_sniff_hints_bzipped_preinformed(self):
for basename in self.sample_file_basenames():
Expand All @@ -91,10 +103,14 @@ def test_sniff_hints_bzipped_preinformed(self):
with open(csv_filename, 'rb') as uncompressed_fileobj:
gzipped_data = bz2.compress(uncompressed_fileobj.read())
fileobj = io.BytesIO(gzipped_data)
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
try:
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
except Exception as e:
if str(e) != config.get('raises'):
raise

def test_sniff_hints_bzipped_sniffed(self):
for basename in self.sample_file_basenames():
Expand All @@ -109,10 +125,14 @@ def test_sniff_hints_bzipped_sniffed(self):
with open(csv_filename, 'rb') as uncompressed_fileobj:
gzipped_data = bz2.compress(uncompressed_fileobj.read())
fileobj = io.BytesIO(gzipped_data)
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
try:
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
except Exception as e:
if str(e) != config.get('raises'):
raise

@patch('records_mover.records.delimited.sniff.csv')
@patch('records_mover.records.delimited.sniff.stream_csv')
Expand Down
Empty file.
6 changes: 6 additions & 0 deletions tests/unit/resources/hint_sniffing/delimited-zero-bytes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"required": {},
"raises": "No columns to parse from file",
"initial_hints": {},
"notes": "Whatever it figures out is fine so long as it doesn't give an internal error"
}

0 comments on commit 525d9dd

Please sign in to comment.