Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reproduce, fix unexpected exception when processing zero byte file #65

Merged
merged 1 commit into from
May 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion metrics/coverage_high_water_mark
Original file line number Diff line number Diff line change
@@ -1 +1 @@
93.9900
94.0200
2 changes: 1 addition & 1 deletion metrics/mypy_high_water_mark
Original file line number Diff line number Diff line change
@@ -1 +1 @@
92.1000
92.1100
2 changes: 2 additions & 0 deletions records_mover/records/delimited/sniff.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,8 @@ def sniff_hints(fileobj: IO[bytes],
record_terminator_hint,
final_encoding_hint,
compression_hint)
else:
python_inferred_hints = {}

#
# Pandas can both validate that we chose correctly by parsing
Expand Down
60 changes: 40 additions & 20 deletions tests/unit/records/test_hints.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,14 @@ def test_sniff_hints(self):
initial_hints = config['initial_hints']

with open(csv_filename, 'rb') as fileobj:
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
try:
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
except Exception as e:
if str(e) != config.get('raises'):
raise

def test_sniff_hints_gzipped_preinformed(self):
for basename in self.sample_file_basenames():
Expand All @@ -54,10 +58,14 @@ def test_sniff_hints_gzipped_preinformed(self):
with open(csv_filename, 'rb') as uncompressed_fileobj:
gzipped_data = gzip.compress(uncompressed_fileobj.read())
fileobj = io.BytesIO(gzipped_data)
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
try:
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
except Exception as e:
if str(e) != config.get('raises'):
raise

def test_sniff_hints_gzipped_sniffed(self):
for basename in self.sample_file_basenames():
Expand All @@ -72,10 +80,14 @@ def test_sniff_hints_gzipped_sniffed(self):
with open(csv_filename, 'rb') as uncompressed_fileobj:
gzipped_data = gzip.compress(uncompressed_fileobj.read())
fileobj = io.BytesIO(gzipped_data)
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
try:
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
except Exception as e:
if str(e) != config.get('raises'):
raise

def test_sniff_hints_bzipped_preinformed(self):
for basename in self.sample_file_basenames():
Expand All @@ -91,10 +103,14 @@ def test_sniff_hints_bzipped_preinformed(self):
with open(csv_filename, 'rb') as uncompressed_fileobj:
gzipped_data = bz2.compress(uncompressed_fileobj.read())
fileobj = io.BytesIO(gzipped_data)
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
try:
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
except Exception as e:
if str(e) != config.get('raises'):
raise

def test_sniff_hints_bzipped_sniffed(self):
for basename in self.sample_file_basenames():
Expand All @@ -109,10 +125,14 @@ def test_sniff_hints_bzipped_sniffed(self):
with open(csv_filename, 'rb') as uncompressed_fileobj:
gzipped_data = bz2.compress(uncompressed_fileobj.read())
fileobj = io.BytesIO(gzipped_data)
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
try:
hints = sniff_hints(fileobj, initial_hints=initial_hints)
self.assertTrue(set(required_hints.items()).issubset(set(hints.items())),
f"Expected at least these hints while reading {basename}: "
f"{required_hints}, found these hints: {hints}")
except Exception as e:
if str(e) != config.get('raises'):
raise

@patch('records_mover.records.delimited.sniff.csv')
@patch('records_mover.records.delimited.sniff.stream_csv')
Expand Down
Empty file.
6 changes: 6 additions & 0 deletions tests/unit/resources/hint_sniffing/delimited-zero-bytes.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"required": {},
"raises": "No columns to parse from file",
"initial_hints": {},
"notes": "Whatever it figures out is fine so long as it doesn't give an internal error"
}