Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop overly-complected Christmas tree tests #145

Merged
merged 4 commits into from
Dec 9, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 0 additions & 29 deletions tests/component/db/vertica/test_vertica_import_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from records_mover.records.load_plan import RecordsLoadPlan
from records_mover.records.processing_instructions import ProcessingInstructions
from records_mover.db.vertica.records_import_options import vertica_import_options
from ...records.format_hints import christmas_tree_format_1_hints
from records_mover.records.delimited.utils import logger as driver_logger
import unittest
from mock import call, patch
Expand Down Expand Up @@ -87,34 +86,6 @@ def test_vertica_format_permissive(self):
self.assertDictEqual(options, expected_options)
self.assertEqual(unhandled_hints, set())

def test_christmas_tree_format_1_permissive(self):
vertica_format = DelimitedRecordsFormat(variant='dumb', hints=christmas_tree_format_1_hints)
processing_instructions = ProcessingInstructions(fail_if_cant_handle_hint=False)
load_plan = RecordsLoadPlan(processing_instructions=processing_instructions,
records_format=vertica_format)
unhandled_hints = set(load_plan.records_format.hints.keys())
with patch.object(driver_logger, 'warning') as mock_warning:
options = vertica_import_options(unhandled_hints, load_plan)
expected_options = {
'abort_on_error': True,
'delimiter': '\x01',
'enforcelength': True,
'error_tolerance': False,
'escape_as': '\\',
'load_method': 'AUTO',
'no_commit': False,
'null_as': None,
'record_terminator': '\x02',
'rejectmax': 1,
'skip': 1,
'trailing_nullcols': False,
}
self.assertDictEqual(options, expected_options)
self.assertListEqual(mock_warning.mock_calls,
[call("Ignoring hint compression = 'LZO'"),
call("Ignoring hint quoting = 'nonnumeric'")])
self.assertEqual(unhandled_hints, set())
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests try to validate things by a growing collection of eclectic hints which try to test a number of different things simultaneously.

Cool, I guess, but the downside is that debugging them when they fail is really hard and creates cognitive load.

Instead I'd like to test hints more individually - the tests in #144 and #140 are better examples of how I'd like to write these tests going forward.

I've been wanting to kill these for a while, but the forcing function was that I tried to debug these after making clearly safe changes and it gave me too much of a head-ache trying to detangle what was going on.


def test_weird_timeonlyformat(self):
vertica_format = DelimitedRecordsFormat(variant='dumb', hints={
'timeonlyformat': 'something else'
Expand Down
64 changes: 0 additions & 64 deletions tests/component/records/format_hints.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,67 +45,3 @@
'datetimeformattz': 'YYYY-MM-DD HH:MI:SSOF',
'header-row': False,
}

christmas_tree_format_1_hints = {
'field-delimiter': '\001',
'record-terminator': '\002',
'compression': 'LZO',
'quoting': 'nonnumeric',
'quotechar': '"',
'doublequote': False,
'escape': '\\',
'encoding': 'UTF8',
'dateformat': 'YYYY-MM-DD',
'timeonlyformat': 'HH24:MI:SS',
'datetimeformat': 'YYYY-MM-DD HH24:MI:SS',
'datetimeformattz': 'YYYY-MM-DD HH:MI:SSOF',
'header-row': True,
}

christmas_tree_format_2_hints = {
'field-delimiter': '\001',
'record-terminator': '\002',
'compression': 'BZIP',
'quoting': 'all',
'quotechar': '"',
'doublequote': True,
'escape': '@', # not really allowed in the spec, but let's see what happens
'encoding': 'UTF8',
'dateformat': 'MM-DD-YYYY',
'timeonlyformat': 'HH24:MI:SS',
'datetimeformat': 'YYYY-MM-DD HH24:MI:SS',
'datetimeformattz': 'HH:MI:SSOF YYYY-MM-DD', # also not allowed
'header-row': False,
}

christmas_tree_format_3_hints = {
'field-delimiter': '\001',
'record-terminator': '\002',
'compression': 'BZIP',
'quoting': 'some_future_option_not_supported_now',
'quotechar': '"',
'doublequote': True,
'escape': '@', # not really allowed in the spec, but let's see what happens
'encoding': 'UTF8',
'dateformat': 'DD-MM-YYYY',
'timeonlyformat': 'HH24:MI:SS',
'datetimeformat': 'YYYY-MM-DD HH24:MI:SS',
'datetimeformattz': 'HH:MI:SSOF YYYY-MM-DD', # also not allowed
'header-row': False,
}

christmas_tree_format_4_hints = {
'field-delimiter': '\001',
'record-terminator': '\002',
'compression': 'BZIP',
'quoting': 'some_future_option_not_supported_now',
'quotechar': '"',
'doublequote': True,
'escape': '@', # not really allowed in the spec, but let's see what happens
'encoding': 'UTF8',
'dateformat': 'totally_bogus_just_made_this_up',
'timeonlyformat': 'HH24:MI:SS',
'datetimeformat': 'YYYY-MM-DD HH24:MI:SS',
'datetimeformattz': 'HH:MI:SSOF YYYY-MM-DD', # also not allowed
'header-row': False,
}

This file was deleted.

87 changes: 1 addition & 86 deletions tests/unit/db/redshift/test_redshift_db_driver_unload.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from .base_test_redshift_db_driver import BaseTestRedshiftDBDriver
from ...records.format_hints import (bluelabs_format_hints,
christmas_tree_format_1_hints,
christmas_tree_format_2_hints)
from ...records.format_hints import bluelabs_format_hints
from records_mover.records.delimited.utils import logger as driver_logger
from records_mover.records import DelimitedRecordsFormat
from mock import call, patch
Expand Down Expand Up @@ -85,86 +83,3 @@ def test_unload(self,
}
mock_UnloadFromSelect.assert_called_with(**expected_args)
self.assertEqual(456, rows)

@patch('records_mover.db.redshift.unloader.UnloadFromSelect')
@patch('records_mover.db.redshift.unloader.text')
def test_unload_christmas_tree_unsupported_options_with_fast_warns_1(self,
mock_text,
mock_UnloadFromSelect):
mock_text.side_effect = fake_text
self.mock_directory.scheme = 's3'
with patch.object(driver_logger, 'warning') as mock_warning:
self.mock_records_unload_plan.processing_instructions.fail_if_dont_understand = False
self.mock_records_unload_plan.processing_instructions.fail_if_cant_handle_hint = False

self.mock_records_unload_plan.records_format =\
DelimitedRecordsFormat(variant='bluelabs',
hints=christmas_tree_format_1_hints)
self.mock_db_engine.execute.return_value.scalar.return_value = 456
rows = self.redshift_db_driver.unloader().\
unload(schema='myschema',
table='mytable',
unload_plan=self.mock_records_unload_plan,
directory=self.mock_directory)
self.assertCountEqual(mock_warning.mock_calls,
[call("Ignoring hint record-terminator = '\\x02'"),
call("Ignoring hint quoting = 'nonnumeric'"),
call("Ignoring hint header-row = True"),
call("Ignoring hint compression = 'LZO'"),
call("Did not understand these hints: header-row=True")])

expected_args = {
'access_key_id': 'fake_aws_id',
'delimiter': '\x01',
'escape': True,
'manifest': True,
'secret_access_key': 'fake_aws_secret',
'select': ('SELECT * FROM myschema.mytable',),
'session_token': 'fake_aws_token',
'unload_location': 's3://mybucket/myparent/mychild/'
}
mock_UnloadFromSelect.assert_called_with(**expected_args)
self.assertEqual(456, rows)

@patch('records_mover.db.redshift.unloader.UnloadFromSelect')
@patch('records_mover.db.redshift.unloader.text')
def test_unload_christmas_tree_unsupported_options_with_fast_warns_2(self,
mock_text,
mock_UnloadFromSelect):
mock_text.side_effect = fake_text
self.mock_directory.scheme = 's3'
with patch.object(driver_logger, 'warning') as mock_warning:
self.mock_records_unload_plan.processing_instructions.fail_if_dont_understand = False
self.mock_records_unload_plan.processing_instructions.fail_if_cant_handle_hint = False
self.mock_records_unload_plan.records_format =\
DelimitedRecordsFormat(variant='bluelabs',
hints=christmas_tree_format_2_hints)
self.mock_db_engine.execute.return_value.scalar.return_value = 456
rows = self.redshift_db_driver.unloader().\
unload(schema='myschema',
table='mytable',
unload_plan=self.mock_records_unload_plan,
directory=self.mock_directory)
self.assertListEqual(mock_warning.mock_calls,
[call("Ignoring hint escape = '@'"),
call("Ignoring hint datetimeformattz = 'HH:MI:SSOF YYYY-MM-DD'"),
call("Ignoring hint record-terminator = '\\x02'"),
call("Ignoring hint doublequote = True"),
call("Ignoring hint compression = 'BZIP'"),
call("Ignoring hint datetimeformattz = "
"'YYYY-MM-DD HH24:MI:SSOF'"),
call("Ignoring hint dateformat = 'MM-DD-YYYY'")])

expected_args = {
'access_key_id': 'fake_aws_id',
'escape': True,
'add_quotes': True,
'delimiter': '\x01',
'manifest': True,
'secret_access_key': 'fake_aws_secret',
'select': ('SELECT * FROM myschema.mytable',),
'session_token': 'fake_aws_token',
'unload_location': 's3://mybucket/myparent/mychild/'
}
mock_UnloadFromSelect.assert_called_with(**expected_args)
self.assertEqual(456, rows)
16 changes: 16 additions & 0 deletions tests/unit/records/schema/field/test_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,3 +242,19 @@ def test_cast_series_type_time_timedelta_entries(self):
series = pd.Series(data)
new_series = field.cast_series_type(series)
self.assertEqual(new_series[0], '01:23:45')

# def test_cast_series_type_time_timedelta_entries_zeroed(self):
# mock_name = Mock(name='name')
# mock_field_type = 'time'
# mock_constraints = Mock(name='constraints')
# mock_statistics = Mock(name='statistics')
# mock_representations = Mock(name='representations')
# field = RecordsSchemaField(name=mock_name,
# field_type=mock_field_type,
# constraints=mock_constraints,
# statistics=mock_statistics,
# representations=mock_representations)
# data = np.array([pd.Timedelta(hours=0, minutes=0, seconds=0)])
# series = pd.Series(data)
# new_series = field.cast_series_type(series)
# self.assertEqual(new_series[0], '00:00:00')
Loading