diff --git a/metrics/coverage_high_water_mark b/metrics/coverage_high_water_mark index dbe7c664b..aaad226da 100644 --- a/metrics/coverage_high_water_mark +++ b/metrics/coverage_high_water_mark @@ -1 +1 @@ -93.4400 \ No newline at end of file +93.4800 \ No newline at end of file diff --git a/records_mover/db/redshift/records_unload.py b/records_mover/db/redshift/records_unload.py index 7b5088a78..e7518c84b 100644 --- a/records_mover/db/redshift/records_unload.py +++ b/records_mover/db/redshift/records_unload.py @@ -64,10 +64,12 @@ def redshift_unload_options(unhandled_hints: Set[str], if hints.encoding != 'UTF8': cant_handle_hint(fail_if_cant_handle_hint, 'encoding', hints) quiet_remove(unhandled_hints, 'encoding') - if hints.datetimeformattz != 'YYYY-MM-DD HH:MI:SSOF': + if hints.datetimeformattz not in ['YYYY-MM-DD HH:MI:SSOF', + 'YYYY-MM-DD HH24:MI:SSOF']: cant_handle_hint(fail_if_cant_handle_hint, 'datetimeformattz', hints) quiet_remove(unhandled_hints, 'datetimeformattz') - if hints.datetimeformat != 'YYYY-MM-DD HH24:MI:SS': + if hints.datetimeformat not in ['YYYY-MM-DD HH24:MI:SS', + 'YYYY-MM-DD HH:MI:SS']: cant_handle_hint(fail_if_cant_handle_hint, 'datetimeformat', hints) quiet_remove(unhandled_hints, 'datetimeformat') if hints.dateformat != 'YYYY-MM-DD': diff --git a/tests/component/db/redshift/__init__.py b/tests/component/db/redshift/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/component/db/redshift/test_records_copy.py b/tests/component/db/redshift/test_records_copy.py new file mode 100644 index 000000000..33635cdc0 --- /dev/null +++ b/tests/component/db/redshift/test_records_copy.py @@ -0,0 +1,163 @@ +import unittest +from records_mover.db.redshift.records_copy import redshift_copy_options +from records_mover.records import DelimitedRecordsFormat, AvroRecordsFormat +from sqlalchemy_redshift.commands import Encoding, Format +from ...records.datetime_cases import ( + DATE_CASES, DATETIMETZ_CASES, DATETIME_CASES, TIMEONLY_CASES +) + + +class TestRecordsCopy(unittest.TestCase): + def test_redshift_copy_options_encodings(self): + tests = { + 'UTF16': Encoding.utf16, + 'UTF16LE': Encoding.utf16le, + 'UTF16BE': Encoding.utf16be + } + for hint_spelling, redshift_sqlalchemy_spelling in tests.items(): + + records_format =\ + DelimitedRecordsFormat(variant='bluelabs', + hints={ + 'encoding': hint_spelling + }) + unhandled_hints = set(records_format.hints.keys()) + out = redshift_copy_options(unhandled_hints, + records_format, + fail_if_cant_handle_hint=True, + fail_if_row_invalid=True, + max_failure_rows=0) + self.assertIs(out['encoding'], redshift_sqlalchemy_spelling) + + def test_redshift_copy_options_avro(self): + records_format = AvroRecordsFormat() + unhandled_hints = set() + out = redshift_copy_options(unhandled_hints, + records_format, + fail_if_cant_handle_hint=True, + fail_if_row_invalid=True, + max_failure_rows=0) + self.assertIs(out['format'], Format.avro) + + def test_redshift_copy_options_dateformat(self): + # The records spec's date/time formats are based on Redshift's + # spec originally, so it's expected that everything here would + # be accepted as-is, but please double-check with Redshift's + # docs as new test cases are added + accept_as_is = { + 'YYYY-MM-DD': True, + 'MM-DD-YYYY': True, + 'DD-MM-YYYY': True, + 'MM/DD/YY': True, + 'DD/MM/YY': True, + 'DD-MM-YY': True, + } + for dateformat in DATE_CASES: + records_format =\ + DelimitedRecordsFormat(variant='bluelabs', + hints={ + 'dateformat': dateformat + }) + unhandled_hints = set(records_format.hints.keys()) + out = redshift_copy_options(unhandled_hints, + records_format, + fail_if_cant_handle_hint=True, + fail_if_row_invalid=True, + max_failure_rows=0) + if accept_as_is[dateformat]: + self.assertIs(out['date_format'], dateformat) + else: + self.fail('define what to expect here') + + def test_redshift_copy_options_datetimeformattz(self): + # Redshift's time_format doesn't support separate + # configuration for datetimeformat vs datetimeformattz, but + # the 'auto' flag seems to work with specific things (see + # tests run in records_copy.py). + # + # Please verify new formats have a test run + # and documented in records_copy.py before putting an entry in + # here. + expectations = { + 'YYYY-MM-DD HH:MI:SSOF': 'auto', + 'YYYY-MM-DD HH:MI:SS': 'YYYY-MM-DD HH:MI:SS', + 'YYYY-MM-DD HH24:MI:SSOF': 'auto', + 'MM/DD/YY HH24:MIOF': 'auto', + 'MM/DD/YY HH24:MI': 'MM/DD/YY HH24:MI', + } + for datetimeformattz in DATETIMETZ_CASES: + hints = { + 'datetimeformattz': datetimeformattz, + 'datetimeformat': datetimeformattz.replace('OF', '') + } + records_format =\ + DelimitedRecordsFormat(variant='bluelabs', + hints=hints) + unhandled_hints = set(records_format.hints.keys()) + out = redshift_copy_options(unhandled_hints, + records_format, + fail_if_cant_handle_hint=True, + fail_if_row_invalid=True, + max_failure_rows=0) + self.assertEquals(out['time_format'], expectations[datetimeformattz]) + + def test_redshift_copy_options_datetimeformat(self): + # Redshift's time_format doesn't support separate + # configuration for datetimeformat vs datetimeformattz, but + # the 'auto' flag seems to work with specific things (see + # tests run in records_copy.py). + # + # Please verify new formats have a test run + # and documented in records_copy.py before putting an entry in + # here. + for datetimeformat in DATETIME_CASES: + hints = { + 'datetimeformattz': f"{datetimeformat}OF", + 'datetimeformat': datetimeformat, + } + records_format =\ + DelimitedRecordsFormat(variant='bluelabs', + hints=hints) + unhandled_hints = set(records_format.hints.keys()) + out = redshift_copy_options(unhandled_hints, + records_format, + fail_if_cant_handle_hint=True, + fail_if_row_invalid=True, + max_failure_rows=0) + self.assertEqual(out['time_format'], 'auto') + for datetimeformat in DATETIME_CASES: + hints = { + 'datetimeformattz': datetimeformat, + 'datetimeformat': datetimeformat, + } + records_format =\ + DelimitedRecordsFormat(variant='bluelabs', + hints=hints) + unhandled_hints = set(records_format.hints.keys()) + out = redshift_copy_options(unhandled_hints, + records_format, + fail_if_cant_handle_hint=True, + fail_if_row_invalid=True, + max_failure_rows=0) + self.assertEqual(out['time_format'], datetimeformat) + + def test_redshift_copy_options_timeonlyformat(self): + # Redshift didn't used to have a time only format, so Records + # Mover doesn't yet support anything here and just treats + # these as strings. Nothing should raise. + # + # https://github.com/bluelabsio/records-mover/issues/141 + for timeonlyformat in TIMEONLY_CASES: + hints = { + 'timeonlyformat': timeonlyformat, + } + records_format =\ + DelimitedRecordsFormat(variant='bluelabs', + hints=hints) + unhandled_hints = set(records_format.hints.keys()) + redshift_copy_options(unhandled_hints, + records_format, + fail_if_cant_handle_hint=True, + fail_if_row_invalid=True, + max_failure_rows=0) + # hurray, we didn't raise! diff --git a/tests/component/db/redshift/test_records_unload.py b/tests/component/db/redshift/test_records_unload.py new file mode 100644 index 000000000..2368efa4d --- /dev/null +++ b/tests/component/db/redshift/test_records_unload.py @@ -0,0 +1,119 @@ +import unittest +from typing import Set +from records_mover.db.redshift.records_unload import redshift_unload_options +from records_mover.records import DelimitedRecordsFormat +from ...records.datetime_cases import ( + DATE_CASES, DATETIMETZ_CASES, DATETIME_CASES, TIMEONLY_CASES +) + + +class TestRecordsUnload(unittest.TestCase): + def test_redshift_unload_options_datetimeformattz(self): + # Redshift offers no options and only unloads YYYY-MM-DD + # HH:MI:SSOF, so we should reject everything else. Double + # check with the docs just in case, though--maybe that's + # changed! + expected_failures = { + 'YYYY-MM-DD HH:MI:SS', + 'MM/DD/YY HH24:MI', + } + for datetimeformattz in DATETIMETZ_CASES: + hints = { + 'datetimeformattz': datetimeformattz, + } + records_format =\ + DelimitedRecordsFormat(variant='bluelabs', + hints=hints) + unhandled_hints = set(records_format.hints.keys()) + try: + redshift_unload_options(unhandled_hints, + records_format, + fail_if_cant_handle_hint=True) + except NotImplementedError: + if datetimeformattz in expected_failures: + continue + else: + raise + self.assertNotIn(datetimeformattz, expected_failures) + + def test_redshift_unload_options_datetimeformat(self): + # Redshift offers no options and only unloads YYYY-MM-DD + # HH:MI:SS, so we should reject everything else. Double + # check with the docs just in case, though--maybe that's + # changed! + expected_failures = { + 'YYYY-MM-DD HH12:MI AM', + 'MM/DD/YY HH24:MI', + } + for datetimeformat in DATETIME_CASES: + hints = { + 'datetimeformat': datetimeformat, + } + records_format =\ + DelimitedRecordsFormat(variant='bluelabs', + hints=hints) + unhandled_hints = set(records_format.hints.keys()) + try: + redshift_unload_options(unhandled_hints, + records_format, + fail_if_cant_handle_hint=True) + except NotImplementedError: + if datetimeformat in expected_failures: + continue + else: + raise + self.assertNotIn(datetimeformat, expected_failures) + + def test_redshift_unload_options_timeonlyformat(self): + # Redshift didn't used to have a time only format, so Records + # Mover doesn't yet support anything here and just treats + # these as strings. Nothing should raise. + expected_failures: Set[str] = set() + for timeonlyformat in TIMEONLY_CASES: + hints = { + 'timeonlyformat': timeonlyformat, + } + records_format =\ + DelimitedRecordsFormat(variant='bluelabs', + hints=hints) + unhandled_hints = set(records_format.hints.keys()) + try: + redshift_unload_options(unhandled_hints, + records_format, + fail_if_cant_handle_hint=True) + except NotImplementedError: + if timeonlyformat in expected_failures: + continue + else: + raise + self.assertNotIn(timeonlyformat, expected_failures) + + def test_redshift_unload_options_dateformat(self): + # Redshift offers no options and only unloads YYYY-MM-DD, so + # we should reject everything else. Double check with the + # docs just in case, though--maybe that's changed! + expected_failures = { + 'MM-DD-YYYY', + 'DD-MM-YYYY', + 'MM/DD/YY', + 'DD/MM/YY', + 'DD-MM-YY', + } + for dateformat in DATE_CASES: + hints = { + 'dateformat': dateformat, + } + records_format =\ + DelimitedRecordsFormat(variant='bluelabs', + hints=hints) + unhandled_hints = set(records_format.hints.keys()) + try: + redshift_unload_options(unhandled_hints, + records_format, + fail_if_cant_handle_hint=True) + except NotImplementedError: + if dateformat in expected_failures: + continue + else: + raise + self.assertNotIn(dateformat, expected_failures) diff --git a/tests/unit/db/redshift/test_records_copy.py b/tests/unit/db/redshift/test_records_copy.py deleted file mode 100644 index c49a6c2ec..000000000 --- a/tests/unit/db/redshift/test_records_copy.py +++ /dev/null @@ -1,37 +0,0 @@ -import unittest -from records_mover.db.redshift.records_copy import redshift_copy_options -from records_mover.records import DelimitedRecordsFormat, AvroRecordsFormat -from sqlalchemy_redshift.commands import Encoding, Format - - -class TestRecordsCopy(unittest.TestCase): - def test_redshift_copy_options_encodings(self): - tests = { - 'UTF16': Encoding.utf16, - 'UTF16LE': Encoding.utf16le, - 'UTF16BE': Encoding.utf16be - } - for hint_spelling, redshift_sqlalchemy_spelling in tests.items(): - - records_format =\ - DelimitedRecordsFormat(variant='bluelabs', - hints={ - 'encoding': hint_spelling - }) - unhandled_hints = set(records_format.hints.keys()) - out = redshift_copy_options(unhandled_hints, - records_format, - fail_if_cant_handle_hint=True, - fail_if_row_invalid=True, - max_failure_rows=0) - self.assertIs(out['encoding'], redshift_sqlalchemy_spelling) - - def test_redshift_copy_options_avro(self): - records_format = AvroRecordsFormat() - unhandled_hints = set() - out = redshift_copy_options(unhandled_hints, - records_format, - fail_if_cant_handle_hint=True, - fail_if_row_invalid=True, - max_failure_rows=0) - self.assertIs(out['format'], Format.avro)