Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve date/time hint handling for Redshift based on better testing #149

Merged
merged 6 commits into from
Dec 10, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion metrics/coverage_high_water_mark
Original file line number Diff line number Diff line change
@@ -1 +1 @@
93.3500
93.4000
6 changes: 4 additions & 2 deletions records_mover/db/redshift/records_unload.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,12 @@ def redshift_unload_options(unhandled_hints: Set[str],
if hints.encoding != 'UTF8':
cant_handle_hint(fail_if_cant_handle_hint, 'encoding', hints)
quiet_remove(unhandled_hints, 'encoding')
if hints.datetimeformattz != 'YYYY-MM-DD HH:MI:SSOF':
if hints.datetimeformattz not in ['YYYY-MM-DD HH:MI:SSOF',
'YYYY-MM-DD HH24:MI:SSOF']:
cant_handle_hint(fail_if_cant_handle_hint, 'datetimeformattz', hints)
quiet_remove(unhandled_hints, 'datetimeformattz')
if hints.datetimeformat != 'YYYY-MM-DD HH24:MI:SS':
if hints.datetimeformat not in ['YYYY-MM-DD HH24:MI:SS',
'YYYY-MM-DD HH:MI:SS']:
cant_handle_hint(fail_if_cant_handle_hint, 'datetimeformat', hints)
quiet_remove(unhandled_hints, 'datetimeformat')
if hints.dateformat != 'YYYY-MM-DD':
Expand Down
Empty file.
163 changes: 163 additions & 0 deletions tests/component/db/redshift/test_records_copy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import unittest
from records_mover.db.redshift.records_copy import redshift_copy_options
from records_mover.records import DelimitedRecordsFormat, AvroRecordsFormat
from sqlalchemy_redshift.commands import Encoding, Format
from ...records.datetime_cases import (
DATE_CASES, DATETIMETZ_CASES, DATETIME_CASES, TIMEONLY_CASES
)


class TestRecordsCopy(unittest.TestCase):
def test_redshift_copy_options_encodings(self):
tests = {
'UTF16': Encoding.utf16,
'UTF16LE': Encoding.utf16le,
'UTF16BE': Encoding.utf16be
}
for hint_spelling, redshift_sqlalchemy_spelling in tests.items():

records_format =\
DelimitedRecordsFormat(variant='bluelabs',
hints={
'encoding': hint_spelling
})
unhandled_hints = set(records_format.hints.keys())
out = redshift_copy_options(unhandled_hints,
records_format,
fail_if_cant_handle_hint=True,
fail_if_row_invalid=True,
max_failure_rows=0)
self.assertIs(out['encoding'], redshift_sqlalchemy_spelling)

def test_redshift_copy_options_avro(self):
records_format = AvroRecordsFormat()
unhandled_hints = set()
out = redshift_copy_options(unhandled_hints,
records_format,
fail_if_cant_handle_hint=True,
fail_if_row_invalid=True,
max_failure_rows=0)
self.assertIs(out['format'], Format.avro)
Copy link
Contributor Author

@vinceatbluelabs vinceatbluelabs Dec 9, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The above was hanging out in tests/unit; I promoted it to tests/component and added the code below.


def test_redshift_copy_options_dateformat(self):
# The records spec's date/time formats are based on Redshift's
# spec originally, so it's expected that everything here would
# be accepted as-is, but please double-check with Redshift's
# docs as new test cases are added
accept_as_is = {
'YYYY-MM-DD': True,
'MM-DD-YYYY': True,
'DD-MM-YYYY': True,
'MM/DD/YY': True,
'DD/MM/YY': True,
'DD-MM-YY': True,
}
for dateformat in DATE_CASES:
records_format =\
DelimitedRecordsFormat(variant='bluelabs',
hints={
'dateformat': dateformat
})
unhandled_hints = set(records_format.hints.keys())
out = redshift_copy_options(unhandled_hints,
records_format,
fail_if_cant_handle_hint=True,
fail_if_row_invalid=True,
max_failure_rows=0)
if accept_as_is[dateformat]:
self.assertIs(out['date_format'], dateformat)
else:
self.fail('define what to expect here')

def test_redshift_copy_options_datetimeformattz(self):
# Redshift's time_format doesn't support separate
# configuration for datetimeformat vs datetimeformattz, but
# the 'auto' flag seems to work with specific things (see
# tests run in records_copy.py).
#
# Please verify new formats have a test run
# and documented in records_copy.py before putting an entry in
# here.
expectations = {
'YYYY-MM-DD HH:MI:SSOF': 'auto',
'YYYY-MM-DD HH:MI:SS': 'YYYY-MM-DD HH:MI:SS',
'YYYY-MM-DD HH24:MI:SSOF': 'auto',
'MM/DD/YY HH24:MIOF': 'auto',
'MM/DD/YY HH24:MI': 'MM/DD/YY HH24:MI',
}
for datetimeformattz in DATETIMETZ_CASES:
hints = {
'datetimeformattz': datetimeformattz,
'datetimeformat': datetimeformattz.replace('OF', '')
}
records_format =\
DelimitedRecordsFormat(variant='bluelabs',
hints=hints)
unhandled_hints = set(records_format.hints.keys())
out = redshift_copy_options(unhandled_hints,
records_format,
fail_if_cant_handle_hint=True,
fail_if_row_invalid=True,
max_failure_rows=0)
self.assertEquals(out['time_format'], expectations[datetimeformattz])

def test_redshift_copy_options_datetimeformat(self):
# Redshift's time_format doesn't support separate
# configuration for datetimeformat vs datetimeformattz, but
# the 'auto' flag seems to work with specific things (see
# tests run in records_copy.py).
#
# Please verify new formats have a test run
# and documented in records_copy.py before putting an entry in
# here.
for datetimeformat in DATETIME_CASES:
hints = {
'datetimeformattz': f"{datetimeformat}OF",
'datetimeformat': datetimeformat,
}
records_format =\
DelimitedRecordsFormat(variant='bluelabs',
hints=hints)
unhandled_hints = set(records_format.hints.keys())
out = redshift_copy_options(unhandled_hints,
records_format,
fail_if_cant_handle_hint=True,
fail_if_row_invalid=True,
max_failure_rows=0)
self.assertEqual(out['time_format'], 'auto')
for datetimeformat in DATETIME_CASES:
hints = {
'datetimeformattz': datetimeformat,
'datetimeformat': datetimeformat,
}
records_format =\
DelimitedRecordsFormat(variant='bluelabs',
hints=hints)
unhandled_hints = set(records_format.hints.keys())
out = redshift_copy_options(unhandled_hints,
records_format,
fail_if_cant_handle_hint=True,
fail_if_row_invalid=True,
max_failure_rows=0)
self.assertEqual(out['time_format'], datetimeformat)

def test_redshift_copy_options_timeonlyformat(self):
# Redshift didn't used to have a time only format, so Records
# Mover doesn't yet support anything here and just treats
# these as strings. Nothing should raise.
#
# https://github.com/bluelabsio/records-mover/issues/141
for timeonlyformat in TIMEONLY_CASES:
hints = {
'timeonlyformat': timeonlyformat,
}
records_format =\
DelimitedRecordsFormat(variant='bluelabs',
hints=hints)
unhandled_hints = set(records_format.hints.keys())
redshift_copy_options(unhandled_hints,
records_format,
fail_if_cant_handle_hint=True,
fail_if_row_invalid=True,
max_failure_rows=0)
# hurray, we didn't raise!
119 changes: 119 additions & 0 deletions tests/component/db/redshift/test_records_unload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import unittest
from typing import Set
from records_mover.db.redshift.records_unload import redshift_unload_options
from records_mover.records import DelimitedRecordsFormat
from ...records.datetime_cases import (
DATE_CASES, DATETIMETZ_CASES, DATETIME_CASES, TIMEONLY_CASES
)


class TestRecordsUnload(unittest.TestCase):
def test_redshift_unload_options_datetimeformattz(self):
# Redshift offers no options and only unloads YYYY-MM-DD
# HH:MI:SSOF, so we should reject everything else. Double
# check with the docs just in case, though--maybe that's
# changed!
expected_failures = {
'YYYY-MM-DD HH:MI:SS',
'MM/DD/YY HH24:MI',
}
for datetimeformattz in DATETIMETZ_CASES:
hints = {
'datetimeformattz': datetimeformattz,
}
records_format =\
DelimitedRecordsFormat(variant='bluelabs',
hints=hints)
unhandled_hints = set(records_format.hints.keys())
try:
redshift_unload_options(unhandled_hints,
records_format,
fail_if_cant_handle_hint=True)
except NotImplementedError:
if datetimeformattz in expected_failures:
continue
else:
raise
self.assertNotIn(datetimeformattz, expected_failures)

def test_redshift_unload_options_datetimeformat(self):
# Redshift offers no options and only unloads YYYY-MM-DD
# HH:MI:SS, so we should reject everything else. Double
# check with the docs just in case, though--maybe that's
# changed!
expected_failures = {
'YYYY-MM-DD HH12:MI AM',
'MM/DD/YY HH24:MI',
}
for datetimeformat in DATETIME_CASES:
hints = {
'datetimeformat': datetimeformat,
}
records_format =\
DelimitedRecordsFormat(variant='bluelabs',
hints=hints)
unhandled_hints = set(records_format.hints.keys())
try:
redshift_unload_options(unhandled_hints,
records_format,
fail_if_cant_handle_hint=True)
except NotImplementedError:
if datetimeformat in expected_failures:
continue
else:
raise
self.assertNotIn(datetimeformat, expected_failures)

def test_redshift_unload_options_timeonlyformat(self):
# Redshift didn't used to have a time only format, so Records
# Mover doesn't yet support anything here and just treats
# these as strings. Nothing should raise.
expected_failures: Set[str] = set()
for timeonlyformat in TIMEONLY_CASES:
hints = {
'timeonlyformat': timeonlyformat,
}
records_format =\
DelimitedRecordsFormat(variant='bluelabs',
hints=hints)
unhandled_hints = set(records_format.hints.keys())
try:
redshift_unload_options(unhandled_hints,
records_format,
fail_if_cant_handle_hint=True)
except NotImplementedError:
if timeonlyformat in expected_failures:
continue
else:
raise
self.assertNotIn(timeonlyformat, expected_failures)

def test_redshift_unload_options_dateformat(self):
# Redshift offers no options and only unloads YYYY-MM-DD, so
# we should reject everything else. Double check with the
# docs just in case, though--maybe that's changed!
expected_failures = {
'MM-DD-YYYY',
'DD-MM-YYYY',
'MM/DD/YY',
'DD/MM/YY',
'DD-MM-YY',
}
for dateformat in DATE_CASES:
hints = {
'dateformat': dateformat,
}
records_format =\
DelimitedRecordsFormat(variant='bluelabs',
hints=hints)
unhandled_hints = set(records_format.hints.keys())
try:
redshift_unload_options(unhandled_hints,
records_format,
fail_if_cant_handle_hint=True)
except NotImplementedError:
if dateformat in expected_failures:
continue
else:
raise
self.assertNotIn(dateformat, expected_failures)
37 changes: 0 additions & 37 deletions tests/unit/db/redshift/test_records_copy.py

This file was deleted.