Skip to content

Commit

Permalink
Convert Asana tickets to GitHub issues (#96)
Browse files Browse the repository at this point in the history
  • Loading branch information
vinceatbluelabs authored Jul 7, 2020
1 parent 0baaf5a commit f5828e9
Show file tree
Hide file tree
Showing 17 changed files with 29 additions and 29 deletions.
2 changes: 1 addition & 1 deletion records_mover/db/mysql/load_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def mysql_load_options(unhandled_hints: Set[str],
# to a compatible locale. This is true for a number of the
# database drivers; the backlog item to address is here:
#
# https://app.asana.com/0/1128138765527694/1173779659264666
# https://github.com/bluelabsio/records-mover/issues/75
#
# To address, we'll want to look into "set trade_date" per
# https://stackoverflow.com/questions/44171283/load-data-local-infile-with-sqlalchemy-and-pymysql
Expand Down
2 changes: 1 addition & 1 deletion records_mover/records/delimited/conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
# This is a Union because the date formats currently don't really
# account for MM/DD time.
#
# https://app.asana.com/0/1128138765527694/1173779659264666
# https://github.com/bluelabsio/records-mover/issues/75
#
python_date_format_from_hints: Dict[Union[HintDateFormat, Literal['DD/MM/YY']], str] = {
'YYYY-MM-DD': '%Y-%m-%d',
Expand Down
2 changes: 1 addition & 1 deletion records_mover/records/delimited/sniff.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def sniff_compression_hint(fileobj: IO[bytes]) -> HintCompression:
def sniff_hints_from_fileobjs(fileobjs: List[IO[bytes]],
initial_hints: PartialRecordsHints) -> PartialRecordsHints:
if len(fileobjs) != 1:
# https://app.asana.com/0/53283930106309/1131698268455054
# https://github.com/bluelabsio/records-mover/issues/84
raise NotImplementedError('Cannot currently sniff hints from mulitple '
'files--please provide hints')
fileobj = fileobjs[0]
Expand Down
2 changes: 1 addition & 1 deletion records_mover/records/pandas/to_csv_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def pandas_to_csv_options(records_format: DelimitedRecordsFormat,
# '2000-01-02 12:34:56.789012'
# >>>
#
# https://app.asana.com/0/1128138765527694/1159958019131681
# https://github.com/bluelabsio/records-mover/issues/95
pandas_options['date_format'] = '%Y-%m-%d %H:%M:%S.%f'
else:
pandas_options['date_format'] = '%Y-%m-%d %H:%M:%S.%f%z'
Expand Down
2 changes: 1 addition & 1 deletion records_mover/records/schema/field/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def details_from_numpy_dtype(dtype: numpy.dtype,
if has_tz:
# See: 'Represent pandas datetime64 with timezone in records schema'
#
# https://app.asana.com/0/53283930106309/1132706099772565
# https://github.com/bluelabsio/records-mover/issues/89
field_type = 'datetimetz'
else:
field_type = 'datetime'
Expand Down
4 changes: 2 additions & 2 deletions records_mover/records/schema/field/sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def field_from_sqlalchemy_column(column: Column,
# constraints out of database tables, so set this to null,
# which means "we don't know":
#
# https://app.asana.com/0/1128138765527694/1131416227825120
# https://github.com/bluelabsio/records-mover/issues/90
unique = None

if isinstance(type_, sqlalchemy.sql.sqltypes.Integer):
Expand Down Expand Up @@ -114,7 +114,7 @@ def field_from_sqlalchemy_column(column: Column,
# We don't currently gather statistics from databases - which
# can bite us when exporting from BigQuery, for instance:
#
# https://app.asana.com/0/53283930106309/1131698268455053
# https://github.com/bluelabsio/records-mover/issues/91
statistics = None

return RecordsSchemaField(name=name,
Expand Down
4 changes: 2 additions & 2 deletions records_mover/records/schema/schema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def from_fileobjs(fileobjs: List[IO[bytes]],
from records_mover.pandas import purge_unnamed_unused_columns

if len(fileobjs) != 1:
# https://app.asana.com/0/53283930106309/1131698268455054
# https://github.com/bluelabsio/records-mover/issues/84
raise NotImplementedError('Cannot currently sniff schema from mulitple '
'files--please provide explicit schema JSON')
fileobj = fileobjs[0]
Expand Down Expand Up @@ -171,7 +171,7 @@ def assign_dataframe_names(self,
index_name = field_names[0]
# RecordsMover supports only single indexes for the moment.
#
# https://app.asana.com/0/1128138765527694/1161071033650873
# https://github.com/bluelabsio/records-mover/issues/92
index_mapping = {0: index_name}

# remove first name, which applies to index
Expand Down
4 changes: 2 additions & 2 deletions records_mover/records/sources/dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def serialize_dfs(self,
# initial_records_schema was based on only the
# first chunk.
#
# https://app.asana.com/0/1128138765527694/1161074649542801
# https://github.com/bluelabsio/records-mover/issues/93
logger.warning("Only checking first chunk for type inference")
i = i + 1

Expand Down Expand Up @@ -156,7 +156,7 @@ def save_df(df: 'DataFrame', output_filename: str) -> None:
def save_df(df: 'DataFrame', output_filename: str) -> None:
logger.info(f"Writing Parquet file to {output_filename}")
# Note that this doesn't specify partitioning as of yet -
# https://app.asana.com/0/1128138765527694/1126615025514407
# https://github.com/bluelabsio/records-mover/issues/94
df.to_parquet(fname=output_filename,
engine='pyarrow',
index=self.include_index,
Expand Down
6 changes: 3 additions & 3 deletions records_mover/records/targets/spectrum.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,11 @@ def post_load_hook(self, num_rows_loaded: Optional[int]) -> None:
columns = [f.to_sqlalchemy_column(self.driver) for f in records_schema.fields]
for column in columns:
if isinstance(column.type, sqlalchemy.sql.sqltypes.Numeric) and column.type.asdecimal:
# https://app.asana.com/0/1128138765527694/1139365728890234
# https://github.com/bluelabsio/records-mover/issues/85
raise NotImplementedError("Teach me how to write a NUMERIC to Redshift Spectrum "
f"(column name: {column})")
if isinstance(column.type, sqlalchemy.sql.sqltypes.DateTime) and column.type.timezone:
# https://app.asana.com/0/53283930106309/1136051640283464
# https://github.com/bluelabsio/records-mover/issues/86
raise NotImplementedError("Teach me how to write a datetimetz to Redshift Spectrum "
f"({column})")

Expand All @@ -129,7 +129,7 @@ def post_load_hook(self, num_rows_loaded: Optional[int]) -> None:
#
# https://docs.aws.amazon.com/redshift/latest/dg/r_ALTER_TABLE_external-table.html
#
# https://app.asana.com/0/53283930106309/1140172717234001
# https://github.com/bluelabsio/records-mover/issues/87
pass
storage_clause = "STORED AS PARQUET\n"
location_clause = f"LOCATION '{self.output_loc.url}_manifest'\n"
Expand Down
2 changes: 1 addition & 1 deletion records_mover/records/targets/table/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(self,
# advertise what format we prefer to be given for mover paths
# that don't yet support full records negotiation.
#
# https://app.asana.com/0/1128138765527694/1130105834872106
# https://github.com/bluelabsio/records-mover/issues/88
self.records_format = next(iter(self.known_supported_records_formats()), None)

def move_from_records_directory(self,
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/records/directory_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def validate_records_schema(self) -> None:
# inference to determine if the data in question will
# fit into it.
#
# https://app.asana.com/0/1128138765527694/1166526213569051
# https://github.com/bluelabsio/records-mover/issues/82
# https://stackoverflow.com/questions/31761047/what-difference-between-the-date-time-datetime-and-timestamp-types/56138746
'mysql': [
'integer', 'string', 'string', 'string',
Expand Down
6 changes: 3 additions & 3 deletions tests/integration/records/expected_column_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
# databases like Redshift. We could collect and use
# statistics to get a better read on this:
#
# https://app.asana.com/0/1128138765527694/1152727792219521
# https://github.com/bluelabsio/records-mover/issues/79
#
#
#
Expand All @@ -87,8 +87,8 @@
# types from numeric statistics it pulls from the source
# tables.
#
# https://app.asana.com/0/1128138765527694/1152727792219523
# https://app.asana.com/0/1128138765527694/1152727792219521
# https://github.com/bluelabsio/records-mover/issues/78
# https://github.com/bluelabsio/records-mover/issues/79
#
#
#
Expand Down
6 changes: 3 additions & 3 deletions tests/integration/records/single_db/test_records_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
class RecordsLoadIntegrationTest(BaseRecordsIntegrationTest):
def load_and_verify(self, format_type, variant, hints={}, broken=False, sourcefn=None):
if self.engine.name == 'bigquery' and variant == 'csv':
# https://app.asana.com/0/53283930106309/1130065227225218
# https://app.asana.com/0/53283930106309/1130065227225219
# https://github.com/bluelabsio/records-mover/issues/80
# https://github.com/bluelabsio/records-mover/issues/81
logger.warning("This test won't pass until we can use hints to "
"infer date/time/etc columns, or we use records schema "
"from target to set types on the source, so skipping.")
Expand Down Expand Up @@ -118,7 +118,7 @@ def sourcefn(filename, records_format, records_schema):
# CSV type inference is not smart enough to identify the
# date/time columns as anything but strings yet.
#
# https://app.asana.com/0/1128138765527694/1130065227225218
# https://github.com/bluelabsio/records-mover/issues/80
#
# Once that's fixed, we can stop passing in a records schema
# here when we have a header row for the names and let the
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/resources/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ timezones in timestamps are expressed.
times, just datetimes, so dates and times come out in
default Pandas __str__() format.

See details on [Pandas limitations](https://app.asana.com/0/53283930106309/1133167860965681)
See details on [Pandas limitations](https://github.com/bluelabsio/records-mover/issues/83)

### utc

Expand Down
8 changes: 4 additions & 4 deletions tests/unit/records/pandas/test_prep_for_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ def test_prep_df_for_csv_output_no_include_index(self):
# instance, specifying how it's turned into a CSV is not
# currently part of the records spec:
#
# https://app.asana.com/0/1128138765527694/1169941483931186
# https://github.com/bluelabsio/records-mover/issues/76
#
# In addition, Vertica suffers from a driver limitation:
#
# https://app.asana.com/0/search/1169941483931185/1126315736470782
# https://github.com/bluelabsio/records-mover/issues/77
#
# 'timetz': [
# us_eastern.localize(pd.Timestamp(year=1970, month=1, day=1,
Expand Down Expand Up @@ -95,11 +95,11 @@ def test_prep_df_for_csv_output_include_index(self):
# instance, specifying how it's turned into a CSV is not
# currently part of the records spec:
#
# https://app.asana.com/0/1128138765527694/1169941483931186
# https://github.com/bluelabsio/records-mover/issues/76
#
# In addition, Vertica suffers from a driver limitation:
#
# https://app.asana.com/0/search/1169941483931185/1126315736470782
# https://github.com/bluelabsio/records-mover/issues/77
#
# 'timetz': [
# us_eastern.localize(pd.Timestamp(year=1970, month=1, day=1,
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/records/test_pandas_read_csv_options.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_pandas_read_csv_options_bluelabs(self):
self.assertEqual(expected, actual)
self.assertFalse(unhandled_hints)

# MM-DD not yet fully supported - see https://app.asana.com/0/1128138765527694/1173779659264666
# MM-DD not yet fully supported - see https://github.com/bluelabsio/records-mover/issues/75
#
# def test_pandas_read_csv_options_bleulabs(self):
# expected = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class TestPandasToCsvOptionsDateformats(unittest.TestCase):
# self.assertFalse(unhandled_hints)

# DD-MM format not yet fully supported - see
# https://app.asana.com/0/1128138765527694/1173779659264666
# https://github.com/bluelabsio/records-mover/issues/75
#
# def test_pandas_dateformat_DD_MM_YYYY_no_tz(self):
# expected = {
Expand Down

0 comments on commit f5828e9

Please sign in to comment.