From f5828e9ad0fba94b7dc2a2ce4049edb7b9e3efa9 Mon Sep 17 00:00:00 2001 From: vinceatbluelabs Date: Tue, 7 Jul 2020 12:17:17 -0400 Subject: [PATCH] Convert Asana tickets to GitHub issues (#96) --- records_mover/db/mysql/load_options.py | 2 +- records_mover/records/delimited/conversions.py | 2 +- records_mover/records/delimited/sniff.py | 2 +- records_mover/records/pandas/to_csv_options.py | 2 +- records_mover/records/schema/field/numpy.py | 2 +- records_mover/records/schema/field/sqlalchemy.py | 4 ++-- records_mover/records/schema/schema/__init__.py | 4 ++-- records_mover/records/sources/dataframes.py | 4 ++-- records_mover/records/targets/spectrum.py | 6 +++--- records_mover/records/targets/table/target.py | 2 +- tests/integration/records/directory_validator.py | 2 +- tests/integration/records/expected_column_types.py | 6 +++--- tests/integration/records/single_db/test_records_load.py | 6 +++--- tests/integration/resources/README.md | 2 +- tests/unit/records/pandas/test_prep_for_csv.py | 8 ++++---- tests/unit/records/test_pandas_read_csv_options.py | 2 +- .../records/test_pandas_to_csv_options_dateformats.py | 2 +- 17 files changed, 29 insertions(+), 29 deletions(-) diff --git a/records_mover/db/mysql/load_options.py b/records_mover/db/mysql/load_options.py index 611a52d40..c62800512 100644 --- a/records_mover/db/mysql/load_options.py +++ b/records_mover/db/mysql/load_options.py @@ -248,7 +248,7 @@ def mysql_load_options(unhandled_hints: Set[str], # to a compatible locale. This is true for a number of the # database drivers; the backlog item to address is here: # - # https://app.asana.com/0/1128138765527694/1173779659264666 + # https://github.com/bluelabsio/records-mover/issues/75 # # To address, we'll want to look into "set trade_date" per # https://stackoverflow.com/questions/44171283/load-data-local-infile-with-sqlalchemy-and-pymysql diff --git a/records_mover/records/delimited/conversions.py b/records_mover/records/delimited/conversions.py index 0d9d7245b..6c8ac04eb 100644 --- a/records_mover/records/delimited/conversions.py +++ b/records_mover/records/delimited/conversions.py @@ -38,7 +38,7 @@ # This is a Union because the date formats currently don't really # account for MM/DD time. # -# https://app.asana.com/0/1128138765527694/1173779659264666 +# https://github.com/bluelabsio/records-mover/issues/75 # python_date_format_from_hints: Dict[Union[HintDateFormat, Literal['DD/MM/YY']], str] = { 'YYYY-MM-DD': '%Y-%m-%d', diff --git a/records_mover/records/delimited/sniff.py b/records_mover/records/delimited/sniff.py index 030dcf047..1075e3ea4 100644 --- a/records_mover/records/delimited/sniff.py +++ b/records_mover/records/delimited/sniff.py @@ -190,7 +190,7 @@ def sniff_compression_hint(fileobj: IO[bytes]) -> HintCompression: def sniff_hints_from_fileobjs(fileobjs: List[IO[bytes]], initial_hints: PartialRecordsHints) -> PartialRecordsHints: if len(fileobjs) != 1: - # https://app.asana.com/0/53283930106309/1131698268455054 + # https://github.com/bluelabsio/records-mover/issues/84 raise NotImplementedError('Cannot currently sniff hints from mulitple ' 'files--please provide hints') fileobj = fileobjs[0] diff --git a/records_mover/records/pandas/to_csv_options.py b/records_mover/records/pandas/to_csv_options.py index 1b9b070ad..a553cbde1 100644 --- a/records_mover/records/pandas/to_csv_options.py +++ b/records_mover/records/pandas/to_csv_options.py @@ -84,7 +84,7 @@ def pandas_to_csv_options(records_format: DelimitedRecordsFormat, # '2000-01-02 12:34:56.789012' # >>> # - # https://app.asana.com/0/1128138765527694/1159958019131681 + # https://github.com/bluelabsio/records-mover/issues/95 pandas_options['date_format'] = '%Y-%m-%d %H:%M:%S.%f' else: pandas_options['date_format'] = '%Y-%m-%d %H:%M:%S.%f%z' diff --git a/records_mover/records/schema/field/numpy.py b/records_mover/records/schema/field/numpy.py index 767ec38ef..eba8a193a 100644 --- a/records_mover/records/schema/field/numpy.py +++ b/records_mover/records/schema/field/numpy.py @@ -19,7 +19,7 @@ def details_from_numpy_dtype(dtype: numpy.dtype, if has_tz: # See: 'Represent pandas datetime64 with timezone in records schema' # - # https://app.asana.com/0/53283930106309/1132706099772565 + # https://github.com/bluelabsio/records-mover/issues/89 field_type = 'datetimetz' else: field_type = 'datetime' diff --git a/records_mover/records/schema/field/sqlalchemy.py b/records_mover/records/schema/field/sqlalchemy.py index 1961c4b0c..c2d4e7e36 100644 --- a/records_mover/records/schema/field/sqlalchemy.py +++ b/records_mover/records/schema/field/sqlalchemy.py @@ -36,7 +36,7 @@ def field_from_sqlalchemy_column(column: Column, # constraints out of database tables, so set this to null, # which means "we don't know": # - # https://app.asana.com/0/1128138765527694/1131416227825120 + # https://github.com/bluelabsio/records-mover/issues/90 unique = None if isinstance(type_, sqlalchemy.sql.sqltypes.Integer): @@ -114,7 +114,7 @@ def field_from_sqlalchemy_column(column: Column, # We don't currently gather statistics from databases - which # can bite us when exporting from BigQuery, for instance: # - # https://app.asana.com/0/53283930106309/1131698268455053 + # https://github.com/bluelabsio/records-mover/issues/91 statistics = None return RecordsSchemaField(name=name, diff --git a/records_mover/records/schema/schema/__init__.py b/records_mover/records/schema/schema/__init__.py index ef8d0bb4f..7b787fb20 100644 --- a/records_mover/records/schema/schema/__init__.py +++ b/records_mover/records/schema/schema/__init__.py @@ -109,7 +109,7 @@ def from_fileobjs(fileobjs: List[IO[bytes]], from records_mover.pandas import purge_unnamed_unused_columns if len(fileobjs) != 1: - # https://app.asana.com/0/53283930106309/1131698268455054 + # https://github.com/bluelabsio/records-mover/issues/84 raise NotImplementedError('Cannot currently sniff schema from mulitple ' 'files--please provide explicit schema JSON') fileobj = fileobjs[0] @@ -171,7 +171,7 @@ def assign_dataframe_names(self, index_name = field_names[0] # RecordsMover supports only single indexes for the moment. # - # https://app.asana.com/0/1128138765527694/1161071033650873 + # https://github.com/bluelabsio/records-mover/issues/92 index_mapping = {0: index_name} # remove first name, which applies to index diff --git a/records_mover/records/sources/dataframes.py b/records_mover/records/sources/dataframes.py index 231c996d7..6f772f26f 100644 --- a/records_mover/records/sources/dataframes.py +++ b/records_mover/records/sources/dataframes.py @@ -86,7 +86,7 @@ def serialize_dfs(self, # initial_records_schema was based on only the # first chunk. # - # https://app.asana.com/0/1128138765527694/1161074649542801 + # https://github.com/bluelabsio/records-mover/issues/93 logger.warning("Only checking first chunk for type inference") i = i + 1 @@ -156,7 +156,7 @@ def save_df(df: 'DataFrame', output_filename: str) -> None: def save_df(df: 'DataFrame', output_filename: str) -> None: logger.info(f"Writing Parquet file to {output_filename}") # Note that this doesn't specify partitioning as of yet - - # https://app.asana.com/0/1128138765527694/1126615025514407 + # https://github.com/bluelabsio/records-mover/issues/94 df.to_parquet(fname=output_filename, engine='pyarrow', index=self.include_index, diff --git a/records_mover/records/targets/spectrum.py b/records_mover/records/targets/spectrum.py index dc286dcae..21ea8f09c 100644 --- a/records_mover/records/targets/spectrum.py +++ b/records_mover/records/targets/spectrum.py @@ -106,11 +106,11 @@ def post_load_hook(self, num_rows_loaded: Optional[int]) -> None: columns = [f.to_sqlalchemy_column(self.driver) for f in records_schema.fields] for column in columns: if isinstance(column.type, sqlalchemy.sql.sqltypes.Numeric) and column.type.asdecimal: - # https://app.asana.com/0/1128138765527694/1139365728890234 + # https://github.com/bluelabsio/records-mover/issues/85 raise NotImplementedError("Teach me how to write a NUMERIC to Redshift Spectrum " f"(column name: {column})") if isinstance(column.type, sqlalchemy.sql.sqltypes.DateTime) and column.type.timezone: - # https://app.asana.com/0/53283930106309/1136051640283464 + # https://github.com/bluelabsio/records-mover/issues/86 raise NotImplementedError("Teach me how to write a datetimetz to Redshift Spectrum " f"({column})") @@ -129,7 +129,7 @@ def post_load_hook(self, num_rows_loaded: Optional[int]) -> None: # # https://docs.aws.amazon.com/redshift/latest/dg/r_ALTER_TABLE_external-table.html # - # https://app.asana.com/0/53283930106309/1140172717234001 + # https://github.com/bluelabsio/records-mover/issues/87 pass storage_clause = "STORED AS PARQUET\n" location_clause = f"LOCATION '{self.output_loc.url}_manifest'\n" diff --git a/records_mover/records/targets/table/target.py b/records_mover/records/targets/table/target.py index 020871f23..9b73251df 100644 --- a/records_mover/records/targets/table/target.py +++ b/records_mover/records/targets/table/target.py @@ -57,7 +57,7 @@ def __init__(self, # advertise what format we prefer to be given for mover paths # that don't yet support full records negotiation. # - # https://app.asana.com/0/1128138765527694/1130105834872106 + # https://github.com/bluelabsio/records-mover/issues/88 self.records_format = next(iter(self.known_supported_records_formats()), None) def move_from_records_directory(self, diff --git a/tests/integration/records/directory_validator.py b/tests/integration/records/directory_validator.py index 63763ce9a..e5d6f822f 100644 --- a/tests/integration/records/directory_validator.py +++ b/tests/integration/records/directory_validator.py @@ -76,7 +76,7 @@ def validate_records_schema(self) -> None: # inference to determine if the data in question will # fit into it. # - # https://app.asana.com/0/1128138765527694/1166526213569051 + # https://github.com/bluelabsio/records-mover/issues/82 # https://stackoverflow.com/questions/31761047/what-difference-between-the-date-time-datetime-and-timestamp-types/56138746 'mysql': [ 'integer', 'string', 'string', 'string', diff --git a/tests/integration/records/expected_column_types.py b/tests/integration/records/expected_column_types.py index 615283006..520f26643 100644 --- a/tests/integration/records/expected_column_types.py +++ b/tests/integration/records/expected_column_types.py @@ -75,7 +75,7 @@ # databases like Redshift. We could collect and use # statistics to get a better read on this: # - # https://app.asana.com/0/1128138765527694/1152727792219521 + # https://github.com/bluelabsio/records-mover/issues/79 # # # @@ -87,8 +87,8 @@ # types from numeric statistics it pulls from the source # tables. # - # https://app.asana.com/0/1128138765527694/1152727792219523 - # https://app.asana.com/0/1128138765527694/1152727792219521 + # https://github.com/bluelabsio/records-mover/issues/78 + # https://github.com/bluelabsio/records-mover/issues/79 # # # diff --git a/tests/integration/records/single_db/test_records_load.py b/tests/integration/records/single_db/test_records_load.py index 59826c330..2e5fe4ef0 100644 --- a/tests/integration/records/single_db/test_records_load.py +++ b/tests/integration/records/single_db/test_records_load.py @@ -11,8 +11,8 @@ class RecordsLoadIntegrationTest(BaseRecordsIntegrationTest): def load_and_verify(self, format_type, variant, hints={}, broken=False, sourcefn=None): if self.engine.name == 'bigquery' and variant == 'csv': - # https://app.asana.com/0/53283930106309/1130065227225218 - # https://app.asana.com/0/53283930106309/1130065227225219 + # https://github.com/bluelabsio/records-mover/issues/80 + # https://github.com/bluelabsio/records-mover/issues/81 logger.warning("This test won't pass until we can use hints to " "infer date/time/etc columns, or we use records schema " "from target to set types on the source, so skipping.") @@ -118,7 +118,7 @@ def sourcefn(filename, records_format, records_schema): # CSV type inference is not smart enough to identify the # date/time columns as anything but strings yet. # - # https://app.asana.com/0/1128138765527694/1130065227225218 + # https://github.com/bluelabsio/records-mover/issues/80 # # Once that's fixed, we can stop passing in a records schema # here when we have a header row for the names and let the diff --git a/tests/integration/resources/README.md b/tests/integration/resources/README.md index 8ccd8425b..fbb89f02c 100644 --- a/tests/integration/resources/README.md +++ b/tests/integration/resources/README.md @@ -46,7 +46,7 @@ timezones in timestamps are expressed. times, just datetimes, so dates and times come out in default Pandas __str__() format. - See details on [Pandas limitations](https://app.asana.com/0/53283930106309/1133167860965681) + See details on [Pandas limitations](https://github.com/bluelabsio/records-mover/issues/83) ### utc diff --git a/tests/unit/records/pandas/test_prep_for_csv.py b/tests/unit/records/pandas/test_prep_for_csv.py index 6d53f2100..0154cd266 100644 --- a/tests/unit/records/pandas/test_prep_for_csv.py +++ b/tests/unit/records/pandas/test_prep_for_csv.py @@ -39,11 +39,11 @@ def test_prep_df_for_csv_output_no_include_index(self): # instance, specifying how it's turned into a CSV is not # currently part of the records spec: # - # https://app.asana.com/0/1128138765527694/1169941483931186 + # https://github.com/bluelabsio/records-mover/issues/76 # # In addition, Vertica suffers from a driver limitation: # - # https://app.asana.com/0/search/1169941483931185/1126315736470782 + # https://github.com/bluelabsio/records-mover/issues/77 # # 'timetz': [ # us_eastern.localize(pd.Timestamp(year=1970, month=1, day=1, @@ -95,11 +95,11 @@ def test_prep_df_for_csv_output_include_index(self): # instance, specifying how it's turned into a CSV is not # currently part of the records spec: # - # https://app.asana.com/0/1128138765527694/1169941483931186 + # https://github.com/bluelabsio/records-mover/issues/76 # # In addition, Vertica suffers from a driver limitation: # - # https://app.asana.com/0/search/1169941483931185/1126315736470782 + # https://github.com/bluelabsio/records-mover/issues/77 # # 'timetz': [ # us_eastern.localize(pd.Timestamp(year=1970, month=1, day=1, diff --git a/tests/unit/records/test_pandas_read_csv_options.py b/tests/unit/records/test_pandas_read_csv_options.py index 2e26b4786..d633bcd4a 100644 --- a/tests/unit/records/test_pandas_read_csv_options.py +++ b/tests/unit/records/test_pandas_read_csv_options.py @@ -57,7 +57,7 @@ def test_pandas_read_csv_options_bluelabs(self): self.assertEqual(expected, actual) self.assertFalse(unhandled_hints) - # MM-DD not yet fully supported - see https://app.asana.com/0/1128138765527694/1173779659264666 + # MM-DD not yet fully supported - see https://github.com/bluelabsio/records-mover/issues/75 # # def test_pandas_read_csv_options_bleulabs(self): # expected = { diff --git a/tests/unit/records/test_pandas_to_csv_options_dateformats.py b/tests/unit/records/test_pandas_to_csv_options_dateformats.py index 62a88de1c..888a4cd1e 100644 --- a/tests/unit/records/test_pandas_to_csv_options_dateformats.py +++ b/tests/unit/records/test_pandas_to_csv_options_dateformats.py @@ -63,7 +63,7 @@ class TestPandasToCsvOptionsDateformats(unittest.TestCase): # self.assertFalse(unhandled_hints) # DD-MM format not yet fully supported - see - # https://app.asana.com/0/1128138765527694/1173779659264666 + # https://github.com/bluelabsio/records-mover/issues/75 # # def test_pandas_dateformat_DD_MM_YYYY_no_tz(self): # expected = {