diff --git a/.gitignore b/.gitignore index 8b47d45a9..433ed28c2 100644 --- a/.gitignore +++ b/.gitignore @@ -73,3 +73,6 @@ target/ # PyCharm .idea/ + +# VSCode +.vscode/settings.json diff --git a/metrics/coverage_high_water_mark b/metrics/coverage_high_water_mark index 704394b24..18639d11d 100644 --- a/metrics/coverage_high_water_mark +++ b/metrics/coverage_high_water_mark @@ -1 +1 @@ -93.5400 +93.5000 diff --git a/metrics/mypy_high_water_mark b/metrics/mypy_high_water_mark index cf303fc4e..ede1f5cca 100644 --- a/metrics/mypy_high_water_mark +++ b/metrics/mypy_high_water_mark @@ -1 +1 @@ -92.3700 \ No newline at end of file +92.3500 \ No newline at end of file diff --git a/records_mover/records/sources/table.py b/records_mover/records/sources/table.py index 56934383c..9dd7ca889 100644 --- a/records_mover/records/sources/table.py +++ b/records_mover/records/sources/table.py @@ -77,9 +77,12 @@ def to_dataframes_source(self, db = self.driver.db records_schema = self.pull_records_schema() - columns = db.dialect.get_columns(db, - self.table_name, - schema=self.schema_name) + if isinstance(db, Engine): + connection = db.connect() + columns = db.dialect.get_columns(connection, self.table_name, schema=self.schema_name) + connection.close() + else: + columns = db.dialect.get_columns(db, self.table_name, schema=self.schema_name) num_columns = len(columns) if num_columns == 0: diff --git a/setup.py b/setup.py index 5ad3f9426..66d0f95ab 100755 --- a/setup.py +++ b/setup.py @@ -144,7 +144,7 @@ def initialize_options(self) -> None: ] db_dependencies = [ - 'sqlalchemy>=1.3.18,<1.4', + 'sqlalchemy>=1.4,<2.0', ] smart_open_dependencies = [ diff --git a/tests/integration/records/expected_column_types.py b/tests/integration/records/expected_column_types.py index 982007bea..12eb9db45 100644 --- a/tests/integration/records/expected_column_types.py +++ b/tests/integration/records/expected_column_types.py @@ -9,32 +9,32 @@ 'redshift': [ 'INTEGER', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', 'VARCHAR(111)', 'DATE', 'VARCHAR(8)', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMPTZ' + 'TIMESTAMP', 'TIMESTAMPTZ' ], 'postgresql': [ 'INTEGER', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', - 'VARCHAR(3)', 'VARCHAR(111)', 'DATE', 'TIME WITHOUT TIME ZONE', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMP WITH TIME ZONE' + 'VARCHAR(3)', 'VARCHAR(111)', 'DATE', 'TIME', + 'TIMESTAMP', 'TIMESTAMP' ], 'bigquery': [ 'INTEGER', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', 'VARCHAR(111)', 'DATE', 'TIME', 'DATETIME', 'TIMESTAMP' ], 'mysql': [ - 'INTEGER(11)', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', - 'VARCHAR(111)', 'DATE', 'TIME', 'DATETIME(6)', 'DATETIME(6)' + 'INTEGER', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', + 'VARCHAR(111)', 'DATE', 'TIME', 'DATETIME', 'DATETIME' ], } expected_df_loaded_database_column_types = { 'postgresql': [ 'BIGINT', 'VARCHAR(12)', 'VARCHAR(12)', 'VARCHAR(4)', 'VARCHAR(4)', - 'VARCHAR(12)', 'VARCHAR(444)', 'DATE', 'TIME WITHOUT TIME ZONE', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMP WITH TIME ZONE' + 'VARCHAR(12)', 'VARCHAR(444)', 'DATE', 'TIME', + 'TIMESTAMP', 'TIMESTAMP' ], 'mysql': [ - 'BIGINT(20)', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', - 'VARCHAR(111)', 'DATE', 'TIME', 'DATETIME(6)', 'DATETIME(6)' + 'BIGINT', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', + 'VARCHAR(111)', 'DATE', 'TIME', 'DATETIME', 'DATETIME' ], 'vertica': [ 'INTEGER', 'VARCHAR(12)', 'VARCHAR(12)', 'VARCHAR(4)', 'VARCHAR(4)', @@ -44,7 +44,7 @@ 'redshift': [ 'BIGINT', 'VARCHAR(12)', 'VARCHAR(12)', 'VARCHAR(4)', 'VARCHAR(4)', 'VARCHAR(12)', 'VARCHAR(444)', 'DATE', 'VARCHAR(8)', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMPTZ' + 'TIMESTAMP', 'TIMESTAMPTZ' ], 'bigquery': [ 'INTEGER', 'VARCHAR(12)', 'VARCHAR(12)', 'VARCHAR(4)', 'VARCHAR(4)', @@ -116,8 +116,8 @@ # date/time/timestamp/timestamptz all get turned into strings. ('postgresql', 'postgresql'): [ 'INTEGER', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', - 'VARCHAR(256)', 'VARCHAR(256)', 'DATE', 'TIME WITHOUT TIME ZONE', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMP WITH TIME ZONE' + 'VARCHAR(256)', 'VARCHAR(256)', 'DATE', 'TIME', + 'TIMESTAMP', 'TIMESTAMP' ], ('postgresql', 'vertica'): [ 'INTEGER', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', @@ -127,7 +127,7 @@ ('postgresql', 'redshift'): [ 'INTEGER', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'DATE', 'VARCHAR(8)', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMPTZ' + 'TIMESTAMP', 'TIMESTAMPTZ' ], ('postgresql', 'bigquery'): [ 'INTEGER', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', @@ -141,7 +141,7 @@ ('redshift', 'postgresql'): [ 'INTEGER', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', 'VARCHAR(111)', 'DATE', 'VARCHAR(8)', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMP WITH TIME ZONE' + 'TIMESTAMP', 'TIMESTAMP' ], ('bigquery', 'redshift'): [ 'BIGINT', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', @@ -154,8 +154,8 @@ ], ('bigquery', 'postgresql'): [ 'BIGINT', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', - 'VARCHAR(256)', 'VARCHAR(256)', 'DATE', 'TIME WITHOUT TIME ZONE', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMP WITH TIME ZONE' + 'VARCHAR(256)', 'VARCHAR(256)', 'DATE', 'TIME', + 'TIMESTAMP', 'TIMESTAMP' ], ('bigquery', 'vertica'): [ 'INTEGER', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', @@ -170,37 +170,37 @@ 'VARCHAR(444)', 'DATE', 'TIME', 'DATETIME', 'DATETIME', ], ('redshift', 'mysql'): [ - 'INTEGER(11)', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', - 'VARCHAR(111)', 'DATE', 'VARCHAR(8)', 'DATETIME(6)', 'DATETIME(6)' + 'INTEGER', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', + 'VARCHAR(111)', 'DATE', 'VARCHAR(8)', 'DATETIME', 'DATETIME' ], ('postgresql', 'mysql'): [ - 'INTEGER(11)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', + 'INTEGER', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', - 'VARCHAR(256)', 'DATE', 'TIME', 'DATETIME(6)', 'DATETIME(6)' + 'VARCHAR(256)', 'DATE', 'TIME', 'DATETIME', 'DATETIME' ], ('bigquery', 'mysql'): [ - 'BIGINT(20)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', + 'BIGINT', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', 'VARCHAR(256)', - 'VARCHAR(256)', 'DATE', 'TIME', 'DATETIME(6)', 'DATETIME(6)' + 'VARCHAR(256)', 'DATE', 'TIME', 'DATETIME', 'DATETIME' ], ('mysql', 'postgresql'): [ 'INTEGER', 'VARCHAR(12)', 'VARCHAR(12)', 'VARCHAR(4)', 'VARCHAR(4)', 'VARCHAR(12)', - 'VARCHAR(444)', 'DATE', 'TIME WITHOUT TIME ZONE', 'TIMESTAMP WITHOUT TIME ZONE', - 'TIMESTAMP WITHOUT TIME ZONE' + 'VARCHAR(444)', 'DATE', 'TIME', 'TIMESTAMP', + 'TIMESTAMP' ], ('mysql', 'redshift'): [ 'INTEGER', 'VARCHAR(12)', 'VARCHAR(12)', 'VARCHAR(4)', 'VARCHAR(4)', 'VARCHAR(12)', - 'VARCHAR(444)', 'DATE', 'VARCHAR(8)', 'TIMESTAMP WITHOUT TIME ZONE', - 'TIMESTAMP WITHOUT TIME ZONE' + 'VARCHAR(444)', 'DATE', 'VARCHAR(8)', 'TIMESTAMP', + 'TIMESTAMP' ], ('vertica', 'postgresql'): [ 'BIGINT', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', - 'VARCHAR(3)', 'VARCHAR(111)', 'DATE', 'TIME WITHOUT TIME ZONE', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMP WITH TIME ZONE' + 'VARCHAR(3)', 'VARCHAR(111)', 'DATE', 'TIME', + 'TIMESTAMP', 'TIMESTAMP' ], ('vertica', 'redshift'): [ 'BIGINT', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', 'VARCHAR(111)', 'DATE', 'VARCHAR(8)', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMPTZ' + 'TIMESTAMP', 'TIMESTAMPTZ' ], } diff --git a/tests/integration/records/single_db/numeric_expectations.py b/tests/integration/records/single_db/numeric_expectations.py index 779167b62..1bf77afb3 100644 --- a/tests/integration/records/single_db/numeric_expectations.py +++ b/tests/integration/records/single_db/numeric_expectations.py @@ -120,11 +120,11 @@ 'uint64': 'NUMERIC(20, 0)', 'float16': 'REAL', 'float32': 'REAL', - 'float64': 'DOUBLE PRECISION', - 'float128': 'DOUBLE PRECISION', # Redshift doesn't support >float64 + 'float64': 'DOUBLE_PRECISION', + 'float128': 'DOUBLE_PRECISION', # Redshift doesn't support >float64 'fixed_6_2': 'NUMERIC(6, 2)', 'fixed_38_9': 'NUMERIC(38, 9)', - 'fixed_100_4': 'DOUBLE PRECISION' # Redshift doesn't support fixed precision > 38 + 'fixed_100_4': 'DOUBLE_PRECISION' # Redshift doesn't support fixed precision > 38 }, 'vertica': { 'int8': 'INTEGER', @@ -180,8 +180,8 @@ 'uint64': 'NUMERIC(20, 0)', 'float16': 'REAL', 'float32': 'REAL', - 'float64': 'DOUBLE PRECISION', - 'float128': 'DOUBLE PRECISION', # Postgres doesn't support >float64 + 'float64': 'DOUBLE_PRECISION', + 'float128': 'DOUBLE_PRECISION', # Postgres doesn't support >float64 'fixed_6_2': 'NUMERIC(6, 2)', 'fixed_38_9': 'NUMERIC(38, 9)', 'fixed_100_4': 'NUMERIC(100, 4)', @@ -191,15 +191,15 @@ # relevant and records-mover just uses the defaults which end up # as the below. 'mysql': { - 'int8': 'TINYINT(4)', - 'int16': 'SMALLINT(6)', - 'int32': 'INTEGER(11)', - 'int64': 'BIGINT(20)', - 'ubyte': 'TINYINT(3) UNSIGNED', - 'uint8': 'TINYINT(3) UNSIGNED', - 'uint16': 'SMALLINT(5) UNSIGNED', - 'uint32': 'INTEGER(10) UNSIGNED', - 'uint64': 'BIGINT(20) UNSIGNED', + 'int8': 'TINYINT', + 'int16': 'SMALLINT', + 'int32': 'INTEGER', + 'int64': 'BIGINT', + 'ubyte': 'TINYINT', + 'uint8': 'TINYINT', + 'uint16': 'SMALLINT', + 'uint32': 'INTEGER', + 'uint64': 'BIGINT', 'float16': 'FLOAT', 'float32': 'FLOAT', 'float64': 'DOUBLE', diff --git a/tests/integration/records/single_db/test_records_numeric.py b/tests/integration/records/single_db/test_records_numeric.py index 28a3ede9c..63b9656e2 100644 --- a/tests/integration/records/single_db/test_records_numeric.py +++ b/tests/integration/records/single_db/test_records_numeric.py @@ -10,6 +10,7 @@ RecordsSchema, DelimitedRecordsFormat, ProcessingInstructions ) from ..records_numeric_database_fixture import RecordsNumericDatabaseFixture +from sqlalchemy.engine import Engine logger = logging.getLogger(__name__) @@ -67,8 +68,17 @@ def test_numeric_schema_fields_created(self) -> None: self.validate_records_schema(tempdir) def validate_table(self): - columns = self.engine.dialect.get_columns(self.engine, self.table_name, - schema=self.schema_name) + if isinstance(self.engine, Engine): + connection = self.engine.connect() + columns = self.engine.dialect.get_columns(connection, + self.table_name, + schema=self.schema_name) + connection.close() + else: + columns = self.engine.dialect.get_columns(self.engine, + self.table_name, + schema=self.schema_name) + # Note that Redshift doesn't support TIME type: # https://docs.aws.amazon.com/redshift/latest/dg/r_Datetime_types.html actual_column_types = { diff --git a/tests/integration/records/table_validator.py b/tests/integration/records/table_validator.py index d6ebd6077..a6a6f52d5 100644 --- a/tests/integration/records/table_validator.py +++ b/tests/integration/records/table_validator.py @@ -62,9 +62,17 @@ def validate(self, self.validate_data_values(schema_name, table_name) def validate_data_types(self, schema_name: str, table_name: str) -> None: - columns = self.target_db_engine.dialect.get_columns(self.target_db_engine, - table_name, - schema=schema_name) + if isinstance(self.target_db_engine, Engine): + connection = self.target_db_engine.connect() + columns = self.target_db_engine.dialect.get_columns(connection, + table_name, + schema=schema_name) + connection.close() + else: + columns = self.target_db_engine.dialect.get_columns(self.target_db_engine, + table_name, + schema=schema_name) + expected_column_names = [ 'num', 'numstr', 'str', 'comma', 'doublequote', 'quotecommaquote', 'newlinestr', 'date', 'time', 'timestamp', 'timestamptz'