diff --git a/.circleci/config.yml b/.circleci/config.yml index 4a0147a4d..cf7e93efb 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -604,16 +604,16 @@ workflows: tags: only: /v\d+\.\d+\.\d+(-[\w]+)?/ - integration_test: - name: bigquery-no-gcs-itest - extras: '[bigquery,itest]' - python_version: "3.9" - db_name: bltoolsdevbq-bq_itest - include_gcs_scratch_bucket: false - requires: - - redshift-s3-itest - filters: - tags: - only: /v\d+\.\d+\.\d+(-[\w]+)?/ + name: bigquery-no-gcs-itest + extras: '[bigquery,itest]' + python_version: "3.9" + db_name: bltoolsdevbq-bq_itest + requires: + - redshift-s3-itest + include_gcs_scratch_bucket: false + filters: + tags: + only: /v\d+\.\d+\.\d+(-[\w]+)?/ - integration_test: name: bigquery-gcs-itest extras: '[bigquery,itest]' diff --git a/metrics/bigfiles_high_water_mark b/metrics/bigfiles_high_water_mark index 91d0f8a3f..7fdcfb8c0 100644 --- a/metrics/bigfiles_high_water_mark +++ b/metrics/bigfiles_high_water_mark @@ -1 +1 @@ -1138 +1131 diff --git a/metrics/coverage_high_water_mark b/metrics/coverage_high_water_mark index d1a64d483..8fee528a7 100644 --- a/metrics/coverage_high_water_mark +++ b/metrics/coverage_high_water_mark @@ -1 +1 @@ -93.0000 +93.6400 diff --git a/metrics/mypy_high_water_mark b/metrics/mypy_high_water_mark index b25e8ece4..a83cfd41b 100644 --- a/metrics/mypy_high_water_mark +++ b/metrics/mypy_high_water_mark @@ -1 +1 @@ -92.2900 +92.3400 \ No newline at end of file diff --git a/records_mover/airflow/hooks/google_cloud_credentials_hook.py b/records_mover/airflow/hooks/google_cloud_credentials_hook.py index 3ab5629c6..2901e6618 100644 --- a/records_mover/airflow/hooks/google_cloud_credentials_hook.py +++ b/records_mover/airflow/hooks/google_cloud_credentials_hook.py @@ -1,15 +1,15 @@ -from airflow.contrib.hooks.gcp_api_base_hook import GoogleCloudBaseHook +from airflow.providers.google.common.hooks.base_google import GoogleBaseHook from typing import Iterable, Optional, TYPE_CHECKING if TYPE_CHECKING: # see the 'gsheets' extras_require option in setup.py - needed for this! import google.auth.credentials # noqa -class GoogleCloudCredentialsHook(GoogleCloudBaseHook): +class GoogleCloudCredentialsHook(GoogleBaseHook): def get_conn(self) -> 'google.auth.credentials.Credentials': return self._get_credentials() - def scopes(self) -> Iterable[str]: + def scopes(self) -> Iterable[str]: # type: ignore scope: Optional[str] = self._get_field('scope', None) scopes: Iterable[str] if scope is not None: diff --git a/records_mover/airflow/hooks/records_hook.py b/records_mover/airflow/hooks/records_hook.py index 87cdd0c53..729989087 100644 --- a/records_mover/airflow/hooks/records_hook.py +++ b/records_mover/airflow/hooks/records_hook.py @@ -5,7 +5,7 @@ from records_mover.db.factory import db_driver from records_mover.db import DBDriver from records_mover.url.resolver import UrlResolver -from airflow.contrib.hooks.aws_hook import AwsHook +from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook from typing import Optional, Union, List, TYPE_CHECKING import sqlalchemy @@ -14,7 +14,7 @@ from airflow.hooks import BaseHook except ImportError: # Required for Airflow 2.0 - from airflow.hooks.base_hook import BaseHook # type: ignore + from airflow.hooks.base import BaseHook # type: ignore if TYPE_CHECKING: from boto3.session import ListObjectsResponseContentType, S3ClientTypeStub # noqa @@ -41,7 +41,7 @@ def __init__(self, def _get_boto3_session(self) -> boto3.session.Session: if not self._boto3_session: - self._boto3_session = AwsHook(self.aws_conn_id).get_session() + self._boto3_session = AwsBaseHook(self.aws_conn_id).get_session() return self._boto3_session @property diff --git a/records_mover/airflow/hooks/sqlalchemy_db_hook.py b/records_mover/airflow/hooks/sqlalchemy_db_hook.py index 8892301f8..cbc2df665 100644 --- a/records_mover/airflow/hooks/sqlalchemy_db_hook.py +++ b/records_mover/airflow/hooks/sqlalchemy_db_hook.py @@ -1,12 +1,6 @@ import sqlalchemy as sa from records_mover.db import create_sqlalchemy_url - -try: - # Works with Airflow 1 - from airflow.hooks import BaseHook -except ImportError: - # Required for Airflow 2.0 - from airflow.hooks.base_hook import BaseHook # type: ignore +from airflow.hooks.base import BaseHook class SqlAlchemyDbHook(BaseHook): diff --git a/records_mover/creds/creds_via_airflow.py b/records_mover/creds/creds_via_airflow.py index 15c203251..e5a5acd03 100644 --- a/records_mover/creds/creds_via_airflow.py +++ b/records_mover/creds/creds_via_airflow.py @@ -14,12 +14,12 @@ class CredsViaAirflow(BaseCreds): def boto3_session(self, aws_creds_name: str) -> 'boto3.session.Session': - from airflow.contrib.hooks.aws_hook import AwsHook - aws_hook = AwsHook(aws_creds_name) + from airflow.providers.amazon.aws.hooks.base_aws import AwsBaseHook + aws_hook = AwsBaseHook(aws_creds_name) return aws_hook.get_session() def db_facts(self, db_creds_name: str) -> DBFacts: - from airflow.hooks import BaseHook + from airflow.hooks.base import BaseHook conn = BaseHook.get_connection(db_creds_name) out: DBFacts = {} diff --git a/setup.cfg b/setup.cfg index a620527bf..f2501a27c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,6 +31,7 @@ max-complexity = 15 [mypy] mypy_path = types/stubs warn_unused_ignores = True +disable_error_code = annotation-unchecked [mypy-alembic.*] ignore_missing_imports = True @@ -84,4 +85,4 @@ ignore_missing_imports = True ignore_missing_imports = True [mypy-airflow.hooks.*] -ignore_missing_imports = True \ No newline at end of file +ignore_missing_imports = True diff --git a/setup.py b/setup.py index 9f377100b..5fa82cd69 100755 --- a/setup.py +++ b/setup.py @@ -137,20 +137,13 @@ def initialize_options(self) -> None: ) airflow_dependencies = [ - # Minimum version here is needed to avoid syntax error in setup.py - # in 1.10.0 - 'apache-airflow>=1.10.1,<2' + 'apache-airflow>=2', + 'apache-airflow-providers-amazon', + 'apache-airflow-providers-google', ] db_dependencies = [ - # Lower bound (>=1.3.18) is to improve package resolution performance - # - # Upper bound (<1.4) is to avoid 1.4 which has breaking changes and is - # incompatible with python-bigquery-sqlalchemy per - # https://github.com/googleapis/python-bigquery-sqlalchemy/issues/83 - # Can lift this once records-mover itself is compatible and - # other packages have appropriate restrictions in place. - 'sqlalchemy>=1.3.18,<1.4', + 'sqlalchemy>=1.3.18', ] smart_open_dependencies = [ diff --git a/tests/component/records/schema/field/test_dtype.py b/tests/component/records/schema/field/test_dtype.py index 7d880a23b..609491589 100644 --- a/tests/component/records/schema/field/test_dtype.py +++ b/tests/component/records/schema/field/test_dtype.py @@ -6,6 +6,7 @@ ) import numpy as np import pandas as pd +import pytest def with_nullable(nullable: bool, fn): @@ -31,29 +32,30 @@ def check_dtype(field_type, constraints, expectation): assert out.dtype == expectation -def test_to_pandas_dtype_integer_no_nullable(): +class Test_to_pandas_dtype_integer_no_nullable: expectations = { - (-100, 100): np.int8, - (0, 240): np.uint8, - (-10000, 10000): np.int16, - (500, 40000): np.uint16, - (-200000000, 200000000): np.int32, - (25, 4000000000): np.uint32, - (-9000000000000000000, 2000000000): np.int64, - (25, 10000000000000000000): np.uint64, + (-100, 100): pd.Int8Dtype(), + (0, 240): pd.UInt8Dtype(), + (-10000, 10000): pd.Int16Dtype(), + (500, 40000): pd.UInt16Dtype(), + (-200000000, 200000000): pd.Int32Dtype(), + (25, 4000000000): pd.UInt32Dtype(), + (-9000000000000000000, 2000000000): pd.Int64Dtype(), + (25, 10000000000000000000): pd.UInt64Dtype(), (25, 1000000000000000000000000000): np.float128, - (None, None): np.int64, + (None, None): pd.Int64Dtype(), } - for (min_, max_), expected_pandas_type in expectations.items(): + + @pytest.mark.parametrize("expectation", expectations.items()) + def test_to_pandas_dtype_integer_no_nullable(self, expectation): + (min_, max_), expected_pandas_type = expectation constraints = RecordsSchemaFieldIntegerConstraints( required=True, unique=None, min_=min_, max_=max_ ) - yield with_nullable( - False, check_dtype - ), "integer", constraints, expected_pandas_type + with_nullable(False, check_dtype("integer", constraints, expected_pandas_type)) -def test_to_pandas_dtype_integer_nullable(): +class Test_to_pandas_dtype_integer_nullable: expectations = { (-100, 100): pd.Int8Dtype(), (0, 240): pd.UInt8Dtype(), @@ -66,16 +68,17 @@ def test_to_pandas_dtype_integer_nullable(): (25, 1000000000000000000000000000): np.float128, (None, None): pd.Int64Dtype(), } - for (min_, max_), expected_pandas_type in expectations.items(): + + @pytest.mark.parametrize("expectation", expectations.items()) + def test_to_pandas_dtype_integer_nullable(self, expectation): + (min_, max_), expected_pandas_type = expectation constraints = RecordsSchemaFieldIntegerConstraints( required=True, unique=None, min_=min_, max_=max_ ) - yield with_nullable( - True, check_dtype - ), "integer", constraints, expected_pandas_type + with_nullable(True, check_dtype("integer", constraints, expected_pandas_type)) -def test_to_pandas_dtype_decimal_float(): +class Test_to_pandas_dtype_decimal_float(): expectations = { (8, 4): np.float16, (20, 10): np.float32, @@ -84,10 +87,10 @@ def test_to_pandas_dtype_decimal_float(): (500, 250): np.float128, (None, None): np.float64, } - for ( - fp_total_bits, - fp_significand_bits, - ), expected_pandas_type in expectations.items(): + + @pytest.mark.parametrize("expectation", expectations.items()) + def test_to_pandas_dtype_Tdecimal_float(self, expectation): + (fp_total_bits, fp_significand_bits), expected_pandas_type = expectation constraints = RecordsSchemaFieldDecimalConstraints( required=False, unique=None, @@ -96,10 +99,10 @@ def test_to_pandas_dtype_decimal_float(): fp_total_bits=fp_total_bits, fp_significand_bits=fp_significand_bits, ) - yield check_dtype, "decimal", constraints, expected_pandas_type + check_dtype("decimal", constraints, expected_pandas_type) -def test_to_pandas_dtype_misc(): +class Test_to_pandas_dtype_misc(): expectations = { "boolean": np.bool_, "string": np.object_, @@ -108,8 +111,11 @@ def test_to_pandas_dtype_misc(): "datetimetz": "datetime64[ns, UTC]", "time": np.object_, } - for field_type, expected_pandas_type in expectations.items(): - yield check_dtype, field_type, None, expected_pandas_type + + @pytest.mark.parametrize("expectation", expectations.items()) + def test_to_pandas_dtype_misc(self, expectation): + field_type, expected_pandas_type = expectation + check_dtype(field_type, None, expected_pandas_type) def test_to_pandas_dtype_fixed_precision_(): diff --git a/tests/integration/records/expected_column_types.py b/tests/integration/records/expected_column_types.py index c3a2070db..ba23ffbed 100644 --- a/tests/integration/records/expected_column_types.py +++ b/tests/integration/records/expected_column_types.py @@ -9,12 +9,11 @@ 'redshift': [ 'INTEGER', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', 'VARCHAR(111)', 'DATE', 'VARCHAR(8)', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMPTZ' + 'TIMESTAMP', 'TIMESTAMPTZ' ], 'postgresql': [ 'INTEGER', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', - 'VARCHAR(3)', 'VARCHAR(111)', 'DATE', 'TIME WITHOUT TIME ZONE', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMP WITH TIME ZONE' + 'VARCHAR(3)', 'VARCHAR(111)', 'DATE', 'TIME', 'TIMESTAMP', 'TIMESTAMP' ], 'bigquery': [ 'INTEGER', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', @@ -28,9 +27,8 @@ expected_df_loaded_database_column_types = { 'postgresql': [ - 'BIGINT', 'VARCHAR(12)', 'VARCHAR(12)', 'VARCHAR(4)', 'VARCHAR(4)', - 'VARCHAR(12)', 'VARCHAR(444)', 'DATE', 'TIME WITHOUT TIME ZONE', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMP WITH TIME ZONE' + 'BIGINT', 'VARCHAR(12)', 'VARCHAR(12)', 'VARCHAR(4)', 'VARCHAR(4)', 'VARCHAR(12)', + 'VARCHAR(444)', 'DATE', 'TIME', 'TIMESTAMP', 'TIMESTAMP' ], 'mysql': [ 'BIGINT(20)', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', @@ -44,7 +42,7 @@ 'redshift': [ 'BIGINT', 'VARCHAR(12)', 'VARCHAR(12)', 'VARCHAR(4)', 'VARCHAR(4)', 'VARCHAR(12)', 'VARCHAR(444)', 'DATE', 'VARCHAR(8)', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMPTZ' + 'TIMESTAMP', 'TIMESTAMPTZ' ], 'bigquery': [ 'INTEGER', 'VARCHAR(12)', 'VARCHAR(12)', 'VARCHAR(4)', 'VARCHAR(4)', diff --git a/tests/integration/records/single_db/numeric_expectations.py b/tests/integration/records/single_db/numeric_expectations.py index 779167b62..a2403d055 100644 --- a/tests/integration/records/single_db/numeric_expectations.py +++ b/tests/integration/records/single_db/numeric_expectations.py @@ -120,11 +120,11 @@ 'uint64': 'NUMERIC(20, 0)', 'float16': 'REAL', 'float32': 'REAL', - 'float64': 'DOUBLE PRECISION', - 'float128': 'DOUBLE PRECISION', # Redshift doesn't support >float64 + 'float64': 'DOUBLE_PRECISION', + 'float128': 'DOUBLE_PRECISION', # Redshift doesn't support >float64 'fixed_6_2': 'NUMERIC(6, 2)', 'fixed_38_9': 'NUMERIC(38, 9)', - 'fixed_100_4': 'DOUBLE PRECISION' # Redshift doesn't support fixed precision > 38 + 'fixed_100_4': 'DOUBLE_PRECISION' # Redshift doesn't support fixed precision > 38 }, 'vertica': { 'int8': 'INTEGER', @@ -180,8 +180,8 @@ 'uint64': 'NUMERIC(20, 0)', 'float16': 'REAL', 'float32': 'REAL', - 'float64': 'DOUBLE PRECISION', - 'float128': 'DOUBLE PRECISION', # Postgres doesn't support >float64 + 'float64': 'DOUBLE_PRECISION', + 'float128': 'DOUBLE_PRECISION', # Postgres doesn't support >float64 'fixed_6_2': 'NUMERIC(6, 2)', 'fixed_38_9': 'NUMERIC(38, 9)', 'fixed_100_4': 'NUMERIC(100, 4)', diff --git a/tests/unit/airflow/hooks/test_records_hook.py b/tests/unit/airflow/hooks/test_records_hook.py index 00d398700..c2c6e720e 100644 --- a/tests/unit/airflow/hooks/test_records_hook.py +++ b/tests/unit/airflow/hooks/test_records_hook.py @@ -12,9 +12,9 @@ def setUp(self): @patch('records_mover.airflow.hooks.records_hook.UrlResolver') @patch('records_mover.airflow.hooks.records_hook.Records') - @patch('records_mover.airflow.hooks.records_hook.AwsHook') + @patch('records_mover.airflow.hooks.records_hook.AwsBaseHook') def test_get_conn(self, - mock_AwsHook, + mock_AwsBaseHook, mock_Records, mock_UrlResolver): conn = self.records_hook.get_conn() @@ -24,11 +24,11 @@ def test_get_conn(self, @patch('records_mover.airflow.hooks.records_hook.UrlResolver') @patch('records_mover.airflow.hooks.records_hook.Records') - @patch('records_mover.airflow.hooks.records_hook.AwsHook') + @patch('records_mover.airflow.hooks.records_hook.AwsBaseHook') @patch('records_mover.airflow.hooks.records_hook.db_driver') def test_get_conn_invalid_s3_url(self, mock_db_driver, - mock_AwsHook, + mock_AwsBaseHook, mock_Records, mock_UrlResolver): records_hook = RecordsHook(s3_temp_base_url='foo', diff --git a/tests/unit/airflow/test_google_cloud_credentials_hook.py b/tests/unit/airflow/test_google_cloud_credentials_hook.py index 4bf012ce4..dc0c1d8c4 100644 --- a/tests/unit/airflow/test_google_cloud_credentials_hook.py +++ b/tests/unit/airflow/test_google_cloud_credentials_hook.py @@ -1,4 +1,4 @@ -from airflow.contrib.hooks.gcp_api_base_hook import GoogleCloudBaseHook +from airflow.providers.google.common.hooks.base_google import GoogleBaseHook from records_mover.airflow.hooks.google_cloud_credentials_hook import GoogleCloudCredentialsHook from mock import Mock import unittest @@ -7,7 +7,7 @@ class TestGoogleCloudCredentialsHook(unittest.TestCase): def test_get_conn(self): mock_init = Mock('__init__') - GoogleCloudBaseHook.__init__ = mock_init + GoogleBaseHook.__init__ = mock_init mock_init.return_value = None hook = GoogleCloudCredentialsHook() mock_get_credentials = Mock('get_credentials') diff --git a/tests/unit/airflow/test_records_hook.py b/tests/unit/airflow/test_records_hook.py index 9c2b07ca4..7a124b943 100644 --- a/tests/unit/airflow/test_records_hook.py +++ b/tests/unit/airflow/test_records_hook.py @@ -4,11 +4,11 @@ class TestRecordsHook(unittest.TestCase): - @patch('records_mover.airflow.hooks.records_hook.AwsHook') + @patch('records_mover.airflow.hooks.records_hook.AwsBaseHook') def test_validate_and_prepare_target_directory(self, - mock_AwsHook): + mock_AwsBaseHook): target_url = 's3://bluelabs-fake-bucket' - mock_boto3_session = mock_AwsHook.return_value.get_session.return_value + mock_boto3_session = mock_AwsBaseHook.return_value.get_session.return_value mock_s3 = mock_boto3_session.client.return_value mock_s3.list_objects_v2.return_value.get.return_value =\ [{ diff --git a/tests/unit/creds/test_creds_via_airflow.py b/tests/unit/creds/test_creds_via_airflow.py index bbca3bd90..e36dbb464 100644 --- a/tests/unit/creds/test_creds_via_airflow.py +++ b/tests/unit/creds/test_creds_via_airflow.py @@ -11,14 +11,14 @@ def setUp(self): default_aws_creds_name=None, default_gcp_creds_name=None) - @patch('airflow.contrib.hooks.aws_hook.AwsHook') - def test_boto3_session(self, mock_AwsHook): + @patch('airflow.providers.amazon.aws.hooks.base_aws.AwsBaseHook') + def test_boto3_session(self, mock_AwsBaseHook): mock_aws_creds_name = Mock(name='aws_creds_name') out = self.creds_via_airflow.boto3_session(mock_aws_creds_name) - mock_AwsHook.assert_called_with(mock_aws_creds_name) - self.assertEqual(mock_AwsHook.return_value.get_session.return_value, out) + mock_AwsBaseHook.assert_called_with(mock_aws_creds_name) + self.assertEqual(mock_AwsBaseHook.return_value.get_session.return_value, out) - @patch('airflow.hooks.BaseHook') + @patch('airflow.hooks.base.BaseHook') def test_db_facts_normcoredb(self, mock_BaseHook): mock_db_creds_name = Mock(name='db_creds_name') mock_conn = mock_BaseHook.get_connection.return_value @@ -37,7 +37,7 @@ def test_db_facts_normcoredb(self, mock_BaseHook): } self.assertEqual(expected_db_facts, out) - @patch('airflow.hooks.BaseHook') + @patch('airflow.hooks.base.BaseHook') def test_db_facts_bigquery_serviceaccount(self, mock_BaseHook): mock_db_creds_name = Mock(name='db_creds_name') mock_conn = mock_BaseHook.get_connection.return_value diff --git a/tests/unit/test_records_hook.py b/tests/unit/test_records_hook.py index b98affed7..5339067e1 100644 --- a/tests/unit/test_records_hook.py +++ b/tests/unit/test_records_hook.py @@ -12,9 +12,9 @@ class TestRecordsHook(unittest.TestCase): @patch('records_mover.airflow.hooks.records_hook.Records') @patch('records_mover.airflow.hooks.records_hook.db_driver') @patch('records_mover.airflow.hooks.records_hook.UrlResolver') - @patch('records_mover.airflow.hooks.records_hook.AwsHook') + @patch('records_mover.airflow.hooks.records_hook.AwsBaseHook') def test_get_conn(self, - mock_AwsHook, + mock_AwsBaseHook, mock_UrlResolver, mock_db_driver, mock_Records): diff --git a/types/stubs/airflow/contrib/hooks/aws_hook/__init__.pyi b/types/stubs/airflow/contrib/hooks/aws_hook/__init__.pyi index 972293a02..df7004904 100644 --- a/types/stubs/airflow/contrib/hooks/aws_hook/__init__.pyi +++ b/types/stubs/airflow/contrib/hooks/aws_hook/__init__.pyi @@ -1,10 +1,10 @@ import boto3 from typing import Optional -from airflow.hooks import BaseHook +from airflow.hooks.base import BaseHook # https://github.com/apache/airflow/blob/master/airflow/contrib/hooks/aws_hook.py -class AwsHook(BaseHook): +class AwsBaseHook(BaseHook): def __init__(self, conn_id: str): ... diff --git a/types/stubs/airflow/contrib/hooks/gcp_api_base_hook/__init__.pyi b/types/stubs/airflow/contrib/hooks/gcp_api_base_hook/__init__.pyi index 4fd919ecb..11818e5d4 100644 --- a/types/stubs/airflow/contrib/hooks/gcp_api_base_hook/__init__.pyi +++ b/types/stubs/airflow/contrib/hooks/gcp_api_base_hook/__init__.pyi @@ -4,7 +4,7 @@ if TYPE_CHECKING: import google.auth.credentials # noqa -class GoogleCloudBaseHook: +class GoogleBaseHook: def __init__(self, gcp_conn_id: str): ... diff --git a/types/stubs/logging/config.pyi b/types/stubs/logging/config.pyi index ce7efc77f..809da3b65 100644 --- a/types/stubs/logging/config.pyi +++ b/types/stubs/logging/config.pyi @@ -28,7 +28,7 @@ class ConvertingDict(dict, ConvertingMixin): class ConvertingList(list, ConvertingMixin): def __getitem__(self, key: Any): ... - def pop(self, idx: int = ...): ... + def pop(self, idx: int = ...): ... # type: ignore[override] # noqa: F821 class ConvertingTuple(tuple, ConvertingMixin):