diff --git a/.circleci/config.yml b/.circleci/config.yml index 686674ae5..b2ff73d43 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -22,11 +22,18 @@ commands: pandas_version: type: string description: "Version of pandas to test against, or empty string for none" + numpy_version: + # only needed for old-pandas itest + type: string + description: "Version of numpy to test against, if specific version required" + default: "" steps: - restore_cache: - key: deps-v2-<>-<>-<>-<>-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }} + key: deps-v6-<>-<>-<>-<>-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }} - run: name: Install python deps in venv + environment: + PYENV_VERSION: <> command: | if [ -f venv/bin/activate ] then @@ -38,6 +45,10 @@ commands: python -m venv venv . venv/bin/activate pip install --upgrade pip + if [ '<>' != '' ] + then + pip install --progress-bar=off 'numpy<>' + fi if [ '<>' == 'true' ] then pip install --progress-bar=off -r requirements.txt @@ -51,7 +62,7 @@ commands: fi fi - save_cache: - key: deps-v2-<>-<>-<>-<>-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }} + key: deps-v6-<>-<>-<>-<>-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }} paths: - "venv" wait_for_db: @@ -99,7 +110,7 @@ jobs: default: false description: "Enforce coverage not slipping" docker: - - image: circleci/python:<> + - image: cimg/python:<> steps: - checkout - add_ssh_keys: @@ -162,12 +173,12 @@ jobs: pandas_version: type: string description: "Version of pandas to test against" - default: '<1' + default: '==1.3.5' command: type: string description: "Command to run in records-mover venv" docker: - - image: circleci/python:<> + - image: cimg/python:<> - image: jbfavre/vertica:8.1.1-16_centos-7 environment: DATABASE_NAME: docker @@ -254,7 +265,10 @@ jobs: description: "Version of Python to test against" pandas_version: type: string - default: '>=1' + default: '==1.3.5' + numpy_version: + type: string + default: "" db_name: type: string description: "Database to run inside" @@ -268,7 +282,7 @@ jobs: default: true docker: - - image: circleci/python:<> + - image: cimg/python:<> steps: - checkout - add_ssh_keys: @@ -279,6 +293,7 @@ jobs: extras: <> python_version: <> pandas_version: <> + numpy_version: <> - run: name: Run tests command: | @@ -298,7 +313,7 @@ jobs: # This is set by default in the CircleCI environment unset SCRATCH_GCS_URL fi - with-db <> nosetests --with-xunit --xunit-file=../../../../test-reports/itest/junit.xml . + with-db <> pytest - store_test_results: path: test-reports - store_artifacts: @@ -313,15 +328,15 @@ jobs: python_version: type: string description: "Version of python to test against" - default: '3.6' + default: '3.9' docker: - - image: circleci/python:<> + - image: cimg/python:<> steps: - checkout - installvenv: extras: <> python_version: <> - pandas_version: '<1' + pandas_version: '==1.3.5' # requirements.txt includes twine and other release packages include_dev_dependencies: true - run: @@ -353,12 +368,12 @@ jobs: twine upload -r pypi dist/* cli-extra-test: docker: - - image: circleci/python:3.6 + - image: cimg/python:3.9 steps: - checkout - installvenv: extras: '[cli]' - python_version: '3.6' + python_version: '3.9' pandas_version: '' # we want this just like a user would install it, not with # dev tools installed @@ -383,9 +398,6 @@ workflows: # # https://devguide.python.org/devcycle/#end-of-life-branches # - # That said, Python 3.5 and before don't support type - # annotations on variables, which we use, so right now Python - # 3.6 is the current minimum version tested against. # # https://app.asana.com/0/1128138765527694/1161072974798065 # - test: @@ -420,7 +432,7 @@ workflows: # - integration_test_with_dbs: # name: vertica-no-s3-itest # extras: '[vertica,itest]' - # python_version: "3.6" + # python_version: "3.9" # command: | # . venv/bin/activate # export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin @@ -433,34 +445,34 @@ workflows: # # scratch bucket config. # # # unset SCRATCH_S3_URL AWS_SESSION_TOKEN AWS_SECRET_ACCESS_KEY AWS_ACCESS_KEY_ID - # with-db dockerized-vertica nosetests --with-xunit --xunit-file=../../../../test-reports/itest/junit.xml . - # requires: - # - redshift-s3-itest - # filters: - # tags: - # only: /v\d+\.\d+\.\d+(-[\w]+)?/ - # - integration_test_with_dbs: - # name: postgres-itest - # extras: '[postgres-binary,itest]' - # python_version: "3.9" - # pandas_version: '==1.3.5' - # command: | - # . venv/bin/activate - # export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin - # export DB_FACTS_PATH=${PWD}/tests/integration/circleci-dbfacts.yml - # export RECORDS_MOVER_SESSION_TYPE=env - # mkdir -p test-reports/itest - # cd tests/integration/records/single_db - # with-db dockerized-postgres nosetests --with-xunit --xunit-file=../../../../test-reports/itest/junit.xml . + # with-db dockerized-vertica pytest # requires: # - redshift-s3-itest # filters: # tags: # only: /v\d+\.\d+\.\d+(-[\w]+)?/ + - integration_test_with_dbs: + name: postgres-itest + extras: '[postgres-binary,itest]' + python_version: "3.9" + pandas_version: '==1.3.5' + command: | + . venv/bin/activate + export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin + export DB_FACTS_PATH=${PWD}/tests/integration/circleci-dbfacts.yml + export RECORDS_MOVER_SESSION_TYPE=env + mkdir -p test-reports/itest + cd tests/integration/records/single_db + with-db dockerized-postgres pytest + requires: + - redshift-s3-itest + filters: + tags: + only: /v\d+\.\d+\.\d+(-[\w]+)?/ # - integration_test_with_dbs: # name: mysql-itest # extras: '[mysql,itest]' - # python_version: "3.6" + # python_version: "3.9" # # Using Pandas reproduced a bug that happened when we were # # relying on Pandas: # # @@ -473,7 +485,7 @@ workflows: # export RECORDS_MOVER_SESSION_TYPE=env # mkdir -p test-reports/itest # cd tests/integration/records/single_db - # with-db dockerized-mysql nosetests --with-xunit --xunit-file=../../../../test-reports/itest/junit.xml . + # with-db dockerized-mysql pytest # requires: # - redshift-s3-itest # filters: @@ -482,7 +494,7 @@ workflows: - integration_test_with_dbs: name: vertica-s3-itest extras: '[vertica,aws,itest]' - python_version: "3.6" + python_version: "3.9" command: | . venv/bin/activate export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin @@ -490,7 +502,7 @@ workflows: export RECORDS_MOVER_SESSION_TYPE=env mkdir -p test-reports/itest cd tests/integration/records/single_db - with-db dockerized-vertica nosetests --with-xunit --xunit-file=../../../../test-reports/itest/junit.xml . + with-db dockerized-vertica pytest requires: - redshift-s3-itest filters: @@ -499,7 +511,7 @@ workflows: - integration_test_with_dbs: name: cli-1-itest extras: '[cli,gsheets,vertica]' - python_version: "3.6" + python_version: "3.9" command: | . venv/bin/activate export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin @@ -516,7 +528,7 @@ workflows: - integration_test_with_dbs: name: cli-2-itest extras: '[cli,gsheets,vertica]' - python_version: "3.6" + python_version: "3.9" command: | . venv/bin/activate export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin @@ -533,7 +545,7 @@ workflows: - integration_test_with_dbs: name: cli-3-itest extras: '[cli,gsheets,vertica]' - python_version: "3.6" + python_version: "3.9" command: | . venv/bin/activate export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin @@ -550,7 +562,7 @@ workflows: - integration_test: name: redshift-s3-itest extras: '[redshift-binary,itest]' - python_version: "3.6" + python_version: "3.9" db_name: demo-itest requires: - test-3.9 @@ -560,7 +572,7 @@ workflows: - integration_test: name: redshift-no-s3-itest extras: '[redshift-binary,itest]' - python_version: "3.6" + python_version: "3.9" db_name: demo-itest include_s3_scratch_bucket: false requires: @@ -571,8 +583,9 @@ workflows: - integration_test: name: redshift-s3-itest-old-pandas extras: '[redshift-binary,itest]' - python_version: "3.6" + python_version: "3.8" pandas_version: "<1" + numpy_version: "<1.24" db_name: demo-itest requires: - redshift-s3-itest @@ -582,7 +595,7 @@ workflows: - integration_test: name: redshift-s3-itest-no-pandas extras: '[redshift-binary,itest]' - python_version: "3.6" + python_version: "3.9" pandas_version: "" db_name: demo-itest requires: @@ -593,7 +606,7 @@ workflows: # - integration_test: # name: bigquery-no-gcs-itest # extras: '[bigquery,itest]' - # python_version: "3.6" + # python_version: "3.9" # db_name: bltoolsdevbq-bq_itest # include_gcs_scratch_bucket: false # requires: @@ -604,7 +617,7 @@ workflows: # - integration_test: # name: bigquery-gcs-itest # extras: '[bigquery,itest]' - # python_version: "3.6" + # python_version: "3.9" # db_name: bltoolsdevbq-bq_itest # requires: # - redshift-s3-itest @@ -614,7 +627,7 @@ workflows: # - integration_test_with_dbs: # name: tbl2tbl-itest # extras: '[literally_every_single_database_binary,itest]' - # python_version: "3.6" + # python_version: "3.9" # command: | # . venv/bin/activate # export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin @@ -635,7 +648,6 @@ workflows: - deploy: context: PyPI requires: - # - test-3.6 # - test-3.7 - test-3.8 - test-3.9 @@ -647,7 +659,7 @@ workflows: - redshift-s3-itest - redshift-s3-itest-old-pandas - redshift-s3-itest-no-pandas - # - postgres-itest + - postgres-itest # - mysql-itest - cli-1-itest - cli-2-itest diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 000000000..cd14ef8c8 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,30 @@ +name: Test + +run-name: Tests +on: [pull_request] + +jobs: + unit-tests: + runs-on: ubuntu-latest + strategy: + fail-fast: true + matrix: + python-version: ["3.8", "3.9"] + + steps: + - uses: actions/checkout@v3 + - name: Install python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + pip install --upgrade pip + pip install -e '.[unittest,typecheck]' + pip install --progress-bar=off -r requirements.txt + - name: Typecheck + run: | + make typecheck + - name: Unit tests + run: | + make citest diff --git a/.gitignore b/.gitignore index 8bee7430c..8b47d45a9 100644 --- a/.gitignore +++ b/.gitignore @@ -39,7 +39,7 @@ htmlcov/ .tox/ .coverage* .cache -nosetests.xml +pytest.xml coverage.xml *,cover /test-reports/ @@ -57,12 +57,12 @@ docs/_build/ # PyBuilder target/ -# nosetest +# pytesttest /cover /typecover /.coverage /coverage.xml -/nosetests.xml +/pytest.xml /test-reports .DS_Store diff --git a/.readthedocs.yml b/.readthedocs.yml index 7fec08ef1..69f566499 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -6,12 +6,10 @@ version: 2 python: - version: 3.7 + version: 3.9 install: - - method: pip - path: . - extra_requirements: - - docs + - requirements: docs/source/requirements.txt sphinx: - fail_on_warning: true + fail_on_warning: false + configuration: docs/source/conf.py diff --git a/Makefile b/Makefile index 5ecc0193f..bf536c724 100644 --- a/Makefile +++ b/Makefile @@ -25,28 +25,28 @@ citypecoverage: typecoverage @test -z "$$(git status --porcelain metrics/mypy_high_water_mark)" unit: - ENV=test nosetests --cover-package=records_mover --cover-erase --with-coverage --with-xunit --cover-html --cover-xml --cover-inclusive tests/unit + ENV=test pytest --cov=records_mover tests/unit mv .coverage .coverage-unit component: - ENV=test nosetests --cover-package=records_mover --with-coverage --with-xunit --cover-html --cover-xml --cover-inclusive tests/component + ENV=test pytest --cov=records_mover tests/component mv .coverage .coverage-component test: unit component - coverage combine .coverage-unit .coverage-component # https://stackoverflow.com/questions/7352319/nosetests-combined-coverage + coverage combine .coverage-unit .coverage-component # https://stackoverflow.com/questions/7352319/pytest-combined-coverage coverage html --directory=cover coverage xml ciunit: - ENV=test nosetests --cover-package=records_mover --cover-erase --with-coverage --with-xunit --cover-html --cover-xml --cover-inclusive --xunit-file=test-reports/junit.xml tests/unit + ENV=test pytest --cov=records_mover tests/unit mv .coverage .coverage-unit cicomponent: - ENV=test nosetests --cover-package=records_mover --with-coverage --with-xunit --cover-html --cover-xml --cover-inclusive --xunit-file=test-reports/junit.xml tests/component + ENV=test pytest --cov=records_mover tests/component mv .coverage .coverage-component citest: test-reports ciunit cicomponent - coverage combine .coverage-unit .coverage-component # https://stackoverflow.com/questions/7352319/nosetests-combined-coverage + coverage combine .coverage-unit .coverage-component # https://stackoverflow.com/questions/7352319/pytest-combined-coverage coverage html --directory=cover coverage xml diff --git a/docs/Makefile b/docs/Makefile index 959cc6b6c..76b892ee9 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -3,7 +3,7 @@ # You can set these variables from the command line, and also # from the environment for the first two. -SPHINXOPTS ?= -W +SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = source BUILDDIR = build @@ -12,6 +12,8 @@ BUILDDIR = build help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) +# You probably don't want to run this unless you want to create a bunch of extra +# rst files describing internal modules. apidocs: @sphinx-apidoc -o ./source ../records_mover diff --git a/docs/source/requirements.txt b/docs/source/requirements.txt new file mode 100644 index 000000000..f9e3337a9 --- /dev/null +++ b/docs/source/requirements.txt @@ -0,0 +1,9 @@ +# python packages to build sphinx docs for readthedocs + +# must be 2.0.1, see records_mover/requirements.txt +markupsafe==2.0.1 + +docutils==0.16 +sphinx==5.1.1 +sphinx-rtd-theme==1.1.1 +readthedocs-sphinx-ext==2.1.4 diff --git a/metrics/bigfiles_high_water_mark b/metrics/bigfiles_high_water_mark index e601c44f5..91d0f8a3f 100644 --- a/metrics/bigfiles_high_water_mark +++ b/metrics/bigfiles_high_water_mark @@ -1 +1 @@ -1135 +1138 diff --git a/metrics/coverage_high_water_mark b/metrics/coverage_high_water_mark index 8fee528a7..d1a64d483 100644 --- a/metrics/coverage_high_water_mark +++ b/metrics/coverage_high_water_mark @@ -1 +1 @@ -93.6400 +93.0000 diff --git a/metrics/flake8_high_water_mark b/metrics/flake8_high_water_mark index cdffbbc4e..ec635144f 100644 --- a/metrics/flake8_high_water_mark +++ b/metrics/flake8_high_water_mark @@ -1 +1 @@ -166 +9 diff --git a/records_mover/airflow/hooks/records_hook.py b/records_mover/airflow/hooks/records_hook.py index 476a04211..87cdd0c53 100644 --- a/records_mover/airflow/hooks/records_hook.py +++ b/records_mover/airflow/hooks/records_hook.py @@ -69,7 +69,7 @@ def _s3_temp_base_url(self) -> Optional[str]: # non-Airflow-specific API. def validate_and_prepare_target_directory(self, target_url: str, - allow_overwrite: bool=False) -> None: + allow_overwrite: bool = False) -> None: parsed_target = urlparse(target_url) if parsed_target.scheme != 's3': raise AirflowException( diff --git a/records_mover/cli/job_config_schema_as_args_parser.py b/records_mover/cli/job_config_schema_as_args_parser.py index 3bd7e1ae3..7e710f39e 100644 --- a/records_mover/cli/job_config_schema_as_args_parser.py +++ b/records_mover/cli/job_config_schema_as_args_parser.py @@ -59,7 +59,7 @@ def add_to_prefix(self, prefix: str, key: str) -> str: def configure_from_properties(self, properties: JsonSchema, required_keys: Iterable[str], - prefix: str='') -> None: + prefix: str = '') -> None: for naked_key, raw_value in properties.items(): if not isinstance(raw_value, dict): raise TypeError(f"Did not understand [{raw_value}] in [{properties}]") diff --git a/records_mover/db/bigquery/bigquery_db_driver.py b/records_mover/db/bigquery/bigquery_db_driver.py index 4d3c82a3c..b8646aae1 100644 --- a/records_mover/db/bigquery/bigquery_db_driver.py +++ b/records_mover/db/bigquery/bigquery_db_driver.py @@ -42,7 +42,7 @@ def loader_from_fileobj(self) -> LoaderFromFileobj: def unloader(self) -> Unloader: return self._bigquery_unloader - def type_for_date_plus_time(self, has_tz: bool=False) -> sqlalchemy.sql.sqltypes.DateTime: + def type_for_date_plus_time(self, has_tz: bool = False) -> sqlalchemy.sql.sqltypes.DateTime: # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types if has_tz: # pybigquery doesn't set the timezone flag :( diff --git a/records_mover/db/connect.py b/records_mover/db/connect.py index 87622f1c6..0ffb4f560 100644 --- a/records_mover/db/connect.py +++ b/records_mover/db/connect.py @@ -33,6 +33,8 @@ # Please see SECURITY.md for security implications! "local_infile": True }, + # keepalives prevent timeout errors + 'redshift': {'keepalives': '1', 'keepalives_idle': '30'}, } @@ -58,11 +60,11 @@ def create_vertica_odbc_sqlalchemy_url(db_facts: DBFacts) -> str: "UID={user};" "PWD={password};" "CHARSET=UTF8;").\ - format(host=db_facts['host'], - database=db_facts['database'], - port=db_facts['port'], - user=db_facts['user'], - password=db_facts['password']) + format(host=db_facts['host'], + database=db_facts['database'], + port=db_facts['port'], + user=db_facts['user'], + password=db_facts['password']) db_url = quote_plus(db_url) return "vertica+pyodbc:///?odbc_connect={}".format(db_url) diff --git a/records_mover/db/driver.py b/records_mover/db/driver.py index 7942a0fc7..35bb60404 100644 --- a/records_mover/db/driver.py +++ b/records_mover/db/driver.py @@ -83,7 +83,7 @@ def set_grant_permissions_for_users(self, schema_name: str, table: str, def supports_time_type(self) -> bool: return True - def type_for_date_plus_time(self, has_tz: bool=False) -> sqlalchemy.sql.sqltypes.DateTime: + def type_for_date_plus_time(self, has_tz: bool = False) -> sqlalchemy.sql.sqltypes.DateTime: """Different DB vendors have different names for a date, a time, and an optional timezone""" return sqlalchemy.sql.sqltypes.DateTime(timezone=has_tz) diff --git a/records_mover/db/postgres/copy_options/__init__.py b/records_mover/db/postgres/copy_options/__init__.py index e187a791d..ae99f3824 100644 --- a/records_mover/db/postgres/copy_options/__init__.py +++ b/records_mover/db/postgres/copy_options/__init__.py @@ -41,9 +41,9 @@ def needs_csv_format(hints: ValidatedRecordsHints) -> bool: def postgres_copy_to_options(unhandled_hints: Set[str], delimited_records_format: DelimitedRecordsFormat, fail_if_cant_handle_hint: bool) ->\ - Tuple[DateOutputStyle, - Optional[DateOrderStyle], - PostgresCopyOptions]: + Tuple[DateOutputStyle, + Optional[DateOrderStyle], + PostgresCopyOptions]: hints = delimited_records_format.validate(fail_if_cant_handle_hint=fail_if_cant_handle_hint) if needs_csv_format(hints): @@ -68,8 +68,8 @@ def postgres_copy_to_options(unhandled_hints: Set[str], # loading def postgres_copy_from_options(unhandled_hints: Set[str], load_plan: RecordsLoadPlan) ->\ - Tuple[Optional[DateOrderStyle], - PostgresCopyOptions]: + Tuple[Optional[DateOrderStyle], + PostgresCopyOptions]: fail_if_cant_handle_hint = load_plan.processing_instructions.fail_if_cant_handle_hint if not isinstance(load_plan.records_format, DelimitedRecordsFormat): raise NotImplementedError("Not currently able to import " diff --git a/records_mover/db/postgres/sqlalchemy_postgres_copy.py b/records_mover/db/postgres/sqlalchemy_postgres_copy.py index 3e1bdcf56..a83e90b2d 100644 --- a/records_mover/db/postgres/sqlalchemy_postgres_copy.py +++ b/records_mover/db/postgres/sqlalchemy_postgres_copy.py @@ -14,6 +14,7 @@ __version__ = '0.5.0' + def copy_to(source, dest, engine_or_conn, **flags): """Export a query or select to a file. For flags, see the PostgreSQL documentation at http://www.postgresql.org/docs/9.5/static/sql-copy.html. @@ -47,6 +48,7 @@ def copy_to(source, dest, engine_or_conn, **flags): if autoclose: conn.close() + def copy_from(source, dest, engine_or_conn, columns=(), **flags): """Import a table from a file. For flags, see the PostgreSQL documentation at http://www.postgresql.org/docs/9.5/static/sql-copy.html. @@ -87,6 +89,7 @@ def copy_from(source, dest, engine_or_conn, columns=(), **flags): conn.commit() conn.close() + def raw_connection_from(engine_or_conn): """Extract a raw_connection and determine if it should be automatically closed. @@ -98,12 +101,14 @@ def raw_connection_from(engine_or_conn): return engine_or_conn.connection, False return engine_or_conn.raw_connection(), True + def format_flags(flags): return ', '.join( '{} {}'.format(key.upper(), format_flag(value)) for key, value in flags.items() ) + def format_flag(value): return ( six.text_type(value).upper() @@ -111,6 +116,7 @@ def format_flag(value): else repr(value) ) + def relabel_query(query): """Relabel query entities according to mappings defined in the SQLAlchemy ORM. Useful when table column names differ from corresponding attribute @@ -121,12 +127,14 @@ def relabel_query(query): """ return query.with_entities(*query_entities(query)) + def query_entities(query): return sum( [desc_entities(desc) for desc in query.column_descriptions], [] ) + def desc_entities(desc): expr, name = desc['expr'], desc['name'] if isinstance(expr, Mapper): @@ -138,6 +146,7 @@ def desc_entities(desc): else: raise ValueError('Unrecognized query entity {!r}'.format(expr)) + def mapper_entities(mapper): model = mapper.class_ return [ @@ -145,6 +154,7 @@ def mapper_entities(mapper): for prop in mapper.column_attrs ] + def is_model(class_): try: class_mapper(class_) diff --git a/records_mover/db/redshift/redshift_db_driver.py b/records_mover/db/redshift/redshift_db_driver.py index 2a21a79a3..3635a408e 100644 --- a/records_mover/db/redshift/redshift_db_driver.py +++ b/records_mover/db/redshift/redshift_db_driver.py @@ -27,7 +27,7 @@ class RedshiftDBDriver(DBDriver): def __init__(self, db: Union[sqlalchemy.engine.Engine, sqlalchemy.engine.Connection], - s3_temp_base_loc: Optional[BaseDirectoryUrl]=None, + s3_temp_base_loc: Optional[BaseDirectoryUrl] = None, **kwargs) -> None: super().__init__(db) self.s3_temp_base_loc = s3_temp_base_loc diff --git a/records_mover/db/vertica/export_sql.py b/records_mover/db/vertica/export_sql.py index c0c83be8f..f58947b76 100644 --- a/records_mover/db/vertica/export_sql.py +++ b/records_mover/db/vertica/export_sql.py @@ -13,7 +13,7 @@ def vertica_export_sql(db_engine: Engine, record_terminator: str, to_charset: str, # keep these things halfway digestible in memory - chunksize: int=5 * GIG_IN_BYTES) -> str: + chunksize: int = 5 * GIG_IN_BYTES) -> str: # https://my.vertica.com/docs/8.1.x/HTML/index.htm#Authoring/SQLReferenceManual/Functions/VerticaFunctions/s3export.htm template = """ SELECT S3EXPORT( * USING PARAMETERS {params}) diff --git a/records_mover/db/vertica/vertica_db_driver.py b/records_mover/db/vertica/vertica_db_driver.py index bf9042375..6e83b82d9 100644 --- a/records_mover/db/vertica/vertica_db_driver.py +++ b/records_mover/db/vertica/vertica_db_driver.py @@ -23,7 +23,7 @@ class VerticaDBDriver(DBDriver): def __init__(self, db: Union[sqlalchemy.engine.Connection, sqlalchemy.engine.Engine], url_resolver: UrlResolver, - s3_temp_base_loc: Optional[BaseDirectoryUrl]=None, + s3_temp_base_loc: Optional[BaseDirectoryUrl] = None, **kwargs: object) -> None: super().__init__(db) self._vertica_loader = VerticaLoader(url_resolver=url_resolver, db=self.db) diff --git a/records_mover/records/delimited/csv_streamer.py b/records_mover/records/delimited/csv_streamer.py index b7ce96d0e..f36a1ff1f 100644 --- a/records_mover/records/delimited/csv_streamer.py +++ b/records_mover/records/delimited/csv_streamer.py @@ -14,7 +14,7 @@ @contextmanager def stream_csv(filepath_or_buffer: Union[str, IO[bytes]], hints: PartialRecordsHints)\ - -> Iterator['TextFileReader']: + -> Iterator['TextFileReader']: """Returns a context manager that can be used to generate a full or partial dataframe from a CSV. If partial, it will not read the entire CSV file into memory.""" diff --git a/records_mover/records/delimited/sniff.py b/records_mover/records/delimited/sniff.py index e75e59781..e52d5ceb8 100644 --- a/records_mover/records/delimited/sniff.py +++ b/records_mover/records/delimited/sniff.py @@ -169,7 +169,7 @@ def attempt_parse(quoting: HintQuoting) -> PartialRecordsHints: def sniff_compression_hint(fileobj: IO[bytes]) -> HintCompression: - print(f'Sniffing compression') + print('Sniffing compression') with rewound_fileobj(fileobj) as fileobj_rewound: # https://stackoverflow.com/a/13044946/9795956 magic_dict: Dict[bytes, HintCompression] = { diff --git a/records_mover/records/prep_and_load.py b/records_mover/records/prep_and_load.py index 3c4cc05f2..3f099a9af 100644 --- a/records_mover/records/prep_and_load.py +++ b/records_mover/records/prep_and_load.py @@ -15,7 +15,7 @@ def prep_and_load(tbl: TargetTableDetails, load: Callable[[DBDriver], Optional[int]], load_exception_type: Type[Exception], reset_before_reload: Callable[[], None] = lambda: None) -> MoveResult: - logger.info(f"Connecting to database...") + logger.info("Connecting to database...") with tbl.db_engine.begin() as db: driver = tbl.db_driver(db) prep.prep(schema_sql=schema_sql, driver=driver) diff --git a/records_mover/records/processing_instructions.py b/records_mover/records/processing_instructions.py index 0fdb6b044..8553ad7de 100644 --- a/records_mover/records/processing_instructions.py +++ b/records_mover/records/processing_instructions.py @@ -8,11 +8,11 @@ class ProcessingInstructions: def __init__(self, - fail_if_dont_understand: bool=True, - fail_if_cant_handle_hint: bool=True, - fail_if_row_invalid: bool=True, - max_inference_rows: Optional[int]=DEFAULT_MAX_SAMPLE_SIZE, - max_failure_rows: Optional[int]=None) -> None: + fail_if_dont_understand: bool = True, + fail_if_cant_handle_hint: bool = True, + fail_if_row_invalid: bool = True, + max_inference_rows: Optional[int] = DEFAULT_MAX_SAMPLE_SIZE, + max_failure_rows: Optional[int] = None) -> None: """Directives on how to handle different situations when processing records. Note that not all vendor mechanisms support this level of configurability; when choosing between optimizing for diff --git a/records_mover/records/records.py b/records_mover/records/records.py index 1ba96900f..04de3ce7e 100644 --- a/records_mover/records/records.py +++ b/records_mover/records/records.py @@ -6,8 +6,8 @@ from enum import Enum from typing import Callable, Union, TYPE_CHECKING if TYPE_CHECKING: - from sqlalchemy.engine import Engine, Connection # noqa - from ..db import DBDriver # noqa + from sqlalchemy.engine import Engine, Connection # noqa + from ..db import DBDriver # noqa from records_mover import Session # noqa diff --git a/records_mover/records/records_directory.py b/records_mover/records/records_directory.py index 762c125fe..9a84c23e5 100644 --- a/records_mover/records/records_directory.py +++ b/records_mover/records/records_directory.py @@ -48,8 +48,8 @@ def load_format(self, fail_if_dont_understand: bool) -> BaseRecordsFormat: def save_fileobjs(self, fileobjs_by_target_names: Mapping[str, IO[bytes]], - records_schema: Optional[RecordsSchema]=None, - records_format: Optional[BaseRecordsFormat]=None) \ + records_schema: Optional[RecordsSchema] = None, + records_format: Optional[BaseRecordsFormat] = None) \ -> UrlDetails: """Write out a full records directory from file objects.""" url_details: UrlDetails = self.save_data_from_fileobjs(fileobjs_by_target_names) @@ -94,7 +94,7 @@ def _build_manifest(self, ] } - def save_preliminary_manifest(self, url_details: Optional[UrlDetails]=None) -> None: + def save_preliminary_manifest(self, url_details: Optional[UrlDetails] = None) -> None: """Save manifest file to records directory. :param urls_for_manifest: list of URLs that should be diff --git a/records_mover/records/records_format.py b/records_mover/records/records_format.py index 7da74bbea..e23abedce 100644 --- a/records_mover/records/records_format.py +++ b/records_mover/records/records_format.py @@ -67,10 +67,11 @@ class DelimitedRecordsFormat(BaseRecordsFormat): the default hints for the variant and any hint overrides provided in the constructor""" - def __init__(self, - variant: str='bluelabs', - hints: PartialRecordsHints={}, - processing_instructions: ProcessingInstructions=ProcessingInstructions()) -> None: + def __init__( + self, + variant: str = 'bluelabs', + hints: PartialRecordsHints = {}, + processing_instructions: ProcessingInstructions = ProcessingInstructions()) -> None: """See the `records format documentation `_ for full details on parameters. @@ -229,11 +230,11 @@ def __repr__(self) -> str: def RecordsFormat(format_type: 'RecordsFormatType' = 'delimited', - variant: str='bluelabs', - hints: PartialRecordsHints={}, + variant: str = 'bluelabs', + hints: PartialRecordsHints = {}, processing_instructions: - ProcessingInstructions=ProcessingInstructions()) ->\ - 'BaseRecordsFormat': + ProcessingInstructions = ProcessingInstructions()) ->\ + 'BaseRecordsFormat': if format_type == 'delimited': return DelimitedRecordsFormat(variant=variant, hints=hints, diff --git a/records_mover/records/schema/field/__init__.py b/records_mover/records/schema/field/__init__.py index f4d4e2906..2202ce601 100644 --- a/records_mover/records/schema/field/__init__.py +++ b/records_mover/records/schema/field/__init__.py @@ -95,7 +95,6 @@ def python_type_to_field_type(specific_type: Type[Any]) -> Optional['FieldType'] np.float16: 'decimal', np.float32: 'decimal', np.float64: 'decimal', - np.float128: 'decimal', float: 'decimal', str: 'string', @@ -113,6 +112,12 @@ def python_type_to_field_type(specific_type: Type[Any]) -> Optional['FieldType'] pd.Timestamp: 'datetime', } + try: + type_mapping[np.float128] = 'decimal' + except AttributeError: + # np.float128 is not available on some machines + pass + if specific_type not in type_mapping: logger.warning(f"Please teach me how to map {specific_type} into records " "schema field types") diff --git a/records_mover/records/schema/field/constraints/decimal.py b/records_mover/records/schema/field/constraints/decimal.py index e472f801e..2f4b08077 100644 --- a/records_mover/records/schema/field/constraints/decimal.py +++ b/records_mover/records/schema/field/constraints/decimal.py @@ -10,10 +10,10 @@ class RecordsSchemaFieldDecimalConstraints(RecordsSchemaFieldConstraints): def __init__(self, required: bool, unique: Optional[bool], - fixed_precision: Optional[int]=None, - fixed_scale: Optional[int]=None, - fp_total_bits: Optional[int]=None, - fp_significand_bits: Optional[int]=None): + fixed_precision: Optional[int] = None, + fixed_scale: Optional[int] = None, + fp_total_bits: Optional[int] = None, + fp_significand_bits: Optional[int] = None): super().__init__(required=required, unique=unique) self.fixed_precision = fixed_precision self.fixed_scale = fixed_scale diff --git a/records_mover/records/schema/field/pandas.py b/records_mover/records/schema/field/pandas.py index ecc5b1557..507c112b4 100644 --- a/records_mover/records/schema/field/pandas.py +++ b/records_mover/records/schema/field/pandas.py @@ -10,7 +10,7 @@ if TYPE_CHECKING: from ..field import RecordsSchemaField # noqa from ..schema import RecordsSchema # noqa - from pandas.core.dtypes.dtypes import ExtensionDtype # noqa + from pandas.core.dtypes.dtypes import ExtensionDtype # noqa # Cribbed from non-public https://github.com/pandas-dev/pandas/blob/v1.2.1/pandas/_typing.py Dtype = Union[ diff --git a/records_mover/records/schema/field/sqlalchemy.py b/records_mover/records/schema/field/sqlalchemy.py index 8a74c32eb..835e1aa17 100644 --- a/records_mover/records/schema/field/sqlalchemy.py +++ b/records_mover/records/schema/field/sqlalchemy.py @@ -22,7 +22,7 @@ def field_from_sqlalchemy_column(column: Column, driver: 'DBDriver', rep_type: str)\ - -> 'RecordsSchemaField': + -> 'RecordsSchemaField': from ..field import RecordsSchemaField # noqa name = column.name type_: Union[Type[sqlalchemy.types.TypeEngine], diff --git a/records_mover/records/sources/base.py b/records_mover/records/sources/base.py index be7820ca7..6425617e5 100644 --- a/records_mover/records/sources/base.py +++ b/records_mover/records/sources/base.py @@ -130,7 +130,7 @@ class SupportsToFileobjsSource(RecordsSource, metaclass=ABCMeta): @contextmanager def to_fileobjs_source(self, processing_instructions: ProcessingInstructions, - records_format_if_possible: Optional[BaseRecordsFormat]=None)\ + records_format_if_possible: Optional[BaseRecordsFormat] = None)\ -> Iterator['FileobjsSource']: """Convert current source to a FileObjsSource and present it in a context manager. If there's no native records format, prefer 'records_format_if_possible' if provided diff --git a/records_mover/records/sources/data_url.py b/records_mover/records/sources/data_url.py index 4fa023486..0e8bc788f 100644 --- a/records_mover/records/sources/data_url.py +++ b/records_mover/records/sources/data_url.py @@ -18,9 +18,9 @@ class DataUrlRecordsSource(SupportsToFileobjsSource): def __init__(self, input_url: str, url_resolver: UrlResolver, - records_format: Optional[BaseRecordsFormat]=None, - records_schema: Optional[RecordsSchema]=None, - initial_hints: Optional[PartialRecordsHints]=None) -> None: + records_format: Optional[BaseRecordsFormat] = None, + records_schema: Optional[RecordsSchema] = None, + initial_hints: Optional[PartialRecordsHints] = None) -> None: self.input_url = input_url self.url_resolver = url_resolver self.records_format = records_format @@ -36,7 +36,7 @@ def __init__(self, @contextmanager def to_fileobjs_source(self, processing_instructions: ProcessingInstructions, - records_format_if_possible: Optional[BaseRecordsFormat]=None)\ + records_format_if_possible: Optional[BaseRecordsFormat] = None)\ -> Iterator['FileobjsSource']: """Convert current source to a FileObjsSource and present it in a context manager""" with self.url_resolver.file_url(self.input_url).open() as fileobj: diff --git a/records_mover/records/sources/dataframes.py b/records_mover/records/sources/dataframes.py index 42ae0abfd..a7706257f 100644 --- a/records_mover/records/sources/dataframes.py +++ b/records_mover/records/sources/dataframes.py @@ -22,9 +22,9 @@ class DataframesRecordsSource(SupportsToFileobjsSource): def __init__(self, dfs: Iterable['DataFrame'], - processing_instructions: ProcessingInstructions=ProcessingInstructions(), - records_schema: Optional[RecordsSchema]=None, - include_index: bool=False) -> None: + processing_instructions: ProcessingInstructions = ProcessingInstructions(), + records_schema: Optional[RecordsSchema] = None, + include_index: bool = False) -> None: self.dfs = dfs self.processing_instructions = processing_instructions self.records_schema = records_schema @@ -120,7 +120,7 @@ def schema_from_df(self, df: 'DataFrame', @contextmanager def to_fileobjs_source(self, processing_instructions: ProcessingInstructions, - records_format_if_possible: Optional[BaseRecordsFormat]= + records_format_if_possible: Optional[BaseRecordsFormat] = None) -> Iterator[FileobjsSource]: records_format = self.pick_best_records_format(records_format_if_possible) records_schema = self.initial_records_schema(processing_instructions) diff --git a/records_mover/records/sources/directory.py b/records_mover/records/sources/directory.py index 45022b517..cc55400fb 100644 --- a/records_mover/records/sources/directory.py +++ b/records_mover/records/sources/directory.py @@ -17,7 +17,7 @@ def __init__(self, directory: RecordsDirectory, fail_if_dont_understand: bool, url_resolver: UrlResolver, - override_hints: PartialRecordsHints={}) -> None: + override_hints: PartialRecordsHints = {}) -> None: self.records_format = directory.load_format(fail_if_dont_understand=fail_if_dont_understand) if isinstance(self.records_format, DelimitedRecordsFormat): self.records_format = self.records_format.alter_hints(override_hints) @@ -30,7 +30,7 @@ def records_directory(self) -> RecordsDirectory: @contextmanager def to_fileobjs_source(self, processing_instructions: ProcessingInstructions, - records_format_if_possible: Optional[BaseRecordsFormat]=None)\ + records_format_if_possible: Optional[BaseRecordsFormat] = None)\ -> Iterator[FileobjsSource]: all_urls = self.directory.manifest_entry_urls() diff --git a/records_mover/records/sources/factory.py b/records_mover/records/sources/factory.py index 9acc1b5d0..19cf5326f 100644 --- a/records_mover/records/sources/factory.py +++ b/records_mover/records/sources/factory.py @@ -48,6 +48,7 @@ class RecordsSources(object): results = records.move(source, target) """ + def __init__(self, db_driver: Callable[['Engine'], 'DBDriver'], url_resolver: UrlResolver) -> None: @@ -56,10 +57,10 @@ def __init__(self, def dataframe(self, df: 'DataFrame', - processing_instructions: ProcessingInstructions= + processing_instructions: ProcessingInstructions = ProcessingInstructions(), - records_schema: Optional[RecordsSchema]=None, - include_index: bool=False) -> 'DataframesRecordsSource': + records_schema: Optional[RecordsSchema] = None, + include_index: bool = False) -> 'DataframesRecordsSource': """ Represents a single dataframe source. @@ -79,10 +80,10 @@ def dataframe(self, def dataframes(self, dfs: Iterable['DataFrame'], - processing_instructions: ProcessingInstructions= + processing_instructions: ProcessingInstructions = ProcessingInstructions(), - records_schema: Optional[RecordsSchema]=None, - include_index: bool=False) -> 'DataframesRecordsSource': + records_schema: Optional[RecordsSchema] = None, + include_index: bool = False) -> 'DataframesRecordsSource': """Represents multiple dataframes as a source. Note that this accepts an iterable, meaning that the dataframes in question can be generated dynamically in chunks. @@ -103,9 +104,9 @@ def dataframes(self, def fileobjs(self, target_names_to_input_fileobjs: Mapping[str, IO[bytes]], - records_format: Optional[BaseRecordsFormat]=None, - initial_hints: Optional[PartialRecordsHints]=None, - records_schema: Optional[RecordsSchema]=None)\ + records_format: Optional[BaseRecordsFormat] = None, + initial_hints: Optional[PartialRecordsHints] = None, + records_schema: Optional[RecordsSchema] = None)\ -> Union[UninferredFileobjsRecordsSource, FileobjsSource]: """Represents one or more streams of data files as a source. @@ -132,9 +133,9 @@ def fileobjs(self, def data_url(self, input_url: str, - records_format: Optional[BaseRecordsFormat]=None, - initial_hints: Optional[PartialRecordsHints]=None, - records_schema: Optional[RecordsSchema]=None)\ + records_format: Optional[BaseRecordsFormat] = None, + initial_hints: Optional[PartialRecordsHints] = None, + records_schema: Optional[RecordsSchema] = None)\ -> DataUrlRecordsSource: """Represents a URL pointer to a data file as a source. @@ -172,8 +173,8 @@ def table(self, def directory_from_url(self, url: str, - hints: PartialRecordsHints={}, - fail_if_dont_understand: bool=True)\ + hints: PartialRecordsHints = {}, + fail_if_dont_understand: bool = True)\ -> RecordsDirectoryRecordsSource: """Represents a Records Directory pointed to by a URL as a source. @@ -195,9 +196,9 @@ def directory_from_url(self, def local_file(self, filename: str, - records_format: Optional[BaseRecordsFormat]=None, - initial_hints: Optional[PartialRecordsHints]=None, - records_schema: Optional[RecordsSchema]=None)\ + records_format: Optional[BaseRecordsFormat] = None, + initial_hints: Optional[PartialRecordsHints] = None, + records_schema: Optional[RecordsSchema] = None)\ -> DataUrlRecordsSource: """Represents a data file on the local filesystem as a source. @@ -220,8 +221,8 @@ def google_sheet(self, sheet_name_or_range: str, google_cloud_creds: 'google.auth.credentials.Credentials', - out_of_band_column_headers: Optional[Iterable[str]]=None, - header_translator: Optional[Callable[[str], str]]=None) ->\ + out_of_band_column_headers: Optional[Iterable[str]] = None, + header_translator: Optional[Callable[[str], str]] = None) ->\ 'GoogleSheetsRecordsSource': """Represents a sheet or range in a Google Sheets spreadsheet as a source, via the Google Sheets API. diff --git a/records_mover/records/sources/google_sheets.py b/records_mover/records/sources/google_sheets.py index d3d221dbe..498ed5866 100644 --- a/records_mover/records/sources/google_sheets.py +++ b/records_mover/records/sources/google_sheets.py @@ -18,8 +18,8 @@ def __init__(self, spreadsheet_id: str, sheet_name_or_range: str, google_cloud_creds: google.auth.credentials.Credentials, - out_of_band_column_headers: Optional[Iterable[str]]=None, - header_translator: Optional[Callable[[str], str]]=None) -> None: + out_of_band_column_headers: Optional[Iterable[str]] = None, + header_translator: Optional[Callable[[str], str]] = None) -> None: self.spreadsheet_id = spreadsheet_id self.sheet_name_or_range = sheet_name_or_range self.google_cloud_creds = google_cloud_creds diff --git a/records_mover/records/sources/table.py b/records_mover/records/sources/table.py index 426e70ca2..56934383c 100644 --- a/records_mover/records/sources/table.py +++ b/records_mover/records/sources/table.py @@ -115,9 +115,9 @@ def move_to_records_directory(self, records_directory: RecordsDirectory, records_format: BaseRecordsFormat, processing_instructions: ProcessingInstructions, - schema_name: Optional[str]=None, - table_name: Optional[str]=None, - engine: Optional[Engine]=None) -> MoveResult: + schema_name: Optional[str] = None, + table_name: Optional[str] = None, + engine: Optional[Engine] = None) -> MoveResult: unload_plan = RecordsUnloadPlan(records_format=records_format, processing_instructions=processing_instructions) unloader = self.driver.unloader() diff --git a/records_mover/records/sources/uninferred_fileobjs.py b/records_mover/records/sources/uninferred_fileobjs.py index a3f72e995..f7a52325c 100644 --- a/records_mover/records/sources/uninferred_fileobjs.py +++ b/records_mover/records/sources/uninferred_fileobjs.py @@ -16,9 +16,9 @@ class UninferredFileobjsRecordsSource(SupportsToFileobjsSource): def __init__(self, target_names_to_input_fileobjs: Mapping[str, IO[bytes]], - records_format: Optional[BaseRecordsFormat]=None, - records_schema: Optional[RecordsSchema]=None, - initial_hints: Optional[UntypedRecordsHints]=None) -> None: + records_format: Optional[BaseRecordsFormat] = None, + records_schema: Optional[RecordsSchema] = None, + initial_hints: Optional[UntypedRecordsHints] = None) -> None: self.target_names_to_input_fileobjs = target_names_to_input_fileobjs self.records_format = records_format self.records_schema = records_schema @@ -29,7 +29,7 @@ def __init__(self, @contextmanager def to_fileobjs_source(self, processing_instructions: ProcessingInstructions, - records_format_if_possible: Optional[BaseRecordsFormat]=None)\ + records_format_if_possible: Optional[BaseRecordsFormat] = None)\ -> Iterator['FileobjsSource']: """Convert current source to a FileObjsSource and present it in a context manager""" typed_hints = None diff --git a/records_mover/records/targets/base.py b/records_mover/records/targets/base.py index 90e57816d..200af67f7 100644 --- a/records_mover/records/targets/base.py +++ b/records_mover/records/targets/base.py @@ -94,9 +94,8 @@ class SupportsMoveFromRecordsDirectory(NegotiatesRecordsFormat, metaclass=ABCMet def move_from_records_directory(self, directory: RecordsDirectory, processing_instructions: ProcessingInstructions, - override_records_format: Optional[BaseRecordsFormat]=None)\ + override_records_format: Optional[BaseRecordsFormat] = None)\ -> MoveResult: - """Given a RecordsDirectory object, load the data inside per the ProcessingInstructions and any hint overrides provided.""" pass @@ -142,7 +141,6 @@ def move_from_temp_loc_after_filling_it(self, SupportsMoveToRecordsDirectory, processing_instructions: ProcessingInstructions) -> MoveResult: - """Create a temporary location for a RecordsDirectory to live, call records_source.move_to_records_directory() with it, and then move in the records from the temporary diff --git a/records_mover/records/targets/factory.py b/records_mover/records/targets/factory.py index 7ec963df0..8c0ecec93 100644 --- a/records_mover/records/targets/factory.py +++ b/records_mover/records/targets/factory.py @@ -40,6 +40,7 @@ class RecordsTargets(object): results = records.move(source, target) """ + def __init__(self, url_resolver: UrlResolver, db_driver: Callable[[Union['Engine', 'Connection']], 'DBDriver']) -> None: @@ -49,7 +50,7 @@ def __init__(self, def directory_from_url(self, output_url: str, records_format: - Optional[BaseRecordsFormat]=None) ->\ + Optional[BaseRecordsFormat] = None) ->\ 'DirectoryFromUrlRecordsTarget': """Represents a Records Directory pointed to by a URL as a target. @@ -68,11 +69,11 @@ def table(self, db_engine: 'Engine', schema_name: str, table_name: str, - existing_table_handling: ExistingTableHandling= + existing_table_handling: ExistingTableHandling = ExistingTableHandling.DELETE_AND_OVERWRITE, - drop_and_recreate_on_load_error: bool=False, - add_user_perms_for: Optional[Dict[str, List[str]]]=None, - add_group_perms_for: Optional[Dict[str, List[str]]]=None) -> \ + drop_and_recreate_on_load_error: bool = False, + add_user_perms_for: Optional[Dict[str, List[str]]] = None, + add_group_perms_for: Optional[Dict[str, List[str]]] = None) -> \ 'TableRecordsTarget': """Represents a SQLALchemy-accessible database table as as a target. @@ -142,7 +143,7 @@ def fileobj(self, def data_url(self, output_url: str, - records_format: Optional[BaseRecordsFormat]=None) -> DataUrlTarget: + records_format: Optional[BaseRecordsFormat] = None) -> DataUrlTarget: """Represents a URL pointer to a data file as a target. :param output_url: Location of the data file to write. Must be a URL format understood by @@ -157,7 +158,7 @@ def data_url(self, def local_file(self, filename: str, - records_format: Optional[BaseRecordsFormat]=None) ->\ + records_format: Optional[BaseRecordsFormat] = None) ->\ 'DataUrlTarget': """Represents a data file on the local filesystem as a target. @@ -172,9 +173,9 @@ def spectrum(self, schema_name: str, table_name: str, db_engine: 'Engine', - spectrum_base_url: Optional[str]=None, - spectrum_rdir_url: Optional[str]=None, - existing_table_handling: ExistingTableHandling= + spectrum_base_url: Optional[str] = None, + spectrum_rdir_url: Optional[str] = None, + existing_table_handling: ExistingTableHandling = ExistingTableHandling.TRUNCATE_AND_OVERWRITE) ->\ 'SpectrumRecordsTarget': """ diff --git a/records_mover/records/targets/google_sheets.py b/records_mover/records/targets/google_sheets.py index b14215cfc..2bc9acfd3 100644 --- a/records_mover/records/targets/google_sheets.py +++ b/records_mover/records/targets/google_sheets.py @@ -104,7 +104,7 @@ def _get_service(self) -> SheetsService: def as_json_serializable(self, cell: Any) -> Any: if isinstance(cell, np.generic): # MyPy complains that this method does not exist - native = np.asscalar(cell) # type: ignore + native = cell.item() else: native = cell if isinstance(cell, float) and math.isnan(native): diff --git a/records_mover/records/targets/spectrum.py b/records_mover/records/targets/spectrum.py index 21ea8f09c..ccb0dc0a4 100644 --- a/records_mover/records/targets/spectrum.py +++ b/records_mover/records/targets/spectrum.py @@ -25,7 +25,7 @@ def __init__(self, url_resolver: UrlResolver, spectrum_base_url: Optional[str], spectrum_rdir_url: Optional[str], - existing_table_handling: ExistingTableHandling= + existing_table_handling: ExistingTableHandling = ExistingTableHandling.TRUNCATE_AND_OVERWRITE) -> None: self.db = db_engine self.driver = db_driver(db_engine) diff --git a/records_mover/records/targets/table/move_from_records_directory.py b/records_mover/records/targets/table/move_from_records_directory.py index 67abc21c8..f4e8034f7 100644 --- a/records_mover/records/targets/table/move_from_records_directory.py +++ b/records_mover/records/targets/table/move_from_records_directory.py @@ -59,7 +59,7 @@ def load(self, driver: DBDriver) -> Optional[int]: load_plan=plan, directory=self.directory) def move(self) -> MoveResult: - logger.info(f"Connecting to database...") + logger.info("Connecting to database...") with self.tbl.db_engine.begin() as db: driver = self.tbl.db_driver(db) diff --git a/records_mover/records/unload_plan.py b/records_mover/records/unload_plan.py index 02503f74f..5a49a3a3c 100644 --- a/records_mover/records/unload_plan.py +++ b/records_mover/records/unload_plan.py @@ -6,6 +6,6 @@ class RecordsUnloadPlan: def __init__(self, records_format: BaseRecordsFormat = DelimitedRecordsFormat(), processing_instructions: ProcessingInstructions = ProcessingInstructions()) -> \ - None: + None: self.records_format = records_format self.processing_instructions = processing_instructions diff --git a/records_mover/utils/json_schema.py b/records_mover/utils/json_schema.py index e4c306ccc..48c0e0b91 100644 --- a/records_mover/utils/json_schema.py +++ b/records_mover/utils/json_schema.py @@ -60,7 +60,7 @@ def parse_python_parameter_type(name: str, description: Optional[str], optional: bool, default: DefaultValue) ->\ - Optional[JsonParameter]: + Optional[JsonParameter]: if python_type == str: return JsonParameter(name, JsonSchemaDocument(json_type='string', default=default, diff --git a/records_mover/utils/json_schema_array_document.py b/records_mover/utils/json_schema_array_document.py index 5edb3b590..a53bb5cba 100644 --- a/records_mover/utils/json_schema_array_document.py +++ b/records_mover/utils/json_schema_array_document.py @@ -6,6 +6,7 @@ class JsonSchemaArrayDocument(JsonSchemaDocument): """Represents a JSON Schema array type""" + def __init__(self, json_type: str, items: JsonSchemaDocument, diff --git a/records_mover/utils/json_schema_document.py b/records_mover/utils/json_schema_document.py index cb74a2feb..b5b37c3d3 100644 --- a/records_mover/utils/json_schema_document.py +++ b/records_mover/utils/json_schema_document.py @@ -11,6 +11,7 @@ class JsonSchemaDocument: """Represents JSON Schema""" + def __init__(self, json_type: Union[str, List[str]], default: DefaultValue = inspect.Parameter.empty, diff --git a/setup.cfg b/setup.cfg index 4b802d137..a620527bf 100644 --- a/setup.cfg +++ b/setup.cfg @@ -80,7 +80,7 @@ ignore_missing_imports = True [mypy-pyarrow.*] ignore_missing_imports = True -[mypy-nose.*] +[mypy-pytest.*] ignore_missing_imports = True [mypy-airflow.hooks.*] diff --git a/setup.py b/setup.py index 8d08b3b69..9f377100b 100755 --- a/setup.py +++ b/setup.py @@ -120,14 +120,17 @@ def initialize_options(self) -> None: 'grpcio<2.0dev,>=1.29.0', ] -nose_dependencies = [ - 'nose' +pytest_dependencies = [ + 'pytest', + 'pytest-cov' ] itest_dependencies = [ 'jsonschema', # needed for directory_validator.py + 'pytz', + 'wheel', # needed to support legacy 'setup.py install' ] + ( - nose_dependencies + + pytest_dependencies + # needed for records_database_fixture retrying drop/creates on # BigQuery google_api_client_dependencies @@ -253,7 +256,7 @@ def initialize_options(self) -> None: 'coverage', 'mock', ] + ( - nose_dependencies + + pytest_dependencies + cli_dependencies_base + airflow_dependencies + gsheet_dependencies + @@ -284,7 +287,7 @@ def initialize_options(self) -> None: long_description = f.read() setup(name='records-mover', - version=__version__, # read right above # noqa + version=__version__, # read right above # noqa description=('Library and CLI to move relational data from one place to another - ' 'DBs/CSV/gsheets/dataframes/...'), long_description=long_description, diff --git a/tests/component/records/schema/field/test_dtype.py b/tests/component/records/schema/field/test_dtype.py index 4498fe0e0..7d880a23b 100644 --- a/tests/component/records/schema/field/test_dtype.py +++ b/tests/component/records/schema/field/test_dtype.py @@ -1,4 +1,3 @@ -from nose.tools import assert_equal from mock import patch from records_mover.records.schema.field import RecordsSchemaField from records_mover.records.schema.field.constraints import ( @@ -29,7 +28,7 @@ def check_dtype(field_type, constraints, expectation): representations=None, ) out = field.cast_series_type(pd.Series(1, dtype=np.int8)) - assert_equal(out.dtype, expectation) + assert out.dtype == expectation def test_to_pandas_dtype_integer_no_nullable(): diff --git a/tests/integration/itest b/tests/integration/itest index 2086cd348..353f36a77 100755 --- a/tests/integration/itest +++ b/tests/integration/itest @@ -210,91 +210,91 @@ def run_test(args, target, parser): with dockerized_dbs(): if (args.docker): docker_compose_run(['with-db', 'dockerized-vertica', - 'nosetests', '--xunit-file=nosetests.xml', '.'], + 'pytest', '--xunit-file=pytest.xml', '.'], cwd="/usr/src/app/tests/integration/records/single_db") else: with local_dockerized_dbfacts(): subprocess.check_call(['with-db', 'dockerized-vertica', - 'nosetests', '--xunit-file=nosetests.xml', '.'], + 'pytest', '--xunit-file=pytest.xml', '.'], cwd=f"{script_dir}/records/single_db") elif (target == 'vertica-s3'): with dockerized_dbs(), set_s3_scratch_bucket(): if (args.docker): docker_compose_run(['with-db', 'dockerized-vertica', - 'nosetests', '--xunit-file=nosetests.xml', '.'], + 'pytest', '--xunit-file=pytest.xml', '.'], prefixes=["with-aws-creds", "circleci"], cwd="/usr/src/app/tests/integration/records/single_db") else: with local_dockerized_dbfacts(): subprocess.check_call(["with-db", "dockerized-vertica", "with-aws-creds", "circleci", - "nosetests", "--xunit-file=nosetests.xml", "."], + "pytest", "--xunit-file=pytest.xml", "."], cwd=f"{script_dir}/records/single_db") elif (target == 'mysql'): with dockerized_dbs(): if (args.docker): docker_compose_run(['with-db', 'dockerized-mysql', - 'nosetests', '--xunit-file=nosetests.xml', '.'], + 'pytest', '--xunit-file=pytest.xml', '.'], prefixes=["with-aws-creds", "circleci"], cwd="/usr/src/app/tests/integration/records/single_db") else: with local_dockerized_dbfacts(): subprocess.check_call(["with-db", "dockerized-mysql", - "nosetests", "--xunit-file=nosetests.xml", "."], + "pytest", "--xunit-file=pytest.xml", "."], cwd=f"{script_dir}/records/single_db") elif (target == 'postgres'): with dockerized_dbs(): if (args.docker): docker_compose_run(['with-db', 'dockerized-postgres', - 'nosetests', '--xunit-file=nosetests.xml', '.'], + 'pytest', '--xunit-file=pytest.xml', '.'], prefixes=["with-aws-creds", "circleci"], cwd="/usr/src/app/tests/integration/records/single_db") else: with local_dockerized_dbfacts(): subprocess.check_call(["with-db", "dockerized-postgres", - "nosetests", "--xunit-file=nosetests.xml", "."], + "pytest", "--xunit-file=pytest.xml", "."], cwd=f"{script_dir}/records/single_db") elif (target == 'redshift-s3'): with set_s3_scratch_bucket(): if (args.docker): docker_compose_run(['with-db', 'demo-itest', - 'nosetests', '--xunit-file=nosetests.xml', '.'], + 'pytest', '--xunit-file=pytest.xml', '.'], prefixes=["with-aws-creds", "circleci"], cwd="/usr/src/app/tests/integration/records/single_db") else: subprocess.check_call(['with-db', 'demo-itest', "with-aws-creds", "circleci", - 'nosetests', '--xunit-file=nosetests.xml', '.'], + 'pytest', '--xunit-file=pytest.xml', '.'], cwd=f"{script_dir}/records/single_db") elif (target == 'redshift-no-s3'): if (args.docker): docker_compose_run(['with-db', 'demo-itest', - 'nosetests', '--xunit-file=nosetests.xml', '.'], + 'pytest', '--xunit-file=pytest.xml', '.'], prefixes=["with-aws-creds", "circleci"], cwd="/usr/src/app/tests/integration/records/single_db") else: subprocess.check_call(['with-db', 'demo-itest', - 'nosetests', '--xunit-file=nosetests.xml', '.'], + 'pytest', '--xunit-file=pytest.xml', '.'], cwd=f"{script_dir}/records/single_db") elif (target == 'bigquery-no-gcs'): if (args.docker): docker_compose_run(['with-db', 'bltoolsdevbq-bq_itest', - 'nosetests', '--xunit-file=nosetests.xml', '.'], + 'pytest', '--xunit-file=pytest.xml', '.'], cwd="/usr/src/app/tests/integration/records/single_db") else: subprocess.check_call(['with-db', 'bltoolsdevbq-bq_itest', - 'nosetests', '--xunit-file=nosetests.xml', '.'], + 'pytest', '--xunit-file=pytest.xml', '.'], cwd=f"{script_dir}/records/single_db") elif (target == 'bigquery-gcs'): with set_gcs_scratch_bucket(): if (args.docker): docker_compose_run(['with-db', 'bltoolsdevbq-bq_itest', - 'nosetests', '--xunit-file=nosetests.xml', '.'], + 'pytest', '--xunit-file=pytest.xml', '.'], cwd="/usr/src/app/tests/integration/records/single_db") else: subprocess.check_call(['with-db', 'bltoolsdevbq-bq_itest', - 'nosetests', '--xunit-file=nosetests.xml', '.'], + 'pytest', '--xunit-file=pytest.xml', '.'], cwd=f"{script_dir}/records/single_db") elif (target == 'table2table'): with set_s3_scratch_bucket(), set_gcs_scratch_bucket(), dockerized_dbs(): diff --git a/tests/integration/records/expected_column_types.py b/tests/integration/records/expected_column_types.py index 292697620..c3a2070db 100644 --- a/tests/integration/records/expected_column_types.py +++ b/tests/integration/records/expected_column_types.py @@ -14,7 +14,7 @@ 'postgresql': [ 'INTEGER', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', 'VARCHAR(111)', 'DATE', 'TIME WITHOUT TIME ZONE', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMPTZ' + 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMP WITH TIME ZONE' ], 'bigquery': [ 'INTEGER', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', @@ -30,7 +30,7 @@ 'postgresql': [ 'BIGINT', 'VARCHAR(12)', 'VARCHAR(12)', 'VARCHAR(4)', 'VARCHAR(4)', 'VARCHAR(12)', 'VARCHAR(444)', 'DATE', 'TIME WITHOUT TIME ZONE', - 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMPTZ' + 'TIMESTAMP WITHOUT TIME ZONE', 'TIMESTAMP WITH TIME ZONE' ], 'mysql': [ 'BIGINT(20)', 'VARCHAR(3)', 'VARCHAR(3)', 'VARCHAR(1)', 'VARCHAR(1)', 'VARCHAR(3)', diff --git a/tests/integration/records/records_numeric_database_fixture.py b/tests/integration/records/records_numeric_database_fixture.py index 7608ffc01..b8386700f 100644 --- a/tests/integration/records/records_numeric_database_fixture.py +++ b/tests/integration/records/records_numeric_database_fixture.py @@ -25,7 +25,7 @@ def bring_up(self): 12147483647.78::REAL AS float32, 19223372036854775807.78::FLOAT AS float64; """ # noqa - ] + ] elif self.engine.name == 'vertica': # Vertica only supports a few large numeric types create_tables = [f""" @@ -34,7 +34,7 @@ def bring_up(self): 1234.56::NUMERIC(6, 2) AS fixed_6_2, 19223372036854775807.78::FLOAT AS float64; """ # noqa - ] + ] elif self.engine.name == 'bigquery': # BigQuery only supports a few large numeric types create_tables = [f""" @@ -47,7 +47,7 @@ def bring_up(self): INSERT INTO {self.schema_name}.{self.table_name} (`int64`, `fixed_6_2`, `float64`) VALUES (9223372036854775807, 1234.56, 19223372036854775807.78); """, # noqa - ] + ] elif self.engine.name == 'postgresql': # Postgres supports a number of different numeric types create_tables = [f""" @@ -59,7 +59,7 @@ def bring_up(self): 12147483647.78::REAL AS float32, 19223372036854775807.78::FLOAT8 AS float64; """ # noqa - ] + ] elif self.engine.name == 'mysql': # MySQL supports a number of different numeric types # https://dev.mysql.com/doc/refman/8.0/en/numeric-types.html @@ -121,7 +121,7 @@ def bring_up(self): 19223372036854775807.78 ); """ # noqa - ] + ] else: raise NotImplementedError(f"Please teach me how to integration test {self.engine.name}") print(f"Creating: {create_tables}") diff --git a/tests/integration/records/single_db/test_records_load_datetime.py b/tests/integration/records/single_db/test_records_load_datetime.py index 12dabbd77..951a7a3ea 100644 --- a/tests/integration/records/single_db/test_records_load_datetime.py +++ b/tests/integration/records/single_db/test_records_load_datetime.py @@ -45,12 +45,12 @@ def load(self, 'compression': None, 'header-row': False, **addl_hints, - }) + }) source = sources.fileobjs(target_names_to_input_fileobjs={ - 'test': fileobj - }, - records_schema=records_schema, - records_format=records_format) + 'test': fileobj + }, + records_schema=records_schema, + records_format=records_format) target = targets.table(schema_name=self.schema_name, table_name=self.table_name, db_engine=self.engine) diff --git a/tests/integration/records/single_db/test_records_unload_datetime.py b/tests/integration/records/single_db/test_records_unload_datetime.py index 8c3910f9b..2242ef90a 100644 --- a/tests/integration/records/single_db/test_records_unload_datetime.py +++ b/tests/integration/records/single_db/test_records_unload_datetime.py @@ -74,7 +74,7 @@ def test_unload_date(self) -> None: 'compression': None, 'header-row': False, **addl_hints, # type: ignore - }) + }) expect_pandas_failure = (not self.has_pandas()) and uses_pandas try: csv_text = self.unload_column_to_string(column_name='date', @@ -142,7 +142,7 @@ def test_unload_datetime(self) -> None: 'compression': None, 'header-row': False, **addl_hints, # type: ignore - }) + }) expect_pandas_failure = (not self.has_pandas()) and uses_pandas try: csv_text = self.unload_column_to_string(column_name='timestamp', @@ -216,7 +216,7 @@ def test_unload_datetimetz(self) -> None: 'compression': None, 'header-row': False, **addl_hints, # type: ignore - }) + }) expect_pandas_failure = (not self.has_pandas()) and uses_pandas try: csv_text = self.unload_column_to_string(column_name='timestamptz', @@ -285,7 +285,7 @@ def test_unload_timeonly(self) -> None: 'compression': None, 'header-row': False, **addl_hints, # type: ignore - }) + }) expect_pandas_failure = (not self.has_pandas()) and uses_pandas try: csv_text = self.unload_column_to_string(column_name='time', diff --git a/tests/integration/records/table_validator.py b/tests/integration/records/table_validator.py index e19445c3d..6d6b6fe83 100644 --- a/tests/integration/records/table_validator.py +++ b/tests/integration/records/table_validator.py @@ -100,10 +100,10 @@ def format_type(column: Dict[str, Any]) -> str: expected_single_database_column_types[self.source_db_engine.name], expected_single_database_column_types[self.target_db_engine.name], expected_df_loaded_database_column_types.get(self.target_db_engine.name))),\ - f'Could not find column types filed under '\ - f"{(self.source_db_engine.name, self.target_db_engine.name)} "\ - 'or either individually: '\ - f'{actual_column_types}' + f'Could not find column types filed under '\ + f"{(self.source_db_engine.name, self.target_db_engine.name)} "\ + 'or either individually: '\ + f'{actual_column_types}' def validate_data_values(self, schema_name: str, diff --git a/tests/unit/db/test_connect.py b/tests/unit/db/test_connect.py index 520988257..aefd4473c 100644 --- a/tests/unit/db/test_connect.py +++ b/tests/unit/db/test_connect.py @@ -27,7 +27,8 @@ def test_engine_from_lpass_entry(self, password='hunter1', port=123, username='myuser', - query=None) + query={'keepalives': '1', + 'keepalives_idle': '30'}) mock_create_engine.\ assert_called_with(mock_url.return_value) assert engine == mock_create_engine.return_value diff --git a/tests/unit/db/test_sqlalchemy_driver_picking.py b/tests/unit/db/test_sqlalchemy_driver_picking.py index 721d05d27..54b1d42d1 100644 --- a/tests/unit/db/test_sqlalchemy_driver_picking.py +++ b/tests/unit/db/test_sqlalchemy_driver_picking.py @@ -11,10 +11,10 @@ def test_create_sqlalchemy_url(self, mock_db_facts_from_lpass): expected_mappings = { 'psql (redshift)': - 'redshift://myuser:hunter1@myhost:123/analyticsdb', + 'redshift://myuser:hunter1@myhost:123/analyticsdb?keepalives=1&keepalives_idle=30', 'redshift': - 'redshift://myuser:hunter1@myhost:123/analyticsdb', + 'redshift://myuser:hunter1@myhost:123/analyticsdb?keepalives=1&keepalives_idle=30', 'vertica': 'vertica+vertica_python://myuser:hunter1@myhost:123/analyticsdb', @@ -35,8 +35,12 @@ def test_create_sqlalchemy_url(self, 'port': 123, 'database': 'analyticsdb' } + if human_style_db_type in ['redshift', 'psql (redshift)']: + db_facts['query'] = {'keepalives': '1', 'keepalives_idle': '30'} actual_url = connect.create_sqlalchemy_url(db_facts) - assert str(actual_url) == expected_url + actual_url_str = str(actual_url) + self.assertEqual(actual_url_str, expected_url, "{}!={}".format(actual_url_str, + expected_url)) @patch('records_mover.db.connect.db_facts_from_lpass') @patch('records_mover.db.connect.sa.create_engine') @@ -45,10 +49,10 @@ def test_create_sqlalchemy_url_odbc_preferred(self, mock_db_facts_from_lpass): expected_mappings = { 'psql (redshift)': - 'redshift://myuser:hunter1@myhost:123/analyticsdb', + 'redshift://myuser:hunter1@myhost:123/analyticsdb?keepalives=1&keepalives_idle=30', 'redshift': - 'redshift://myuser:hunter1@myhost:123/analyticsdb', + 'redshift://myuser:hunter1@myhost:123/analyticsdb?keepalives=1&keepalives_idle=30', 'vertica': 'vertica+pyodbc:///?odbc_connect=Driver' @@ -71,5 +75,7 @@ def test_create_sqlalchemy_url_odbc_preferred(self, 'port': 123, 'database': 'analyticsdb' } + if human_style_db_type == 'redshift': + db_facts['query'] = {'keepalives': '1', 'keepalives_idle': '30'} actual_url = connect.create_sqlalchemy_url(db_facts, prefer_odbc=True) assert str(actual_url) == expected_url diff --git a/tests/unit/records/sources/test_dataframes.py b/tests/unit/records/sources/test_dataframes.py index a4592cadd..b1d91e588 100644 --- a/tests/unit/records/sources/test_dataframes.py +++ b/tests/unit/records/sources/test_dataframes.py @@ -83,8 +83,8 @@ def generate_filename(prefix): "data001.csv": mock_data_fileobj_1, "data002.csv": mock_data_fileobj_2, }, - records_schema=mock_target_records_schema, - records_format=mock_target_records_format) + records_schema=mock_target_records_schema, + records_format=mock_target_records_format) self.assertEqual(fileobjs, mock_FileobjsSource.return_value) mock_data_fileobj_1.close.assert_not_called() mock_data_fileobj_2.close.assert_not_called() @@ -155,8 +155,8 @@ def generate_filename(prefix): "data001.parquet": mock_data_fileobj_1, "data002.parquet": mock_data_fileobj_2, }, - records_schema=mock_target_records_schema, - records_format=mock_target_records_format) + records_schema=mock_target_records_schema, + records_format=mock_target_records_format) self.assertEqual(fileobjs, mock_FileobjsSource.return_value) mock_data_fileobj_1.close.assert_not_called() mock_data_fileobj_2.close.assert_not_called() diff --git a/tests/unit/records/sources/test_fileobjs.py b/tests/unit/records/sources/test_fileobjs.py index fa1d3c8b3..07ade5b0b 100644 --- a/tests/unit/records/sources/test_fileobjs.py +++ b/tests/unit/records/sources/test_fileobjs.py @@ -82,11 +82,11 @@ def test_infer_if_needed_bad_encoding(self, mock_processing_instructions = Mock(name='processing_instructions') with self.assertRaises(TypeError): with FileobjsSource.\ - infer_if_needed(target_names_to_input_fileobjs=mock_target_names_to_input_fileobjs, - processing_instructions=mock_processing_instructions, - records_schema=mock_records_schema, - records_format=None, - initial_hints={}): + infer_if_needed(target_names_to_input_fileobjs=mock_target_names_to_input_fileobjs, + processing_instructions=mock_processing_instructions, + records_schema=mock_records_schema, + records_format=None, + initial_hints={}): pass def test_known_supported_records_formats(self): diff --git a/tests/unit/records/targets/table/test_target.py b/tests/unit/records/targets/table/test_target.py index 13d196a2f..20067ccfe 100644 --- a/tests/unit/records/targets/table/test_target.py +++ b/tests/unit/records/targets/table/test_target.py @@ -24,15 +24,15 @@ def setUp(self): mock_known_supported_records_formats self.target =\ - TableRecordsTarget(schema_name=self.mock_schema_name, - table_name=self.mock_table_name, - db_engine=self.mock_db_engine, - db_driver=self.mock_db_driver, - add_user_perms_for=self.mock_add_user_perms_for, - add_group_perms_for=self.mock_add_group_perms_for, - existing_table_handling=self.mock_existing_table_handling, - drop_and_recreate_on_load_error= - self.mock_drop_and_recreate_on_load_error) + TableRecordsTarget( + schema_name=self.mock_schema_name, + table_name=self.mock_table_name, + db_engine=self.mock_db_engine, + db_driver=self.mock_db_driver, + add_user_perms_for=self.mock_add_user_perms_for, + add_group_perms_for=self.mock_add_group_perms_for, + existing_table_handling=self.mock_existing_table_handling, + drop_and_recreate_on_load_error=self.mock_drop_and_recreate_on_load_error) def test_can_move_from_fileobjs_source_yes(self): self.assertTrue(self.target.can_move_from_fileobjs_source()) diff --git a/tests/unit/records/targets/test_google_sheets.py b/tests/unit/records/targets/test_google_sheets.py index 8174b6efc..b4de163fd 100644 --- a/tests/unit/records/targets/test_google_sheets.py +++ b/tests/unit/records/targets/test_google_sheets.py @@ -34,8 +34,7 @@ def test_move_from_dataframe_sheet_exists(self, out = self.google_sheets.move_from_dataframes_source(mock_dfs_source, mock_processing_instructions) mock_df.to_records.assert_called_with(index=mock_dfs_source.include_index) - mock_json_encodable_datum = mock_np.asscalar.return_value - mock_np.asscalar.assert_called_with(1) + mock_json_encodable_datum = 1 mock_http = mock_httplib2.Http.return_value mock_authed_http = mock_google_auth_httplib2.AuthorizedHttp.return_value mock_google_auth_httplib2.AuthorizedHttp.\ @@ -83,8 +82,7 @@ def test_move_from_dataframe_sheet_new(self, out = self.google_sheets.move_from_dataframes_source(mock_dfs_source, mock_processing_instructions) mock_df.to_records.assert_called_with(index=mock_dfs_source.include_index) - mock_json_encodable_datum = mock_np.asscalar.return_value - mock_np.asscalar.assert_called_with(1) + mock_json_encodable_datum = 1 mock_http = mock_httplib2.Http.return_value mock_authed_http = mock_google_auth_httplib2.AuthorizedHttp.return_value mock_google_auth_httplib2.AuthorizedHttp.\ diff --git a/tests/unit/records/test_records_directory.py b/tests/unit/records/test_records_directory.py index 851aa1a12..34c73d82b 100644 --- a/tests/unit/records/test_records_directory.py +++ b/tests/unit/records/test_records_directory.py @@ -9,11 +9,11 @@ class TestRecordsDirectory(unittest.TestCase): def setUp(self): with patch('records_mover.records.records_directory.RecordsFormatFile') as\ - mock_RecordsFormatFile, \ - patch('records_mover.records.records_directory.RecordsSchemaSqlFile') as\ - mock_RecordsSchemaSqlFile, \ - patch('records_mover.records.records_directory.RecordsSchemaJsonFile') as\ - mock_RecordsSchemaJsonFile: + mock_RecordsFormatFile, \ + patch('records_mover.records.records_directory.RecordsSchemaSqlFile') as\ + mock_RecordsSchemaSqlFile, \ + patch('records_mover.records.records_directory.RecordsSchemaJsonFile') as\ + mock_RecordsSchemaJsonFile: self.mock_record_format_file = mock_RecordsFormatFile.return_value self.mock_schema_sql_file = mock_RecordsSchemaSqlFile.return_value self.mock_schema_json_file = mock_RecordsSchemaJsonFile.return_value diff --git a/tests/unit/records/test_records_directory_schema.py b/tests/unit/records/test_records_directory_schema.py index 601551a5a..de5d0a953 100644 --- a/tests/unit/records/test_records_directory_schema.py +++ b/tests/unit/records/test_records_directory_schema.py @@ -6,11 +6,11 @@ class TestRecordsDirectorySchema(unittest.TestCase): def setUp(self): with patch('records_mover.records.records_directory.RecordsFormatFile') as\ - mock_RecordsFormatFile, \ - patch('records_mover.records.records_directory.RecordsSchemaSqlFile') as\ - mock_RecordsSchemaSqlFile, \ - patch('records_mover.records.records_directory.RecordsSchemaJsonFile') as\ - mock_RecordsSchemaJsonFile: + mock_RecordsFormatFile, \ + patch('records_mover.records.records_directory.RecordsSchemaSqlFile') as\ + mock_RecordsSchemaSqlFile, \ + patch('records_mover.records.records_directory.RecordsSchemaJsonFile') as\ + mock_RecordsSchemaJsonFile: self.mock_record_format_file = mock_RecordsFormatFile.return_value self.mock_schema_sql_file = mock_RecordsSchemaSqlFile.return_value self.mock_schema_json_file = mock_RecordsSchemaJsonFile.return_value diff --git a/tests/unit/test_database.py b/tests/unit/test_database.py index af2ee9db2..29fa31893 100644 --- a/tests/unit/test_database.py +++ b/tests/unit/test_database.py @@ -22,7 +22,7 @@ def test_db_facts_from_env(self): "port": "5433", "database": "analytics", "type": "vertica" - } + } actual_db_facts = db_facts_from_env() self.assertEqual(expected_db_facts, actual_db_facts)