bluelabsio · ryantimjohn · Dec 17, 2022 · Dec 12, 2022 · Dec 14, 2022 · Dec 14, 2022
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -162,7 +162,7 @@ jobs:
       pandas_version:
         type: string
         description: "Version of pandas to test against"
-        default: '<1'
+        default: '==1.3.5'
       command:
         type: string
         description: "Command to run in records-mover venv"
@@ -254,7 +254,7 @@ jobs:
         description: "Version of Python to test against"
       pandas_version:
         type: string
-        default: '>=1'
+        default: '==1.3.5'
       db_name:
         type: string
         description: "Database to run inside"
@@ -313,15 +313,15 @@ jobs:
       python_version:
         type: string
         description: "Version of python to test against"
-        default: '3.6'
+        default: '3.9'
     docker:
       - image: circleci/python:<<parameters.python_version>>
     steps:
       - checkout
       - installvenv:
           extras: <<parameters.extras>>
           python_version: <<parameters.python_version>>
-          pandas_version: '<1'
+          pandas_version: '==1.3.5'
           # requirements.txt includes twine and other release packages
           include_dev_dependencies: true
       - run:
@@ -353,12 +353,12 @@ jobs:
             twine upload -r pypi dist/*
   cli-extra-test:
     docker:
-      - image: circleci/python:3.6
+      - image: circleci/python:3.9
     steps:
       - checkout
       - installvenv:
           extras: '[cli]'
-          python_version: '3.6'
+          python_version: '3.9'
           pandas_version: ''
           # we want this just like a user would install it, not with
           # dev tools installed
@@ -383,9 +383,6 @@ workflows:
       #
       # https://devguide.python.org/devcycle/#end-of-life-branches
       #
-      # That said, Python 3.5 and before don't support type
-      # annotations on variables, which we use, so right now Python
-      # 3.6 is the current minimum version tested against.
       #
       # https://app.asana.com/0/1128138765527694/1161072974798065
       # - test:
@@ -420,7 +417,7 @@ workflows:
       # - integration_test_with_dbs:
       #     name: vertica-no-s3-itest
       #     extras: '[vertica,itest]'
-      #     python_version: "3.6"
+      #     python_version: "3.9"
       #     command: |
       #       . venv/bin/activate
       #       export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin
@@ -439,28 +436,28 @@ workflows:
       #     filters:
       #       tags:
       #         only: /v\d+\.\d+\.\d+(-[\w]+)?/
-      # - integration_test_with_dbs:
-      #     name: postgres-itest
-      #     extras: '[postgres-binary,itest]'
-      #     python_version: "3.9"
-      #     pandas_version: '==1.3.5'
-      #     command: |
-      #       . venv/bin/activate
-      #       export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin
-      #       export DB_FACTS_PATH=${PWD}/tests/integration/circleci-dbfacts.yml
-      #       export RECORDS_MOVER_SESSION_TYPE=env
-      #       mkdir -p test-reports/itest
-      #       cd tests/integration/records/single_db
-      #       with-db dockerized-postgres nosetests --with-xunit --xunit-file=../../../../test-reports/itest/junit.xml .
-      #     requires:
-      #       - redshift-s3-itest
-      #     filters:
-      #       tags:
-      #         only: /v\d+\.\d+\.\d+(-[\w]+)?/
+      - integration_test_with_dbs:
+          name: postgres-itest
+          extras: '[postgres-binary,itest]'
+          python_version: "3.9"
+          pandas_version: '==1.3.5'
+          command: |
+            . venv/bin/activate
+            export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin
+            export DB_FACTS_PATH=${PWD}/tests/integration/circleci-dbfacts.yml
+            export RECORDS_MOVER_SESSION_TYPE=env
+            mkdir -p test-reports/itest
+            cd tests/integration/records/single_db
+            with-db dockerized-postgres nosetests --with-xunit --xunit-file=../../../../test-reports/itest/junit.xml .
+          requires:
+            - redshift-s3-itest
+          filters:
+            tags:
+              only: /v\d+\.\d+\.\d+(-[\w]+)?/
       # - integration_test_with_dbs:
       #     name: mysql-itest
       #     extras: '[mysql,itest]'
-      #     python_version: "3.6"
+      #     python_version: "3.9"
       #     # Using Pandas reproduced a bug that happened when we were
       #     # relying on Pandas:
       #     #
@@ -482,7 +479,7 @@ workflows:
       - integration_test_with_dbs:
           name: vertica-s3-itest
           extras: '[vertica,aws,itest]'
-          python_version: "3.6"
+          python_version: "3.9"
           command: |
             . venv/bin/activate
             export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin
@@ -499,7 +496,7 @@ workflows:
       - integration_test_with_dbs:
           name: cli-1-itest
           extras: '[cli,gsheets,vertica]'
-          python_version: "3.6"
+          python_version: "3.9"
           command: |
             . venv/bin/activate
             export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin
@@ -516,7 +513,7 @@ workflows:
       - integration_test_with_dbs:
           name: cli-2-itest
           extras: '[cli,gsheets,vertica]'
-          python_version: "3.6"
+          python_version: "3.9"
           command: |
             . venv/bin/activate
             export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin
@@ -533,7 +530,7 @@ workflows:
       - integration_test_with_dbs:
           name: cli-3-itest
           extras: '[cli,gsheets,vertica]'
-          python_version: "3.6"
+          python_version: "3.9"
           command: |
             . venv/bin/activate
             export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin
@@ -550,7 +547,7 @@ workflows:
       - integration_test:
           name: redshift-s3-itest
           extras: '[redshift-binary,itest]'
-          python_version: "3.6"
+          python_version: "3.9"
           db_name: demo-itest
           requires:
             - test-3.9
@@ -560,7 +557,7 @@ workflows:
       - integration_test:
           name: redshift-no-s3-itest
           extras: '[redshift-binary,itest]'
-          python_version: "3.6"
+          python_version: "3.9"
           db_name: demo-itest
           include_s3_scratch_bucket: false
           requires:
@@ -571,7 +568,7 @@ workflows:
       - integration_test:
           name: redshift-s3-itest-old-pandas
           extras: '[redshift-binary,itest]'
-          python_version: "3.6"
+          python_version: "3.8"
           pandas_version: "<1"
           db_name: demo-itest
           requires:
@@ -582,7 +579,7 @@ workflows:
       - integration_test:
           name: redshift-s3-itest-no-pandas
           extras: '[redshift-binary,itest]'
-          python_version: "3.6"
+          python_version: "3.9"
           pandas_version: ""
           db_name: demo-itest
           requires:
@@ -593,7 +590,7 @@ workflows:
       # - integration_test:
       #     name: bigquery-no-gcs-itest
       #     extras: '[bigquery,itest]'
-      #     python_version: "3.6"
+      #     python_version: "3.9"
       #     db_name: bltoolsdevbq-bq_itest
       #     include_gcs_scratch_bucket: false
       #     requires:
@@ -604,7 +601,7 @@ workflows:
       # - integration_test:
       #     name: bigquery-gcs-itest
       #     extras: '[bigquery,itest]'
-      #     python_version: "3.6"
+      #     python_version: "3.9"
       #     db_name: bltoolsdevbq-bq_itest
       #     requires:
       #       - redshift-s3-itest
@@ -614,7 +611,7 @@ workflows:
       # - integration_test_with_dbs:
       #     name: tbl2tbl-itest
       #     extras: '[literally_every_single_database_binary,itest]'
-      #     python_version: "3.6"
+      #     python_version: "3.9"
       #     command: |
       #       . venv/bin/activate
       #       export PATH=${PATH}:${PWD}/tests/integration/bin:/opt/vertica/bin
@@ -635,7 +632,6 @@ workflows:
       - deploy:
           context: PyPI
           requires:
-            # - test-3.6
             # - test-3.7
             - test-3.8
             - test-3.9
@@ -647,7 +643,7 @@ workflows:
             - redshift-s3-itest
             - redshift-s3-itest-old-pandas
             - redshift-s3-itest-no-pandas
-            # - postgres-itest
+            - postgres-itest
             # - mysql-itest
             - cli-1-itest
             - cli-2-itest

diff --git a/metrics/bigfiles_high_water_mark b/metrics/bigfiles_high_water_mark
@@ -1 +1 @@
-1135
+1137
diff --git a/metrics/flake8_high_water_mark b/metrics/flake8_high_water_mark
@@ -1 +1 @@
-166
+9
diff --git a/records_mover/airflow/hooks/records_hook.py b/records_mover/airflow/hooks/records_hook.py
@@ -69,7 +69,7 @@ def _s3_temp_base_url(self) -> Optional[str]:
     # non-Airflow-specific API.
     def validate_and_prepare_target_directory(self,
                                               target_url: str,
-                                              allow_overwrite: bool=False) -> None:
+                                              allow_overwrite: bool = False) -> None:
         parsed_target = urlparse(target_url)
         if parsed_target.scheme != 's3':
             raise AirflowException(

diff --git a/records_mover/cli/job_config_schema_as_args_parser.py b/records_mover/cli/job_config_schema_as_args_parser.py
@@ -59,7 +59,7 @@ def add_to_prefix(self, prefix: str, key: str) -> str:
     def configure_from_properties(self,
                                   properties: JsonSchema,
                                   required_keys: Iterable[str],
-                                  prefix: str='') -> None:
+                                  prefix: str = '') -> None:
         for naked_key, raw_value in properties.items():
             if not isinstance(raw_value, dict):
                 raise TypeError(f"Did not understand [{raw_value}] in [{properties}]")

diff --git a/records_mover/db/bigquery/bigquery_db_driver.py b/records_mover/db/bigquery/bigquery_db_driver.py
@@ -42,7 +42,7 @@ def loader_from_fileobj(self) -> LoaderFromFileobj:
     def unloader(self) -> Unloader:
         return self._bigquery_unloader
 
-    def type_for_date_plus_time(self, has_tz: bool=False) -> sqlalchemy.sql.sqltypes.DateTime:
+    def type_for_date_plus_time(self, has_tz: bool = False) -> sqlalchemy.sql.sqltypes.DateTime:
         # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
         if has_tz:
             # pybigquery doesn't set the timezone flag :(

diff --git a/records_mover/db/connect.py b/records_mover/db/connect.py
@@ -60,11 +60,11 @@ def create_vertica_odbc_sqlalchemy_url(db_facts: DBFacts) -> str:
               "UID={user};"
               "PWD={password};"
               "CHARSET=UTF8;").\
-              format(host=db_facts['host'],
-                     database=db_facts['database'],
-                     port=db_facts['port'],
-                     user=db_facts['user'],
-                     password=db_facts['password'])
+        format(host=db_facts['host'],
+               database=db_facts['database'],
+               port=db_facts['port'],
+               user=db_facts['user'],
+               password=db_facts['password'])
     db_url = quote_plus(db_url)
     return "vertica+pyodbc:///?odbc_connect={}".format(db_url)
 

diff --git a/records_mover/db/driver.py b/records_mover/db/driver.py
@@ -83,7 +83,7 @@ def set_grant_permissions_for_users(self, schema_name: str, table: str,
     def supports_time_type(self) -> bool:
         return True
 
-    def type_for_date_plus_time(self, has_tz: bool=False) -> sqlalchemy.sql.sqltypes.DateTime:
+    def type_for_date_plus_time(self, has_tz: bool = False) -> sqlalchemy.sql.sqltypes.DateTime:
         """Different DB vendors have different names for a date, a time, and
         an optional timezone"""
         return sqlalchemy.sql.sqltypes.DateTime(timezone=has_tz)

diff --git a/records_mover/db/postgres/copy_options/__init__.py b/records_mover/db/postgres/copy_options/__init__.py
@@ -41,9 +41,9 @@ def needs_csv_format(hints: ValidatedRecordsHints) -> bool:
 def postgres_copy_to_options(unhandled_hints: Set[str],
                              delimited_records_format: DelimitedRecordsFormat,
                              fail_if_cant_handle_hint: bool) ->\
-                                         Tuple[DateOutputStyle,
-                                               Optional[DateOrderStyle],
-                                               PostgresCopyOptions]:
+    Tuple[DateOutputStyle,
+          Optional[DateOrderStyle],
+          PostgresCopyOptions]:
     hints = delimited_records_format.validate(fail_if_cant_handle_hint=fail_if_cant_handle_hint)
 
     if needs_csv_format(hints):
@@ -68,8 +68,8 @@ def postgres_copy_to_options(unhandled_hints: Set[str],
 # loading
 def postgres_copy_from_options(unhandled_hints: Set[str],
                                load_plan: RecordsLoadPlan) ->\
-                               Tuple[Optional[DateOrderStyle],
-                                     PostgresCopyOptions]:
+    Tuple[Optional[DateOrderStyle],
+          PostgresCopyOptions]:
     fail_if_cant_handle_hint = load_plan.processing_instructions.fail_if_cant_handle_hint
     if not isinstance(load_plan.records_format, DelimitedRecordsFormat):
         raise NotImplementedError("Not currently able to import "

diff --git a/records_mover/db/postgres/sqlalchemy_postgres_copy.py b/records_mover/db/postgres/sqlalchemy_postgres_copy.py
@@ -14,6 +14,7 @@
 
 __version__ = '0.5.0'
 
+
 def copy_to(source, dest, engine_or_conn, **flags):
     """Export a query or select to a file. For flags, see the PostgreSQL
     documentation at http://www.postgresql.org/docs/9.5/static/sql-copy.html.
@@ -47,6 +48,7 @@ def copy_to(source, dest, engine_or_conn, **flags):
     if autoclose:
         conn.close()
 
+
 def copy_from(source, dest, engine_or_conn, columns=(), **flags):
     """Import a table from a file. For flags, see the PostgreSQL documentation
     at http://www.postgresql.org/docs/9.5/static/sql-copy.html.
@@ -87,6 +89,7 @@ def copy_from(source, dest, engine_or_conn, columns=(), **flags):
         conn.commit()
         conn.close()
 
+
 def raw_connection_from(engine_or_conn):
     """Extract a raw_connection and determine if it should be automatically closed.
 
@@ -98,19 +101,22 @@ def raw_connection_from(engine_or_conn):
         return engine_or_conn.connection, False
     return engine_or_conn.raw_connection(), True
 
+
 def format_flags(flags):
     return ', '.join(
         '{} {}'.format(key.upper(), format_flag(value))
         for key, value in flags.items()
     )
 
+
 def format_flag(value):
     return (
         six.text_type(value).upper()
         if isinstance(value, bool)
         else repr(value)
     )
 
+
 def relabel_query(query):
     """Relabel query entities according to mappings defined in the SQLAlchemy
     ORM. Useful when table column names differ from corresponding attribute
@@ -121,12 +127,14 @@ def relabel_query(query):
     """
     return query.with_entities(*query_entities(query))
 
+
 def query_entities(query):
     return sum(
         [desc_entities(desc) for desc in query.column_descriptions],
         []
     )
 
+
 def desc_entities(desc):
     expr, name = desc['expr'], desc['name']
     if isinstance(expr, Mapper):
@@ -138,13 +146,15 @@ def desc_entities(desc):
     else:
         raise ValueError('Unrecognized query entity {!r}'.format(expr))
 
+
 def mapper_entities(mapper):
     model = mapper.class_
     return [
         getattr(model, prop.key).label(prop.key)
         for prop in mapper.column_attrs
     ]
 
+
 def is_model(class_):
     try:
         class_mapper(class_)
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		166
		9
Copy link Collaborator Brunope Dec 16, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. I don't exactly understand how these are used but why is this such a large numerical change? Copy link Contributor Author ryantimjohn Dec 17, 2022 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. So these "high water marks" are the number of violations there were of a particular protocol. In this case, we had 166 Flake8 violations and brought them down to 9. I mostly did this through using `autopep8` then hand correcting some of the other errors. We still have 9 flake8 errors remaining... but those are from "too high complexity" which is more than I wanted to take on in a single PR.