From 73bbe0dee21377e6372046577ae43579171aa845 Mon Sep 17 00:00:00 2001 From: Tim Ryan Date: Tue, 9 May 2023 11:44:38 -0600 Subject: [PATCH 01/13] RM-86 error on future and deprecation warnings RM-86 switch to only testing records_mover package RM-86 sqlalchemy RM-86 redshift_sqlalchemy RM-86 try dash in module name RM-86 exempt sqlalchemy redshift RM-86 add explicit callout for google RM-86 add exception for pkg_resources RM-86 update req version to get rid of pip err RM-86 move pytest.ini to main folder RM-86 move config to pytest.ini RM-86 update URL method call RM-86 have fail on all dep and future RM-86 try regex RM-86 add wildcards around recordsmover RM-86 add FutureWarning failure RM-86 add Deprecation fail to config RM-86 Remove SADeprecation Warning RM-86 error on SADeprecationWarning RM-86 update cache location RM-86 simpler warning filter RM-86 try different filtering RM-86 try multiple option flags RM-86 except warnings from outside packages --- .circleci/config.yml | 4 ++-- pytest.ini | 5 +++++ records_mover/db/connect.py | 15 ++++++++------- tests/integration/records/single_db/pytest.ini | 5 +++++ 4 files changed, 20 insertions(+), 9 deletions(-) create mode 100644 pytest.ini create mode 100644 tests/integration/records/single_db/pytest.ini diff --git a/.circleci/config.yml b/.circleci/config.yml index fdc50ca06..91737ca10 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -30,7 +30,7 @@ commands: default: "" steps: - restore_cache: - key: deps-v7-<>-<>-<>-<>-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }} + key: deps-v8-<>-<>-<>-<>-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }} - run: name: Install python deps in venv environment: @@ -63,7 +63,7 @@ commands: fi fi - save_cache: - key: deps-v6-<>-<>-<>-<>-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }} + key: deps-v8-<>-<>-<>-<>-{{ .Branch }}-{{ checksum "requirements.txt" }}-{{ checksum "setup.py" }} paths: - "venv" wait_for_db: diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 000000000..3c6310b05 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,5 @@ +# pytest.ini +[pytest] +filterwarnings = + error::DeprecationWarning:.*records_mover.* + error::FutureWarning:.*records_mover.* \ No newline at end of file diff --git a/records_mover/db/connect.py b/records_mover/db/connect.py index d6cf8df85..81b7729da 100644 --- a/records_mover/db/connect.py +++ b/records_mover/db/connect.py @@ -113,13 +113,14 @@ def create_sqlalchemy_url(db_facts: DBFacts, return create_bigquery_sqlalchemy_url(db_facts) else: - return sa.engine.url.URL(drivername=driver, - username=username, - password=db_facts['password'], - host=db_facts['host'], - port=db_facts['port'], - database=db_facts['database'], - query=query_for_type.get(db_type)) + return sa.engine.url.URL.create( + drivername=driver, + username=username, + password=db_facts['password'], + host=db_facts['host'], + port=db_facts['port'], + database=db_facts['database'], + query=query_for_type.get(db_type)) def engine_from_lpass_entry(lpass_entry_name: str) -> sa.engine.Engine: diff --git a/tests/integration/records/single_db/pytest.ini b/tests/integration/records/single_db/pytest.ini new file mode 100644 index 000000000..3c6310b05 --- /dev/null +++ b/tests/integration/records/single_db/pytest.ini @@ -0,0 +1,5 @@ +# pytest.ini +[pytest] +filterwarnings = + error::DeprecationWarning:.*records_mover.* + error::FutureWarning:.*records_mover.* \ No newline at end of file From 703029ccf337ac50e0360d7242d349562f71d100 Mon Sep 17 00:00:00 2001 From: Tim Ryan Date: Thu, 11 May 2023 23:15:45 -0600 Subject: [PATCH 02/13] RM-86 update error_bad_lines to on_bad_lines --- records_mover/records/pandas/read_csv_options.py | 6 +++--- tests/component/records/test_pandas_read_csv_options.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/records_mover/records/pandas/read_csv_options.py b/records_mover/records/pandas/read_csv_options.py index 805a2a00b..5e4f962bb 100644 --- a/records_mover/records/pandas/read_csv_options.py +++ b/records_mover/records/pandas/read_csv_options.py @@ -630,7 +630,7 @@ def day_first(dateish_format: str) -> bool: # (deprecated, so not supplying) # - # error_bad_lines : bool, default True + # on_bad_lines : bool, default True # # Lines with too many fields (e.g. a csv line with too many # commas) will by default cause an exception to be raised, and no @@ -638,12 +638,12 @@ def day_first(dateish_format: str) -> bool: # will dropped from the DataFrame that is returned. # - pandas_options['error_bad_lines'] = processing_instructions.fail_if_row_invalid + pandas_options['on_bad_lines'] = processing_instructions.fail_if_row_invalid # # warn_bad_lines : bool, default True # - # If error_bad_lines is False, and warn_bad_lines is True, a + # If on_bad_lines is False, and warn_bad_lines is True, a # warning for each “bad line” will be output. # diff --git a/tests/component/records/test_pandas_read_csv_options.py b/tests/component/records/test_pandas_read_csv_options.py index 0103ab5e8..b3d2c459c 100644 --- a/tests/component/records/test_pandas_read_csv_options.py +++ b/tests/component/records/test_pandas_read_csv_options.py @@ -38,7 +38,7 @@ def test_pandas_read_csv_options_bluelabs(self): 'doublequote': False, 'encoding': 'UTF8', 'engine': 'python', - 'error_bad_lines': True, + 'on_bad_lines': True, 'escapechar': '\\', 'header': None, 'prefix': 'untitled_', @@ -67,7 +67,7 @@ def test_pandas_read_csv_options_bluelabs(self): # 'doublequote': False, # 'encoding': 'UTF8', # 'engine': 'python', - # 'error_bad_lines': True, + # 'on_bad_lines': True, # 'escapechar': '\\', # 'header': None, # 'prefix': 'untitled_', @@ -116,7 +116,7 @@ def test_pandas_read_csv_options_csv(self): 'doublequote': True, 'encoding': 'UTF8', 'engine': 'python', - 'error_bad_lines': True, + 'on_bad_lines': True, 'header': 0, 'quotechar': '"', 'quoting': 0, @@ -141,7 +141,7 @@ def test_pandas_read_csv_options_vertica(self): 'delimiter': '\x01', 'doublequote': False, 'engine': 'c', - 'error_bad_lines': True, + 'on_bad_lines': True, 'header': None, 'lineterminator': '\x02', 'prefix': 'untitled_', From c932a8c8230e8cfb48a9df39993aa03a983d35fc Mon Sep 17 00:00:00 2001 From: Tim Ryan Date: Thu, 11 May 2023 23:18:39 -0600 Subject: [PATCH 03/13] RM-86 line_terminator to lineterminator --- records_mover/records/pandas/to_csv_options.py | 2 +- .../test_pandas_to_csv_options_bluelabs.py | 2 +- .../records/test_pandas_to_csv_options_csv.py | 2 +- .../test_pandas_to_csv_options_dateformats.py | 8 ++++---- .../test_pandas_to_csv_options_vertica.py | 2 +- tests/unit/records/targets/test_fileobj.py | 16 ++++++++-------- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/records_mover/records/pandas/to_csv_options.py b/records_mover/records/pandas/to_csv_options.py index 0765f611b..f43063a79 100644 --- a/records_mover/records/pandas/to_csv_options.py +++ b/records_mover/records/pandas/to_csv_options.py @@ -111,7 +111,7 @@ def pandas_to_csv_options(records_format: DelimitedRecordsFormat, pandas_options['sep'] = hints.field_delimiter quiet_remove(unhandled_hints, 'field-delimiter') - pandas_options['line_terminator'] = hints.record_terminator + pandas_options['lineterminator'] = hints.record_terminator quiet_remove(unhandled_hints, 'record-terminator') return pandas_options diff --git a/tests/component/records/test_pandas_to_csv_options_bluelabs.py b/tests/component/records/test_pandas_to_csv_options_bluelabs.py index 328a2a724..cfbd9fb79 100644 --- a/tests/component/records/test_pandas_to_csv_options_bluelabs.py +++ b/tests/component/records/test_pandas_to_csv_options_bluelabs.py @@ -14,7 +14,7 @@ def test_pandas_to_csv_options_bluelabs(self): 'encoding': 'UTF8', 'escapechar': '\\', 'header': False, - 'line_terminator': '\n', + 'lineterminator': '\n', 'quotechar': '"', 'quoting': 3, 'sep': ',', diff --git a/tests/component/records/test_pandas_to_csv_options_csv.py b/tests/component/records/test_pandas_to_csv_options_csv.py index 9a0abf0f2..5218283f6 100644 --- a/tests/component/records/test_pandas_to_csv_options_csv.py +++ b/tests/component/records/test_pandas_to_csv_options_csv.py @@ -13,7 +13,7 @@ def test_pandas_to_csv_options_csv(self): 'doublequote': True, 'encoding': 'UTF8', 'header': True, - 'line_terminator': '\n', + 'lineterminator': '\n', 'quotechar': '"', 'quoting': 0, 'sep': ',' diff --git a/tests/component/records/test_pandas_to_csv_options_dateformats.py b/tests/component/records/test_pandas_to_csv_options_dateformats.py index 888a4cd1e..452076bdf 100644 --- a/tests/component/records/test_pandas_to_csv_options_dateformats.py +++ b/tests/component/records/test_pandas_to_csv_options_dateformats.py @@ -16,7 +16,7 @@ class TestPandasToCsvOptionsDateformats(unittest.TestCase): # 'encoding': 'UTF8', # 'escapechar': '\\', # 'header': False, - # 'line_terminator': '\n', + # 'lineterminator': '\n', # 'quotechar': '"', # 'quoting': 3, # 'sep': ',', @@ -44,7 +44,7 @@ class TestPandasToCsvOptionsDateformats(unittest.TestCase): # 'encoding': 'UTF8', # 'escapechar': '\\', # 'header': False, - # 'line_terminator': '\n', + # 'lineterminator': '\n', # 'quotechar': '"', # 'quoting': 3, # 'sep': ',', @@ -73,7 +73,7 @@ class TestPandasToCsvOptionsDateformats(unittest.TestCase): # 'encoding': 'UTF8', # 'escapechar': '\\', # 'header': False, - # 'line_terminator': '\n', + # 'lineterminator': '\n', # 'quotechar': '"', # 'quoting': 3, # 'sep': ',', @@ -101,7 +101,7 @@ class TestPandasToCsvOptionsDateformats(unittest.TestCase): # 'encoding': 'UTF8', # 'escapechar': '\\', # 'header': False, - # 'line_terminator': '\n', + # 'lineterminator': '\n', # 'quotechar': '"', # 'quoting': 3, # 'sep': ',', diff --git a/tests/component/records/test_pandas_to_csv_options_vertica.py b/tests/component/records/test_pandas_to_csv_options_vertica.py index b28e1ef2a..f45cb341d 100644 --- a/tests/component/records/test_pandas_to_csv_options_vertica.py +++ b/tests/component/records/test_pandas_to_csv_options_vertica.py @@ -12,7 +12,7 @@ def test_pandas_to_csv_options_vertica(self): 'doublequote': False, 'encoding': 'UTF8', 'header': False, - 'line_terminator': '\x02', + 'lineterminator': '\x02', 'quotechar': '"', 'quoting': 3, 'sep': '\x01', diff --git a/tests/unit/records/targets/test_fileobj.py b/tests/unit/records/targets/test_fileobj.py index 71db867a4..4961dd9d8 100644 --- a/tests/unit/records/targets/test_fileobj.py +++ b/tests/unit/records/targets/test_fileobj.py @@ -42,7 +42,7 @@ def test_move_from_dataframe_uncompressed_no_header_row(self, encoding='UTF8', escapechar='\\', header=False, - line_terminator='\n', + lineterminator='\n', quotechar='"', quoting=1, sep=',') @@ -54,7 +54,7 @@ def test_move_from_dataframe_uncompressed_no_header_row(self, encoding='UTF8', escapechar='\\', header=False, - line_terminator='\n', + lineterminator='\n', quotechar='"', quoting=1, sep=',') @@ -96,7 +96,7 @@ def test_move_from_dataframe_uncompressed_with_header_row(self, encoding='UTF8', escapechar='\\', header=True, - line_terminator='\n', + lineterminator='\n', quotechar='"', quoting=1, sep=',') @@ -108,7 +108,7 @@ def test_move_from_dataframe_uncompressed_with_header_row(self, encoding='UTF8', escapechar='\\', header=False, - line_terminator='\n', + lineterminator='\n', quotechar='"', quoting=1, sep=',') @@ -150,7 +150,7 @@ def test_move_from_dataframe_compressed_no_header_row(self, encoding='UTF8', escapechar='\\', header=False, - line_terminator='\n', + lineterminator='\n', quotechar='"', quoting=1, sep=',') @@ -163,7 +163,7 @@ def test_move_from_dataframe_compressed_no_header_row(self, encoding='UTF8', escapechar='\\', header=False, - line_terminator='\n', + lineterminator='\n', quotechar='"', quoting=1, sep=',') @@ -205,7 +205,7 @@ def test_move_from_dataframe_compressed_with_header_row(self, encoding='UTF8', escapechar='\\', header=True, - line_terminator='\n', + lineterminator='\n', quotechar='"', quoting=1, sep=',') @@ -218,7 +218,7 @@ def test_move_from_dataframe_compressed_with_header_row(self, encoding='UTF8', escapechar='\\', header=False, - line_terminator='\n', + lineterminator='\n', quotechar='"', quoting=1, sep=',') From df075fa3be7b4fd45c2183ca60aba712a4953c57 Mon Sep 17 00:00:00 2001 From: Tim Ryan Date: Thu, 11 May 2023 23:25:39 -0600 Subject: [PATCH 04/13] RM-86 update patch --- tests/unit/db/test_connect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/db/test_connect.py b/tests/unit/db/test_connect.py index aefd4473c..6c55b5b38 100644 --- a/tests/unit/db/test_connect.py +++ b/tests/unit/db/test_connect.py @@ -6,7 +6,7 @@ class TestConnect(unittest.TestCase): @patch('records_mover.db.connect.db_facts_from_lpass') @patch('records_mover.db.connect.sa.create_engine') - @patch('records_mover.db.connect.sa.engine.url.URL') + @patch('records_mover.db.connect.sa.engine.url.URL.create') def test_engine_from_lpass_entry(self, mock_url, mock_create_engine, From cdd562ca18afa2ac4985108d8bedcdf08e55b606 Mon Sep 17 00:00:00 2001 From: Tim Ryan Date: Thu, 11 May 2023 23:32:58 -0600 Subject: [PATCH 05/13] RM-86 update warn_bad_lines syntax --- records_mover/records/pandas/read_csv_options.py | 10 ++++------ .../component/records/test_pandas_read_csv_options.py | 9 +++------ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/records_mover/records/pandas/read_csv_options.py b/records_mover/records/pandas/read_csv_options.py index 5e4f962bb..38d9e8f15 100644 --- a/records_mover/records/pandas/read_csv_options.py +++ b/records_mover/records/pandas/read_csv_options.py @@ -630,7 +630,7 @@ def day_first(dateish_format: str) -> bool: # (deprecated, so not supplying) # - # on_bad_lines : bool, default True + # on_bad_lines : string default 'error' # # Lines with too many fields (e.g. a csv line with too many # commas) will by default cause an exception to be raised, and no @@ -638,17 +638,15 @@ def day_first(dateish_format: str) -> bool: # will dropped from the DataFrame that is returned. # - pandas_options['on_bad_lines'] = processing_instructions.fail_if_row_invalid + pandas_options['on_bad_lines'] = 'error' if processing_instructions.fail_if_row_invalid else 'warn' # - # warn_bad_lines : bool, default True + # # - # If on_bad_lines is False, and warn_bad_lines is True, a + # If processing_instructions.fail_if_row_invalid is False, a # warning for each “bad line” will be output. # - pandas_options['warn_bad_lines'] = True - # # delim_whitespace : bool, default False # diff --git a/tests/component/records/test_pandas_read_csv_options.py b/tests/component/records/test_pandas_read_csv_options.py index b3d2c459c..62e6e52ee 100644 --- a/tests/component/records/test_pandas_read_csv_options.py +++ b/tests/component/records/test_pandas_read_csv_options.py @@ -38,13 +38,12 @@ def test_pandas_read_csv_options_bluelabs(self): 'doublequote': False, 'encoding': 'UTF8', 'engine': 'python', - 'on_bad_lines': True, + 'on_bad_lines': 'error', 'escapechar': '\\', 'header': None, 'prefix': 'untitled_', 'quotechar': '"', 'quoting': 3, - 'warn_bad_lines': True, 'parse_dates': [0, 1, 2, 3], } processing_instructions = ProcessingInstructions() @@ -116,11 +115,10 @@ def test_pandas_read_csv_options_csv(self): 'doublequote': True, 'encoding': 'UTF8', 'engine': 'python', - 'on_bad_lines': True, + 'on_bad_lines': 'error', 'header': 0, 'quotechar': '"', 'quoting': 0, - 'warn_bad_lines': True, 'parse_dates': [0, 1, 2, 3], } processing_instructions = ProcessingInstructions() @@ -141,13 +139,12 @@ def test_pandas_read_csv_options_vertica(self): 'delimiter': '\x01', 'doublequote': False, 'engine': 'c', - 'on_bad_lines': True, + 'on_bad_lines': 'error', 'header': None, 'lineterminator': '\x02', 'prefix': 'untitled_', 'quotechar': '"', 'quoting': 3, - 'warn_bad_lines': True, 'parse_dates': [0, 1, 2, 3], } processing_instructions = ProcessingInstructions() From 5cfde29ea04f9c5cc73fced55f0b5e40154781e2 Mon Sep 17 00:00:00 2001 From: Tim Ryan Date: Thu, 11 May 2023 23:44:55 -0600 Subject: [PATCH 06/13] RM-86 fix pandas iloc call --- records_mover/records/pandas/prep_for_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/records_mover/records/pandas/prep_for_csv.py b/records_mover/records/pandas/prep_for_csv.py index 4839f164f..8a4d6bfaf 100644 --- a/records_mover/records/pandas/prep_for_csv.py +++ b/records_mover/records/pandas/prep_for_csv.py @@ -110,5 +110,5 @@ def prep_df_for_csv_output(df: DataFrame, records_format, processing_instructions) if formatted_series is not None: - formatted_df.iloc[:, index] = formatted_series + formatted_df[formatted_df.columns[index]] = formatted_series return formatted_df From 5ace0a42ca5b469ed801d865eb8c61361d80af24 Mon Sep 17 00:00:00 2001 From: Tim Ryan Date: Thu, 11 May 2023 23:40:10 -0600 Subject: [PATCH 07/13] RM-86 silence SA 2.0 warning RM-86 silence SA 2.0 warning (separate ticket) --- .circleci/config.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 91737ca10..9f141def8 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -35,6 +35,7 @@ commands: name: Install python deps in venv environment: PYENV_VERSION: <> + SQLALCHEMY_SILENCE_UBER_WARNING: 1 command: | if [ -f venv/bin/activate ] then @@ -113,6 +114,8 @@ jobs: description: "Enforce coverage not slipping" docker: - image: cimg/python:<> + environment: + SQLALCHEMY_SILENCE_UBER_WARNING: 1 steps: - checkout - add_ssh_keys: @@ -185,6 +188,8 @@ jobs: command: type: string description: "Command to run in records-mover venv" + environment: + SQLALCHEMY_SILENCE_UBER_WARNING: 1 docker: - image: cimg/python:<> - image: jbfavre/vertica:8.1.1-16_centos-7 @@ -294,7 +299,8 @@ jobs: type: boolean description: "If true, pass in the env variable specifying an GCS scratch bucket to Records Mover (for BigQuery)" default: true - + environment: + SQLALCHEMY_SILENCE_UBER_WARNING: 1 docker: - image: cimg/python:<> steps: @@ -351,6 +357,8 @@ jobs: default: '3.9' docker: - image: cimg/python:<> + environment: + SQLALCHEMY_SILENCE_UBER_WARNING: 1 steps: - checkout - installvenv: @@ -400,6 +408,8 @@ jobs: cli-extra-test: docker: - image: cimg/python:3.9 + environment: + SQLALCHEMY_SILENCE_UBER_WARNING: 1 steps: - checkout - installvenv: From 0d34685a6b100ce1923342496b73de7ab1bf6745 Mon Sep 17 00:00:00 2001 From: Tim Ryan Date: Mon, 15 May 2023 17:30:16 -0600 Subject: [PATCH 08/13] RM-86 ignore type error on URL.create (says method doesn't exist but it does) RM-86 --- records_mover/db/connect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/records_mover/db/connect.py b/records_mover/db/connect.py index 81b7729da..a2b85706e 100644 --- a/records_mover/db/connect.py +++ b/records_mover/db/connect.py @@ -113,7 +113,7 @@ def create_sqlalchemy_url(db_facts: DBFacts, return create_bigquery_sqlalchemy_url(db_facts) else: - return sa.engine.url.URL.create( + return sa.engine.url.URL.create( # type: ignore drivername=driver, username=username, password=db_facts['password'], From 0e4109c743bdaa34c7d1248e008b75a0795669db Mon Sep 17 00:00:00 2001 From: Tim Ryan Date: Mon, 15 May 2023 17:04:34 -0600 Subject: [PATCH 09/13] RM-86 remove use of prefix arg Revert "RM-86 remove use of prefix arg" This reverts commit 06927539e49960068f573fc4bd0cb7bbfab27b19. RM-86 remove instances of untitled_ prefix RM-86 try int index RM-86 revert column names RM-86 try unnamed rather than untitled RM-86 check for column datatype RM-86 print columns --- records_mover/pandas/__init__.py | 7 +++--- .../records/delimited/csv_streamer.py | 12 ---------- .../records/pandas/read_csv_options.py | 23 ------------------- .../records/pandas/test_read_csv_options.py | 8 +++---- .../records/test_pandas_read_csv_options.py | 3 --- 5 files changed, 8 insertions(+), 45 deletions(-) diff --git a/records_mover/pandas/__init__.py b/records_mover/pandas/__init__.py index 51dacab2e..5b7e0a14f 100644 --- a/records_mover/pandas/__init__.py +++ b/records_mover/pandas/__init__.py @@ -29,7 +29,8 @@ def purge_unnamed_unused_columns(df: DataFrame) -> DataFrame: # "unnamed: 1", or maybe "Unnamed: 1" (not sure why/when # that differs). Let's clean those up. for column in df: - if column.startswith('Unnamed: ') or column.startswith('unnamed: '): - if not df[column].notnull().any(): - df = df.drop(column, axis=1) + if type(column) == str: + if column.startswith('Unnamed: ') or column.startswith('unnamed: '): + if not df[column].notnull().any(): + df = df.drop(column, axis=1) return df diff --git a/records_mover/records/delimited/csv_streamer.py b/records_mover/records/delimited/csv_streamer.py index f36a1ff1f..3da57a029 100644 --- a/records_mover/records/delimited/csv_streamer.py +++ b/records_mover/records/delimited/csv_streamer.py @@ -44,18 +44,6 @@ def stream_csv(filepath_or_buffer: Union[str, IO[bytes]], 'iterator': True, 'engine': 'python' } - if header is None: - # Pandas only accepts the prefix argument (which makes for - # tidier column names when otherwise not provided) when the - # header is explicitly marked as missing, not when it's - # available or even when we ask Pandas to infer it. Bummer, - # as this means that when Pandas infers that there's no - # header, the column names will end up different than folks - # explicitly tell records mover that there is no header. - # - # https://github.com/pandas-dev/pandas/issues/27394 - # https://github.com/pandas-dev/pandas/pull/31383 - kwargs['prefix'] = 'untitled_' if 'quoting' in hints: quoting = hints['quoting'] kwargs['quoting'] = pandas_quoting_from_hint[quoting] diff --git a/records_mover/records/pandas/read_csv_options.py b/records_mover/records/pandas/read_csv_options.py index 38d9e8f15..2ee8544ee 100644 --- a/records_mover/records/pandas/read_csv_options.py +++ b/records_mover/records/pandas/read_csv_options.py @@ -146,29 +146,6 @@ def pandas_read_csv_options(records_format: DelimitedRecordsFormat, # (better to keep a standard format, no matter how many columsn) # - # - # prefix : str, optional - # - # Prefix to add to column numbers when no header, e.g. ‘X’ for X0, X1, - # - - # - # Not sure this actually does anything - when loading a CSV format - # file with an empty final column name - e.g., - # tests/integration/resources/delimited-csv-with-header.csv - the - # column still comes out as 'unnamed: 11'ead as 'untitled_11'. - # - # Leaving this in case a future version of Pandas behaves - # better. - # - if pandas_options['header'] is None: - # Pandas only accepts the prefix argument when the - # header is marked as missing. - # - # https://github.com/pandas-dev/pandas/issues/27394 - # https://github.com/pandas-dev/pandas/pull/31383 - pandas_options['prefix'] = 'untitled_' - # # mangle_dupe_cols : bool, default True # diff --git a/tests/component/records/pandas/test_read_csv_options.py b/tests/component/records/pandas/test_read_csv_options.py index dd39d1bd3..c69de8d7c 100644 --- a/tests/component/records/pandas/test_read_csv_options.py +++ b/tests/component/records/pandas/test_read_csv_options.py @@ -90,7 +90,7 @@ class DateFormatExpectations(TypedDict): fileobj = io.StringIO(create_sample(dateformat)) df = pandas.read_csv(filepath_or_buffer=fileobj, **options) - timestamp = df['untitled_0'][0] + timestamp = df[0][0] self.assertEqual(timestamp.year, SAMPLE_YEAR) self.assertEqual(timestamp.month, SAMPLE_MONTH) self.assertEqual(timestamp.day, SAMPLE_DAY) @@ -145,7 +145,7 @@ class DateTimeFormatTzExpectations(TypedDict): fileobj = io.StringIO(datetimetz) df = pandas.read_csv(filepath_or_buffer=fileobj, **options) - timestamp = df['untitled_0'][0] + timestamp = df[0][0] self.assertIsInstance(timestamp, pandas.Timestamp, f"Pandas did not parse {datetimetz} as a timestamp object") self.assertEqual(timestamp.year, SAMPLE_YEAR) @@ -208,7 +208,7 @@ class DateTimeFormatExpectations(TypedDict): fileobj = io.StringIO(datetimetz) df = pandas.read_csv(filepath_or_buffer=fileobj, **options) - timestamp = df['untitled_0'][0] + timestamp = df[0][0] self.assertIsInstance(timestamp, pandas.Timestamp, f"Pandas did not parse {datetimetz} as a timestamp object") self.assertEqual(timestamp.year, SAMPLE_YEAR) @@ -249,7 +249,7 @@ def test_timeonlyformat(self) -> None: fileobj = io.StringIO(timeonly) df = pandas.read_csv(filepath_or_buffer=fileobj, **options) - timestamp = df['untitled_0'][0] + timestamp = df[0][0] self.assertIsInstance(timestamp, pandas.Timestamp, f"Pandas did not parse {timeonly} as a timestamp object") self.assertEqual(timestamp.hour, SAMPLE_HOUR) diff --git a/tests/component/records/test_pandas_read_csv_options.py b/tests/component/records/test_pandas_read_csv_options.py index 62e6e52ee..47c5e629d 100644 --- a/tests/component/records/test_pandas_read_csv_options.py +++ b/tests/component/records/test_pandas_read_csv_options.py @@ -41,7 +41,6 @@ def test_pandas_read_csv_options_bluelabs(self): 'on_bad_lines': 'error', 'escapechar': '\\', 'header': None, - 'prefix': 'untitled_', 'quotechar': '"', 'quoting': 3, 'parse_dates': [0, 1, 2, 3], @@ -69,7 +68,6 @@ def test_pandas_read_csv_options_bluelabs(self): # 'on_bad_lines': True, # 'escapechar': '\\', # 'header': None, - # 'prefix': 'untitled_', # 'quotechar': '"', # 'quoting': 3, # 'warn_bad_lines': True, @@ -142,7 +140,6 @@ def test_pandas_read_csv_options_vertica(self): 'on_bad_lines': 'error', 'header': None, 'lineterminator': '\x02', - 'prefix': 'untitled_', 'quotechar': '"', 'quoting': 3, 'parse_dates': [0, 1, 2, 3], From 3830cf1515844dac826b4160078db084014d1f3e Mon Sep 17 00:00:00 2001 From: Tim Ryan Date: Mon, 15 May 2023 18:26:47 -0600 Subject: [PATCH 10/13] RM-86 allow multiple versions of lineterminator --- .../records/pandas/read_csv_options.py | 7 +- .../records/pandas/to_csv_options.py | 7 +- .../records/test_pandas_read_csv_options.py | 43 ++- .../test_pandas_to_csv_options_bluelabs.py | 40 ++- .../records/test_pandas_to_csv_options_csv.py | 37 +- .../test_pandas_to_csv_options_vertica.py | 34 +- tests/unit/records/targets/test_fileobj.py | 318 ++++++++++++------ 7 files changed, 338 insertions(+), 148 deletions(-) diff --git a/records_mover/records/pandas/read_csv_options.py b/records_mover/records/pandas/read_csv_options.py index 2ee8544ee..66d9ca5f2 100644 --- a/records_mover/records/pandas/read_csv_options.py +++ b/records_mover/records/pandas/read_csv_options.py @@ -6,6 +6,8 @@ from records_mover.records.schema import RecordsSchema import logging from typing import Set, Dict, Any +from packaging import version +import pandas as pd logger = logging.getLogger(__name__) @@ -499,7 +501,10 @@ def day_first(dateish_format: str) -> bool: # Character to break file into lines. Only valid with C parser. # if non_standard_record_terminator: - pandas_options['lineterminator'] = hints.record_terminator + if version.parse(pd.__version__) >= version.parse('1.5.0'): + pandas_options['lineterminator'] = hints.record_terminator + else: + pandas_options['line_terminator'] = hints.record_terminator quiet_remove(unhandled_hints, 'record-terminator') # diff --git a/records_mover/records/pandas/to_csv_options.py b/records_mover/records/pandas/to_csv_options.py index f43063a79..bee11a522 100644 --- a/records_mover/records/pandas/to_csv_options.py +++ b/records_mover/records/pandas/to_csv_options.py @@ -6,6 +6,8 @@ from records_mover.mover_types import _assert_never import logging from typing import Set, Dict +from packaging import version +import pandas as pd logger = logging.getLogger(__name__) @@ -111,7 +113,10 @@ def pandas_to_csv_options(records_format: DelimitedRecordsFormat, pandas_options['sep'] = hints.field_delimiter quiet_remove(unhandled_hints, 'field-delimiter') - pandas_options['lineterminator'] = hints.record_terminator + if version.parse(pd.__version__) >= version.parse('1.5.0'): + pandas_options['lineterminator'] = hints.record_terminator + else: + pandas_options['line_terminator'] = hints.record_terminator quiet_remove(unhandled_hints, 'record-terminator') return pandas_options diff --git a/tests/component/records/test_pandas_read_csv_options.py b/tests/component/records/test_pandas_read_csv_options.py index 47c5e629d..1f4020cd9 100644 --- a/tests/component/records/test_pandas_read_csv_options.py +++ b/tests/component/records/test_pandas_read_csv_options.py @@ -4,6 +4,8 @@ from records_mover.records.processing_instructions import ProcessingInstructions from records_mover.records.records_format import DelimitedRecordsFormat from records_mover.records.schema import RecordsSchema +from packaging import version +import pandas as pd class TestPandasReadCsvOptions(unittest.TestCase): @@ -131,19 +133,34 @@ def test_pandas_read_csv_options_csv(self): def test_pandas_read_csv_options_vertica(self): self.maxDiff = None - expected = { - 'dayfirst': False, - 'compression': None, - 'delimiter': '\x01', - 'doublequote': False, - 'engine': 'c', - 'on_bad_lines': 'error', - 'header': None, - 'lineterminator': '\x02', - 'quotechar': '"', - 'quoting': 3, - 'parse_dates': [0, 1, 2, 3], - } + if version.parse(pd.__version__) >= version.parse('1.5.0'): + expected = { + 'dayfirst': False, + 'compression': None, + 'delimiter': '\x01', + 'doublequote': False, + 'engine': 'c', + 'on_bad_lines': 'error', + 'header': None, + 'lineterminator': '\x02', + 'quotechar': '"', + 'quoting': 3, + 'parse_dates': [0, 1, 2, 3], + } + else: + expected = { + 'dayfirst': False, + 'compression': None, + 'delimiter': '\x01', + 'doublequote': False, + 'engine': 'c', + 'on_bad_lines': 'error', + 'header': None, + 'line_terminator': '\x02', + 'quotechar': '"', + 'quoting': 3, + 'parse_dates': [0, 1, 2, 3], + } processing_instructions = ProcessingInstructions() records_format = DelimitedRecordsFormat(hints=vertica_format_hints) unhandled_hints = set(records_format.hints) diff --git a/tests/component/records/test_pandas_to_csv_options_bluelabs.py b/tests/component/records/test_pandas_to_csv_options_bluelabs.py index cfbd9fb79..be450900d 100644 --- a/tests/component/records/test_pandas_to_csv_options_bluelabs.py +++ b/tests/component/records/test_pandas_to_csv_options_bluelabs.py @@ -3,22 +3,38 @@ from records_mover.records.pandas import pandas_to_csv_options from records_mover.records.processing_instructions import ProcessingInstructions from records_mover.records.records_format import DelimitedRecordsFormat +from packaging import version +import pandas as pd class TestPandasToCsvOptionsBlueLabs(unittest.TestCase): def test_pandas_to_csv_options_bluelabs(self): - expected = { - 'compression': 'gzip', - 'date_format': '%Y-%m-%d %H:%M:%S.%f%z', - 'doublequote': False, - 'encoding': 'UTF8', - 'escapechar': '\\', - 'header': False, - 'lineterminator': '\n', - 'quotechar': '"', - 'quoting': 3, - 'sep': ',', - } + if version.parse(pd.__version__) >= version.parse('1.5.0'): + expected = { + 'compression': 'gzip', + 'date_format': '%Y-%m-%d %H:%M:%S.%f%z', + 'doublequote': False, + 'encoding': 'UTF8', + 'escapechar': '\\', + 'header': False, + 'lineterminator': '\n', + 'quotechar': '"', + 'quoting': 3, + 'sep': ',', + } + else: + expected = { + 'compression': 'gzip', + 'date_format': '%Y-%m-%d %H:%M:%S.%f%z', + 'doublequote': False, + 'encoding': 'UTF8', + 'escapechar': '\\', + 'header': False, + 'line_terminator': '\n', + 'quotechar': '"', + 'quoting': 3, + 'sep': ',', + } processing_instructions = ProcessingInstructions() records_format = DelimitedRecordsFormat(hints=bluelabs_format_hints) unhandled_hints = set(records_format.hints) diff --git a/tests/component/records/test_pandas_to_csv_options_csv.py b/tests/component/records/test_pandas_to_csv_options_csv.py index 5218283f6..9dfc4a3fb 100644 --- a/tests/component/records/test_pandas_to_csv_options_csv.py +++ b/tests/component/records/test_pandas_to_csv_options_csv.py @@ -3,21 +3,36 @@ from records_mover.records.pandas import pandas_to_csv_options from records_mover.records.processing_instructions import ProcessingInstructions from records_mover.records.records_format import DelimitedRecordsFormat +from packaging import version +import pandas as pd class TestPandasToCsvOptionsCsv(unittest.TestCase): def test_pandas_to_csv_options_csv(self): - expected = { - 'compression': 'gzip', - 'date_format': '%m/%d/%y %H:%M', - 'doublequote': True, - 'encoding': 'UTF8', - 'header': True, - 'lineterminator': '\n', - 'quotechar': '"', - 'quoting': 0, - 'sep': ',' - } + if version.parse(pd.__version__) >= version.parse('1.5.0'): + expected = { + 'compression': 'gzip', + 'date_format': '%m/%d/%y %H:%M', + 'doublequote': True, + 'encoding': 'UTF8', + 'header': True, + 'lineterminator': '\n', + 'quotechar': '"', + 'quoting': 0, + 'sep': ',' + } + else: + expected = { + 'compression': 'gzip', + 'date_format': '%m/%d/%y %H:%M', + 'doublequote': True, + 'encoding': 'UTF8', + 'header': True, + 'line_terminator': '\n', + 'quotechar': '"', + 'quoting': 0, + 'sep': ',' + } processing_instructions =\ ProcessingInstructions(fail_if_cant_handle_hint=True) records_format = DelimitedRecordsFormat(hints=csv_format_hints) diff --git a/tests/component/records/test_pandas_to_csv_options_vertica.py b/tests/component/records/test_pandas_to_csv_options_vertica.py index f45cb341d..d18296b05 100644 --- a/tests/component/records/test_pandas_to_csv_options_vertica.py +++ b/tests/component/records/test_pandas_to_csv_options_vertica.py @@ -3,20 +3,34 @@ from records_mover.records.pandas import pandas_to_csv_options from records_mover.records.processing_instructions import ProcessingInstructions from records_mover.records.records_format import DelimitedRecordsFormat +from packaging import version +import pandas as pd class TestPandasToCsvOptionsVertica(unittest.TestCase): def test_pandas_to_csv_options_vertica(self): - expected = { - 'date_format': '%Y-%m-%d %H:%M:%S.%f%z', - 'doublequote': False, - 'encoding': 'UTF8', - 'header': False, - 'lineterminator': '\x02', - 'quotechar': '"', - 'quoting': 3, - 'sep': '\x01', - } + if version.parse(pd.__version__) >= version.parse('1.5.0'): + expected = { + 'date_format': '%Y-%m-%d %H:%M:%S.%f%z', + 'doublequote': False, + 'encoding': 'UTF8', + 'header': False, + 'lineterminator': '\x02', + 'quotechar': '"', + 'quoting': 3, + 'sep': '\x01', + } + else: + expected = { + 'date_format': '%Y-%m-%d %H:%M:%S.%f%z', + 'doublequote': False, + 'encoding': 'UTF8', + 'header': False, + 'line_terminator': '\x02', + 'quotechar': '"', + 'quoting': 3, + 'sep': '\x01', + } processing_instructions = ProcessingInstructions() records_format = DelimitedRecordsFormat(hints=vertica_format_hints) unhandled_hints = set(records_format.hints) diff --git a/tests/unit/records/targets/test_fileobj.py b/tests/unit/records/targets/test_fileobj.py index 4961dd9d8..57b6ad395 100644 --- a/tests/unit/records/targets/test_fileobj.py +++ b/tests/unit/records/targets/test_fileobj.py @@ -3,6 +3,8 @@ from records_mover.records.results import MoveResult from records_mover.records.records_format import DelimitedRecordsFormat from mock import patch, Mock, ANY +from packaging import version +import pandas as pd class TestFileobjTarget(unittest.TestCase): @@ -34,30 +36,58 @@ def test_move_from_dataframe_uncompressed_no_header_row(self, out = fileobj_target.move_from_dataframes_source(mock_dfs_source, mock_processing_instructions) mock_text_fileobj = mock_io.TextIOWrapper.return_value - mock_df_1.to_csv.assert_called_with(index=mock_dfs_source.include_index, - path_or_buf=mock_text_fileobj, - mode="a", - date_format='%Y-%m-%d %H:%M:%S.%f%z', - doublequote=False, - encoding='UTF8', - escapechar='\\', - header=False, - lineterminator='\n', - quotechar='"', - quoting=1, - sep=',') - mock_df_2.to_csv.assert_called_with(index=mock_dfs_source.include_index, - path_or_buf=mock_text_fileobj, - mode="a", - date_format='%Y-%m-%d %H:%M:%S.%f%z', - doublequote=False, - encoding='UTF8', - escapechar='\\', - header=False, - lineterminator='\n', - quotechar='"', - quoting=1, - sep=',') + if version.parse(pd.__version__) >= version.parse('1.5.0'): + mock_df_1.to_csv.assert_called_with(index=mock_dfs_source.include_index, + path_or_buf=mock_text_fileobj, + mode="a", + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=False, + lineterminator='\n', + quotechar='"', + quoting=1, + sep=',') + else: + mock_df_1.to_csv.assert_called_with(index=mock_dfs_source.include_index, + path_or_buf=mock_text_fileobj, + mode="a", + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=False, + line_terminator='\n', + quotechar='"', + quoting=1, + sep=',') + if version.parse(pd.__version__) >= version.parse('1.5.0'): + mock_df_2.to_csv.assert_called_with(index=mock_dfs_source.include_index, + path_or_buf=mock_text_fileobj, + mode="a", + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=False, + lineterminator='\n', + quotechar='"', + quoting=1, + sep=',') + else: + mock_df_2.to_csv.assert_called_with(index=mock_dfs_source.include_index, + path_or_buf=mock_text_fileobj, + mode="a", + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=False, + line_terminator='\n', + quotechar='"', + quoting=1, + sep=',') self.assertEqual(out, MoveResult(move_count=2, output_urls=None)) @patch('records_mover.records.pandas.prep_df_for_csv_output') @@ -88,30 +118,58 @@ def test_move_from_dataframe_uncompressed_with_header_row(self, out = fileobj_target.move_from_dataframes_source(mock_dfs_source, mock_processing_instructions) mock_text_fileobj = mock_io.TextIOWrapper.return_value - mock_df_1.to_csv.assert_called_with(index=mock_dfs_source.include_index, - path_or_buf=mock_text_fileobj, - mode="a", - date_format='%Y-%m-%d %H:%M:%S.%f%z', - doublequote=False, - encoding='UTF8', - escapechar='\\', - header=True, - lineterminator='\n', - quotechar='"', - quoting=1, - sep=',') - mock_df_2.to_csv.assert_called_with(index=mock_dfs_source.include_index, - path_or_buf=mock_text_fileobj, - mode="a", - date_format='%Y-%m-%d %H:%M:%S.%f%z', - doublequote=False, - encoding='UTF8', - escapechar='\\', - header=False, - lineterminator='\n', - quotechar='"', - quoting=1, - sep=',') + if version.parse(pd.__version__) >= version.parse('1.5.0'): + mock_df_1.to_csv.assert_called_with(index=mock_dfs_source.include_index, + path_or_buf=mock_text_fileobj, + mode="a", + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=True, + lineterminator='\n', + quotechar='"', + quoting=1, + sep=',') + else: + mock_df_1.to_csv.assert_called_with(index=mock_dfs_source.include_index, + path_or_buf=mock_text_fileobj, + mode="a", + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=True, + line_terminator='\n', + quotechar='"', + quoting=1, + sep=',') + if version.parse(pd.__version__) >= version.parse('1.5.0'): + mock_df_2.to_csv.assert_called_with(index=mock_dfs_source.include_index, + path_or_buf=mock_text_fileobj, + mode="a", + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=False, + lineterminator='\n', + quotechar='"', + quoting=1, + sep=',') + else: + mock_df_2.to_csv.assert_called_with(index=mock_dfs_source.include_index, + path_or_buf=mock_text_fileobj, + mode="a", + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=False, + line_terminator='\n', + quotechar='"', + quoting=1, + sep=',') self.assertEqual(out, MoveResult(move_count=2, output_urls=None)) @patch('records_mover.records.pandas.prep_df_for_csv_output') @@ -141,32 +199,62 @@ def test_move_from_dataframe_compressed_no_header_row(self, mock_prep_df_for_csv_output.side_effect = [mock_df_1, mock_df_2] out = fileobj_target.move_from_dataframes_source(mock_dfs_source, mock_processing_instructions) - mock_df_1.to_csv.assert_called_with(path_or_buf=ANY, - index=mock_dfs_source.include_index, - mode="a", - compression='gzip', - date_format='%Y-%m-%d %H:%M:%S.%f%z', - doublequote=False, - encoding='UTF8', - escapechar='\\', - header=False, - lineterminator='\n', - quotechar='"', - quoting=1, - sep=',') - mock_df_2.to_csv.assert_called_with(path_or_buf=ANY, - index=mock_dfs_source.include_index, - mode="a", - compression='gzip', - date_format='%Y-%m-%d %H:%M:%S.%f%z', - doublequote=False, - encoding='UTF8', - escapechar='\\', - header=False, - lineterminator='\n', - quotechar='"', - quoting=1, - sep=',') + if version.parse(pd.__version__) >= version.parse('1.5.0'): + mock_df_1.to_csv.assert_called_with(path_or_buf=ANY, + index=mock_dfs_source.include_index, + mode="a", + compression='gzip', + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=False, + lineterminator='\n', + quotechar='"', + quoting=1, + sep=',') + else: + mock_df_1.to_csv.assert_called_with(path_or_buf=ANY, + index=mock_dfs_source.include_index, + mode="a", + compression='gzip', + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=False, + line_terminator='\n', + quotechar='"', + quoting=1, + sep=',') + if version.parse(pd.__version__) >= version.parse('1.5.0'): + mock_df_2.to_csv.assert_called_with(path_or_buf=ANY, + index=mock_dfs_source.include_index, + mode="a", + compression='gzip', + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=False, + lineterminator='\n', + quotechar='"', + quoting=1, + sep=',') + else: + mock_df_2.to_csv.assert_called_with(path_or_buf=ANY, + index=mock_dfs_source.include_index, + mode="a", + compression='gzip', + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=False, + line_terminator='\n', + quotechar='"', + quoting=1, + sep=',') self.assertEqual(out, MoveResult(move_count=2, output_urls=None)) @patch('records_mover.records.pandas.prep_df_for_csv_output') @@ -196,30 +284,60 @@ def test_move_from_dataframe_compressed_with_header_row(self, mock_prep_df_for_csv_output.side_effect = [mock_df_1, mock_df_2] out = fileobj_target.move_from_dataframes_source(mock_dfs_source, mock_processing_instructions) - mock_df_1.to_csv.assert_called_with(path_or_buf=ANY, - index=mock_dfs_source.include_index, - mode="a", - compression='gzip', - date_format='%Y-%m-%d %H:%M:%S.%f%z', - doublequote=False, - encoding='UTF8', - escapechar='\\', - header=True, - lineterminator='\n', - quotechar='"', - quoting=1, - sep=',') - mock_df_2.to_csv.assert_called_with(path_or_buf=ANY, - index=mock_dfs_source.include_index, - mode="a", - compression='gzip', - date_format='%Y-%m-%d %H:%M:%S.%f%z', - doublequote=False, - encoding='UTF8', - escapechar='\\', - header=False, - lineterminator='\n', - quotechar='"', - quoting=1, - sep=',') + if version.parse(pd.__version__) >= version.parse('1.5.0'): + mock_df_1.to_csv.assert_called_with(path_or_buf=ANY, + index=mock_dfs_source.include_index, + mode="a", + compression='gzip', + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=True, + lineterminator='\n', + quotechar='"', + quoting=1, + sep=',') + else: + mock_df_1.to_csv.assert_called_with(path_or_buf=ANY, + index=mock_dfs_source.include_index, + mode="a", + compression='gzip', + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=True, + line_terminator='\n', + quotechar='"', + quoting=1, + sep=',') + if version.parse(pd.__version__) >= version.parse('1.5.0'): + mock_df_2.to_csv.assert_called_with(path_or_buf=ANY, + index=mock_dfs_source.include_index, + mode="a", + compression='gzip', + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=False, + lineterminator='\n', + quotechar='"', + quoting=1, + sep=',') + else: + mock_df_2.to_csv.assert_called_with(path_or_buf=ANY, + index=mock_dfs_source.include_index, + mode="a", + compression='gzip', + date_format='%Y-%m-%d %H:%M:%S.%f%z', + doublequote=False, + encoding='UTF8', + escapechar='\\', + header=False, + line_terminator='\n', + quotechar='"', + quoting=1, + sep=',') self.assertEqual(out, MoveResult(move_count=2, output_urls=None)) From e145eec3c9ba4b3cb33a9f89c26c27204d0b2d5d Mon Sep 17 00:00:00 2001 From: Tim Ryan Date: Mon, 15 May 2023 18:32:50 -0600 Subject: [PATCH 11/13] RM-86 add way out for old pandas tests --- .circleci/config.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 9f141def8..e8d896414 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -299,6 +299,10 @@ jobs: type: boolean description: "If true, pass in the env variable specifying an GCS scratch bucket to Records Mover (for BigQuery)" default: true + pytest_flags: + type: string + description: "Any flags you'd like to add to the pytest call" + default: "" environment: SQLALCHEMY_SILENCE_UBER_WARNING: 1 docker: @@ -333,7 +337,7 @@ jobs: # This is set by default in the CircleCI environment unset SCRATCH_GCS_URL fi - with-db <> pytest + with-db <> pytest <> - store_test_results: path: test-reports - store_artifacts: @@ -632,6 +636,7 @@ workflows: numpy_version: "<1.24" context: slack-secrets db_name: demo-itest + pytest_flags: "-W default::FutureWarning" filters: tags: only: /v\d+\.\d+\.\d+(-[\w]+)?/ From 749b1bb924d365f9137b86c3909d82b01031f3fa Mon Sep 17 00:00:00 2001 From: Tim Ryan Date: Mon, 15 May 2023 18:13:09 -0600 Subject: [PATCH 12/13] RM-86 reduce mypy watermark RM-86 lower mypy high water mark --- metrics/mypy_high_water_mark | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metrics/mypy_high_water_mark b/metrics/mypy_high_water_mark index d182afd09..c4c82ddc2 100644 --- a/metrics/mypy_high_water_mark +++ b/metrics/mypy_high_water_mark @@ -1 +1 @@ -89.7400 +89.7000 From 5a69b17a3ce7f524a24e41fe765a901e2cb22c89 Mon Sep 17 00:00:00 2001 From: Tim Ryan Date: Mon, 15 May 2023 19:02:21 -0600 Subject: [PATCH 13/13] RM-86 update GH workflow --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 00556f271..728676678 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,8 @@ jobs: fail-fast: true matrix: python-version: ["3.8", "3.9"] - + env: + SQLALCHEMY_SILENCE_UBER_WARNING: 1 steps: - uses: actions/checkout@v3 - name: Install python ${{ matrix.python-version }}