From 42b21b141d0e4633de8107c294b977872b9d27c9 Mon Sep 17 00:00:00 2001 From: lrcouto Date: Thu, 29 Feb 2024 13:36:24 -0300 Subject: [PATCH 1/4] Update spaceflights tutorial and starter requirements Signed-off-by: lrcouto --- docs/source/kedro_project_setup/dependencies.md | 2 +- docs/source/tutorial/tutorial_template.md | 2 +- features/environment.py | 2 +- .../test_starter/{{ cookiecutter.repo_name }}/requirements.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/source/kedro_project_setup/dependencies.md b/docs/source/kedro_project_setup/dependencies.md index 4af705b961..95fc3f29ce 100644 --- a/docs/source/kedro_project_setup/dependencies.md +++ b/docs/source/kedro_project_setup/dependencies.md @@ -62,4 +62,4 @@ To limit installation to dependencies specific to a data type: pip install "kedro-datasets[.]" ``` -For example, your workflow might require use of the `pandas.ExcelDataset`, so to install its dependencies, run `pip install "kedro-datasets[pandas.ExcelDataset]"`. +For example, your workflow might require use of the `pandas.ExcelDataset`, so to install its dependencies, run `pip install "kedro-datasets[pandas-exceldataset]"`. diff --git a/docs/source/tutorial/tutorial_template.md b/docs/source/tutorial/tutorial_template.md index 2b2c45cc82..9460eec53f 100644 --- a/docs/source/tutorial/tutorial_template.md +++ b/docs/source/tutorial/tutorial_template.md @@ -48,7 +48,7 @@ pytest~=7.2 # Kedro dependencies and datasets to work with different data formats (including CSV, Excel, and Parquet) kedro~=0.19.0 -kedro-datasets[pandas.CSVDataset, pandas.ExcelDataset, pandas.ParquetDataset]>=1.1 +kedro-datasets[pandas-csvdataset, pandas-exceldataset, pandas-parquetdataset]>=1.1 kedro-telemetry>=0.3.1 kedro-viz~=6.0 # Visualise pipelines diff --git a/features/environment.py b/features/environment.py index 26a6090a6e..f5b76df4ec 100644 --- a/features/environment.py +++ b/features/environment.py @@ -130,6 +130,6 @@ def _install_project_requirements(context): .splitlines() ) install_reqs = [req for req in install_reqs if "{" not in req and "#" not in req] - install_reqs.append("kedro-datasets[pandas.CSVDataset]") + install_reqs.append("kedro-datasets[pandas-csvdataset]") call([context.pip, "install", *install_reqs], env=context.env) return context diff --git a/features/steps/test_starter/{{ cookiecutter.repo_name }}/requirements.txt b/features/steps/test_starter/{{ cookiecutter.repo_name }}/requirements.txt index fb756bd0f3..8d70c587fd 100644 --- a/features/steps/test_starter/{{ cookiecutter.repo_name }}/requirements.txt +++ b/features/steps/test_starter/{{ cookiecutter.repo_name }}/requirements.txt @@ -3,7 +3,7 @@ ipython>=8.10 jupyterlab>=3.0 notebook kedro~={{ cookiecutter.kedro_version}} -kedro-datasets[pandas.CSVDataset] +kedro-datasets[pandas-csvdataset] kedro-telemetry>=0.3.1 pytest-cov~=3.0 pytest-mock>=1.7.1, <2.0 From 53dfc6408395b0ef53a5f3680f0774836e37aada Mon Sep 17 00:00:00 2001 From: lrcouto Date: Thu, 11 Apr 2024 02:33:25 -0300 Subject: [PATCH 2/4] fix e2e tests Signed-off-by: lrcouto --- features/steps/cli_steps.py | 6 ++++-- features/steps/util.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/features/steps/cli_steps.py b/features/steps/cli_steps.py index 414d366136..d3c808472e 100644 --- a/features/steps/cli_steps.py +++ b/features/steps/cli_steps.py @@ -554,7 +554,8 @@ def check_one_node_run(context, number): def check_correct_nodes_run(context, node): expected_log_line = f"Running node: {node}" stdout = context.result.stdout - assert expected_log_line in stdout, ( + clean_logs = util.clean_up_log(stdout) + assert expected_log_line in clean_logs, ( "Expected the following message segment to be printed on stdout: " f"{expected_log_line},\nbut got {stdout}" ) @@ -595,7 +596,8 @@ def check_message_printed(context, msg): else: stdout = context.result.stdout - assert msg in stdout, ( + clean_logs = util.clean_up_log(stdout) + assert msg in clean_logs, ( "Expected the following message segment to be printed on stdout: " f"{msg},\nbut got {stdout}" ) diff --git a/features/steps/util.py b/features/steps/util.py index 74031232f1..f9c7b2c4e2 100644 --- a/features/steps/util.py +++ b/features/steps/util.py @@ -83,3 +83,32 @@ def parse_csv(text: str) -> list[str]: List of string tokens """ return re.findall(r"\"(.+?)\"\s*,?", text) + + +def clean_up_log(stdout: str) -> str: + """ + Cleans up log output by removing duplicate lines, extra whitespaces, + and log levels (INFO, WARNING, ERROR) along with .py filenames. + + Args: + stdout (str): The log output to be cleaned. + + Returns: + str: Cleaned log output without unnecessary information. + """ + cleaned_lines = [] + already_extracted = set() + + for line in stdout.split("\n"): + if any(word in line for word in ["WARNING", "INFO", "ERROR"]): + # Remove log levels and .py filenames + cleaned_line = re.sub(r"\b(INFO|WARNING|ERROR)\b|\s+\w+\.py:\d+", "", line) + cleaned_lines.append(cleaned_line.strip()) + already_extracted.add(line) + elif line not in already_extracted: + cleaned_lines.append(line) + + cleaned_output = "\n".join(cleaned_lines) + cleaned_output = re.sub(r"\s+", " ", cleaned_output) + + return cleaned_output.strip() From 270cd499726339a7c6df3cf580232779c183ae2d Mon Sep 17 00:00:00 2001 From: Merel Theisen <49397448+merelcht@users.noreply.github.com> Date: Thu, 11 Apr 2024 15:47:24 +0100 Subject: [PATCH 3/4] Fix e2e tests by distinguishing `kedro-datasets` dependency for different python versions (#3802) Signed-off-by: Merel Theisen --- Makefile | 2 +- features/environment.py | 9 ++++++++- .../{{ cookiecutter.repo_name }}/requirements.txt | 3 ++- pyproject.toml | 3 ++- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index d76892857c..40a2126f55 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,7 @@ package: clean install install-test-requirements: python -m pip install -U "pip>=21.2" - pip install .[test] + pip install -U .[test] install-pre-commit: pre-commit install --install-hooks diff --git a/features/environment.py b/features/environment.py index f5b76df4ec..14be1445ef 100644 --- a/features/environment.py +++ b/features/environment.py @@ -5,6 +5,7 @@ import os import shutil import subprocess +import sys import tempfile import venv from pathlib import Path @@ -14,6 +15,7 @@ _PATHS_TO_REMOVE: set[Path] = set() FRESH_VENV_TAG = "fresh_venv" +MINOR_PYTHON_38_VERSION = 8 def call(cmd, env): @@ -130,6 +132,11 @@ def _install_project_requirements(context): .splitlines() ) install_reqs = [req for req in install_reqs if "{" not in req and "#" not in req] - install_reqs.append("kedro-datasets[pandas-csvdataset]") + # For Python versions 3.9 and above we use the new dataset dependency format introduced in `kedro-datasets` 3.0.0 + if sys.version_info.minor > MINOR_PYTHON_38_VERSION: + install_reqs.append("kedro-datasets[pandas-csvdataset]") + # For Python 3.8 we use the older `kedro-datasets` dependency format + else: + install_reqs.append("kedro-datasets[pandas.CSVDataset]") call([context.pip, "install", *install_reqs], env=context.env) return context diff --git a/features/steps/test_starter/{{ cookiecutter.repo_name }}/requirements.txt b/features/steps/test_starter/{{ cookiecutter.repo_name }}/requirements.txt index 8d70c587fd..826097e88d 100644 --- a/features/steps/test_starter/{{ cookiecutter.repo_name }}/requirements.txt +++ b/features/steps/test_starter/{{ cookiecutter.repo_name }}/requirements.txt @@ -3,7 +3,8 @@ ipython>=8.10 jupyterlab>=3.0 notebook kedro~={{ cookiecutter.kedro_version}} -kedro-datasets[pandas-csvdataset] +kedro-datasets[pandas-csvdataset]; python_version >= "3.9" +kedro-datasets[pandas.CSVDataset]<2.0.0; python_version < '3.9' kedro-telemetry>=0.3.1 pytest-cov~=3.0 pytest-mock>=1.7.1, <2.0 diff --git a/pyproject.toml b/pyproject.toml index 79a632fea5..5c0f6ee88a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,8 @@ test = [ "jupyterlab_server>=2.11.1", "jupyterlab>=3,<5", "jupyter~=1.0", - "kedro-datasets", + "kedro-datasets; python_version >= '3.9'", + "kedro-datasets<2.0.0; python_version < '3.9'", "mypy~=1.0", "pandas~=2.0", "pluggy>=1.0, <1.4", # pluggy 1.4 hide imports inside function and causing mocking issue From 6a936a6c8762e0d02947dc73cce8cdc3377e63ab Mon Sep 17 00:00:00 2001 From: "L. R. Couto" <57910428+lrcouto@users.noreply.github.com> Date: Thu, 11 Apr 2024 11:58:38 -0300 Subject: [PATCH 4/4] Update docs/source/tutorial/tutorial_template.md Co-authored-by: Merel Theisen <49397448+merelcht@users.noreply.github.com> Signed-off-by: L. R. Couto <57910428+lrcouto@users.noreply.github.com> --- docs/source/tutorial/tutorial_template.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/tutorial/tutorial_template.md b/docs/source/tutorial/tutorial_template.md index 9460eec53f..d8462f1b20 100644 --- a/docs/source/tutorial/tutorial_template.md +++ b/docs/source/tutorial/tutorial_template.md @@ -48,7 +48,7 @@ pytest~=7.2 # Kedro dependencies and datasets to work with different data formats (including CSV, Excel, and Parquet) kedro~=0.19.0 -kedro-datasets[pandas-csvdataset, pandas-exceldataset, pandas-parquetdataset]>=1.1 +kedro-datasets[pandas-csvdataset, pandas-exceldataset, pandas-parquetdataset]>=3.0 kedro-telemetry>=0.3.1 kedro-viz~=6.0 # Visualise pipelines