From 047d4650a4557e34f71d382b006b5a5da826a6db Mon Sep 17 00:00:00 2001 From: Dan Fuchs Date: Sun, 19 May 2024 15:23:21 -0500 Subject: [PATCH] Run all notebooks in repo directory with exclude option * NotebookRunner business now runs all notebooks in a repo by default * Add `disable_run_in_subdirs` option to NotebookRunner business config. This is `False` by default, so the default going forward is that all notebooks in a repo are run by this business. [This phalanx PR] sets this option to `true` in all existing configurations so as not to break any existing workflows. * Add `exclude_dirs` option to NotebookRunner business to list directories in which notebooks will NOT be run --- src/mobu/constants.py | 4 + src/mobu/models/business/notebookrunner.py | 20 ++- src/mobu/services/business/notebookrunner.py | 16 ++- tests/business/notebookrunner_test.py | 129 +++++++++++++++++- .../test-always-excluded-notebook.ipynb | 129 ++++++++++++++++++ .../some-dir/test-some-dir-notebook.ipynb | 116 ++++++++++++++++ .../test-double-nested-dir.ipynb | 92 +++++++++++++ .../some-other-dir/test-some-other-dir.ipynb | 116 ++++++++++++++++ 8 files changed, 618 insertions(+), 4 deletions(-) create mode 100644 tests/notebooks/mobu_exclude/test-always-excluded-notebook.ipynb create mode 100644 tests/notebooks/some-dir/test-some-dir-notebook.ipynb create mode 100644 tests/notebooks/some-other-dir/nested-dir/double-nested-dir/test-double-nested-dir.ipynb create mode 100644 tests/notebooks/some-other-dir/test-some-other-dir.ipynb diff --git a/src/mobu/constants.py b/src/mobu/constants.py index 349e6eb8..a384c4e4 100644 --- a/src/mobu/constants.py +++ b/src/mobu/constants.py @@ -5,6 +5,7 @@ from datetime import timedelta __all__ = [ + "NOTEBOOK_ALWAYS_EXCLUDE_DIRS", "NOTEBOOK_REPO_URL", "NOTEBOOK_REPO_BRANCH", "TOKEN_LIFETIME", @@ -12,6 +13,9 @@ "WEBSOCKET_OPEN_TIMEOUT", ] +NOTEBOOK_ALWAYS_EXCLUDE_DIRS = {"mobu_exclude"} +"""NotebookRunner always excludes notebooks in these dirs.""" + NOTEBOOK_REPO_URL = "https://github.com/lsst-sqre/notebook-demo.git" """Default notebook repository for NotebookRunner.""" diff --git a/src/mobu/models/business/notebookrunner.py b/src/mobu/models/business/notebookrunner.py index c0f52143..ad602b3f 100644 --- a/src/mobu/models/business/notebookrunner.py +++ b/src/mobu/models/business/notebookrunner.py @@ -2,11 +2,16 @@ from __future__ import annotations +from pathlib import Path from typing import Literal from pydantic import Field -from ...constants import NOTEBOOK_REPO_BRANCH, NOTEBOOK_REPO_URL +from ...constants import ( + NOTEBOOK_ALWAYS_EXCLUDE_DIRS, + NOTEBOOK_REPO_BRANCH, + NOTEBOOK_REPO_URL, +) from .base import BusinessConfig from .nublado import NubladoBusinessData, NubladoBusinessOptions @@ -48,6 +53,19 @@ class NotebookRunnerOptions(NubladoBusinessOptions): description="Only used by the NotebookRunner", ) + exclude_dirs: set[Path] = Field( + set(), + title="Any notebooks in these directories will not be run", + description=( + " These directories are relative to the repo root. Any notebooks" + " in child directories of these directories will also be excluded." + " Regardless of this value, notebooks in these dirs will always" + f" be excluded: {NOTEBOOK_ALWAYS_EXCLUDE_DIRS}" + " Only used by the NotebookRunner." + ), + examples=["some-dir", "some-dir/some-other-dir"], + ) + class NotebookRunnerConfig(BusinessConfig): """Configuration specialization for NotebookRunner.""" diff --git a/src/mobu/services/business/notebookrunner.py b/src/mobu/services/business/notebookrunner.py index c80f1bcd..043f7ecf 100644 --- a/src/mobu/services/business/notebookrunner.py +++ b/src/mobu/services/business/notebookrunner.py @@ -18,6 +18,7 @@ from httpx import AsyncClient from structlog.stdlib import BoundLogger +from ...constants import NOTEBOOK_ALWAYS_EXCLUDE_DIRS from ...exceptions import NotebookRepositoryError from ...models.business.notebookrunner import ( NotebookRunnerData, @@ -57,6 +58,7 @@ def __init__( self._notebook: Path | None = None self._notebook_paths: list[Path] | None = None self._repo_dir: Path | None = None + self._exclude_paths: set[Path] = set() self._running_code: str | None = None self._git = Git(logger=logger) @@ -72,6 +74,10 @@ async def startup(self) -> None: if self._repo_dir is None: self._repo_dir = Path(TemporaryDirectory().name) await self.clone_repo() + exclude_dirs = self.options.exclude_dirs | NOTEBOOK_ALWAYS_EXCLUDE_DIRS + self._exclude_paths = { + (self._repo_dir / path) for path in exclude_dirs + } self._notebook_paths = self.find_notebooks() self.logger.info("Repository cloned and ready") await super().startup() @@ -87,6 +93,12 @@ async def clone_repo(self) -> None: with self.timings.start("clone_repo"): await self._git.clone("-b", branch, url, str(self._repo_dir)) + def is_excluded(self, notebook: Path) -> bool: + # A notebook is excluded if any of its parent directories are excluded + parents = set(notebook.parents) + excluded_parents = parents & self._exclude_paths + return len(excluded_parents) != 0 + def find_notebooks(self) -> list[Path]: with self.timings.start("find_notebooks"): if self._repo_dir is None: @@ -94,7 +106,9 @@ def find_notebooks(self) -> list[Path]: "Repository directory must be set", self.user.username ) notebooks = [ - p for p in self._repo_dir.iterdir() if p.suffix == ".ipynb" + n + for n in self._repo_dir.glob("**/*.ipynb") + if not self.is_excluded(n) ] if not notebooks: msg = "No notebooks found in {self._repo_dir}" diff --git a/tests/business/notebookrunner_test.py b/tests/business/notebookrunner_test.py index 72d4e602..9469709e 100644 --- a/tests/business/notebookrunner_test.py +++ b/tests/business/notebookrunner_test.py @@ -59,7 +59,7 @@ async def test_run( "options": { "spawn_settle_time": 0, "execution_idle_time": 0, - "max_executions": 1, + "max_executions": 10, "repo_url": str(repo_path), "repo_branch": "main", "working_directory": str(repo_path), @@ -76,7 +76,7 @@ async def test_run( "business": { "failure_count": 0, "name": "NotebookRunner", - "notebook": "test-notebook.ipynb", + "notebook": ANY, "success_count": 1, "timings": ANY, }, @@ -93,9 +93,134 @@ async def test_run( # Get the log and check the cell output. r = await client.get("/mobu/flocks/test/monkeys/testuser1/log") assert r.status_code == 200 + + # Root notebook + assert "This is a test" in r.text + assert "This is another test" in r.text + assert "Final test" in r.text + + # some-dir notebook + assert "Test some-dir" in r.text + assert "Another test some-dir" in r.text + assert "Final test some-dir" in r.text + + # some-other-dir notebook + assert "Test some-other-dir" in r.text + assert "Another test some-other-dir" in r.text + assert "Final test some-other-dir" in r.text + + # double-nested-dir notebook + assert "Test double-nested-dir" in r.text + assert "Another test double-nested-dir" in r.text + assert "Final test double-nested-dir" in r.text + + # always excluded notebook + assert "This should never run." not in r.text + + # Exceptions + assert "Exception thrown" not in r.text + + +@pytest.mark.asyncio +async def test_exclude_dirs( + client: AsyncClient, respx_mock: respx.Router, tmp_path: Path +) -> None: + mock_gafaelfawr(respx_mock) + cwd = Path.cwd() + + # Set up a notebook repository. + source_path = Path(__file__).parent.parent / "notebooks" + repo_path = tmp_path / "notebooks" + + shutil.copytree(str(source_path), str(repo_path)) + # Remove exception notebook + (repo_path / "exception.ipynb").unlink() + + # Set up git client + git = Git(repo=repo_path) + await git.init("--initial-branch=main") + await git.config("user.email", "gituser@example.com") + await git.config("user.name", "Git User") + for path in repo_path.iterdir(): + if not path.name.startswith("."): + await git.add(str(path)) + await git.commit("-m", "Initial commit") + + # Start a monkey. We have to do this in a try/finally block since the + # runner will change working directories, which because working + # directories are process-global may mess up future tests. + try: + r = await client.put( + "/mobu/flocks", + json={ + "name": "test", + "count": 1, + "user_spec": {"username_prefix": "testuser"}, + "scopes": ["exec:notebook"], + "business": { + "type": "NotebookRunner", + "options": { + "spawn_settle_time": 0, + "execution_idle_time": 0, + "max_executions": 10, + "repo_url": str(repo_path), + "repo_branch": "main", + "working_directory": str(repo_path), + "exclude_dirs": [ + "some-other-dir/nested-dir", + "some-dir", + ], + }, + }, + }, + ) + assert r.status_code == 201 + + # Wait until we've finished one loop and check the results. + data = await wait_for_business(client, "testuser1") + assert data == { + "name": "testuser1", + "business": { + "failure_count": 0, + "name": "NotebookRunner", + "notebook": ANY, + "success_count": 1, + "timings": ANY, + }, + "state": "RUNNING", + "user": { + "scopes": ["exec:notebook"], + "token": ANY, + "username": "testuser1", + }, + } + finally: + os.chdir(cwd) + + # Get the log and check the cell output. + r = await client.get("/mobu/flocks/test/monkeys/testuser1/log") + assert r.status_code == 200 + + # Root notebook assert "This is a test" in r.text assert "This is another test" in r.text assert "Final test" in r.text + + # some-other-dir notebook + assert "Test some-other-dir" in r.text + assert "Another test some-other-dir" in r.text + assert "Final test some-other-dir" in r.text + + # some-dir notebook + assert "Test some-dir" not in r.text + + # nested-dir notebook + assert "Test double-nested-dir" not in r.text + + # always excluded notebook + assert "This should never run." not in r.text + + # Exceptions assert "Exception thrown" not in r.text diff --git a/tests/notebooks/mobu_exclude/test-always-excluded-notebook.ipynb b/tests/notebooks/mobu_exclude/test-always-excluded-notebook.ipynb new file mode 100644 index 00000000..c1831a44 --- /dev/null +++ b/tests/notebooks/mobu_exclude/test-always-excluded-notebook.ipynb @@ -0,0 +1,129 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5cb3e0b7", + "metadata": {}, + "source": "This notebook should never get run because it is in an always-excluded directory." + }, + { + "cell_type": "code", + "id": "f84f0959", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-20T14:45:05.038058Z", + "start_time": "2024-05-20T14:45:05.035661Z" + } + }, + "source": "print(\"This should never run.\")", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "This should never run.\n" + ] + } + ], + "execution_count": 2 + }, + { + "cell_type": "code", + "id": "44ada997", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-20T14:45:05.073817Z", + "start_time": "2024-05-20T14:45:05.041797Z" + } + }, + "source": [ + "import os\n", + "print(\"This should never run either.\")" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "This should never run either.\n" + ] + } + ], + "execution_count": 3 + }, + { + "cell_type": "markdown", + "id": "d5cea3a7", + "metadata": {}, + "source": [ + "Another Markdown cell" + ] + }, + { + "cell_type": "markdown", + "id": "541caea0", + "metadata": {}, + "source": [ + "Yet another Markdown cell. This one has a list.\n", + "- one\n", + "- two\n", + "- three" + ] + }, + { + "cell_type": "code", + "id": "53a941a4", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-20T14:45:05.100321Z", + "start_time": "2024-05-20T14:45:05.074766Z" + } + }, + "source": "print(\"Finally, this should never run\")", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finally, this should never run\n" + ] + } + ], + "execution_count": 4 + }, + { + "cell_type": "code", + "id": "823560c6", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-20T14:45:05.102973Z", + "start_time": "2024-05-20T14:45:05.101128Z" + } + }, + "source": [], + "outputs": [], + "execution_count": 4 + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/notebooks/some-dir/test-some-dir-notebook.ipynb b/tests/notebooks/some-dir/test-some-dir-notebook.ipynb new file mode 100644 index 00000000..a3769af1 --- /dev/null +++ b/tests/notebooks/some-dir/test-some-dir-notebook.ipynb @@ -0,0 +1,116 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5cb3e0b7", + "metadata": {}, + "source": "This is a test notebook to check the NotebookRunner business. It should run even though it's in a directory, unless this directory is excluded..." + }, + { + "cell_type": "code", + "id": "f84f0959", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-17T20:37:58.826140Z", + "start_time": "2024-05-17T20:37:58.823537Z" + } + }, + "source": "print(\"Test some-dir\")", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test some-dir\n" + ] + } + ], + "execution_count": 1 + }, + { + "cell_type": "code", + "id": "44ada997", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-17T20:38:00.590204Z", + "start_time": "2024-05-17T20:38:00.587773Z" + } + }, + "source": [ + "import os\n", + "print(\"Another test some-dir\")" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Another test some-dir\n" + ] + } + ], + "execution_count": 2 + }, + { + "cell_type": "markdown", + "id": "d5cea3a7", + "metadata": {}, + "source": [ + "Another Markdown cell" + ] + }, + { + "cell_type": "markdown", + "id": "541caea0", + "metadata": {}, + "source": [ + "Yet another Markdown cell. This one has a list.\n", + "- one\n", + "- two\n", + "- three" + ] + }, + { + "cell_type": "code", + "id": "53a941a4", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-17T20:38:02.105516Z", + "start_time": "2024-05-17T20:38:02.103211Z" + } + }, + "source": "print(\"Final test some-dir\")", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Final test some-dir\n" + ] + } + ], + "execution_count": 3 + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/notebooks/some-other-dir/nested-dir/double-nested-dir/test-double-nested-dir.ipynb b/tests/notebooks/some-other-dir/nested-dir/double-nested-dir/test-double-nested-dir.ipynb new file mode 100644 index 00000000..4c5c46b9 --- /dev/null +++ b/tests/notebooks/some-other-dir/nested-dir/double-nested-dir/test-double-nested-dir.ipynb @@ -0,0 +1,92 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5cb3e0b7", + "metadata": {}, + "source": " This is a test notebook to check the NotebookRunner business. It should run even though it's in a directory, unless this directory is excluded..." + }, + { + "cell_type": "code", + "id": "f84f0959", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-17T20:39:57.123503Z", + "start_time": "2024-05-17T20:39:57.121412Z" + } + }, + "source": "print(\"Test double-nested-dir\")", + "execution_count": 1, + "outputs": [] + }, + { + "cell_type": "code", + "id": "44ada997", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-17T20:39:57.150633Z", + "start_time": "2024-05-17T20:39:57.124284Z" + } + }, + "source": [ + "import os\n", + "print(\"Another test double-nested-dir\")" + ], + "execution_count": 2, + "outputs": [] + }, + { + "cell_type": "markdown", + "id": "d5cea3a7", + "metadata": {}, + "source": [ + "Another Markdown cell" + ] + }, + { + "cell_type": "markdown", + "id": "541caea0", + "metadata": {}, + "source": [ + "Yet another Markdown cell. This one has a list.\n", + "- one\n", + "- two\n", + "- three" + ] + }, + { + "cell_type": "code", + "id": "53a941a4", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-17T20:39:57.176580Z", + "start_time": "2024-05-17T20:39:57.151166Z" + } + }, + "source": "print(\"Final test double-nested-dir\")", + "execution_count": 3, + "outputs": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/notebooks/some-other-dir/test-some-other-dir.ipynb b/tests/notebooks/some-other-dir/test-some-other-dir.ipynb new file mode 100644 index 00000000..1bc4abaf --- /dev/null +++ b/tests/notebooks/some-other-dir/test-some-other-dir.ipynb @@ -0,0 +1,116 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5cb3e0b7", + "metadata": {}, + "source": "This is a test notebook to check the NotebookRunner business. It should run even though it's in a directory, unless this directory is excluded..." + }, + { + "cell_type": "code", + "id": "f84f0959", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-17T20:39:13.319752Z", + "start_time": "2024-05-17T20:39:13.317012Z" + } + }, + "source": "print(\"Test some-other-dir\")", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Test some-other-dir\n" + ] + } + ], + "execution_count": 1 + }, + { + "cell_type": "code", + "id": "44ada997", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-17T20:39:13.345548Z", + "start_time": "2024-05-17T20:39:13.320800Z" + } + }, + "source": [ + "import os\n", + "print(\"Another test some-other-dir\")" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Another test some-other-dir\n" + ] + } + ], + "execution_count": 2 + }, + { + "cell_type": "markdown", + "id": "d5cea3a7", + "metadata": {}, + "source": [ + "Another Markdown cell" + ] + }, + { + "cell_type": "markdown", + "id": "541caea0", + "metadata": {}, + "source": [ + "Yet another Markdown cell. This one has a list.\n", + "- one\n", + "- two\n", + "- three" + ] + }, + { + "cell_type": "code", + "id": "53a941a4", + "metadata": { + "ExecuteTime": { + "end_time": "2024-05-17T20:39:13.371583Z", + "start_time": "2024-05-17T20:39:13.346126Z" + } + }, + "source": "print(\"Final test some-other-dir\")", + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Final test some-other-dir\n" + ] + } + ], + "execution_count": 3 + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}