Skip to content

Commit

Permalink
Run all notebooks in all dirs, add exclude option
Browse files Browse the repository at this point in the history
  • Loading branch information
fajpunk committed May 20, 2024
1 parent dfa2141 commit 8e3f112
Show file tree
Hide file tree
Showing 12 changed files with 836 additions and 24 deletions.
10 changes: 10 additions & 0 deletions changelog.d/20240520_113001_danfuchs_notebook_directories.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!-- Delete the sections that don't apply -->

### Backwards-incompatible changes

- NotebookRunner business now runs all notebooks in a repo by default except in the `mobu_exclude` or its descendants
- Add `exclude_dirs` option to NotebookRunner business to list additional directories in which notebooks will not be run

### Other changes

- Updated python dependencies
4 changes: 4 additions & 0 deletions src/mobu/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
from datetime import timedelta

__all__ = [
"NOTEBOOK_ALWAYS_EXCLUDE_DIRS",
"NOTEBOOK_REPO_URL",
"NOTEBOOK_REPO_BRANCH",
"TOKEN_LIFETIME",
"USERNAME_REGEX",
"WEBSOCKET_OPEN_TIMEOUT",
]

NOTEBOOK_ALWAYS_EXCLUDE_DIRS = {"mobu_exclude"}
"""NotebookRunner always excludes notebooks in these dirs."""

NOTEBOOK_REPO_URL = "https://github.com/lsst-sqre/notebook-demo.git"
"""Default notebook repository for NotebookRunner."""

Expand Down
20 changes: 19 additions & 1 deletion src/mobu/models/business/notebookrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@

from __future__ import annotations

from pathlib import Path
from typing import Literal

from pydantic import Field

from ...constants import NOTEBOOK_REPO_BRANCH, NOTEBOOK_REPO_URL
from ...constants import (
NOTEBOOK_ALWAYS_EXCLUDE_DIRS,
NOTEBOOK_REPO_BRANCH,
NOTEBOOK_REPO_URL,
)
from .base import BusinessConfig
from .nublado import NubladoBusinessData, NubladoBusinessOptions

Expand Down Expand Up @@ -48,6 +53,19 @@ class NotebookRunnerOptions(NubladoBusinessOptions):
description="Only used by the NotebookRunner",
)

exclude_dirs: set[Path] = Field(
set(),
title="Any notebooks in these directories will not be run",
description=(
" These directories are relative to the repo root. Any notebooks"
" in child directories of these directories will also be excluded."
" Regardless of this value, notebooks in these dirs will always"
f" be excluded: {NOTEBOOK_ALWAYS_EXCLUDE_DIRS}"
" Only used by the NotebookRunner."
),
examples=["some-dir", "some-dir/some-other-dir"],
)


class NotebookRunnerConfig(BusinessConfig):
"""Configuration specialization for NotebookRunner."""
Expand Down
14 changes: 13 additions & 1 deletion src/mobu/services/business/notebookrunner.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from httpx import AsyncClient
from structlog.stdlib import BoundLogger

from ...constants import NOTEBOOK_ALWAYS_EXCLUDE_DIRS
from ...exceptions import NotebookRepositoryError
from ...models.business.notebookrunner import (
NotebookRunnerData,
Expand Down Expand Up @@ -57,6 +58,7 @@ def __init__(
self._notebook: Path | None = None
self._notebook_paths: list[Path] | None = None
self._repo_dir: Path | None = None
self._exclude_paths: set[Path] = set()
self._running_code: str | None = None
self._git = Git(logger=logger)

Expand All @@ -72,6 +74,10 @@ async def startup(self) -> None:
if self._repo_dir is None:
self._repo_dir = Path(TemporaryDirectory().name)
await self.clone_repo()
exclude_dirs = self.options.exclude_dirs | NOTEBOOK_ALWAYS_EXCLUDE_DIRS
self._exclude_paths = {
(self._repo_dir / path) for path in exclude_dirs
}
self._notebook_paths = self.find_notebooks()
self.logger.info("Repository cloned and ready")
await super().startup()
Expand All @@ -87,14 +93,20 @@ async def clone_repo(self) -> None:
with self.timings.start("clone_repo"):
await self._git.clone("-b", branch, url, str(self._repo_dir))

def is_excluded(self, notebook: Path) -> bool:
# A notebook is excluded if any of its parent directories are excluded
return bool(set(notebook.parents) & self._exclude_paths)

def find_notebooks(self) -> list[Path]:
with self.timings.start("find_notebooks"):
if self._repo_dir is None:
raise NotebookRepositoryError(
"Repository directory must be set", self.user.username
)
notebooks = [
p for p in self._repo_dir.iterdir() if p.suffix == ".ipynb"
n
for n in self._repo_dir.glob("**/*.ipynb")
if not self.is_excluded(n)
]
if not notebooks:
msg = "No notebooks found in {self._repo_dir}"
Expand Down
244 changes: 222 additions & 22 deletions tests/business/notebookrunner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@
from ..support.util import wait_for_business


async def setup_git_repo(repo_path: Path) -> None:
"""Initialize and populate a git repo at `repo_path`."""
git = Git(repo=repo_path)
await git.init("--initial-branch=main")
await git.config("user.email", "[email protected]")
await git.config("user.name", "Git User")
for path in repo_path.iterdir():
if not path.name.startswith("."):
await git.add(str(path))
await git.commit("-m", "Initial commit")


@pytest.mark.asyncio
async def test_run(
client: AsyncClient, respx_mock: respx.Router, tmp_path: Path
Expand All @@ -30,18 +42,9 @@ async def test_run(
repo_path = tmp_path / "notebooks"

shutil.copytree(str(source_path), str(repo_path))
# Remove exception notebook
(repo_path / "exception.ipynb").unlink()

# Set up git client
git = Git(repo=repo_path)
await git.init("--initial-branch=main")
await git.config("user.email", "[email protected]")
await git.config("user.name", "Git User")
for path in repo_path.iterdir():
if not path.name.startswith("."):
await git.add(str(path))
await git.commit("-m", "Initial commit")
# Set up git repo
await setup_git_repo(repo_path)

# Start a monkey. We have to do this in a try/finally block since the
# runner will change working directories, which because working
Expand Down Expand Up @@ -93,11 +96,216 @@ async def test_run(
# Get the log and check the cell output.
r = await client.get("/mobu/flocks/test/monkeys/testuser1/log")
assert r.status_code == 200

# Root notebook
assert "This is a test" in r.text
assert "This is another test" in r.text
assert "Final test" in r.text

# Exceptions
assert "Exception thrown" not in r.text

# Make sure mobu ran all of the notebooks it thinks it should have
assert "Done with this cycle of notebooks" in r.text


@pytest.mark.asyncio
async def test_run_recursive(
client: AsyncClient, respx_mock: respx.Router, tmp_path: Path
) -> None:
mock_gafaelfawr(respx_mock)
cwd = Path.cwd()

# Set up a notebook repository.
source_path = Path(__file__).parent.parent / "notebooks_recursive"
repo_path = tmp_path / "notebooks"

shutil.copytree(str(source_path), str(repo_path))
# Remove exception notebook
(repo_path / "exception.ipynb").unlink()

# Set up git repo
await setup_git_repo(repo_path)

# Start a monkey. We have to do this in a try/finally block since the
# runner will change working directories, which because working
# directories are process-global may mess up future tests.
try:
r = await client.put(
"/mobu/flocks",
json={
"name": "test",
"count": 1,
"user_spec": {"username_prefix": "testuser"},
"scopes": ["exec:notebook"],
"business": {
"type": "NotebookRunner",
"options": {
"spawn_settle_time": 0,
"execution_idle_time": 0,
"max_executions": 4,
"repo_url": str(repo_path),
"repo_branch": "main",
"working_directory": str(repo_path),
},
},
},
)
assert r.status_code == 201

# Wait until we've finished one loop and check the results.
data = await wait_for_business(client, "testuser1")
assert data == {
"name": "testuser1",
"business": {
"failure_count": 0,
"name": "NotebookRunner",
"notebook": ANY,
"success_count": 1,
"timings": ANY,
},
"state": "RUNNING",
"user": {
"scopes": ["exec:notebook"],
"token": ANY,
"username": "testuser1",
},
}
finally:
os.chdir(cwd)

# Get the log and check the cell output.
r = await client.get("/mobu/flocks/test/monkeys/testuser1/log")
assert r.status_code == 200

# Root notebook
assert "This is a test" in r.text
assert "This is another test" in r.text
assert "Final test" in r.text

# some-dir notebook
assert "Test some-dir" in r.text
assert "Another test some-dir" in r.text
assert "Final test some-dir" in r.text

# some-other-dir notebook
assert "Test some-other-dir" in r.text
assert "Another test some-other-dir" in r.text
assert "Final test some-other-dir" in r.text

# double-nested-dir notebook
assert "Test double-nested-dir" in r.text
assert "Another test double-nested-dir" in r.text
assert "Final test double-nested-dir" in r.text

# always excluded notebook
assert "This should never run." not in r.text

# Exceptions
assert "Exception thrown" not in r.text

# Make sure mobu ran all of the notebooks it thinks it should have
assert "Done with this cycle of notebooks" in r.text


@pytest.mark.asyncio
async def test_exclude_dirs(
client: AsyncClient, respx_mock: respx.Router, tmp_path: Path
) -> None:
mock_gafaelfawr(respx_mock)
cwd = Path.cwd()

# Set up a notebook repository.
source_path = Path(__file__).parent.parent / "notebooks_recursive"
repo_path = tmp_path / "notebooks"

shutil.copytree(str(source_path), str(repo_path))
# Remove exception notebook
(repo_path / "exception.ipynb").unlink()

# Set up git repo
await setup_git_repo(repo_path)

# Start a monkey. We have to do this in a try/finally block since the
# runner will change working directories, which because working
# directories are process-global may mess up future tests.
try:
r = await client.put(
"/mobu/flocks",
json={
"name": "test",
"count": 1,
"user_spec": {"username_prefix": "testuser"},
"scopes": ["exec:notebook"],
"business": {
"type": "NotebookRunner",
"options": {
"spawn_settle_time": 0,
"execution_idle_time": 0,
"max_executions": 2,
"repo_url": str(repo_path),
"repo_branch": "main",
"working_directory": str(repo_path),
"exclude_dirs": [
"some-other-dir/nested-dir",
"some-dir",
],
},
},
},
)
assert r.status_code == 201

# Wait until we've finished one loop and check the results.
data = await wait_for_business(client, "testuser1")
assert data == {
"name": "testuser1",
"business": {
"failure_count": 0,
"name": "NotebookRunner",
"notebook": ANY,
"success_count": 1,
"timings": ANY,
},
"state": "RUNNING",
"user": {
"scopes": ["exec:notebook"],
"token": ANY,
"username": "testuser1",
},
}
finally:
os.chdir(cwd)

# Get the log and check the cell output.
r = await client.get("/mobu/flocks/test/monkeys/testuser1/log")
assert r.status_code == 200

# Root notebook
assert "This is a test" in r.text
assert "This is another test" in r.text
assert "Final test" in r.text

# some-other-dir notebook
assert "Test some-other-dir" in r.text
assert "Another test some-other-dir" in r.text
assert "Final test some-other-dir" in r.text

# some-dir notebook
assert "Test some-dir" not in r.text

# nested-dir notebook
assert "Test double-nested-dir" not in r.text

# always excluded notebook
assert "This should never run." not in r.text

# Exceptions
assert "Exception thrown" not in r.text

# Make sure mobu ran all of the notebooks it thinks it should have
assert "Done with this cycle of notebooks" in r.text


@pytest.mark.asyncio
async def test_alert(
Expand All @@ -109,21 +317,13 @@ async def test_alert(
mock_gafaelfawr(respx_mock)

# Set up a notebook repository with the exception notebook.
source_path = Path(__file__).parent.parent / "notebooks"
source_path = Path(__file__).parent.parent / "notebooks_recursive"
repo_path = tmp_path / "notebooks"
repo_path.mkdir()
shutil.copy(str(source_path / "exception.ipynb"), str(repo_path))

# Set up git client
git = Git(repo=repo_path)
await git.init("--initial-branch=main")
await git.config("--local", "user.email", "[email protected]")
await git.config("--local", "user.name", "Git User")

for path in repo_path.iterdir():
if not path.name.startswith("."):
await git.add(str(path))
await git.commit("-m", "Initial commit")
# Set up git repo
await setup_git_repo(repo_path)

# The bad code run by the exception test.
bad_code = 'foo = {"bar": "baz"}\nfoo["nothing"]'
Expand Down
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 8e3f112

Please sign in to comment.