Skip to content

Commit

Permalink
Add VSCode/ IPython support for %load_node and printing support for…
Browse files Browse the repository at this point in the history
… Databricks (#3604)

* Add option to print node content to load_node

Signed-off-by: lrcouto <[email protected]>

* Add option to print node content to load_node

Signed-off-by: lrcouto <[email protected]>

* Change formatting on --print output

Signed-off-by: lrcouto <[email protected]>

* Fix merge error

Signed-off-by: lrcouto <[email protected]>

* Detect if the user is in a Jupyter environment

Signed-off-by: lrcouto <[email protected]>

* add _guess_environment() method to determine Ipython/jupyter

Signed-off-by: Nok Lam Chan <[email protected]>

* add platform args

Signed-off-by: Nok Lam Chan <[email protected]>

* remove print

Signed-off-by: Nok Lam Chan <[email protected]>

* changes base on feedback

Signed-off-by: Nok Lam Chan <[email protected]>

* remove line number

Signed-off-by: Nok Lam Chan <[email protected]>

* fix linting

Signed-off-by: Nok Lam Chan <[email protected]>

* please mypy

Signed-off-by: Nok Lam Chan <[email protected]>

* add a few more unit test

Signed-off-by: Nok Lam Chan <[email protected]>

* fix tests

Signed-off-by: Nok Lam Chan <[email protected]>

* clean up print and detech VScode better

Signed-off-by: Nok Lam Chan <[email protected]>

* fix coverage

Signed-off-by: Nok Lam Chan <[email protected]>

* fix lint

Signed-off-by: Nok Lam Chan <[email protected]>

* Explicitly import get_ipython()

Signed-off-by: lrcouto <[email protected]>

* fix lint

Signed-off-by: Nok <[email protected]>

* fix mypy

Signed-off-by: Nok <[email protected]>

---------

Signed-off-by: lrcouto <[email protected]>
Signed-off-by: Nok Lam Chan <[email protected]>
Signed-off-by: Nok <[email protected]>
Co-authored-by: Nok Lam Chan <[email protected]>
  • Loading branch information
lrcouto and noklam authored Feb 16, 2024
1 parent cd026f5 commit b3637db
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 15 deletions.
1 change: 1 addition & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Major features and improvements
* Create the debugging line magic `%load_node` for Jupyter Notebook and Jupyter Lab.
* Add better IPython, VSCode Notebook support for `%load_node` and minimal support for Databricks.

## Bug fixes and other changes
* Updated CLI Command `kedro catalog resolve` to work with dataset factories that use `PartitionedDataset`.
Expand Down
64 changes: 53 additions & 11 deletions kedro/ipython/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,23 @@
This script creates an IPython extension to load Kedro-related variables in
local scope.
"""

from __future__ import annotations

import inspect
import logging
import os
import sys
import typing
import warnings
from pathlib import Path
from typing import Any, Callable

import IPython
from IPython.core.getipython import get_ipython
from IPython.core.magic import needs_local_scope, register_line_magic
from IPython.core.magic_arguments import argument, magic_arguments, parse_argstring
from rich.console import Console
from rich.syntax import Syntax

from kedro.framework.cli import load_entry_points
from kedro.framework.cli.project import CONF_SOURCE_HELP, PARAMS_ARG_HELP
Expand All @@ -28,6 +32,7 @@
from kedro.framework.session import KedroSession
from kedro.framework.startup import _is_project, bootstrap_project
from kedro.pipeline.node import Node
from kedro.utils import _is_databricks

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -114,7 +119,7 @@ def reload_kedro(
context = session.load_context()
catalog = context.catalog

IPython.get_ipython().push( # type: ignore[attr-defined, no-untyped-call]
get_ipython().push( # type: ignore[no-untyped-call]
variables={
"context": context,
"catalog": catalog,
Expand Down Expand Up @@ -187,6 +192,20 @@ def _find_kedro_project(current_dir: Path) -> Any: # pragma: no cover
return None


def _guess_run_environment() -> str: # pragma: no cover
"""Best effort to guess the IPython/Jupyter environment"""
# https://github.com/microsoft/vscode-jupyter/issues/7380
if os.environ.get("VSCODE_PID") or os.environ.get("VSCODE_CWD"):
return "vscode"
elif _is_databricks():
return "databricks"
elif hasattr(get_ipython(), "kernel"): # type: ignore[no-untyped-call]
# IPython terminal does not have this attribute
return "jupyter"
else:
return "ipython"


@typing.no_type_check
@magic_arguments()
@argument(
Expand All @@ -196,25 +215,48 @@ def _find_kedro_project(current_dir: Path) -> Any: # pragma: no cover
nargs="?",
default=None,
)
def magic_load_node(node: str) -> None:
def magic_load_node(args: str) -> None:
"""The line magic %load_node <node_name>
Currently it only supports Jupyter Notebook (>7.0) and Jupyter Lab. This line magic
will generate code in multiple cells to load datasets from `DataCatalog`, import
relevant functions and modules, node function definition and a function call.
Currently this feature has better supports with Jupyter Notebook (>7.0) and Jupyter Lab
and VSCode Notebook. This line magic will generate code in multiple cells to load
datasets from `DataCatalog`, import relevant functions and modules, node function
definition and a function call. If generating code is not possible, it will print
the code instead.
"""
cells = _load_node(node, pipelines)
from ipylab import JupyterFrontEnd

app = JupyterFrontEnd()
parameters = parse_argstring(magic_load_node, args)
cells = _load_node(parameters.node, pipelines)

run_environment = _guess_run_environment()
if run_environment == "jupyter":
# Only create cells if it is jupyter
for cell in cells:
_create_cell_with_text(cell, is_jupyter=True)
elif run_environment in ("ipython", "vscode"):
# Combine multiple cells into one
combined_cell = "\n\n".join(cells)
_create_cell_with_text(combined_cell, is_jupyter=False)
else:
_print_cells(cells)

def _create_cell_with_text(text: str) -> None:

def _create_cell_with_text(text: str, is_jupyter: bool = True) -> None:
if is_jupyter:
from ipylab import JupyterFrontEnd

app = JupyterFrontEnd()
# Noted this only works with Notebook >7.0 or Jupyter Lab. It doesn't work with
# VS Code Notebook due to imcompatible backends.
app.commands.execute("notebook:insert-cell-below")
app.commands.execute("notebook:replace-selection", {"text": text})
else:
get_ipython().set_next_input(text) # type: ignore[no-untyped-call]


def _print_cells(cells: list[str]) -> None:
for cell in cells:
_create_cell_with_text(cell)
Console().print("")
Console().print(Syntax(cell, "python", theme="monokai", line_numbers=False))


def _load_node(node_name: str, pipelines: _ProjectPipelines) -> list[str]:
Expand Down
5 changes: 3 additions & 2 deletions kedro/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"""

import logging
import os
import sys
from pathlib import Path
from typing import Any
Expand All @@ -13,6 +12,8 @@
import rich.pretty
import rich.traceback

from kedro.utils import _is_databricks


class RichHandler(rich.logging.RichHandler):
"""Identical to rich's logging handler but with a few extra behaviours:
Expand Down Expand Up @@ -52,7 +53,7 @@ def __init__(self, *args: Any, **kwargs: Any):
else:
traceback_install_kwargs[key_prefix_removed] = value

if self.rich_tracebacks and "DATABRICKS_RUNTIME_VERSION" not in os.environ:
if self.rich_tracebacks and not _is_databricks():
# Rich traceback handling does not work on databricks. Hopefully this will be
# fixed on their side at some point, but until then we disable it.
# See https://github.com/Textualize/rich/issues/2455
Expand Down
5 changes: 5 additions & 0 deletions kedro/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
of kedro package.
"""
import importlib
import os
from typing import Any


Expand All @@ -24,3 +25,7 @@ def load_obj(obj_path: str, default_obj_path: str = "") -> Any:
obj_name = obj_path_list[0]
module_obj = importlib.import_module(obj_path)
return getattr(module_obj, obj_name)


def _is_databricks() -> bool:
return "DATABRICKS_RUNTIME_VERSION" in os.environ
54 changes: 52 additions & 2 deletions tests/ipython/test_ipython.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pytest
from IPython.core.error import UsageError

import kedro.ipython
from kedro.framework.project import pipelines
from kedro.ipython import (
_find_node,
Expand Down Expand Up @@ -198,13 +199,13 @@ def test_load_extension_register_line_magic(self, mocker, ipython):
"--conf-source=new_conf",
],
)
def test_line_magic_with_valid_arguments(self, mocker, args, ipython):
def test_reload_kedro_magic_with_valid_arguments(self, mocker, args, ipython):
mocker.patch("kedro.ipython._find_kedro_project")
mocker.patch("kedro.ipython.reload_kedro")

ipython.magic(f"reload_kedro {args}")

def test_line_magic_with_invalid_arguments(self, mocker, ipython):
def test_reload_kedro_with_invalid_arguments(self, mocker, ipython):
mocker.patch("kedro.ipython._find_kedro_project")
mocker.patch("kedro.ipython.reload_kedro")
load_ipython_extension(ipython)
Expand Down Expand Up @@ -380,3 +381,52 @@ def test_get_nested_function_body(self, dummy_nested_function_literal):
def test_get_function_with_loop_body(self, dummy_function_with_loop_literal):
result = _prepare_function_body(dummy_function_with_loop)
assert result == dummy_function_with_loop_literal

def test_load_node_magic_with_valid_arguments(self, mocker, ipython):
mocker.patch("kedro.ipython._find_kedro_project")
mocker.patch("kedro.ipython._load_node")
ipython.magic("load_node dummy_node")

def test_load_node_with_invalid_arguments(self, mocker, ipython):
mocker.patch("kedro.ipython._find_kedro_project")
mocker.patch("kedro.ipython._load_node")
load_ipython_extension(ipython)

with pytest.raises(
UsageError, match=r"unrecognized arguments: --invalid_arg=dummy_node"
):
ipython.magic("load_node --invalid_arg=dummy_node")

def test_load_node_with_jupyter(self, mocker, ipython):
mocker.patch("kedro.ipython._find_kedro_project")
mocker.patch("kedro.ipython._load_node", return_value=["cell1", "cell2"])
mocker.patch("kedro.ipython._guess_run_environment", return_value="jupyter")
spy = mocker.spy(kedro.ipython, "_create_cell_with_text")
call = mocker.call

load_ipython_extension(ipython)
ipython.magic("load_node dummy_node")
calls = [call("cell1", is_jupyter=True), call("cell2", is_jupyter=True)]
spy.assert_has_calls(calls)

@pytest.mark.parametrize("run_env", ["ipython", "vscode"])
def test_load_node_with_ipython(self, mocker, ipython, run_env):
mocker.patch("kedro.ipython._find_kedro_project")
mocker.patch("kedro.ipython._load_node", return_value=["cell1", "cell2"])
mocker.patch("kedro.ipython._guess_run_environment", return_value=run_env)
spy = mocker.spy(kedro.ipython, "_create_cell_with_text")

load_ipython_extension(ipython)
ipython.magic("load_node dummy_node")
spy.assert_called_once()

@pytest.mark.parametrize("run_env", ["databricks", "colab", "dummy"])
def test_load_node_with_other(self, mocker, ipython, run_env):
mocker.patch("kedro.ipython._find_kedro_project")
mocker.patch("kedro.ipython._load_node", return_value=["cell1", "cell2"])
mocker.patch("kedro.ipython._guess_run_environment", return_value=run_env)
spy = mocker.spy(kedro.ipython, "_print_cells")

load_ipython_extension(ipython)
ipython.magic("load_node dummy_node")
spy.assert_called_once()

0 comments on commit b3637db

Please sign in to comment.