-
Notifications
You must be signed in to change notification settings - Fork 87
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added more value inference for dbutils.notebook.run(...)
#1860
Changes from 16 commits
f7e4bbc
3caa312
7b1e8f5
b245148
91bce51
f3323ca
757d5d8
3f5e621
d4d7389
01c213f
37fee3e
04d1a21
bbaca0f
b6f27b0
394be93
6880e7c
525cc75
469de9b
5048314
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -9,6 +9,7 @@ | |||||
Attribute, | ||||||
Call, | ||||||
Const, | ||||||
InferenceError, | ||||||
Import, | ||||||
ImportFrom, | ||||||
Name, | ||||||
|
@@ -81,12 +82,28 @@ class NotebookRunCall(NodeBase): | |||||
def __init__(self, node: Call): | ||||||
super().__init__(node) | ||||||
|
||||||
def get_notebook_path(self) -> str | None: | ||||||
node = DbutilsLinter.get_dbutils_notebook_run_path_arg(cast(Call, self.node)) | ||||||
inferred = next(node.infer(), None) | ||||||
if isinstance(inferred, Const): | ||||||
return inferred.value.strip().strip("'").strip('"') | ||||||
return None | ||||||
def get_notebook_paths(self) -> list[str | None]: | ||||||
"""we return multiple paths because astroid can infer them in scenarios such as: | ||||||
paths = ["p1", "p2"] | ||||||
for path in paths: | ||||||
dbutils.notebook.run(path) | ||||||
""" | ||||||
node = DbutilsLinter.get_dbutils_notebook_run_path_arg(self.node) | ||||||
try: | ||||||
return self._get_notebook_paths(node.infer()) | ||||||
except InferenceError: | ||||||
logger.debug(f"Can't infer value(s) of {node.as_string()}") | ||||||
return [None] | ||||||
|
||||||
@classmethod | ||||||
def _get_notebook_paths(cls, nodes: Iterable[NodeNG]) -> list[str | None]: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
let's avoid nullability There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||
paths: list[str | None] = [] | ||||||
for node in nodes: | ||||||
if isinstance(node, Const): | ||||||
nfx marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
paths.append(node.as_string().strip("'").strip('"')) | ||||||
continue | ||||||
paths.append(None) | ||||||
return paths | ||||||
|
||||||
|
||||||
class DbutilsLinter(Linter): | ||||||
|
@@ -99,16 +116,17 @@ def lint(self, code: str) -> Iterable[Advice]: | |||||
@classmethod | ||||||
def _convert_dbutils_notebook_run_to_advice(cls, node: NodeNG) -> Advisory: | ||||||
assert isinstance(node, Call) | ||||||
path = cls.get_dbutils_notebook_run_path_arg(node) | ||||||
if isinstance(path, Const): | ||||||
call = NotebookRunCall(cast(Call, node)) | ||||||
paths = call.get_notebook_paths() | ||||||
if None in paths: | ||||||
return Advisory.from_node( | ||||||
'dbutils-notebook-run-literal', | ||||||
"Call to 'dbutils.notebook.run' will be migrated automatically", | ||||||
'dbutils-notebook-run-dynamic', | ||||||
"Path for 'dbutils.notebook.run' cannot be computed and requires adjusting the notebook path(s)", | ||||||
node=node, | ||||||
) | ||||||
return Advisory.from_node( | ||||||
'dbutils-notebook-run-dynamic', | ||||||
"Path for 'dbutils.notebook.run' is not a constant and requires adjusting the notebook path", | ||||||
'dbutils-notebook-run-literal', | ||||||
"Call to 'dbutils.notebook.run' will be migrated automatically", | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we won't be migrating notebook.run() logic. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So what should be the message (the above was existing) ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we drop this advice altogether ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sure There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||
node=node, | ||||||
) | ||||||
|
||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
import pytest | ||
from astroid import Attribute, Call, Const, Expr # type: ignore | ||
|
||
from databricks.labs.ucx.source_code.linters.python_ast import Tree | ||
|
||
|
||
def test_extract_call_by_name(): | ||
tree = Tree.parse("o.m1().m2().m3()") | ||
stmt = tree.first_statement() | ||
assert isinstance(stmt, Expr) | ||
assert isinstance(stmt.value, Call) | ||
act = Tree.extract_call_by_name(stmt.value, "m2") | ||
assert isinstance(act, Call) | ||
assert isinstance(act.func, Attribute) | ||
assert act.func.attrname == "m2" | ||
|
||
|
||
def test_extract_call_by_name_none(): | ||
tree = Tree.parse("o.m1().m2().m3()") | ||
stmt = tree.first_statement() | ||
assert isinstance(stmt, Expr) | ||
assert isinstance(stmt.value, Call) | ||
act = Tree.extract_call_by_name(stmt.value, "m5000") | ||
assert act is None | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"code, arg_index, arg_name, expected", | ||
[ | ||
("o.m1()", 1, "second", None), | ||
("o.m1(3)", 1, "second", None), | ||
("o.m1(first=3)", 1, "second", None), | ||
("o.m1(4, 3)", None, None, None), | ||
("o.m1(4, 3)", None, "second", None), | ||
("o.m1(4, 3)", 1, "second", 3), | ||
("o.m1(4, 3)", 1, None, 3), | ||
("o.m1(first=4, second=3)", 1, "second", 3), | ||
("o.m1(second=3, first=4)", 1, "second", 3), | ||
("o.m1(second=3, first=4)", None, "second", 3), | ||
("o.m1(second=3)", 1, "second", 3), | ||
("o.m1(4, 3, 2)", 1, "second", 3), | ||
], | ||
) | ||
def test_linter_gets_arg(code, arg_index, arg_name, expected): | ||
tree = Tree.parse(code) | ||
stmt = tree.first_statement() | ||
assert isinstance(stmt, Expr) | ||
assert isinstance(stmt.value, Call) | ||
act = Tree.get_arg(stmt.value, arg_index, arg_name) | ||
if expected is None: | ||
assert act is None | ||
else: | ||
assert isinstance(act, Const) | ||
assert act.value == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"code, expected", | ||
[ | ||
("o.m1()", 0), | ||
("o.m1(3)", 1), | ||
("o.m1(first=3)", 1), | ||
("o.m1(3, 3)", 2), | ||
("o.m1(first=3, second=3)", 2), | ||
("o.m1(3, second=3)", 2), | ||
("o.m1(3, *b, **c, second=3)", 4), | ||
], | ||
) | ||
def test_args_count(code, expected): | ||
tree = Tree.parse(code) | ||
stmt = tree.first_statement() | ||
assert isinstance(stmt, Expr) | ||
assert isinstance(stmt.value, Call) | ||
act = Tree.args_count(stmt.value) | ||
assert act == expected | ||
|
||
|
||
def test_tree_walks_nodes_once(): | ||
nodes = set() | ||
count = 0 | ||
tree = Tree.parse("o.m1().m2().m3()") | ||
for node in tree.walk(): | ||
nodes.add(node) | ||
count += 1 | ||
assert len(nodes) == count |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,10 @@ | ||
from __future__ import annotations | ||
|
||
import functools | ||
import operator | ||
|
||
import pytest | ||
from astroid import Attribute, Call, Const, Expr # type: ignore | ||
|
||
from databricks.labs.ucx.source_code.graph import DependencyProblem | ||
|
||
from databricks.labs.ucx.source_code.linters.imports import DbutilsLinter, ImportSource, SysPathChange | ||
|
@@ -137,77 +139,6 @@ def test_linter_returns_appended_relative_paths_with_os_path_abspath_alias(): | |
assert "relative_path" in [p.path for p in appended] | ||
|
||
|
||
def test_extract_call_by_name(): | ||
tree = Tree.parse("o.m1().m2().m3()") | ||
stmt = tree.first_statement() | ||
assert isinstance(stmt, Expr) | ||
assert isinstance(stmt.value, Call) | ||
act = Tree.extract_call_by_name(stmt.value, "m2") | ||
assert isinstance(act, Call) | ||
assert isinstance(act.func, Attribute) | ||
assert act.func.attrname == "m2" | ||
|
||
|
||
def test_extract_call_by_name_none(): | ||
tree = Tree.parse("o.m1().m2().m3()") | ||
stmt = tree.first_statement() | ||
assert isinstance(stmt, Expr) | ||
assert isinstance(stmt.value, Call) | ||
act = Tree.extract_call_by_name(stmt.value, "m5000") | ||
assert act is None | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"code, arg_index, arg_name, expected", | ||
[ | ||
("o.m1()", 1, "second", None), | ||
("o.m1(3)", 1, "second", None), | ||
("o.m1(first=3)", 1, "second", None), | ||
("o.m1(4, 3)", None, None, None), | ||
("o.m1(4, 3)", None, "second", None), | ||
("o.m1(4, 3)", 1, "second", 3), | ||
("o.m1(4, 3)", 1, None, 3), | ||
("o.m1(first=4, second=3)", 1, "second", 3), | ||
("o.m1(second=3, first=4)", 1, "second", 3), | ||
("o.m1(second=3, first=4)", None, "second", 3), | ||
("o.m1(second=3)", 1, "second", 3), | ||
("o.m1(4, 3, 2)", 1, "second", 3), | ||
], | ||
) | ||
def test_linter_gets_arg(code, arg_index, arg_name, expected): | ||
tree = Tree.parse(code) | ||
stmt = tree.first_statement() | ||
assert isinstance(stmt, Expr) | ||
assert isinstance(stmt.value, Call) | ||
act = Tree.get_arg(stmt.value, arg_index, arg_name) | ||
if expected is None: | ||
assert act is None | ||
else: | ||
assert isinstance(act, Const) | ||
assert act.value == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"code, expected", | ||
[ | ||
("o.m1()", 0), | ||
("o.m1(3)", 1), | ||
("o.m1(first=3)", 1), | ||
("o.m1(3, 3)", 2), | ||
("o.m1(first=3, second=3)", 2), | ||
("o.m1(3, second=3)", 2), | ||
("o.m1(3, *b, **c, second=3)", 4), | ||
], | ||
) | ||
def test_args_count(code, expected): | ||
tree = Tree.parse(code) | ||
stmt = tree.first_statement() | ||
assert isinstance(stmt, Expr) | ||
assert isinstance(stmt.value, Call) | ||
act = Tree.args_count(stmt.value) | ||
assert act == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"code, expected", | ||
[ | ||
|
@@ -216,22 +147,27 @@ def test_args_count(code, expected): | |
name = "xyz" | ||
dbutils.notebook.run(name) | ||
""", | ||
"xyz", | ||
) | ||
["xyz"], | ||
), | ||
( | ||
""" | ||
name = "xyz" + "-" + "abc" | ||
dbutils.notebook.run(name) | ||
""", | ||
["xyz-abc"], | ||
), | ||
( | ||
""" | ||
names = ["abc", "xyz"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we already do
or does it require building a small type-aware interpreter? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can! added corresponding test in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice! |
||
for name in names: | ||
dbutils.notebook.run(name) | ||
""", | ||
["abc", "xyz"], | ||
), | ||
], | ||
) | ||
def test_infers_string_variable_value(code, expected): | ||
def test_infers_dbutils_notebook_run_dynamic_value(code, expected): | ||
tree = Tree.parse(code) | ||
calls = DbutilsLinter.list_dbutils_notebook_run_calls(tree) | ||
actual = list(call.get_notebook_path() for call in calls) | ||
assert [expected] == actual | ||
|
||
|
||
def test_tree_walker_walks_nodes_once(): | ||
nodes = set() | ||
count = 0 | ||
tree = Tree.parse("o.m1().m2().m3()") | ||
for node in tree.walk(): | ||
nodes.add(node) | ||
count += 1 | ||
assert len(nodes) == count | ||
actual = functools.reduce(operator.iconcat, list(call.get_notebook_paths() for call in calls), []) | ||
assert expected == actual |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
from pathlib import Path | ||
|
||
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookLoader | ||
from databricks.sdk.service.workspace import Language | ||
|
||
|
||
def test_detects_language(): | ||
|
||
class NotebookLoaderForTesting(NotebookLoader): | ||
|
||
@classmethod | ||
def detect_language(cls, path: Path, content: str): | ||
return cls._detect_language(path, content) | ||
|
||
assert NotebookLoaderForTesting.detect_language(Path("hi.py"), "stuff") == Language.PYTHON | ||
assert NotebookLoaderForTesting.detect_language(Path("hi.sql"), "stuff") == Language.SQL | ||
assert NotebookLoaderForTesting.detect_language(Path("hi"), "# Databricks notebook source") == Language.PYTHON | ||
assert NotebookLoaderForTesting.detect_language(Path("hi"), "-- Databricks notebook source") == Language.SQL | ||
assert not NotebookLoaderForTesting.detect_language(Path("hi"), "stuff") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# Databricks notebook source | ||
a = "./leaf1.py" | ||
# ucx[dbutils-notebook-run-literal:+1:0:+1:23] Call to 'dbutils.notebook.run' will be migrated automatically | ||
dbutils.notebook.run(a) | ||
b = some_function() | ||
# ucx[dbutils-notebook-run-dynamic:+1:0:+1:23] Path for 'dbutils.notebook.run' cannot be computed and requires adjusting the notebook path(s) | ||
dbutils.notebook.run(b) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why do we need to change the signature?.. dbutils.notebook.run() can have at most two arguments - path and parameters - it can't have multiple paths.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we need this because astroid is clever enough to return multiple inferred nodes in a scenario such as:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, interesting. Please add it as a code comment, so that the next time reading this code won't catch by surprise
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done