Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix false advice when linting homonymous method names #2114

Merged
merged 11 commits into from
Jul 9, 2024
7 changes: 6 additions & 1 deletion src/databricks/labs/ucx/source_code/linters/pyspark.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,12 @@ class Matcher(ABC):
session_state: CurrentSessionState | None = None

def matches(self, node: NodeNG):
return isinstance(node, Call) and isinstance(node.func, Attribute) and self._get_table_arg(node) is not None
return (
isinstance(node, Call)
and self._get_table_arg(node) is not None
and isinstance(node.func, Attribute)
and Tree(node.func.expr).is_from_module("spark")
)

@abstractmethod
def lint(
Expand Down
24 changes: 24 additions & 0 deletions src/databricks/labs/ucx/source_code/linters/python_ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from astroid import ( # type: ignore
Assign,
AssignName,
Attribute,
Call,
Const,
Expand Down Expand Up @@ -223,6 +224,27 @@ def append_statements(self, tree: Tree) -> Tree:
# the following may seem strange but it's actually ok to use the original module as tree root
return tree

def is_from_module(self, module_name: str):
# if his is the call's root node, check it against the required module
if isinstance(self._node, Name):
if self._node.name == module_name:
return True
root = self.root
if not isinstance(root, Module):
return False
for value in root.globals.get(self._node.name, []):
if not isinstance(value, AssignName) or not isinstance(value.parent, Assign):
continue
if Tree(value.parent.value).is_from_module(module_name):
return True
return False
# walk up intermediate calls such as spark.range(...)
if isinstance(self._node, Call):
return isinstance(self._node.func, Attribute) and Tree(self._node.func.expr).is_from_module(module_name)
if isinstance(self._node, Attribute):
return Tree(self._node.expr).is_from_module(module_name)
return False


class TreeVisitor:

Expand Down Expand Up @@ -282,6 +304,8 @@ def visit_importfrom(self, node: ImportFrom):
def _matches(self, node: NodeNG, depth: int):
if depth >= len(self._match_nodes):
return False
if isinstance(node, Call):
return self._matches(node.func, depth)
name, match_node = self._match_nodes[depth]
if not isinstance(node, match_node):
return False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,11 @@ def get_advice_span(self, node: NodeNG) -> Range | None:
if call_args_count < self.min_args or call_args_count > self.max_args:
return None

# Check 3: check presence of the format specifier:
# Check 3: ensure this is a spark call
if not Tree(node.func.expr).is_from_module("spark"):
return None

# Check 4: check presence of the format specifier:
# Option A: format specifier may be given as a direct parameter to the table-creating call
# >>> df.saveToTable("c.db.table", format="csv")
format_arg = Tree.get_arg(node, self.format_arg_index, self.format_arg_name)
Expand Down
8 changes: 6 additions & 2 deletions tests/unit/source_code/linters/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,9 @@ def test_folder_has_repr():


@pytest.mark.skip("Manual testing for troubleshooting")
@pytest.mark.parametrize("path", [(Path(site_packages, "mypy", "build.py"))])
@pytest.mark.parametrize(
"path", [Path("/Users/eric.vergnaud/development/ucx/.venv/lib/python3.10/site-packages/spacy/pipe_analysis.py")]
)
def test_known_issues(path: Path, migration_index):
file_loader = FileLoader()
folder_loader = FolderLoader(file_loader)
Expand All @@ -181,4 +183,6 @@ def test_known_issues(path: Path, migration_index):
resolver,
lambda: LinterContext(migration_index, session_state),
)
linter.lint(MockPrompts({}), path)
advices = linter.lint(MockPrompts({}), path)
for advice in advices:
print(repr(advice))
14 changes: 13 additions & 1 deletion tests/unit/source_code/linters/test_python_ast.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from astroid import Assign, AstroidSyntaxError, Attribute, Call, Const, Expr # type: ignore
from astroid import Assign, AstroidSyntaxError, Attribute, Call, Const, Expr, Name # type: ignore

from databricks.labs.ucx.source_code.linters.python_ast import Tree
from databricks.labs.ucx.source_code.linters.python_infer import InferredValue
Expand Down Expand Up @@ -147,3 +147,15 @@ def test_appends_statements():
values = list(InferredValue.infer_from_node(tree.node))
strings = list(value.as_string() for value in values)
assert strings == ["Hello John!"]


def test_is_from_module():
source = """
df = spark.read.csv("hi")
df.write.format("delta").saveAsTable("old.things")
"""
tree = Tree.normalize_and_parse(source)
save_call = tree.locate(
Call, [("saveAsTable", Attribute), ("format", Attribute), ("write", Attribute), ("df", Name)]
)[0]
assert Tree(save_call).is_from_module("spark")
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,5 @@
spark.catalog.cacheTable(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
# FIXME: This is a false positive; any method named 'cacheTable' is triggering the warning.
# ucx[table-migrate:+1:0:+1:39] Table old.things is migrated to brand.new.stuff in Unity Catalog
something_else.cacheTable("old.things")
a_function("old.things")
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,5 @@
do_stuff_with(df)

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
# FIXME: This is a false positive; any method named 'createExternalTable' is triggering the warning.
# ucx[table-migrate:+1:4:+1:52] Table old.things is migrated to brand.new.stuff in Unity Catalog
something_else.createExternalTable("old.things")
a_function("old.things")
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,5 @@
do_stuff_with(df)

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
# FIXME: This is a false positive; any method named 'createTable' is triggering the warning.
# ucx[table-migrate:+1:4:+1:44] Table old.things is migrated to brand.new.stuff in Unity Catalog
something_else.createTable("old.things")
a_function("old.things")
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,5 @@
do_stuff_with(table)

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
# FIXME: This is a false positive; any method named 'getTable' is triggering the warning.
# ucx[table-migrate:+1:4:+1:41] Table old.things is migrated to brand.new.stuff in Unity Catalog
something_else.getTable("old.things")
a_function("old.things")
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,5 @@
cached_previously = spark.catalog.isCached(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
# FIXME: This is a false positive; any method named 'isCached' is triggering the warning.
# ucx[table-migrate:+1:4:+1:41] Table old.things is migrated to brand.new.stuff in Unity Catalog
something_else.isCached("old.things")
a_function("old.things")
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,5 @@
columns = spark.catalog.listColumns(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
# FIXME: This is a false positive; any method named 'listColumns' is triggering the warning.
# ucx[table-migrate:+1:4:+1:44] Table old.things is migrated to brand.new.stuff in Unity Catalog
something_else.listColumns("old.things")
a_function("old.things")
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,5 @@
spark.catalog.recoverPartitions(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
# FIXME: This is a false positive; any method named 'recoverPartitions' is triggering the warning.
# ucx[table-migrate:+1:4:+1:50] Table old.things is migrated to brand.new.stuff in Unity Catalog
something_else.recoverPartitions("old.things")
a_function("old.things")
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,5 @@
spark.catalog.refreshTable(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
# FIXME: This is a false positive; any method named 'refreshTable' is triggering the warning.
# ucx[table-migrate:+1:4:+1:45] Table old.things is migrated to brand.new.stuff in Unity Catalog
something_else.refreshTable("old.things")
a_function("old.things")
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,5 @@
pass

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
# FIXME: This is a false positive; any method named 'tableExists' is triggering the warning.
# ucx[table-migrate:+1:4:+1:44] Table old.things is migrated to brand.new.stuff in Unity Catalog
something_else.tableExists("old.things")
a_function("old.things")
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,5 @@
spark.catalog.uncacheTable(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
# FIXME: This is a false positive; any method named 'uncacheTable' is triggering the warning.
# ucx[table-migrate:+1:4:+1:45] Table old.things is migrated to brand.new.stuff in Unity Catalog
something_else.uncacheTable("old.things")
a_function("old.things")
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,5 @@
df.write.insertInto(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
# FIXME: This is a false positive; any method named 'insertInto' is triggering the warning.
# ucx[table-migrate:+1:4:+1:43] Table old.things is migrated to brand.new.stuff in Unity Catalog
something_else.insertInto("old.things")
a_function("old.things")
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,5 @@
df.write.format("delta").saveAsTable(f"boop{stuff}")

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
# FIXME: This are false positives; any method named 'saveAsTable' is triggering the warnings.
# ucx[table-migrate:+2:4:+2:44] Table old.things is migrated to brand.new.stuff in Unity Catalog
# ucx[table-migrate:+1:4:+1:44] The default format changed in Databricks Runtime 8.0, from Parquet to Delta
something_else.saveAsTable("old.things")
a_function("old.things")
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,6 @@
df = spark.table(f"boop{stuff}")
do_stuff_with(df)

## Some trivial references to the method or table in unrelated contexts that should not trigger warnigns.
# FIXME: These are false positives; any method named 'table' is triggering the warnings.
# ucx[table-migrate:+2:4:+2:38] Table old.things is migrated to brand.new.stuff in Unity Catalog
# ucx[table-migrate:+1:4:+1:38] The default format changed in Databricks Runtime 8.0, from Parquet to Delta
## Some trivial references to the method or table in unrelated contexts that should not trigger warnings.
something_else.table("old.things")
a_function("old.things")