Skip to content

Commit

Permalink
Added message confidence levels
Browse files Browse the repository at this point in the history
  • Loading branch information
nfx committed Mar 16, 2024
1 parent 4d55ccb commit 027e08a
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 29 deletions.
7 changes: 5 additions & 2 deletions src/databricks/labs/pylint/airflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import astroid
from pylint.checkers import BaseChecker
from pylint.interfaces import INFERENCE


class AirflowChecker(BaseChecker):
Expand Down Expand Up @@ -37,9 +38,11 @@ def visit_call(self, node: astroid.Call):

def _check_new_cluster(self, key: str, new_cluster: Dict[str, Any], node: astroid.NodeNG):
if "data_security_mode" not in new_cluster:
self.add_message("missing-data-security-mode", node=node, args=(key,))
self.add_message("missing-data-security-mode", node=node, args=(key,), confidence=INFERENCE)
if "spark_version" in new_cluster and not self._is_supported(new_cluster["spark_version"]):
self.add_message("unsupported-runtime", node=node, args=(key, new_cluster["spark_version"]))
self.add_message(
"unsupported-runtime", node=node, args=(key, new_cluster["spark_version"]), confidence=INFERENCE
)

@staticmethod
def _is_supported(spark_version: str):
Expand Down
32 changes: 19 additions & 13 deletions src/databricks/labs/pylint/dbutils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# pylint checker for databricks dbutils
import astroid
from pylint.checkers import BaseChecker
from pylint.interfaces import HIGH


class DbutilsChecker(BaseChecker):
Expand Down Expand Up @@ -63,13 +64,15 @@ def visit_call(self, node: astroid.Call):
# add message if dbutils.fs.cp() is used
func_as_string = node.func.as_string()
if func_as_string == "dbutils.fs.cp":
self.add_message("dbutils-fs-cp", node=node, args=(node.args[0].as_string(), node.args[1].as_string()))
self.add_message(
"dbutils-fs-cp", node=node, args=(node.args[0].as_string(), node.args[1].as_string()), confidence=HIGH
)
# add message if dbutils.fs.head() is used
elif func_as_string == "dbutils.fs.head":
self.add_message("dbutils-fs-head", node=node, args=(node.args[0].as_string(),))
self.add_message("dbutils-fs-head", node=node, args=(node.args[0].as_string(),), confidence=HIGH)
# add message if dbutils.fs.ls("/tmp") is used
elif func_as_string == "dbutils.fs.ls":
self.add_message("dbutils-fs-ls", node=node, args=(node.args[0].as_string(),))
self.add_message("dbutils-fs-ls", node=node, args=(node.args[0].as_string(),), confidence=HIGH)
# add message if dbutils.fs.mount("s3a://%s" % aws_bucket_name, "/mnt/%s" % mount_name) is used
elif func_as_string in {
"dbutils.fs.mount",
Expand All @@ -78,43 +81,46 @@ def visit_call(self, node: astroid.Call):
"dbutils.fs.updateMount",
"dbutils.fs.refreshMounts",
}:
self.add_message("dbutils-fs-mount", node=node)
self.add_message("dbutils-fs-mount", node=node, confidence=HIGH)
# add message if dbutils.credentials.* is used
elif func_as_string.startswith("dbutils.credentials."):
self.add_message("dbutils-credentials", node=node)
self.add_message("dbutils-credentials", node=node, confidence=HIGH)
# add message if dbutils.notebook.run("My Other Notebook", 60) is used
elif func_as_string == "dbutils.notebook.run":
self.add_message(
"dbutils-notebook-run", node=node, args=(node.args[0].as_string(), node.args[1].as_string())
"dbutils-notebook-run",
node=node,
args=(node.args[0].as_string(), node.args[1].as_string()),
confidence=HIGH,
)
elif func_as_string.endswith("getDbutils"):
self.add_message("internal-api", node=node, args=(node.as_string(),))
self.add_message("internal-api", node=node, args=(node.as_string(),), confidence=HIGH)
elif ".notebook().getContext()" in func_as_string:
self.add_message("internal-api", node=node, args=(node.as_string(),))
self.add_message("internal-api", node=node, args=(node.as_string(),), confidence=HIGH)
elif ".notebook.entry_point" in func_as_string:
self.add_message("internal-api", node=node, args=(node.as_string(),))
self.add_message("internal-api", node=node, args=(node.as_string(),), confidence=HIGH)
elif ".apiToken" in func_as_string:
self.add_message("internal-api", node=node, args=(node.as_string(),))
self.add_message("internal-api", node=node, args=(node.as_string(),), confidence=HIGH)

def visit_const(self, node: astroid.Const):
value = node.value
if not isinstance(value, str):
return
# add a message if string matches dapi[0-9a-f]{32}, dkea[0-9a-f]{32}, or dosa[0-9a-f]{32}
if value.startswith("dapi") or value.startswith("dkea") or value.startswith("dosa"):
self.add_message("pat-token-leaked", node=node)
self.add_message("pat-token-leaked", node=node, confidence=HIGH)

def visit_import(self, node: astroid.Import):
# add a message if dbruntime is imported
for name_tuple in node.names:
real_name, _ = name_tuple
if real_name.startswith("dbruntime"):
self.add_message("internal-api", node=node, args=(node.as_string(),))
self.add_message("internal-api", node=node, args=(node.as_string(),), confidence=HIGH)

def visit_importfrom(self, node: astroid.ImportFrom):
# add a message if dbruntime is imported
if node.modname.startswith("dbruntime"):
self.add_message("internal-api", node=node, args=(node.as_string(),))
self.add_message("internal-api", node=node, args=(node.as_string(),), confidence=HIGH)


def register(linter):
Expand Down
11 changes: 6 additions & 5 deletions src/databricks/labs/pylint/legacy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import astroid
from pylint.checkers import BaseChecker
from pylint.interfaces import HIGH, INFERENCE


class LegacyChecker(BaseChecker):
Expand Down Expand Up @@ -68,26 +69,26 @@ def visit_import(self, node: astroid.Import):
# add message if databricks_cli is imported
for name, _ in node.names:
if name.startswith("databricks_cli"):
self.add_message("legacy-cli", node=node)
self.add_message("legacy-cli", node=node, confidence=HIGH)
# very coarse check for UC incompatibility
for needle in self.UC_INCOMPATIBLE_BRUTE_FORCE:
if needle in name:
self.add_message("incompatible-with-uc", node=node, args=(node.as_string(),))
self.add_message("incompatible-with-uc", node=node, args=(node.as_string(),), confidence=INFERENCE)

def visit_importfrom(self, node: astroid.ImportFrom):
if node.modname.startswith("databricks_cli"):
self.add_message("legacy-cli", node=node)
# very coarse check for UC incompatibility
for needle in self.UC_INCOMPATIBLE_BRUTE_FORCE:
if needle in node.modname:
self.add_message("incompatible-with-uc", node=node, args=(node.as_string(),))
self.add_message("incompatible-with-uc", node=node, args=(node.as_string(),), confidence=INFERENCE)

def visit_call(self, node: astroid.Call):
func_as_string = node.func.as_string()
# very coarse check for UC incompatibility
for needle in self.UC_INCOMPATIBLE_BRUTE_FORCE:
if needle in func_as_string:
self.add_message("incompatible-with-uc", node=node, args=(node.as_string(),))
self.add_message("incompatible-with-uc", node=node, args=(node.as_string(),), confidence=INFERENCE)

def visit_const(self, node: astroid.Const):
# very coarse check for UC incompatibility
Expand All @@ -96,7 +97,7 @@ def visit_const(self, node: astroid.Const):
return
for needle in self.UC_INCOMPATIBLE_BRUTE_FORCE:
if needle in value:
self.add_message("incompatible-with-uc", node=node, args=(node.as_string(),))
self.add_message("incompatible-with-uc", node=node, args=(node.as_string(),), confidence=INFERENCE)


def register(linter):
Expand Down
5 changes: 3 additions & 2 deletions src/databricks/labs/pylint/notebooks.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import astroid
from pylint.checkers import BaseRawFileChecker
from pylint.interfaces import CONTROL_FLOW, HIGH


class NotebookChecker(BaseRawFileChecker):
Expand Down Expand Up @@ -55,11 +56,11 @@ def process_module(self, node: astroid.Module):
if line == b"# COMMAND ----------\n":
cells += 1
if cells > self.linter.config.max_cells and not too_many_cells_raised:
self.add_message("notebooks-too-many-cells", line=lineno + 1)
self.add_message("notebooks-too-many-cells", line=lineno + 1, confidence=CONTROL_FLOW)
too_many_cells_raised = True
continue
if line.startswith(b"# MAGIC %run"):
self.add_message("notebooks-percent-run", line=lineno)
self.add_message("notebooks-percent-run", line=lineno, confidence=HIGH)


def register(linter):
Expand Down
11 changes: 8 additions & 3 deletions src/databricks/labs/pylint/spark.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import astroid
from pylint.checkers import BaseChecker
from pylint.interfaces import CONTROL_FLOW, INFERENCE


class SparkChecker(BaseChecker):
Expand Down Expand Up @@ -33,19 +34,23 @@ def visit_name(self, node: astroid.Name):
while in_node and not isinstance(in_node, astroid.FunctionDef):
in_node = in_node.parent
if not in_node:
self.add_message("spark-outside-function", node=node)
self.add_message("spark-outside-function", node=node, confidence=CONTROL_FLOW)
return
has_spark_arg = False
for arg in in_node.args.arguments:
if arg.name == "spark":
has_spark_arg = True
break
if not has_spark_arg:
self.add_message("no-spark-argument-in-function", node=in_node, args=(in_node.name,))
self.add_message(
"no-spark-argument-in-function", node=in_node, args=(in_node.name,), confidence=CONTROL_FLOW
)

def visit_attribute(self, node: astroid.Attribute):
if node.attrname == "show" and isinstance(node.expr, astroid.Call):
self.add_message("use-display-instead-of-show", node=node, args=(node.expr.as_string(),))
self.add_message(
"use-display-instead-of-show", node=node, args=(node.expr.as_string(),), confidence=INFERENCE
)


def register(linter):
Expand Down
2 changes: 1 addition & 1 deletion tests/samples/m/many_cells.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
notebooks-too-many-cells:303:0:None:None::Notebooks should not have more than 75 cells:UNDEFINED
notebooks-too-many-cells:303:0:None:None::Notebooks should not have more than 75 cells:CONTROL_FLOW
6 changes: 3 additions & 3 deletions tests/samples/p/percent_run.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
notebooks-percent-run:7:0:None:None::Using %run is not allowed:UNDEFINED
spark-outside-function:15:5:15:10::Using spark outside the function is leading to untestable code:UNDEFINED
spark-outside-function:24:5:24:10::Using spark outside the function is leading to untestable code:UNDEFINED
notebooks-percent-run:7:0:None:None::Using %run is not allowed:HIGH
spark-outside-function:15:5:15:10::Using spark outside the function is leading to untestable code:CONTROL_FLOW
spark-outside-function:24:5:24:10::Using spark outside the function is leading to untestable code:CONTROL_FLOW

0 comments on commit 027e08a

Please sign in to comment.