Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed Unsupported schema: XXX error on assess_workflows #3104

Merged
merged 1 commit into from
Oct 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions src/databricks/labs/ucx/mixins/cached_workspace_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,11 @@ def unlink(self, missing_ok: bool = False) -> None:
_CachedPathT = TypeVar("_CachedPathT", bound=_CachedPath)


class WorkspaceCache:
class InvalidPath(ValueError):
pass


class InvalidWorkspacePath(ValueError):
pass
class WorkspaceCache:

def __init__(self, ws: WorkspaceClient, max_entries: int = 2048) -> None:
self._ws = ws
Expand All @@ -129,10 +130,10 @@ def get_workspace_path(self, path: str) -> WorkspacePath:
Args:
path: a valid workspace path (must be absolute)
Raises:
WorkspaceCache.InvalidWorkspacePath: this is raised immediately if the supplied path is not a syntactically
InvalidPath: this is raised immediately if the supplied path is not a syntactically
valid workspace path. (This is not raised if the path is syntactically valid but does not exist.)
"""
if not path.startswith("/"):
msg = f"Invalid workspace path; must be absolute and start with a slash ('/'): {path}"
raise WorkspaceCache.InvalidWorkspacePath(msg)
raise InvalidPath(msg)
return _CachedPath(self._cache, self._ws, path)
30 changes: 18 additions & 12 deletions src/databricks/labs/ucx/source_code/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from databricks.labs.ucx.assessment.crawlers import runtime_version_tuple
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
from databricks.labs.ucx.mixins.cached_workspace_path import WorkspaceCache
from databricks.labs.ucx.mixins.cached_workspace_path import WorkspaceCache, InvalidPath
from databricks.labs.ucx.source_code.base import (
CurrentSessionState,
LocatedAdvice,
Expand Down Expand Up @@ -160,7 +160,7 @@ def _as_path(self, path: str) -> Path:
return DBFSPath(self._ws, parsed_path.path)
case other:
msg = f"Unsupported schema: {other} (only DBFS or Workspace paths are allowed)"
raise ValueError(msg)
raise InvalidPath(msg)

@classmethod
@contextmanager
Expand All @@ -183,7 +183,7 @@ def _register_library(self, graph: DependencyGraph, library: compute.Library) ->
yield from self._register_whl(graph, library)
if library.requirements:
yield from self._register_requirements_txt(graph, library)
except WorkspaceCache.InvalidWorkspacePath as e:
except InvalidPath as e:
yield DependencyProblem('cannot-load-file', str(e))
except BadRequest as e:
# see https://github.com/databrickslabs/ucx/issues/2916
Expand All @@ -209,9 +209,12 @@ def _register_requirements_txt(self, graph, library) -> Iterable[DependencyProbl
yield from graph.register_library(clean_requirement)

def _register_whl(self, graph, library) -> Iterable[DependencyProblem]:
wheel_path = self._as_path(library.whl)
with self._temporary_copy(wheel_path) as local_file:
yield from graph.register_library(local_file.as_posix())
try:
wheel_path = self._as_path(library.whl)
with self._temporary_copy(wheel_path) as local_file:
yield from graph.register_library(local_file.as_posix())
except InvalidPath as e:
yield DependencyProblem('cannot-load-file', str(e))

def _register_egg(self, graph, library) -> Iterable[DependencyProblem]:
if self.runtime_version > (14, 0):
Expand All @@ -220,9 +223,12 @@ def _register_egg(self, graph, library) -> Iterable[DependencyProblem]:
message='Installing eggs is no longer supported on Databricks 14.0 or higher',
)
logger.info(f"Registering library from {library.egg}")
egg_path = self._as_path(library.egg)
with self._temporary_copy(egg_path) as local_file:
yield from graph.register_library(local_file.as_posix())
try:
egg_path = self._as_path(library.egg)
with self._temporary_copy(egg_path) as local_file:
yield from graph.register_library(local_file.as_posix())
except InvalidPath as e:
yield DependencyProblem('cannot-load-file', str(e))

def _register_notebook(self, graph: DependencyGraph) -> Iterable[DependencyProblem]:
if not self._task.notebook_task:
Expand All @@ -237,7 +243,7 @@ def _register_notebook(self, graph: DependencyGraph) -> Iterable[DependencyProbl
try:
# Notebooks can't be on DBFS.
path = self._cache.get_workspace_path(notebook_path)
except WorkspaceCache.InvalidWorkspacePath as e:
except InvalidPath as e:
return [DependencyProblem('cannot-load-notebook', str(e))]
return graph.register_notebook(path, False)

Expand All @@ -249,7 +255,7 @@ def _register_spark_python_task(self, graph: DependencyGraph) -> Iterable[Depend
logger.info(f'Discovering {self._task.task_key} entrypoint: {python_file}')
try:
path = self._as_path(python_file)
except WorkspaceCache.InvalidWorkspacePath as e:
except InvalidPath as e:
return [DependencyProblem('cannot-load-file', str(e))]
return graph.register_file(path)

Expand Down Expand Up @@ -326,7 +332,7 @@ def _register_notebook_path(self, graph: DependencyGraph, notebook_path: str) ->
try:
# Notebooks can't be on DBFS.
path = self._cache.get_workspace_path(notebook_path)
except WorkspaceCache.InvalidWorkspacePath as e:
except InvalidPath as e:
yield DependencyProblem('cannot-load-notebook', str(e))
return
# the notebook is the root of the graph, so there's no context to inherit
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/mixins/test_cached_workspace_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from databricks.sdk import WorkspaceClient
from databricks.sdk.service.workspace import ObjectInfo, ObjectType

from databricks.labs.ucx.mixins.cached_workspace_path import WorkspaceCache
from databricks.labs.ucx.mixins.cached_workspace_path import WorkspaceCache, InvalidPath
from databricks.labs.ucx.source_code.base import decode_with_bom


Expand All @@ -29,7 +29,7 @@ def test_path_like_returns_cached_instance() -> None:

def test_non_absolute_path_error() -> None:
cache = _WorkspaceCacheFriend(mock_workspace_client())
with pytest.raises(WorkspaceCache.InvalidWorkspacePath, match="Invalid workspace path; must be absolute"):
with pytest.raises(InvalidPath, match="Invalid workspace path; must be absolute"):
_ = cache.get_workspace_path("not/an/absolute/path")


Expand Down