From 21e45556360d23385b90b2b8e0bc7ebc0290c5a3 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Wed, 11 Oct 2023 18:47:19 +0200 Subject: [PATCH 1/4] Merge `workspace_access.Crawler` and `workspace_access.Applier` interfaces to `workspace_access.AclSupport` This PR simplifies integration testing and removes technical debt. --- docs/local-group-migration.md | 30 +++--- .../labs/ucx/workspace_access/base.py | 33 ++---- .../labs/ucx/workspace_access/generic.py | 95 +++++++++------- .../labs/ucx/workspace_access/groups.py | 1 + .../labs/ucx/workspace_access/manager.py | 101 ++++++------------ .../labs/ucx/workspace_access/redash.py | 33 ++++-- .../labs/ucx/workspace_access/scim.py | 28 ++--- .../labs/ucx/workspace_access/secrets.py | 54 +++++----- .../labs/ucx/workspace_access/tacl.py | 20 ++-- tests/unit/workspace_access/test_base.py | 34 ++++-- tests/unit/workspace_access/test_generic.py | 20 ++-- tests/unit/workspace_access/test_groups.py | 8 +- tests/unit/workspace_access/test_listing.py | 54 +++++----- tests/unit/workspace_access/test_manager.py | 32 ++---- tests/unit/workspace_access/test_redash.py | 20 ++-- tests/unit/workspace_access/test_tacl.py | 2 +- 16 files changed, 269 insertions(+), 296 deletions(-) diff --git a/docs/local-group-migration.md b/docs/local-group-migration.md index 9843d013a4..a6f5e0ebe1 100644 --- a/docs/local-group-migration.md +++ b/docs/local-group-migration.md @@ -17,18 +17,12 @@ To deliver this migration, the following steps are performed: > Please note that inherited permissions will not be inventorized / migrated. We only cover direct permissions. -On a very high-level, the permissions inventorization process is split into two steps: +On a very high-level, the permissions crawling process is split into two steps: -1. collect all existing permissions into a persistent storage. -2. apply the collected permissions to the target resources. +1. collect all existing permissions into a persistent storage - see `workspace_access.AclSupport.get_crawler_tasks`. +2. apply the collected permissions to the target resources - see `workspace_access.AclSupport.get_apply_task`. -The first step is performed by the `Crawler` and the second by the `Applier`. - -Crawler and applier are intrinsically connected to each other due to SerDe (serialization/deserialization) logic. - -We implement separate crawlers and applier for each supported resource type. - -Please note that `table ACLs` logic is currently handled separately from the logic described in this document. +We implement `workspace_access.AclSupport` for each supported resource type. ## Logical objects and relevant APIs @@ -147,7 +141,9 @@ Additional info: #### Known issues -- Folder names with forward-slash (`/`) in directory name will be skipped by the inventory. Databricks UI no longer allows creating folders with a forward slash. See [this issue](https://github.com/databrickslabs/ucx/issues/230) for more details. +- Folder names with forward-slash (`/`) in directory name will be skipped by the inventory. Databricks UI no longer +allows creating folders with a forward slash. See [this issue](https://github.com/databrickslabs/ucx/issues/230) for +more details. ### Secrets (uses Secrets API) @@ -163,16 +159,16 @@ Additional info: - put method: `ws.secrets.put_acl` -## Crawler and serialization logic +## AclSupport and serialization logic Crawlers are expected to return a list of callable functions that will be later used to get the permissions. -Each of these functions shall return a `PermissionInventoryItem` that should be serializable into a Delta Table. +Each of these functions shall return a `workspace_access.Permissions` that should be serializable into a Delta Table. The permission payload differs between different crawlers, therefore each crawler should implement a serialization method. ## Applier and deserialization logic -Appliers are expected to accept a list of `PermissionInventoryItem` and generate a list of callables that will apply the +Appliers are expected to accept a list of `workspace_access.Permissions` and generate a list of callables that will apply the given permissions. Each applier should implement a deserialization method that will convert the raw payload into a typed one. Each permission item should have a crawler type associated with it, so that the applier can use the correct @@ -189,10 +185,10 @@ We do this inside the `applier`, by returning a `noop` callable if the object is To crawl the permissions, we use the following logic: 1. Go through the list of all crawlers. 2. Get the list of all objects of the given type. -3. For each object, generate a callable that will return a `PermissionInventoryItem`. +3. For each object, generate a callable that will return a `workspace_access.Permissions`. 4. Execute the callables in parallel -5. Collect the results into a list of `PermissionInventoryItem`. -6. Save the list of `PermissionInventoryItem` into a Delta Table. +5. Collect the results into a list of `workspace_access.Permissions`. +6. Save the list of `workspace_access.Permissions` into a Delta Table. ## Applying the permissions diff --git a/src/databricks/labs/ucx/workspace_access/base.py b/src/databricks/labs/ucx/workspace_access/base.py index 5fb6011fd8..5b20b4f12a 100644 --- a/src/databricks/labs/ucx/workspace_access/base.py +++ b/src/databricks/labs/ucx/workspace_access/base.py @@ -1,7 +1,6 @@ from abc import abstractmethod from collections.abc import Callable, Iterator from dataclasses import dataclass -from functools import partial from logging import Logger from typing import Literal @@ -21,7 +20,7 @@ class Permissions: Destination = Literal["backup", "account"] -class Crawler: +class AclSupport: @abstractmethod def get_crawler_tasks(self) -> Iterator[Callable[..., Permissions | None]]: """ @@ -29,31 +28,13 @@ def get_crawler_tasks(self) -> Iterator[Callable[..., Permissions | None]]: :return: """ - -# TODO: this class has to become typing.Protocol and keep only abstract methods -# See https://www.oreilly.com/library/view/fluent-python-2nd/9781492056348/ch13.html -class Applier: - @abstractmethod - def is_item_relevant(self, item: Permissions, migration_state: GroupMigrationState) -> bool: - """TODO: remove it, see https://github.com/databrickslabs/ucx/issues/410""" - @abstractmethod - def _get_apply_task( - self, item: Permissions, migration_state: GroupMigrationState, destination: Destination - ) -> partial: - """ - This method should return an instance of ApplierTask. - """ - def get_apply_task( self, item: Permissions, migration_state: GroupMigrationState, destination: Destination - ) -> partial: - # we explicitly put the relevance check here to avoid "forgotten implementation" in child classes - if self.is_item_relevant(item, migration_state): - return self._get_apply_task(item, migration_state, destination) - else: + ) -> Callable[[], None] | None: + """This method returns a Callable, that applies permissions to a destination group, based on + the group migration state. The callable is required not to have any shared mutable state.""" - def noop(): - pass - - return partial(noop) + @abstractmethod + def object_types(self) -> set[str]: + """This method returns a set of strings, that represent object types that are applicable by this instance.""" diff --git a/src/databricks/labs/ucx/workspace_access/generic.py b/src/databricks/labs/ucx/workspace_access/generic.py index b7a124f6b4..9d28b4c7ae 100644 --- a/src/databricks/labs/ucx/workspace_access/generic.py +++ b/src/databricks/labs/ucx/workspace_access/generic.py @@ -1,3 +1,4 @@ +import datetime import json import logging from collections.abc import Callable, Iterator @@ -11,8 +12,7 @@ from databricks.labs.ucx.mixins.hardening import rate_limited from databricks.labs.ucx.workspace_access.base import ( - Applier, - Crawler, + AclSupport, Destination, Permissions, ) @@ -31,17 +31,49 @@ class RetryableError(DatabricksError): pass -class GenericPermissionsSupport(Crawler, Applier): - def __init__(self, ws: WorkspaceClient, listings: list[Callable[..., Iterator[GenericPermissionsInfo]]]): +class Listing: + def __init__(self, func: Callable[..., list], id_attribute: str, object_type: str): + self._func = func + self._id_attribute = id_attribute + self._object_type = object_type + + def object_types(self) -> set[str]: + return set(self._object_type) + + def __iter__(self): + started = datetime.datetime.now() + for item in self._func(): + yield GenericPermissionsInfo(getattr(item, self._id_attribute), self._object_type) + since = datetime.datetime.now() - started + logger.info(f"Listed {self._object_type} in {since}") + + +class GenericPermissionsSupport(AclSupport): + def __init__(self, ws: WorkspaceClient, listings: list[Listing]): self._ws = ws self._listings = listings def get_crawler_tasks(self): for listing in self._listings: - for info in listing(): + for info in listing: yield partial(self._crawler_task, info.request_type, info.object_id) - def is_item_relevant(self, item: Permissions, migration_state: GroupMigrationState) -> bool: + def object_types(self) -> set[str]: + all_object_types = set() + for listing in self._listings: + for object_type in listing.object_types(): + all_object_types.add(object_type) + return all_object_types + + def get_apply_task(self, item: Permissions, migration_state: GroupMigrationState, destination: Destination): + if not self._is_item_relevant(item, migration_state): + return None + object_permissions = iam.ObjectPermissions.from_dict(json.loads(item.raw)) + new_acl = self._prepare_new_acl(object_permissions, migration_state, destination) + return partial(self._applier_task, item.object_type, item.object_id, new_acl) + + @staticmethod + def _is_item_relevant(item: Permissions, migration_state: GroupMigrationState) -> bool: # passwords and tokens are represented on the workspace-level if item.object_id in ("tokens", "passwords"): return True @@ -50,14 +82,6 @@ def is_item_relevant(self, item: Permissions, migration_state: GroupMigrationSta ] return any(g in mentioned_groups for g in [info.workspace.display_name for info in migration_state.groups]) - def _get_apply_task( - self, item: Permissions, migration_state: GroupMigrationState, destination: Destination - ) -> partial: - new_acl = self._prepare_new_acl( - iam.ObjectPermissions.from_dict(json.loads(item.raw)), migration_state, destination - ) - return partial(self._applier_task, item.object_type, item.object_id, new_acl) - @rate_limited(max_requests=30) def _applier_task(self, object_type: str, object_id: str, acl: list[iam.AccessControlRequest]): self._ws.permissions.update(object_type, object_id, access_control_list=acl) @@ -121,20 +145,17 @@ def _prepare_new_acl( return acl_requests -def listing_wrapper( - func: Callable[..., list], id_attribute: str, object_type: str -) -> Callable[..., Iterator[GenericPermissionsInfo]]: - def wrapper() -> Iterator[GenericPermissionsInfo]: - for item in func(): - yield GenericPermissionsInfo( - object_id=getattr(item, id_attribute), - request_type=object_type, - ) - - return wrapper +class WorkspaceListing(Listing): + def __init__(self, ws: WorkspaceClient, num_threads=20, start_path: str | None = "/"): + super().__init__(..., ..., ...) + self._ws = ws + self._num_threads = num_threads + self._start_path = start_path + def object_types(self) -> set[str]: + return {"notebooks", "directories", "repos", "files"} -def workspace_listing(ws: WorkspaceClient, num_threads=20, start_path: str | None = "/"): + @staticmethod def _convert_object_type_to_request_type(_object: workspace.ObjectInfo) -> str | None: match _object.object_type: case workspace.ObjectType.NOTEBOOK: @@ -151,17 +172,15 @@ def _convert_object_type_to_request_type(_object: workspace.ObjectInfo) -> str | case None: return None - def inner(): + def __iter__(self): from databricks.labs.ucx.workspace_access.listing import WorkspaceListing - ws_listing = WorkspaceListing(ws, num_threads=num_threads, with_directories=False) - for _object in ws_listing.walk(start_path): - request_type = _convert_object_type_to_request_type(_object) + ws_listing = WorkspaceListing(self._ws, num_threads=self._num_threads, with_directories=False) + for _object in ws_listing.walk(self._start_path): + request_type = self._convert_object_type_to_request_type(_object) if request_type: yield GenericPermissionsInfo(object_id=str(_object.object_id), request_type=request_type) - return inner - def models_listing(ws: WorkspaceClient): def inner() -> Iterator[ml.ModelDatabricks]: @@ -193,12 +212,6 @@ def inner() -> Iterator[ml.Experiment]: return inner -def authorization_listing(): - def inner(): - for _value in ["passwords", "tokens"]: - yield GenericPermissionsInfo( - object_id=_value, - request_type="authorization", - ) - - return inner +def tokens_and_passwords(): + for _value in ["passwords", "tokens"]: + yield GenericPermissionsInfo(_value, "authorization") diff --git a/src/databricks/labs/ucx/workspace_access/groups.py b/src/databricks/labs/ucx/workspace_access/groups.py index 7ca25cf0c9..f9fee127b8 100644 --- a/src/databricks/labs/ucx/workspace_access/groups.py +++ b/src/databricks/labs/ucx/workspace_access/groups.py @@ -42,6 +42,7 @@ def is_in_scope(self, attr: str, group: Group) -> bool: return False def get_by_workspace_group_name(self, workspace_group_name: str) -> MigrationGroupInfo | None: + # TODO: this method is deprecated, replace all usages by get_target_principal() found = [g for g in self.groups if g.workspace.display_name == workspace_group_name] if len(found) == 0: return None diff --git a/src/databricks/labs/ucx/workspace_access/manager.py b/src/databricks/labs/ucx/workspace_access/manager.py index 38251c7aba..7441d064ea 100644 --- a/src/databricks/labs/ucx/workspace_access/manager.py +++ b/src/databricks/labs/ucx/workspace_access/manager.py @@ -11,7 +11,7 @@ from databricks.labs.ucx.framework.parallel import Threads from databricks.labs.ucx.hive_metastore import GrantsCrawler, TablesCrawler from databricks.labs.ucx.workspace_access import generic, redash, scim, secrets -from databricks.labs.ucx.workspace_access.base import Applier, Crawler, Permissions +from databricks.labs.ucx.workspace_access.base import AclSupport, Permissions from databricks.labs.ucx.workspace_access.groups import GroupMigrationState from databricks.labs.ucx.workspace_access.tacl import TableAclSupport @@ -19,12 +19,9 @@ class PermissionManager(CrawlerBase): - def __init__( - self, backend: SqlBackend, inventory_database: str, crawlers: list[Crawler], appliers: dict[str, Applier] - ): + def __init__(self, backend: SqlBackend, inventory_database: str, crawlers: list[AclSupport]): super().__init__(backend, "hive_metastore", inventory_database, "permissions", Permissions) - self._crawlers = crawlers - self._appliers = appliers + self._acl_support = crawlers @classmethod def factory( @@ -39,86 +36,42 @@ def factory( if num_threads is None: num_threads = os.cpu_count() * 2 generic_acl_listing = [ - generic.listing_wrapper(ws.clusters.list, "cluster_id", "clusters"), - generic.listing_wrapper(ws.cluster_policies.list, "policy_id", "cluster-policies"), - generic.listing_wrapper(ws.instance_pools.list, "instance_pool_id", "instance-pools"), - generic.listing_wrapper(ws.warehouses.list, "id", "sql/warehouses"), - generic.listing_wrapper(ws.jobs.list, "job_id", "jobs"), - generic.listing_wrapper(ws.pipelines.list_pipelines, "pipeline_id", "pipelines"), - generic.listing_wrapper(generic.experiments_listing(ws), "experiment_id", "experiments"), - generic.listing_wrapper(generic.models_listing(ws), "id", "registered-models"), - generic.workspace_listing(ws, num_threads=num_threads, start_path=workspace_start_path), - generic.authorization_listing(), + generic.Listing(ws.clusters.list, "cluster_id", "clusters"), + generic.Listing(ws.cluster_policies.list, "policy_id", "cluster-policies"), + generic.Listing(ws.instance_pools.list, "instance_pool_id", "instance-pools"), + generic.Listing(ws.warehouses.list, "id", "sql/warehouses"), + generic.Listing(ws.jobs.list, "job_id", "jobs"), + generic.Listing(ws.pipelines.list_pipelines, "pipeline_id", "pipelines"), + generic.Listing(generic.experiments_listing(ws), "experiment_id", "experiments"), + generic.Listing(generic.models_listing(ws), "id", "registered-models"), + generic.Listing(generic.tokens_and_passwords, "object_id", "authorization"), + generic.WorkspaceListing(ws, num_threads=num_threads, start_path=workspace_start_path), ] redash_acl_listing = [ - redash.redash_listing_wrapper(ws.alerts.list, sql.ObjectTypePlural.ALERTS), - redash.redash_listing_wrapper(ws.dashboards.list, sql.ObjectTypePlural.DASHBOARDS), - redash.redash_listing_wrapper(ws.queries.list, sql.ObjectTypePlural.QUERIES), + redash.Listing(ws.alerts.list, sql.ObjectTypePlural.ALERTS), + redash.Listing(ws.dashboards.list, sql.ObjectTypePlural.DASHBOARDS), + redash.Listing(ws.queries.list, sql.ObjectTypePlural.QUERIES), ] generic_support = generic.GenericPermissionsSupport(ws, generic_acl_listing) - sql_support = redash.SqlPermissionsSupport(ws, redash_acl_listing) + sql_support = redash.RedashPermissionsSupport(ws, redash_acl_listing) secrets_support = secrets.SecretScopesSupport(ws) scim_support = scim.ScimSupport(ws) tables_crawler = TablesCrawler(sql_backend, inventory_database) grants_crawler = GrantsCrawler(tables_crawler) tacl_support = TableAclSupport(grants_crawler, sql_backend) return cls( - sql_backend, - inventory_database, - [generic_support, sql_support, secrets_support, scim_support, tacl_support], - cls._object_type_appliers(generic_support, sql_support, secrets_support, scim_support, tacl_support), + sql_backend, inventory_database, [generic_support, sql_support, secrets_support, scim_support, tacl_support] ) - @staticmethod - def _object_type_appliers(generic_support, sql_support, secrets_support, scim_support, tacl_support): - return { - # SCIM-based API - "entitlements": scim_support, - "roles": scim_support, - # Generic Permissions API - "authorization": generic_support, - "clusters": generic_support, - "cluster-policies": generic_support, - "instance-pools": generic_support, - "sql/warehouses": generic_support, - "jobs": generic_support, - "pipelines": generic_support, - "experiments": generic_support, - "registered-models": generic_support, - "notebooks": generic_support, - "files": generic_support, - "directories": generic_support, - "repos": generic_support, - # Redash equivalent of Generic Permissions API - "alerts": sql_support, - "queries": sql_support, - "dashboards": sql_support, - # Secret Scope ACL API - "secrets": secrets_support, - # Legacy Table ACLs - "TABLE": tacl_support, - "VIEW": tacl_support, - "DATABASE": tacl_support, - "ANY FILE": tacl_support, - "ANONYMOUS FUNCTION": tacl_support, - "CATALOG": tacl_support, - } - def inventorize_permissions(self): logger.debug("Crawling permissions") crawler_tasks = list(self._get_crawler_tasks()) logger.info(f"Starting to crawl permissions. Total tasks: {len(crawler_tasks)}") - results, errors = Threads.gather("crawl permissions", crawler_tasks) + items, errors = Threads.gather("crawl permissions", crawler_tasks) if len(errors) > 0: # TODO: https://github.com/databrickslabs/ucx/issues/406 logger.error(f"Detected {len(errors)} errors while crawling permissions") - items = [] - for item in results: - if item.object_type not in self._appliers: - msg = f"unknown object_type: {item.object_type}" - raise KeyError(msg) - items.append(item) - logger.info(f"Total crawled permissions after filtering: {len(items)}") + logger.info(f"Total crawled permissions: {len(items)}") self._save(items) logger.info(f"Saved {len(items)} to {self._full_name}") @@ -138,14 +91,22 @@ def apply_group_permissions(self, migration_state: GroupMigrationState, destinat support: list(items_subset) for support, items_subset in groupby(items, key=lambda i: i.object_type) } + appliers = {} + for support in self._acl_support: + for object_type in support.object_types(): + if object_type in appliers: + msg = f"{object_type} is already supported by {type(appliers[object_type]).__name__}" + raise KeyError(msg) + appliers[object_type] = support + # we first check that all supports are valid. for object_type in supports_to_items: - if object_type not in self._appliers: + if object_type not in appliers: msg = f"Could not find support for {object_type}. Please check the inventory table." raise ValueError(msg) for object_type, items_subset in supports_to_items.items(): - relevant_support = self._appliers[object_type] + relevant_support = appliers[object_type] tasks_for_support = [ relevant_support.get_apply_task(item, migration_state, destination) for item in items_subset ] @@ -178,5 +139,5 @@ def _load_all(self) -> list[Permissions]: ] def _get_crawler_tasks(self) -> Iterator[Callable[..., Permissions | None]]: - for support in self._crawlers: + for support in self._acl_support: yield from support.get_crawler_tasks() diff --git a/src/databricks/labs/ucx/workspace_access/redash.py b/src/databricks/labs/ucx/workspace_access/redash.py index 49695e0657..91312a504e 100644 --- a/src/databricks/labs/ucx/workspace_access/redash.py +++ b/src/databricks/labs/ucx/workspace_access/redash.py @@ -10,8 +10,7 @@ from databricks.labs.ucx.mixins.hardening import rate_limited from databricks.labs.ucx.workspace_access.base import ( - Applier, - Crawler, + AclSupport, Destination, Permissions, logger, @@ -28,12 +27,24 @@ class SqlPermissionsInfo: # This module is called redash to disambiguate from databricks.sdk.service.sql -class SqlPermissionsSupport(Crawler, Applier): - def __init__(self, ws: WorkspaceClient, listings: list[Callable[..., list[SqlPermissionsInfo]]]): +class Listing: + def __init__(self, func: Callable[..., list], request_type: sql.ObjectTypePlural): + self._func = func + self._request_type = request_type + self.object_type = request_type.value + + def __iter__(self): + for item in self._func(): + yield SqlPermissionsInfo(item.id, self._request_type) + + +class RedashPermissionsSupport(AclSupport): + def __init__(self, ws: WorkspaceClient, listings: list[Listing]): self._ws = ws self._listings = listings - def is_item_relevant(self, item: Permissions, migration_state: GroupMigrationState) -> bool: + @staticmethod + def _is_item_relevant(item: Permissions, migration_state: GroupMigrationState) -> bool: mentioned_groups = [ acl.group_name for acl in sql.GetResponse.from_dict(json.loads(item.raw)).access_control_list ] @@ -41,10 +52,18 @@ def is_item_relevant(self, item: Permissions, migration_state: GroupMigrationSta def get_crawler_tasks(self): for listing in self._listings: - for item in listing(): + for item in listing: yield partial(self._crawler_task, item.object_id, item.request_type) - def _get_apply_task(self, item: Permissions, migration_state: GroupMigrationState, destination: Destination): + def object_types(self) -> set[str]: + all_object_types = set() + for listing in self._listings: + all_object_types.add(listing.object_type) + return all_object_types + + def get_apply_task(self, item: Permissions, migration_state: GroupMigrationState, destination: Destination): + if not self._is_item_relevant(item, migration_state): + return None new_acl = self._prepare_new_acl( sql.GetResponse.from_dict(json.loads(item.raw)).access_control_list, migration_state, diff --git a/src/databricks/labs/ucx/workspace_access/scim.py b/src/databricks/labs/ucx/workspace_access/scim.py index c9d53dee73..2be29623dc 100644 --- a/src/databricks/labs/ucx/workspace_access/scim.py +++ b/src/databricks/labs/ucx/workspace_access/scim.py @@ -8,19 +8,19 @@ from databricks.labs.ucx.mixins.hardening import rate_limited from databricks.labs.ucx.workspace_access.base import ( - Applier, - Crawler, + AclSupport, Destination, Permissions, ) from databricks.labs.ucx.workspace_access.groups import GroupMigrationState -class ScimSupport(Crawler, Applier): +class ScimSupport(AclSupport): def __init__(self, ws: WorkspaceClient): self._ws = ws - def is_item_relevant(self, item: Permissions, migration_state: GroupMigrationState) -> bool: + @staticmethod + def _is_item_relevant(item: Permissions, migration_state: GroupMigrationState) -> bool: return any(g.workspace.id == item.object_id for g in migration_state.groups) def get_crawler_tasks(self): @@ -37,17 +37,19 @@ def get_crawler_tasks(self): def _get_groups(self): return self._ws.groups.list(attributes="id,displayName,roles,entitlements") - def _get_apply_task(self, item: Permissions, migration_state: GroupMigrationState, destination: Destination): + def object_types(self) -> set[str]: + return {"roles", "entitlements"} + + def get_apply_task(self, item: Permissions, migration_state: GroupMigrationState, destination: Destination): + if not self._is_item_relevant(item, migration_state): + return None value = [iam.ComplexValue.from_dict(e) for e in json.loads(item.raw)] target_info = [g for g in migration_state.groups if g.workspace.id == item.object_id] - if len(target_info) == 0: - msg = f"Could not find group with ID {item.object_id}" - raise ValueError(msg) - else: - target_group_id = getattr(target_info[0], destination).id - return partial(self._applier_task, group_id=target_group_id, value=value, property_name=item.object_type) - - def _crawler_task(self, group: iam.Group, property_name: str): + target_group_id = getattr(target_info[0], destination).id + return partial(self._applier_task, group_id=target_group_id, value=value, property_name=item.object_type) + + @staticmethod + def _crawler_task(group: iam.Group, property_name: str): return Permissions( object_id=group.id, object_type=property_name, diff --git a/src/databricks/labs/ucx/workspace_access/secrets.py b/src/databricks/labs/ucx/workspace_access/secrets.py index 0523b2ddf0..633ba0258e 100644 --- a/src/databricks/labs/ucx/workspace_access/secrets.py +++ b/src/databricks/labs/ucx/workspace_access/secrets.py @@ -8,15 +8,14 @@ from databricks.labs.ucx.mixins.hardening import rate_limited from databricks.labs.ucx.workspace_access.base import ( - Applier, - Crawler, + AclSupport, Destination, Permissions, ) from databricks.labs.ucx.workspace_access.groups import GroupMigrationState -class SecretScopesSupport(Crawler, Applier): +class SecretScopesSupport(AclSupport): def __init__(self, ws: WorkspaceClient): self._ws = ws @@ -34,7 +33,33 @@ def _crawler_task(scope: workspace.SecretScope): for scope in scopes: yield partial(_crawler_task, scope) - def is_item_relevant(self, item: Permissions, migration_state: GroupMigrationState) -> bool: + def object_types(self) -> set[str]: + return {"secrets"} + + def get_apply_task(self, item: Permissions, migration_state: GroupMigrationState, destination: Destination): + if not self._is_item_relevant(item, migration_state): + return None + + acls = [workspace.AclItem.from_dict(acl) for acl in json.loads(item.raw)] + new_acls = [] + + for acl in acls: + if acl.principal in [i.workspace.display_name for i in migration_state.groups]: + source_info = migration_state.get_by_workspace_group_name(acl.principal) + target: iam.Group = getattr(source_info, destination) + new_acls.append(workspace.AclItem(principal=target.display_name, permission=acl.permission)) + else: + new_acls.append(acl) + + def apply_acls(): + for acl in new_acls: + self._rate_limited_put_acl(item.object_id, acl.principal, acl.permission) + return True + + return partial(apply_acls) + + @staticmethod + def _is_item_relevant(item: Permissions, migration_state: GroupMigrationState) -> bool: acls = [workspace.AclItem.from_dict(acl) for acl in json.loads(item.raw)] mentioned_groups = [acl.principal for acl in acls] return any(g in mentioned_groups for g in [info.workspace.display_name for info in migration_state.groups]) @@ -74,24 +99,3 @@ def _inflight_check( def _rate_limited_put_acl(self, object_id: str, principal: str, permission: workspace.AclPermission): self._ws.secrets.put_acl(object_id, principal, permission) self._inflight_check(scope_name=object_id, group_name=principal, expected_permission=permission) - - def _get_apply_task( - self, item: Permissions, migration_state: GroupMigrationState, destination: Destination - ) -> partial: - acls = [workspace.AclItem.from_dict(acl) for acl in json.loads(item.raw)] - new_acls = [] - - for acl in acls: - if acl.principal in [i.workspace.display_name for i in migration_state.groups]: - source_info = migration_state.get_by_workspace_group_name(acl.principal) - target: iam.Group = getattr(source_info, destination) - new_acls.append(workspace.AclItem(principal=target.display_name, permission=acl.permission)) - else: - new_acls.append(acl) - - def apply_acls(): - for acl in new_acls: - self._rate_limited_put_acl(item.object_id, acl.principal, acl.permission) - return True - - return partial(apply_acls) diff --git a/src/databricks/labs/ucx/workspace_access/tacl.py b/src/databricks/labs/ucx/workspace_access/tacl.py index ee4db3f81d..b2d3a3cb5c 100644 --- a/src/databricks/labs/ucx/workspace_access/tacl.py +++ b/src/databricks/labs/ucx/workspace_access/tacl.py @@ -8,15 +8,14 @@ from databricks.labs.ucx.hive_metastore import GrantsCrawler from databricks.labs.ucx.hive_metastore.grants import Grant from databricks.labs.ucx.workspace_access.base import ( - Applier, - Crawler, + AclSupport, Destination, Permissions, ) from databricks.labs.ucx.workspace_access.groups import GroupMigrationState -class TableAclSupport(Crawler, Applier): +class TableAclSupport(AclSupport): def __init__(self, grants_crawler: GrantsCrawler, sql_backend: SqlBackend): self._grants_crawler = grants_crawler self._sql_backend = sql_backend @@ -29,22 +28,15 @@ def inner(grant: Grant) -> Permissions: for grant in self._grants_crawler.snapshot(): yield functools.partial(inner, grant) - def is_item_relevant(self, _1: Permissions, _2: GroupMigrationState) -> bool: - # TODO: this abstract method is a design flaw https://github.com/databrickslabs/ucx/issues/410 - return True + def object_types(self) -> set[str]: + return {"TABLE", "DATABASE", "VIEW", "CATALOG", "ANONYMOUS FUNCTION", "ANY FILE"} - def _noop(self): - pass - - def _get_apply_task( - self, item: Permissions, migration_state: GroupMigrationState, destination: Destination - ) -> partial: + def get_apply_task(self, item: Permissions, migration_state: GroupMigrationState, destination: Destination): grant = Grant(**json.loads(item.raw)) target_principal = migration_state.get_target_principal(grant.principal, destination) if target_principal is None: # this is a grant for user, service principal, or irrelevant group - # technically, we should be able just to `return self._noop` - return partial(self._noop) + return None target_grant = dataclasses.replace(grant, principal=target_principal) sql = target_grant.hive_grant_sql() # this has to be executed on tacl cluster, otherwise - use SQLExecutionAPI backend & Warehouse diff --git a/tests/unit/workspace_access/test_base.py b/tests/unit/workspace_access/test_base.py index bf9d108672..88e1f61c1c 100644 --- a/tests/unit/workspace_access/test_base.py +++ b/tests/unit/workspace_access/test_base.py @@ -1,8 +1,13 @@ +from collections.abc import Callable, Iterator from functools import partial from databricks.sdk.service import iam -from databricks.labs.ucx.workspace_access.base import Applier, Permissions +from databricks.labs.ucx.workspace_access.base import ( + AclSupport, + Destination, + Permissions, +) from databricks.labs.ucx.workspace_access.groups import ( GroupMigrationState, MigrationGroupInfo, @@ -10,13 +15,27 @@ def test_applier(): - class SampleApplier(Applier): - def is_item_relevant(self, item: Permissions, migration_state: GroupMigrationState) -> bool: + class SampleApplier(AclSupport): + def __init__(self): + self.called = False + + def get_crawler_tasks(self) -> Iterator[Callable[..., Permissions | None]]: + return [] + + def object_types(self) -> set[str]: + return {"test"} + + @staticmethod + def _is_item_relevant(item: Permissions, migration_state: GroupMigrationState) -> bool: workspace_groups = [info.workspace.display_name for info in migration_state.groups] return item.object_id in workspace_groups - def _get_apply_task(self, _, __, ___): + def get_apply_task(self, item: Permissions, migration_state: GroupMigrationState, _: Destination): + if not self._is_item_relevant(item, migration_state): + return None + def test_task(): + self.called = True print("here!") return partial(test_task) @@ -33,8 +52,9 @@ def test_task(): ) task = applier.get_apply_task(positive_item, migration_state, "backup") - assert task.func.__name__ == "test_task" + task() + assert applier.called negative_item = Permissions(object_id="not-here", object_type="test", raw="test") - new_task = applier.get_apply_task(negative_item, migration_state, "backup") - new_task.func() + task = applier.get_apply_task(negative_item, migration_state, "backup") + assert task is None diff --git a/tests/unit/workspace_access/test_generic.py b/tests/unit/workspace_access/test_generic.py index 3bddd174ad..1b746d4f0c 100644 --- a/tests/unit/workspace_access/test_generic.py +++ b/tests/unit/workspace_access/test_generic.py @@ -6,11 +6,11 @@ from databricks.labs.ucx.workspace_access.generic import ( GenericPermissionsSupport, + Listing, Permissions, - authorization_listing, experiments_listing, - listing_wrapper, models_listing, + tokens_and_passwords, ) @@ -38,7 +38,7 @@ def test_crawler(): sup = GenericPermissionsSupport( ws=ws, listings=[ - listing_wrapper(ws.clusters.list, "cluster_id", "clusters"), + Listing(ws.clusters.list, "cluster_id", "clusters"), ], ) @@ -96,7 +96,7 @@ def test_apply(migration_state): def test_relevance(): sup = GenericPermissionsSupport(ws=MagicMock(), listings=[]) # no listings since only apply is tested - result = sup.is_item_relevant( + result = sup._is_item_relevant( item=Permissions(object_id="passwords", object_type="passwords", raw="some-stuff"), migration_state=MagicMock(), ) @@ -127,7 +127,7 @@ def test_no_permissions(): sup = GenericPermissionsSupport( ws=ws, listings=[ - listing_wrapper(ws.clusters.list, "cluster_id", "clusters"), + Listing(ws.clusters.list, "cluster_id", "clusters"), ], ) tasks = list(sup.get_crawler_tasks()) @@ -153,7 +153,7 @@ def test_passwords_tokens_crawler(migration_state): iam.ObjectPermissions(object_id="tokens", object_type="authorization", access_control_list=basic_acl), ] - sup = GenericPermissionsSupport(ws=ws, listings=[authorization_listing()]) + sup = GenericPermissionsSupport(ws=ws, listings=[Listing(tokens_and_passwords, "object_id", "authorization")]) tasks = list(sup.get_crawler_tasks()) assert len(tasks) == 2 auth_items = [task() for task in tasks] @@ -182,8 +182,8 @@ def test_models_listing(): ) ) - wrapped = listing_wrapper(models_listing(ws), id_attribute="id", object_type="registered-models") - result = list(wrapped()) + wrapped = Listing(models_listing(ws), id_attribute="id", object_type="registered-models") + result = list(wrapped) assert len(result) == 1 assert result[0].object_id == "some-id" assert result[0].request_type == "registered-models" @@ -199,8 +199,8 @@ def test_experiment_listing(): experiment_id="test4", tags=[ml.ExperimentTag(key="mlflow.experiment.sourceType", value="REPO_NOTEBOOK")] ), ] - wrapped = listing_wrapper(experiments_listing(ws), id_attribute="experiment_id", object_type="experiments") - results = list(wrapped()) + wrapped = Listing(experiments_listing(ws), id_attribute="experiment_id", object_type="experiments") + results = list(wrapped) assert len(results) == 2 for res in results: assert res.request_type == "experiments" diff --git a/tests/unit/workspace_access/test_groups.py b/tests/unit/workspace_access/test_groups.py index 053aafe9de..91dd674c75 100644 --- a/tests/unit/workspace_access/test_groups.py +++ b/tests/unit/workspace_access/test_groups.py @@ -73,8 +73,8 @@ def test_no_group_in_migration_state(migration_state): object_type="roles", raw=json.dumps([p.as_dict() for p in sample_permissions]), ) - with pytest.raises(ValueError): - sup._get_apply_task(item, migration_state, "backup") + task = sup.get_apply_task(item, migration_state, "backup") + assert task is None def test_non_relevant(migration_state): @@ -91,8 +91,8 @@ def test_non_relevant(migration_state): object_type="roles", raw=json.dumps([p.as_dict() for p in sample_permissions]), ) - assert sup.is_item_relevant(relevant_item, migration_state) - assert not sup.is_item_relevant(irrelevant_item, migration_state) + assert sup._is_item_relevant(relevant_item, migration_state) + assert not sup._is_item_relevant(irrelevant_item, migration_state) def compare(s, t): diff --git a/tests/unit/workspace_access/test_listing.py b/tests/unit/workspace_access/test_listing.py index 21d1d4e4cc..e0816dd1fb 100644 --- a/tests/unit/workspace_access/test_listing.py +++ b/tests/unit/workspace_access/test_listing.py @@ -4,13 +4,12 @@ from databricks.sdk.service import workspace from databricks.sdk.service.workspace import ObjectInfo, ObjectType -from databricks.labs.ucx.workspace_access.generic import workspace_listing -from databricks.labs.ucx.workspace_access.listing import WorkspaceListing +from databricks.labs.ucx.workspace_access import generic, listing def test_logging_calls(): ws = MagicMock() - workspace_listing = WorkspaceListing(ws=ws, num_threads=1) + workspace_listing = listing.WorkspaceListing(ws=ws, num_threads=1) workspace_listing.start_time = dt.datetime.now() workspace_listing._counter = 9 # with patch.object(logger, "info") as mock_info: @@ -19,8 +18,8 @@ def test_logging_calls(): def test_workspace_listing(): - listing = MagicMock(spec=WorkspaceListing) - listing.walk.return_value = [ + listing_instance = MagicMock(spec=listing.WorkspaceListing) + listing_instance.walk.return_value = [ workspace.ObjectInfo(object_id=1, object_type=workspace.ObjectType.NOTEBOOK), workspace.ObjectInfo(object_id=2, object_type=workspace.ObjectType.DIRECTORY), workspace.ObjectInfo(object_id=3, object_type=workspace.ObjectType.LIBRARY), @@ -29,10 +28,10 @@ def test_workspace_listing(): workspace.ObjectInfo(object_id=6, object_type=None), # MLflow Experiment ] - with patch("databricks.labs.ucx.workspace_access.listing.WorkspaceListing", return_value=listing): - results = workspace_listing(ws=MagicMock())() + with patch("databricks.labs.ucx.workspace_access.listing.WorkspaceListing", return_value=listing_instance): + results = generic.WorkspaceListing(ws=MagicMock()) assert len(list(results)) == 4 - listing.walk.assert_called_once() + listing_instance.walk.assert_called_once() for res in results: assert res.request_type in [ "notebooks", @@ -40,7 +39,7 @@ def test_workspace_listing(): "repos", "files", ] - assert res.object_id in [1, 2, 4, 5] + assert int(res.object_id) in [1, 2, 4, 5] # Helper to compare an unordered list of objects @@ -64,8 +63,8 @@ def test_list_and_analyze_should_separate_folders_and_other_objects(): client = Mock() client.workspace.list.return_value = [file, directory, notebook] - listing = WorkspaceListing(client, 1) - directories, others = listing._list_and_analyze(rootobj) + listing_instance = listing.WorkspaceListing(client, 1) + directories, others = listing_instance._list_and_analyze(rootobj) assert compare(others, [file, notebook]) assert compare(directories, [directory]) @@ -78,11 +77,11 @@ def test_walk_with_an_empty_folder_should_return_it(): client.workspace.list.return_value = [] client.workspace.get_status.return_value = rootobj - listing = WorkspaceListing(client, 1) - listing.walk("/rootPath") + listing_instance = listing.WorkspaceListing(client, 1) + listing_instance.walk("/rootPath") - assert len(listing.results) == 1 - assert listing.results == [rootobj] + assert len(listing_instance.results) == 1 + assert listing_instance.results == [rootobj] def test_walk_with_two_files_should_return_rootpath_and_two_files(): @@ -94,11 +93,11 @@ def test_walk_with_two_files_should_return_rootpath_and_two_files(): client.workspace.list.return_value = [file, notebook] client.workspace.get_status.return_value = rootobj - listing = WorkspaceListing(client, 1) - listing.walk("/rootPath") + listing_instance = listing.WorkspaceListing(client, 1) + listing_instance.walk("/rootPath") - assert len(listing.results) == 3 - assert compare(listing.results, [rootobj, file, notebook]) + assert len(listing_instance.results) == 3 + assert compare(listing_instance.results, [rootobj, file, notebook]) def test_walk_with_nested_folders_should_return_nested_objects(): @@ -117,11 +116,11 @@ def my_side_effect(path, **kwargs): client.workspace.list.side_effect = my_side_effect client.workspace.get_status.return_value = rootobj - listing = WorkspaceListing(client, 1) - listing.walk("/rootPath") + listing_instance = listing.WorkspaceListing(client, 1) + listing_instance.walk("/rootPath") - assert len(listing.results) == 4 - assert compare(listing.results, [rootobj, file, nested_folder, nested_notebook]) + assert len(listing_instance.results) == 4 + assert compare(listing_instance.results, [rootobj, file, nested_folder, nested_notebook]) def test_walk_with_three_level_nested_folders_returns_three_levels(): @@ -147,10 +146,11 @@ def my_side_effect(path, **kwargs): client = Mock() client.workspace.list.side_effect = my_side_effect client.workspace.get_status.return_value = rootobj - listing = WorkspaceListing(client, 2) - listing.walk("/rootPath") + listing_instance = listing.WorkspaceListing(client, 2) + listing_instance.walk("/rootPath") - assert len(listing.results) == 6 + assert len(listing_instance.results) == 6 assert compare( - listing.results, [rootobj, file, nested_folder, nested_notebook, second_nested_folder, second_nested_notebook] + listing_instance.results, + [rootobj, file, nested_folder, nested_notebook, second_nested_folder, second_nested_notebook], ) diff --git a/tests/unit/workspace_access/test_manager.py b/tests/unit/workspace_access/test_manager.py index fd784e6534..2e4ab709b4 100644 --- a/tests/unit/workspace_access/test_manager.py +++ b/tests/unit/workspace_access/test_manager.py @@ -21,13 +21,13 @@ def b(): def test_inventory_table_manager_init(b): - pi = PermissionManager(b, "test_database", [], {}) + pi = PermissionManager(b, "test_database", []) assert pi._full_name == "hive_metastore.test_database.permissions" def test_cleanup(b): - pi = PermissionManager(b, "test_database", [], {}) + pi = PermissionManager(b, "test_database", []) pi.cleanup() @@ -35,7 +35,7 @@ def test_cleanup(b): def test_save(b): - pi = PermissionManager(b, "test_database", [], {}) + pi = PermissionManager(b, "test_database", []) pi._save([Permissions("object1", "clusters", "test acl")]) @@ -58,7 +58,7 @@ def test_load_all(): ] } ) - pi = PermissionManager(b, "test_database", [], {}) + pi = PermissionManager(b, "test_database", []) output = pi._load_all() assert output[0] == Permissions("object1", "clusters", "test acl") @@ -67,7 +67,7 @@ def test_load_all(): def test_manager_inventorize(b, mocker): some_crawler = mocker.Mock() some_crawler.get_crawler_tasks = lambda: [lambda: None, lambda: Permissions("a", "b", "c"), lambda: None] - pm = PermissionManager(b, "test_database", [some_crawler], {"b": mocker.Mock()}) + pm = PermissionManager(b, "test_database", [some_crawler]) pm.inventorize_permissions() @@ -76,15 +76,6 @@ def test_manager_inventorize(b, mocker): ) -def test_manager_inventorize_unknown_object_type_raises_error(b, mocker): - some_crawler = mocker.Mock() - some_crawler.get_crawler_tasks = lambda: [lambda: None, lambda: Permissions("a", "b", "c"), lambda: None] - pm = PermissionManager(b, "test_database", [some_crawler], {}) - - with pytest.raises(KeyError): - pm.inventorize_permissions() - - def test_manager_apply(mocker): b = MockBackend( rows={ @@ -132,20 +123,13 @@ def test_manager_apply(mocker): # has to be set, as it's going to be appended through multiple threads applied_items = set() mock_applier = mocker.Mock() + mock_applier.object_types = lambda: {"clusters", "cluster-policies"} # this emulates a real applier and call to an API mock_applier.get_apply_task = lambda item, _, dst: lambda: applied_items.add( f"{item.object_id} {item.object_id} {dst}" ) - pm = PermissionManager( - b, - "test_database", - [], - { - "clusters": mock_applier, - "cluster-policies": mock_applier, - }, - ) + pm = PermissionManager(b, "test_database", [mock_applier]) group_migration_state: GroupMigrationState = MagicMock() group_migration_state.groups = [ MigrationGroupInfo( @@ -168,7 +152,7 @@ def test_unregistered_support(): ] } ) - pm = PermissionManager(b, "test", [], {}) + pm = PermissionManager(b, "test", []) pm.apply_group_permissions(migration_state=MagicMock(), destination="backup") diff --git a/tests/unit/workspace_access/test_redash.py b/tests/unit/workspace_access/test_redash.py index 84587db7d8..44844028bf 100644 --- a/tests/unit/workspace_access/test_redash.py +++ b/tests/unit/workspace_access/test_redash.py @@ -6,9 +6,9 @@ from databricks.sdk.service import sql from databricks.labs.ucx.workspace_access.redash import ( + Listing, Permissions, - SqlPermissionsSupport, - redash_listing_wrapper, + RedashPermissionsSupport, ) @@ -39,12 +39,12 @@ def test_crawlers(): for ot in [sql.ObjectType.ALERT, sql.ObjectType.QUERY, sql.ObjectType.DASHBOARD] ] - sup = SqlPermissionsSupport( + sup = RedashPermissionsSupport( ws=ws, listings=[ - redash_listing_wrapper(ws.alerts.list, sql.ObjectTypePlural.ALERTS), - redash_listing_wrapper(ws.dashboards.list, sql.ObjectTypePlural.DASHBOARDS), - redash_listing_wrapper(ws.queries.list, sql.ObjectTypePlural.QUERIES), + Listing(ws.alerts.list, sql.ObjectTypePlural.ALERTS), + Listing(ws.dashboards.list, sql.ObjectTypePlural.DASHBOARDS), + Listing(ws.queries.list, sql.ObjectTypePlural.QUERIES), ], ) @@ -62,7 +62,7 @@ def test_crawlers(): def test_apply(migration_state): ws = MagicMock() - sup = SqlPermissionsSupport(ws=ws, listings=[]) + sup = RedashPermissionsSupport(ws=ws, listings=[]) item = Permissions( object_id="test", object_type="alerts", @@ -104,14 +104,14 @@ def test_apply(migration_state): def test_safe_getter_known(): ws = MagicMock() ws.dbsql_permissions.get.side_effect = DatabricksError(error_code="RESOURCE_DOES_NOT_EXIST") - sup = SqlPermissionsSupport(ws=ws, listings=[]) + sup = RedashPermissionsSupport(ws=ws, listings=[]) assert sup._safe_get_dbsql_permissions(object_type=sql.ObjectTypePlural.ALERTS, object_id="test") is None def test_safe_getter_unknown(): ws = MagicMock() ws.dbsql_permissions.get.side_effect = DatabricksError(error_code="SOMETHING_NON_EXPECTED") - sup = SqlPermissionsSupport(ws=ws, listings=[]) + sup = RedashPermissionsSupport(ws=ws, listings=[]) with pytest.raises(DatabricksError): sup._safe_get_dbsql_permissions(object_type=sql.ObjectTypePlural.ALERTS, object_id="test") @@ -119,5 +119,5 @@ def test_safe_getter_unknown(): def test_empty_permissions(): ws = MagicMock() ws.dbsql_permissions.get.side_effect = DatabricksError(error_code="RESOURCE_DOES_NOT_EXIST") - sup = SqlPermissionsSupport(ws=ws, listings=[]) + sup = RedashPermissionsSupport(ws=ws, listings=[]) assert sup._crawler_task(object_id="test", object_type=sql.ObjectTypePlural.ALERTS) is None diff --git a/tests/unit/workspace_access/test_tacl.py b/tests/unit/workspace_access/test_tacl.py index 13b538e912..fa2195230f 100644 --- a/tests/unit/workspace_access/test_tacl.py +++ b/tests/unit/workspace_access/test_tacl.py @@ -90,6 +90,6 @@ def test_tacl_applier_no_target_principal(mocker): ) ) task = table_acl_support.get_apply_task(permissions, migration_state, "backup") - task() + assert task is None assert [] == sql_backend.queries From b4630dccef7445d2ad025c87f4028fa930675bd7 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Wed, 11 Oct 2023 19:10:24 +0200 Subject: [PATCH 2/4] .. --- .../labs/ucx/workspace_access/generic.py | 2 +- .../labs/ucx/workspace_access/manager.py | 18 ++++++----- tests/unit/workspace_access/test_manager.py | 30 ++++++++++++++++++- 3 files changed, 41 insertions(+), 9 deletions(-) diff --git a/src/databricks/labs/ucx/workspace_access/generic.py b/src/databricks/labs/ucx/workspace_access/generic.py index 9d28b4c7ae..69a2f6db35 100644 --- a/src/databricks/labs/ucx/workspace_access/generic.py +++ b/src/databricks/labs/ucx/workspace_access/generic.py @@ -38,7 +38,7 @@ def __init__(self, func: Callable[..., list], id_attribute: str, object_type: st self._object_type = object_type def object_types(self) -> set[str]: - return set(self._object_type) + return {self._object_type} def __iter__(self): started = datetime.datetime.now() diff --git a/src/databricks/labs/ucx/workspace_access/manager.py b/src/databricks/labs/ucx/workspace_access/manager.py index 7441d064ea..53121f36a2 100644 --- a/src/databricks/labs/ucx/workspace_access/manager.py +++ b/src/databricks/labs/ucx/workspace_access/manager.py @@ -91,13 +91,7 @@ def apply_group_permissions(self, migration_state: GroupMigrationState, destinat support: list(items_subset) for support, items_subset in groupby(items, key=lambda i: i.object_type) } - appliers = {} - for support in self._acl_support: - for object_type in support.object_types(): - if object_type in appliers: - msg = f"{object_type} is already supported by {type(appliers[object_type]).__name__}" - raise KeyError(msg) - appliers[object_type] = support + appliers = self._appliers() # we first check that all supports are valid. for object_type in supports_to_items: @@ -122,6 +116,16 @@ def apply_group_permissions(self, migration_state: GroupMigrationState, destinat logger.info("Permissions were applied") return True + def _appliers(self) -> dict[str, AclSupport]: + appliers = {} + for support in self._acl_support: + for object_type in support.object_types(): + if object_type in appliers: + msg = f"{object_type} is already supported by {type(appliers[object_type]).__name__}" + raise KeyError(msg) + appliers[object_type] = support + return appliers + def cleanup(self): logger.info(f"Cleaning up inventory table {self._full_name}") self._exec(f"DROP TABLE IF EXISTS {self._full_name}") diff --git a/tests/unit/workspace_access/test_manager.py b/tests/unit/workspace_access/test_manager.py index 2e4ab709b4..d71d90824c 100644 --- a/tests/unit/workspace_access/test_manager.py +++ b/tests/unit/workspace_access/test_manager.py @@ -159,4 +159,32 @@ def test_unregistered_support(): def test_factory(mocker): ws = mocker.Mock() b = MockBackend() - PermissionManager.factory(ws, b, "test") + permission_manager = PermissionManager.factory(ws, b, "test") + appliers = permission_manager._appliers() + assert { + "sql/warehouses", + "registered-models", + "instance-pools", + "jobs", + "directories", + "experiments", + "clusters", + "notebooks", + "repos", + "files", + "authorization", + "pipelines", + "cluster-policies", + "dashboards", + "queries", + "alerts", + "secrets", + "entitlements", + "roles", + "ANONYMOUS FUNCTION", + "CATALOG", + "TABLE", + "ANY FILE", + "VIEW", + "DATABASE", + } == appliers.keys() From 1b11264d638d7692d611ba159fe85c0b1e48a36c Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Wed, 11 Oct 2023 21:15:17 +0200 Subject: [PATCH 3/4] .. --- src/databricks/labs/ucx/mixins/fixtures.py | 9 ++++----- tests/integration/assessment/test_assessment.py | 5 ++--- .../workspace_access/test_permissions_manager.py | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/databricks/labs/ucx/mixins/fixtures.py b/src/databricks/labs/ucx/mixins/fixtures.py index 1293359b3d..1b6e142ad3 100644 --- a/src/databricks/labs/ucx/mixins/fixtures.py +++ b/src/databricks/labs/ucx/mixins/fixtures.py @@ -471,15 +471,14 @@ def create( if single_node: kwargs["num_workers"] = 0 if "spark_conf" in kwargs: - kwargs["spark_conf"] = { - **kwargs["spark_conf"], - **{"spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]"}, + kwargs["spark_conf"] = kwargs["spark_conf"] | { + "spark.databricks.cluster.profile": "singleNode", + "spark.master": "local[*]" } else: kwargs["spark_conf"] = {"spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]"} kwargs["custom_tags"] = {"ResourceClass": "SingleNode"} - kwargs["node_type_id"] = ws.clusters.select_node_type(local_disk=True) - elif "instance_pool_id" not in kwargs: + if "instance_pool_id" not in kwargs: kwargs["node_type_id"] = ws.clusters.select_node_type(local_disk=True) return ws.clusters.create( diff --git a/tests/integration/assessment/test_assessment.py b/tests/integration/assessment/test_assessment.py index 39877b2c58..7d8a644df1 100644 --- a/tests/integration/assessment/test_assessment.py +++ b/tests/integration/assessment/test_assessment.py @@ -72,18 +72,17 @@ def test_pipeline_with_secret_conf_crawler(ws, make_pipeline, inventory_schema, def test_cluster_crawler(ws, make_cluster, inventory_schema, sql_backend): created_cluster = make_cluster(single_node=True, spark_conf=_SPARK_CONF) - new_cluster = created_cluster.result() cluster_crawler = ClustersCrawler(ws=ws, sbe=sql_backend, schema=inventory_schema) clusters = cluster_crawler.snapshot() results = [] for cluster in clusters: if cluster.success != 0: continue - if cluster.cluster_id == new_cluster.cluster_id: + if cluster.cluster_id == created_cluster.cluster_id: results.append(cluster) assert len(results) >= 1 - assert results[0].cluster_id == new_cluster.cluster_id + assert results[0].cluster_id == created_cluster.cluster_id def test_job_crawler(ws, make_job, inventory_schema, sql_backend): diff --git a/tests/integration/workspace_access/test_permissions_manager.py b/tests/integration/workspace_access/test_permissions_manager.py index 4defb01401..cd8b306292 100644 --- a/tests/integration/workspace_access/test_permissions_manager.py +++ b/tests/integration/workspace_access/test_permissions_manager.py @@ -3,7 +3,7 @@ def test_permissions_save_and_load(ws, sql_backend, inventory_schema, env_or_skip): - pi = PermissionManager(sql_backend, inventory_schema, [], {}) + pi = PermissionManager(sql_backend, inventory_schema, []) saved = [ Permissions(object_id="abc", object_type="bcd", raw="def"), From 6d11dc04446c8ab121a9fab8c346af0026155772 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Wed, 11 Oct 2023 21:42:00 +0200 Subject: [PATCH 4/4] .. --- src/databricks/labs/ucx/mixins/fixtures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/mixins/fixtures.py b/src/databricks/labs/ucx/mixins/fixtures.py index 1b6e142ad3..9d3f56dc83 100644 --- a/src/databricks/labs/ucx/mixins/fixtures.py +++ b/src/databricks/labs/ucx/mixins/fixtures.py @@ -473,7 +473,7 @@ def create( if "spark_conf" in kwargs: kwargs["spark_conf"] = kwargs["spark_conf"] | { "spark.databricks.cluster.profile": "singleNode", - "spark.master": "local[*]" + "spark.master": "local[*]", } else: kwargs["spark_conf"] = {"spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]"}