From bf6491d206f77ae207aaaf2c2ddddefd0e9d7048 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 16 Dec 2024 15:49:34 +0100 Subject: [PATCH 1/5] Add exclude dashboard ids to LakeviewDashboard --- src/databricks/labs/ucx/assessment/dashboards.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/databricks/labs/ucx/assessment/dashboards.py b/src/databricks/labs/ucx/assessment/dashboards.py index 67fe12e629..4b2146b47c 100644 --- a/src/databricks/labs/ucx/assessment/dashboards.py +++ b/src/databricks/labs/ucx/assessment/dashboards.py @@ -310,11 +310,13 @@ def __init__( schema: str, *, include_dashboard_ids: list[str] | None = None, + exclude_dashboard_ids: list[str] | None = None, include_query_ids: list[str] | None = None, ): super().__init__(sql_backend, "hive_metastore", schema, "lakeview_dashboards", Dashboard) self._ws = ws self._include_dashboard_ids = include_dashboard_ids + self._exclude_dashboard_ids = exclude_dashboard_ids self._include_query_ids = include_query_ids def _crawl(self) -> Iterable[Dashboard]: From 917a666d7d6faa8e3ab838a879a0b62cb9a29d6a Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 16 Dec 2024 15:49:57 +0100 Subject: [PATCH 2/5] Test lakeview crawler to not persist dashboard with ids in exclude dashboard ids --- tests/unit/assessment/test_dashboards.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/unit/assessment/test_dashboards.py b/tests/unit/assessment/test_dashboards.py index cf0ae8f719..b5a0a7badc 100644 --- a/tests/unit/assessment/test_dashboards.py +++ b/tests/unit/assessment/test_dashboards.py @@ -458,6 +458,19 @@ def get_dashboards(dashboard_id: str) -> SdkLakeviewDashboard: ws.lakeview.list.assert_not_called() +def test_lakeview_dashboard_crawler_skips_exclude_dashboard_ids(caplog, mock_backend) -> None: + ws = create_autospec(WorkspaceClient) + dashboards = [SdkLakeviewDashboard(dashboard_id="did1"), SdkLakeviewDashboard(dashboard_id="did2")] + ws.lakeview.list.side_effect = lambda: (dashboard for dashboard in dashboards) # Expects an iterator + crawler = LakeviewDashboardCrawler(ws, mock_backend, "test", exclude_dashboard_ids=["did2"]) + + crawler.snapshot() + + rows = mock_backend.rows_written_for("hive_metastore.test.lakeview_dashboards", "overwrite") + assert rows == [Row(id="did1", name=None, parent=None, query_ids=[], tags=[], creator_id=None)] + ws.lakeview.list.assert_called_once() + + def test_lakeview_dashboard_crawler_list_queries_includes_query_ids(mock_backend) -> None: ws = create_autospec(WorkspaceClient) datasets = [ From f6de9b50520df909309a5523bf11bcac1effc14c Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 16 Dec 2024 15:50:19 +0100 Subject: [PATCH 3/5] Skip when dashboard id in exclude dashboard ids --- src/databricks/labs/ucx/assessment/dashboards.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/databricks/labs/ucx/assessment/dashboards.py b/src/databricks/labs/ucx/assessment/dashboards.py index 4b2146b47c..66a472c8e5 100644 --- a/src/databricks/labs/ucx/assessment/dashboards.py +++ b/src/databricks/labs/ucx/assessment/dashboards.py @@ -324,6 +324,8 @@ def _crawl(self) -> Iterable[Dashboard]: for sdk_dashboard in self._list_dashboards(): if sdk_dashboard.dashboard_id is None: continue + if sdk_dashboard.dashboard_id in (self._exclude_dashboard_ids or []): + continue dashboard = Dashboard.from_sdk_lakeview_dashboard(sdk_dashboard) dashboards.append(dashboard) return dashboards From 9436237fe07d24af98c2ae47c165071db5164a0c Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 16 Dec 2024 15:53:37 +0100 Subject: [PATCH 4/5] Test both include and exclude dashboard ids are present --- tests/unit/assessment/test_dashboards.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/unit/assessment/test_dashboards.py b/tests/unit/assessment/test_dashboards.py index b5a0a7badc..b9ffa948d6 100644 --- a/tests/unit/assessment/test_dashboards.py +++ b/tests/unit/assessment/test_dashboards.py @@ -471,6 +471,23 @@ def test_lakeview_dashboard_crawler_skips_exclude_dashboard_ids(caplog, mock_bac ws.lakeview.list.assert_called_once() +def test_lakeview_dashboard_crawler_skips_exclude_dashboard_ids_takes_priority_over_include_dashboard_ids( + caplog, mock_backend +) -> None: + ws = create_autospec(WorkspaceClient) + ws.lakeview.get.side_effect = lambda dashboard_id: SdkLakeviewDashboard(dashboard_id=dashboard_id) + crawler = LakeviewDashboardCrawler( + ws, mock_backend, "test", include_dashboard_ids=["did1", "did2"], exclude_dashboard_ids=["did2"] + ) + + crawler.snapshot() + + rows = mock_backend.rows_written_for("hive_metastore.test.lakeview_dashboards", "overwrite") + assert rows == [Row(id="did1", name=None, parent=None, query_ids=[], tags=[], creator_id=None)] + ws.lakeview.list.assert_not_called() + ws.lakeview.get.assert_has_calls([call("did1"), call("did2")]) + + def test_lakeview_dashboard_crawler_list_queries_includes_query_ids(mock_backend) -> None: ws = create_autospec(WorkspaceClient) datasets = [ From da928af7df02661e92355eef4734d9373c272267 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 16 Dec 2024 16:13:23 +0100 Subject: [PATCH 5/5] Pass lakeview dashboard ids from state to exclude when crawling Lakeview dashboards --- src/databricks/labs/ucx/contexts/application.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py index 65a0c7a871..102fcdb432 100644 --- a/src/databricks/labs/ucx/contexts/application.py +++ b/src/databricks/labs/ucx/contexts/application.py @@ -303,6 +303,7 @@ def lakeview_crawler(self) -> LakeviewDashboardCrawler: self.sql_backend, self.inventory_database, include_dashboard_ids=self.config.include_dashboard_ids, + exclude_dashboard_ids=list(self.install_state.dashboards.values()), include_query_ids=self.config.include_query_ids, )