From b7ecacd1f5e45bb740c395ca0c40a359437446ef Mon Sep 17 00:00:00 2001 From: micheledaddetta-databricks Date: Thu, 19 Dec 2024 11:10:45 +0100 Subject: [PATCH] Implement disposition field in SQL backend Having this feature will allow the execution of queries with not negligible dimension results. Using this field during the configuration, will allow to export assessment results for larger workspaces. --- src/databricks/labs/ucx/config.py | 4 ++++ src/databricks/labs/ucx/contexts/workspace_cli.py | 4 +++- src/databricks/labs/ucx/install.py | 8 ++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/config.py b/src/databricks/labs/ucx/config.py index b7755baf9d..fa0daf7813 100644 --- a/src/databricks/labs/ucx/config.py +++ b/src/databricks/labs/ucx/config.py @@ -1,6 +1,7 @@ from dataclasses import dataclass from databricks.sdk.core import Config +from databricks.sdk.service.sql import Disposition __all__ = ["WorkspaceConfig"] @@ -92,6 +93,9 @@ class WorkspaceConfig: # pylint: disable=too-many-instance-attributes # Skip TACL migration during table migration skip_tacl_migration: bool = False + # Select SQL query statement disposition, default to INLINE + query_statement_disposition: Disposition | None = None + def replace_inventory_variable(self, text: str) -> str: return text.replace("$inventory", f"hive_metastore.{self.inventory_database}") diff --git a/src/databricks/labs/ucx/contexts/workspace_cli.py b/src/databricks/labs/ucx/contexts/workspace_cli.py index 9ad07d12ca..13e63c95f4 100644 --- a/src/databricks/labs/ucx/contexts/workspace_cli.py +++ b/src/databricks/labs/ucx/contexts/workspace_cli.py @@ -43,7 +43,9 @@ def workspace_client(self) -> WorkspaceClient: @cached_property def sql_backend(self) -> SqlBackend: - return StatementExecutionBackend(self.workspace_client, self.config.warehouse_id) + return StatementExecutionBackend( + self.workspace_client, self.config.warehouse_id, disposition=self.config.query_statement_disposition + ) @cached_property def cluster_access(self) -> ClusterAccess: diff --git a/src/databricks/labs/ucx/install.py b/src/databricks/labs/ucx/install.py index c32edeff84..ae12076101 100644 --- a/src/databricks/labs/ucx/install.py +++ b/src/databricks/labs/ucx/install.py @@ -45,6 +45,7 @@ CreateWarehouseRequestWarehouseType, EndpointInfoWarehouseType, SpotInstancePolicy, + Disposition, ) from databricks.sdk.useragent import with_extra @@ -259,6 +260,12 @@ def _prompt_for_new_installation(self) -> WorkspaceConfig: recon_tolerance_percent = int( self.prompts.question("Reconciliation threshold, in percentage", default="5", valid_number=True) ) + + query_statement_disposition = self.prompts.confirm( + "Do you want to use the `EXTERNAL_LINKS` disposition for query statements? (Only needed when exporting more than 25 MiB of data from workspaces with many resources)" + ) + query_statement_disposition = Disposition.EXTERNAL_LINKS if query_statement_disposition else None + return WorkspaceConfig( inventory_database=inventory_database, ucx_catalog=ucx_catalog, @@ -276,6 +283,7 @@ def _prompt_for_new_installation(self) -> WorkspaceConfig: recon_tolerance_percent=recon_tolerance_percent, upload_dependencies=upload_dependencies, default_owner_group=default_owner_group, + query_statement_disposition=query_statement_disposition, ) def _compare_remote_local_versions(self):