Skip to content

Commit

Permalink
Upgrade script Implementation (#777)
Browse files Browse the repository at this point in the history
closes #769
  • Loading branch information
vijaypavann-db authored and vil1 committed Sep 20, 2024
1 parent 6d46e99 commit 7d28298
Show file tree
Hide file tree
Showing 7 changed files with 316 additions and 8 deletions.
12 changes: 12 additions & 0 deletions src/databricks/labs/remorph/contexts/application.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# pylint: disable=too-many-public-methods
import logging
from functools import cached_property

from databricks.labs.blueprint.installation import Installation
from databricks.labs.blueprint.installer import InstallState
from databricks.labs.blueprint.upgrades import Upgrades
from databricks.labs.blueprint.tui import Prompts
from databricks.labs.blueprint.wheels import ProductInfo
from databricks.labs.lsql.backends import SqlBackend
Expand Down Expand Up @@ -123,4 +125,14 @@ def workspace_installation(self) -> WorkspaceInstallation:
self.prompts,
self.installation,
self.recon_deployment,
self.wheels,
self.upgrades,
)

@cached_property
def upgrades(self):
return Upgrades(self.product_info, self.installation)

@cached_property
def wheels(self):
return self.product_info.wheels(self.workspace_client)
23 changes: 23 additions & 0 deletions src/databricks/labs/remorph/deployment/installation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@

from databricks.labs.remorph.config import RemorphConfigs
from databricks.labs.remorph.deployment.recon import ReconDeployment
from databricks.labs.blueprint.wheels import WheelsV2

from databricks.sdk.errors.platform import InvalidParameterValue
from databricks.labs.blueprint.upgrades import Upgrades


logger = logging.getLogger("databricks.labs.remorph.install")

Expand All @@ -18,15 +23,33 @@ def __init__(
prompts: Prompts,
installation: Installation,
recon_deployment: ReconDeployment,
wheels: WheelsV2,
upgrades: Upgrades,
):
self._ws = ws
self._prompts = prompts
self._installation = installation
self._recon_deployment = recon_deployment
self._wheels = wheels
self._upgrades = upgrades

def _apply_upgrades(self):
try:
self._upgrades.apply(self._ws)
except (InvalidParameterValue, NotFound) as err:
logger.warning(f"Unable to apply Upgrades due to: {err}")

def _upload_wheel(self):
with self._wheels:
wheel_paths = [self._wheels.upload_to_wsfs()]
wheel_paths = [f"/Workspace{wheel}" for wheel in wheel_paths]
return wheel_paths

def install(self, config: RemorphConfigs):
if config.reconcile:
self._recon_deployment.install(config.reconcile)
self._upload_wheel()
self._apply_upgrades()

def uninstall(self, config: RemorphConfigs):
# This will remove all the Remorph modules
Expand Down
2 changes: 2 additions & 0 deletions src/databricks/labs/remorph/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
from databricks.labs.blueprint.installer import InstallState
from databricks.labs.blueprint.tui import Prompts
from databricks.labs.blueprint.wheels import ProductInfo


from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import NotFound, PermissionDenied
from databricks.sdk.service.catalog import Privilege
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,9 @@ CREATE TABLE IF NOT EXISTS main (
operation_name STRING NOT NULL,
start_ts TIMESTAMP,
end_ts TIMESTAMP
)
TBLPROPERTIES (
'delta.columnMapping.mode' = 'name',
'delta.minReaderVersion' = '2',
'delta.minWriterVersion' = '5'
);
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# pylint: disable=invalid-name
import logging
import re
from importlib.resources import files

import databricks.labs.remorph.resources

from databricks.labs.blueprint.installation import Installation
from databricks.labs.blueprint.tui import Prompts
from databricks.sdk import WorkspaceClient

from databricks.labs.remorph.contexts.application import ApplicationContext
from databricks.labs.remorph.deployment.recon import RECON_JOB_NAME
from databricks.labs.remorph.helpers import db_sql

logger = logging.getLogger(__name__)


def _replace_patterns(sql_text: str) -> str:
"""
Replace the STRUCT and MAP datatypes in the SQL text with empty string
"""
parsed_sql_text = sql_text
for pattern in (r'STRUCT<.*?>', r'MAP<.*?>'):
parsed_sql_text = re.sub(pattern, "", parsed_sql_text, flags=re.DOTALL)
return parsed_sql_text


def _extract_columns_with_datatype(sql_text: str) -> list[str]:
"""
Extract the columns with datatype from the SQL text
Example:
Input: CREATE TABLE main (
recon_table_id BIGINT NOT NULL,
report_type STRING NOT NULL
);
Output: [recon_table_id BIGINT NOT NULL,
report_type STRING NOT NULL]
"""
return sql_text[sql_text.index("(") + 1 : sql_text.index(")")].strip().split(",")


def _extract_column_name(column_with_datatype: str) -> str:
"""
Extract the column name from the column with datatype.
Example:
Input: \n recon_table_id BIGINT NOT NULL,
Output: recon_table_id
"""
return column_with_datatype.strip("\n").strip().split(" ")[0]


def _main_table_query() -> str:
"""
Get the main table DDL from the main.sql file
:return: str
"""
resources = files(databricks.labs.remorph.resources)
query_dir = resources.joinpath("reconcile/queries/installation")
return query_dir.joinpath("main.sql").read_text()


def _current_main_table_columns() -> list[str]:
"""
Extract the column names from the main table DDL
:return: column_names: list[str]
"""
main_sql = _replace_patterns(_main_table_query())
main_table_columns = [
_extract_column_name(main_table_column) for main_table_column in _extract_columns_with_datatype(main_sql)
]
return main_table_columns


def _installed_main_table_columns(ws: WorkspaceClient, table_identifier: str) -> list[str]:
"""
Fetch the column names from the installed table on Databricks Workspace using SQL Backend
:return: column_names: list[str]
"""
main_table_columns = list(db_sql.get_sql_backend(ws).fetch(f"DESC {table_identifier}"))
return [row.col_name for row in main_table_columns]


def _main_table_mismatch(installed_main_table_columns, current_main_table_columns) -> bool:
# Compare the current main table columns with the installed main table columns
mismatch = False
if "operation_name" in installed_main_table_columns and len(installed_main_table_columns) != len(
current_main_table_columns
):
mismatch = True
if sorted(installed_main_table_columns) != sorted(current_main_table_columns):
mismatch = True
return mismatch


def _recreate_main_table_sql(
table_identifier: str,
installed_main_table_columns: list[str],
current_main_table_columns: list[str],
prompts: Prompts,
) -> str | None:
"""
* Verify all the current main table columns are present in the installed main table and then use CTAS to recreate the main table
* If any of the current main table columns are missing in the installed main table, prompt the user to recreate the main table:
- If the user confirms, recreate the main table using the main DDL file, else log an error message and exit
:param table_identifier:
:param installed_main_table_columns:
:param current_main_table_columns:
:param prompts:
:return:
"""
sql: str | None = (
f"CREATE OR REPLACE TABLE {table_identifier} AS SELECT {','.join(current_main_table_columns)} FROM {table_identifier}"
)

if not set(current_main_table_columns).issubset(installed_main_table_columns):
if prompts.confirm("The `main` table columns are not as expected. Do you want to recreate the `main` table?"):
sql = _main_table_query()
else:
logger.error("The `main` table columns are not as expected. Please check and recreate the `main` table.")
sql = None
return sql


def _upgrade_reconcile_metadata_main_table(
installation: Installation,
ws: WorkspaceClient,
app_context: ApplicationContext,
):
"""
Add operation_name column to the main table as part of the upgrade process.
- Compare the current main table columns with the installed main table columns. If there is any mismatch:
* Verify all the current main table columns are present in the installed main table and then use CTAS to recreate the main table
* If any of the current main table columns are missing in the installed main table, prompt the user to recreate the main table:
- If the user confirms, recreate the main table using the main DDL file, else log an error message and exit
:param installation:
:param ws:
:param app_context:
"""
reconcile_config = app_context.recon_config
assert reconcile_config, "Reconcile config must be present to upgrade the reconcile metadata main table"
table_identifier = f"{reconcile_config.metadata_config.catalog}.{reconcile_config.metadata_config.schema}.main1"
installed_main_table_columns = _installed_main_table_columns(ws, table_identifier)
sql: str | None = f"ALTER TABLE {table_identifier} ADD COLUMN operation_name STRING AFTER report_type"
if _main_table_mismatch(installed_main_table_columns, _current_main_table_columns()):
logger.info("Recreating main table")
sql = _recreate_main_table_sql(
table_identifier, installed_main_table_columns, _current_main_table_columns(), app_context.prompts
)
if sql:
logger.debug(f"Executing SQL to upgrade main table: \n{sql}")
db_sql.get_sql_backend(ws).execute(sql)
installation.save(reconcile_config)
logger.debug("Upgraded Reconcile main table")


def _upgrade_reconcile_workflow(app_context: ApplicationContext):
if app_context.recon_config:
logger.info("Upgrading reconcile workflow")
app_context.job_deployment.deploy_recon_job(RECON_JOB_NAME, app_context.recon_config)
logger.debug("Upgraded reconcile workflow")


def upgrade(installation: Installation, ws: WorkspaceClient):
app_context = ApplicationContext(ws)
_upgrade_reconcile_metadata_main_table(installation, ws, app_context)
_upgrade_reconcile_workflow(app_context)
38 changes: 31 additions & 7 deletions tests/unit/deployment/test_installation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import pytest
from databricks.labs.blueprint.installation import MockInstallation, Installation
from databricks.labs.blueprint.tui import MockPrompts
from databricks.labs.blueprint.wheels import WheelsV2
from databricks.labs.blueprint.upgrades import Upgrades

from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import NotFound
from databricks.sdk.service import iam
Expand Down Expand Up @@ -35,6 +38,9 @@ def test_install_all(ws):
)
recon_deployment = create_autospec(ReconDeployment)
installation = create_autospec(Installation)
wheels = create_autospec(WheelsV2)
upgrades = create_autospec(Upgrades)

transpile_config = MorphConfig(
source="snowflake",
input_sql="/tmp/queries/snow6",
Expand All @@ -60,14 +66,17 @@ def test_install_all(ws):
),
)
config = RemorphConfigs(morph=transpile_config, reconcile=reconcile_config)
installation = WorkspaceInstallation(ws, prompts, installation, recon_deployment)
installation = WorkspaceInstallation(ws, prompts, installation, recon_deployment, wheels, upgrades)
installation.install(config)


def test_no_recon_component_installation(ws):
prompts = MockPrompts({})
recon_deployment = create_autospec(ReconDeployment)
installation = create_autospec(Installation)
wheels = create_autospec(WheelsV2)
upgrades = create_autospec(Upgrades)

transpile_config = MorphConfig(
source="snowflake",
input_sql="/tmp/queries/snow7",
Expand All @@ -78,7 +87,7 @@ def test_no_recon_component_installation(ws):
mode="current",
)
config = RemorphConfigs(morph=transpile_config)
installation = WorkspaceInstallation(ws, prompts, installation, recon_deployment)
installation = WorkspaceInstallation(ws, prompts, installation, recon_deployment, wheels, upgrades)
installation.install(config)
recon_deployment.install.assert_not_called()

Expand All @@ -87,6 +96,9 @@ def test_recon_component_installation(ws):
recon_deployment = create_autospec(ReconDeployment)
installation = create_autospec(Installation)
prompts = MockPrompts({})
wheels = create_autospec(WheelsV2)
upgrades = create_autospec(Upgrades)

reconcile_config = ReconcileConfig(
data_source="oracle",
report_type="all",
Expand All @@ -103,7 +115,7 @@ def test_recon_component_installation(ws):
),
)
config = RemorphConfigs(reconcile=reconcile_config)
installation = WorkspaceInstallation(ws, prompts, installation, recon_deployment)
installation = WorkspaceInstallation(ws, prompts, installation, recon_deployment, wheels, upgrades)
installation.install(config)
recon_deployment.install.assert_called()

Expand All @@ -116,7 +128,10 @@ def test_negative_uninstall_confirmation(ws):
)
installation = create_autospec(Installation)
recon_deployment = create_autospec(ReconDeployment)
ws_installation = WorkspaceInstallation(ws, prompts, installation, recon_deployment)
wheels = create_autospec(WheelsV2)
upgrades = create_autospec(Upgrades)

ws_installation = WorkspaceInstallation(ws, prompts, installation, recon_deployment, wheels, upgrades)
config = RemorphConfigs()
ws_installation.uninstall(config)
installation.remove.assert_not_called()
Expand All @@ -132,7 +147,10 @@ def test_missing_installation(ws):
installation.files.side_effect = NotFound("Installation not found")
installation.install_folder.return_value = "~/mock"
recon_deployment = create_autospec(ReconDeployment)
ws_installation = WorkspaceInstallation(ws, prompts, installation, recon_deployment)
wheels = create_autospec(WheelsV2)
upgrades = create_autospec(Upgrades)

ws_installation = WorkspaceInstallation(ws, prompts, installation, recon_deployment, wheels, upgrades)
config = RemorphConfigs()
ws_installation.uninstall(config)
installation.remove.assert_not_called()
Expand Down Expand Up @@ -175,7 +193,10 @@ def test_uninstall_configs_exist(ws):
config = RemorphConfigs(morph=transpile_config, reconcile=reconcile_config)
installation = MockInstallation({})
recon_deployment = create_autospec(ReconDeployment)
ws_installation = WorkspaceInstallation(ws, prompts, installation, recon_deployment)
wheels = create_autospec(WheelsV2)
upgrades = create_autospec(Upgrades)

ws_installation = WorkspaceInstallation(ws, prompts, installation, recon_deployment, wheels, upgrades)
ws_installation.uninstall(config)
recon_deployment.uninstall.assert_called()
installation.assert_removed()
Expand All @@ -189,7 +210,10 @@ def test_uninstall_configs_missing(ws):
)
installation = MockInstallation()
recon_deployment = create_autospec(ReconDeployment)
ws_installation = WorkspaceInstallation(ws, prompts, installation, recon_deployment)
wheels = create_autospec(WheelsV2)
upgrades = create_autospec(Upgrades)

ws_installation = WorkspaceInstallation(ws, prompts, installation, recon_deployment, wheels, upgrades)
config = RemorphConfigs()
ws_installation.uninstall(config)
recon_deployment.uninstall.assert_not_called()
Expand Down
Loading

0 comments on commit 7d28298

Please sign in to comment.