Skip to content

Commit

Permalink
Refactor python transpiler code (databrickslabs#1314)
Browse files Browse the repository at this point in the history
This PR implements a clear structure for existing python transpiler
code, and also renames a few classes for clarity.

Progresses databrickslabs#1298
  • Loading branch information
ericvergnaud authored Dec 9, 2024
1 parent ad69ec0 commit fb9fc70
Show file tree
Hide file tree
Showing 40 changed files with 166 additions and 163 deletions.
8 changes: 4 additions & 4 deletions src/databricks/labs/remorph/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@

from databricks.labs.blueprint.cli import App
from databricks.labs.blueprint.entrypoint import get_logger
from databricks.labs.remorph.config import SQLGLOT_DIALECTS, MorphConfig
from databricks.labs.remorph.config import SQLGLOT_DIALECTS, TranspileConfig
from databricks.labs.remorph.contexts.application import ApplicationContext
from databricks.labs.remorph.helpers.recon_config_utils import ReconConfigPrompts
from databricks.labs.remorph.reconcile.runner import ReconcileRunner
from databricks.labs.remorph.lineage import lineage_generator
from databricks.labs.remorph.transpiler.execute import morph
from databricks.labs.remorph.transpiler.execute import transpile as do_transpile
from databricks.labs.remorph.reconcile.execute import RECONCILE_OPERATION_NAME, AGG_RECONCILE_OPERATION_NAME
from databricks.labs.remorph.jvmproxy import proxy_command

Expand Down Expand Up @@ -69,7 +69,7 @@ def transpile(
catalog_name = catalog_name if catalog_name else default_config.catalog_name
schema_name = schema_name if schema_name else default_config.schema_name

config = MorphConfig(
config = TranspileConfig(
source=source.lower(),
input_sql=input_sql,
output_folder=output_folder,
Expand All @@ -80,7 +80,7 @@ def transpile(
sdk_config=sdk_config,
)

status = morph(ctx.workspace_client, config)
status = do_transpile(ctx.workspace_client, config)

print(json.dumps(status))

Expand Down
11 changes: 6 additions & 5 deletions src/databricks/labs/remorph/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@

from sqlglot.dialects.dialect import Dialect, Dialects, DialectType

from databricks.labs.remorph.helpers.morph_status import ParserError
from databricks.labs.remorph.transpiler.transpile_status import ParserError
from databricks.labs.remorph.reconcile.recon_config import Table
from databricks.labs.remorph.snow import databricks, oracle, snowflake, presto
from databricks.labs.remorph.transpiler.sqlglot.generator import databricks
from databricks.labs.remorph.transpiler.sqlglot.parsers import oracle, presto, snowflake

logger = logging.getLogger(__name__)

Expand All @@ -19,7 +20,7 @@
"postgresql": Dialects.POSTGRES,
"presto": presto.Presto,
"redshift": Dialects.REDSHIFT,
"snowflake": snowflake.Snow,
"snowflake": snowflake.Snowflake,
"sqlite": Dialects.SQLITE,
"teradata": Dialects.TERADATA,
"trino": Dialects.TRINO,
Expand All @@ -37,7 +38,7 @@ def get_key_from_dialect(input_dialect: Dialect) -> str:


@dataclass
class MorphConfig:
class TranspileConfig:
__file__ = "config.yml"
__version__ = 1

Expand Down Expand Up @@ -126,5 +127,5 @@ class ReconcileConfig:

@dataclass
class RemorphConfigs:
morph: MorphConfig | None = None
transpile: TranspileConfig | None = None
reconcile: ReconcileConfig | None = None
8 changes: 4 additions & 4 deletions src/databricks/labs/remorph/contexts/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from databricks.sdk.errors import NotFound
from databricks.sdk.service.iam import User

from databricks.labs.remorph.config import MorphConfig, ReconcileConfig, RemorphConfigs
from databricks.labs.remorph.config import TranspileConfig, ReconcileConfig, RemorphConfigs
from databricks.labs.remorph.deployment.configurator import ResourceConfigurator
from databricks.labs.remorph.deployment.dashboard import DashboardDeployment
from databricks.labs.remorph.deployment.installation import WorkspaceInstallation
Expand Down Expand Up @@ -50,9 +50,9 @@ def installation(self) -> Installation:
return Installation.assume_user_home(self.workspace_client, self.product_info.product_name())

@cached_property
def transpile_config(self) -> MorphConfig | None:
def transpile_config(self) -> TranspileConfig | None:
try:
return self.installation.load(MorphConfig)
return self.installation.load(TranspileConfig)
except NotFound as err:
logger.debug(f"Couldn't find existing `transpile` installation: {err}")
return None
Expand All @@ -67,7 +67,7 @@ def recon_config(self) -> ReconcileConfig | None:

@cached_property
def remorph_config(self) -> RemorphConfigs:
return RemorphConfigs(morph=self.transpile_config, reconcile=self.recon_config)
return RemorphConfigs(transpile=self.transpile_config, reconcile=self.recon_config)

@cached_property
def connect_config(self) -> Config:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from databricks.labs.blueprint.wheels import ProductInfo
from databricks.labs.remorph.coverage import commons
from databricks.labs.remorph.snow.databricks import Databricks
from databricks.labs.remorph.snow.snowflake import Snow
from databricks.labs.remorph.transpiler.sqlglot.generator.databricks import Databricks
from databricks.labs.remorph.transpiler.sqlglot.parsers.snowflake import Snowflake

if __name__ == "__main__":
input_dir = commons.get_env_var("INPUT_DIR_PARENT", required=True)
Expand All @@ -22,7 +22,7 @@
"Remorph",
REMORPH_COMMIT_HASH,
remorph_version,
Snow,
Snowflake,
Databricks,
Path(input_dir) / 'snowflake',
Path(output_dir),
Expand Down
6 changes: 3 additions & 3 deletions src/databricks/labs/remorph/deployment/installation.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,10 @@ def uninstall(self, config: RemorphConfigs):
logger.error(f"Check if {self._installation.install_folder()} is present. Aborting uninstallation.")
return

if config.morph:
if config.transpile:
logging.info(
f"Won't remove transpile validation schema `{config.morph.schema_name}` "
f"from catalog `{config.morph.catalog_name}`. Please remove it manually."
f"Won't remove transpile validation schema `{config.transpile.schema_name}` "
f"from catalog `{config.transpile.catalog_name}`. Please remove it manually."
)

if config.reconcile:
Expand Down
4 changes: 2 additions & 2 deletions src/databricks/labs/remorph/helpers/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from io import StringIO

from databricks.labs.lsql.backends import SqlBackend
from databricks.labs.remorph.config import MorphConfig, ValidationResult
from databricks.labs.remorph.config import TranspileConfig, ValidationResult
from databricks.sdk.errors.base import DatabricksError

logger = logging.getLogger(__name__)
Expand All @@ -16,7 +16,7 @@ class Validator:
def __init__(self, sql_backend: SqlBackend):
self._sql_backend = sql_backend

def validate_format_result(self, config: MorphConfig, input_sql: str) -> ValidationResult:
def validate_format_result(self, config: TranspileConfig, input_sql: str) -> ValidationResult:
"""
Validates the SQL query and formats the result.
Expand Down
14 changes: 7 additions & 7 deletions src/databricks/labs/remorph/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

from databricks.labs.remorph.__about__ import __version__
from databricks.labs.remorph.config import (
MorphConfig,
TranspileConfig,
ReconcileConfig,
SQLGLOT_DIALECTS,
DatabaseConfig,
Expand Down Expand Up @@ -93,9 +93,9 @@ def configure(self, module: str | None = None) -> RemorphConfigs:
def _is_testing(self):
return self._product_info.product_name() != "remorph"

def _configure_transpile(self) -> MorphConfig:
def _configure_transpile(self) -> TranspileConfig:
try:
self._installation.load(MorphConfig)
self._installation.load(TranspileConfig)
logger.info("Remorph `transpile` is already installed on this workspace.")
if not self._prompts.confirm("Do you want to override the existing installation?"):
raise SystemExit(
Expand All @@ -113,7 +113,7 @@ def _configure_transpile(self) -> MorphConfig:
logger.info("Finished configuring remorph `transpile`.")
return config

def _configure_new_transpile_installation(self) -> MorphConfig:
def _configure_new_transpile_installation(self) -> TranspileConfig:
default_config = self._prompt_for_new_transpile_installation()
runtime_config = None
catalog_name = "remorph"
Expand All @@ -133,7 +133,7 @@ def _configure_new_transpile_installation(self) -> MorphConfig:
self._save_config(config)
return config

def _prompt_for_new_transpile_installation(self) -> MorphConfig:
def _prompt_for_new_transpile_installation(self) -> TranspileConfig:
logger.info("Please answer a few questions to configure remorph `transpile`")
source = self._prompts.choice("Select the source:", list(SQLGLOT_DIALECTS.keys()))
input_sql = self._prompts.question("Enter input SQL path (directory/file)")
Expand All @@ -142,7 +142,7 @@ def _prompt_for_new_transpile_installation(self) -> MorphConfig:
"Would you like to validate the syntax and semantics of the transpiled queries?"
)

return MorphConfig(
return TranspileConfig(
source=source,
skip_validation=(not run_validation),
mode="current", # mode will not have a prompt as this is a hidden flag
Expand Down Expand Up @@ -269,7 +269,7 @@ def _configure_volume(
default_volume_name,
)

def _save_config(self, config: MorphConfig | ReconcileConfig):
def _save_config(self, config: TranspileConfig | ReconcileConfig):
logger.info(f"Saving configuration file {config.__file__}")
self._installation.save(config)
ws_file_url = self._installation.workspace_link(config.__file__)
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/remorph/intermediate/engine_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from sqlglot.dialects.dialect import Dialect

from databricks.labs.remorph.snow.sql_transpiler import SqlglotEngine
from databricks.labs.remorph.transpiler.sqlglot.sqlglot_engine import SqlglotEngine

logger = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from databricks.labs.remorph.reconcile.connectors.databricks import DatabricksDataSource
from databricks.labs.remorph.reconcile.connectors.oracle import OracleDataSource
from databricks.labs.remorph.reconcile.connectors.snowflake import SnowflakeDataSource
from databricks.labs.remorph.snow.databricks import Databricks
from databricks.labs.remorph.snow.oracle import Oracle
from databricks.labs.remorph.snow.snowflake import Snow
from databricks.labs.remorph.transpiler.sqlglot.generator.databricks import Databricks
from databricks.labs.remorph.transpiler.sqlglot.parsers.oracle import Oracle
from databricks.labs.remorph.transpiler.sqlglot.parsers.snowflake import Snowflake
from databricks.sdk import WorkspaceClient


Expand All @@ -17,7 +17,7 @@ def create_adapter(
ws: WorkspaceClient,
secret_scope: str,
) -> DataSource:
if isinstance(engine, Snow):
if isinstance(engine, Snowflake):
return SnowflakeDataSource(engine, spark, ws, secret_scope)
if isinstance(engine, Oracle):
return OracleDataSource(engine, spark, ws, secret_scope)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
coalesce,
)
from databricks.labs.remorph.reconcile.recon_config import ColumnThresholds
from databricks.labs.remorph.snow.databricks import Databricks
from databricks.labs.remorph.transpiler.sqlglot.generator.databricks import Databricks

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/remorph/reconcile/schema_compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
SchemaReconcileOutput,
Table,
)
from databricks.labs.remorph.snow.databricks import Databricks
from databricks.labs.remorph.transpiler.sqlglot.generator.databricks import Databricks

logger = logging.getLogger(__name__)

Expand Down
36 changes: 18 additions & 18 deletions src/databricks/labs/remorph/transpiler/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sqlglot.dialects.dialect import Dialect
from databricks.labs.remorph.__about__ import __version__
from databricks.labs.remorph.config import (
MorphConfig,
TranspileConfig,
get_dialect,
TranspilationResult,
ValidationResult,
Expand All @@ -18,14 +18,14 @@
make_dir,
remove_bom,
)
from databricks.labs.remorph.helpers.morph_status import (
MorphStatus,
from databricks.labs.remorph.transpiler.transpile_status import (
TranspileStatus,
ParserError,
ValidationError,
)
from databricks.labs.remorph.helpers.validation import Validator
from databricks.labs.remorph.snow import lca_utils
from databricks.labs.remorph.snow.sql_transpiler import SqlglotEngine
from databricks.labs.remorph.transpiler.sqlglot import lca_utils
from databricks.labs.remorph.transpiler.sqlglot.sqlglot_engine import SqlglotEngine
from databricks.sdk import WorkspaceClient

# pylint: disable=unspecified-encoding
Expand All @@ -34,7 +34,7 @@


def _process_file(
config: MorphConfig,
config: TranspileConfig,
validator: Validator | None,
transpiler: SqlglotEngine,
input_file: str | Path,
Expand Down Expand Up @@ -82,7 +82,7 @@ def _process_file(


def _process_directory(
config: MorphConfig,
config: TranspileConfig,
validator: Validator | None,
transpiler: SqlglotEngine,
root: str | Path,
Expand Down Expand Up @@ -121,7 +121,7 @@ def _process_directory(


def _process_recursive_dirs(
config: MorphConfig, input_sql_path: Path, validator: Validator | None, transpiler: SqlglotEngine
config: TranspileConfig, input_sql_path: Path, validator: Validator | None, transpiler: SqlglotEngine
):
input_sql = input_sql_path
parse_error_list = []
Expand All @@ -144,11 +144,11 @@ def _process_recursive_dirs(

error_log = parse_error_list + validate_error_list

return MorphStatus(file_list, counter, len(parse_error_list), len(validate_error_list), error_log)
return TranspileStatus(file_list, counter, len(parse_error_list), len(validate_error_list), error_log)


@timeit
def morph(workspace_client: WorkspaceClient, config: MorphConfig):
def transpile(workspace_client: WorkspaceClient, config: TranspileConfig):
"""
[Experimental] Transpiles the SQL queries from one dialect to another.
Expand All @@ -161,7 +161,7 @@ def morph(workspace_client: WorkspaceClient, config: MorphConfig):

input_sql = Path(config.input_sql)
status = []
result = MorphStatus([], 0, 0, 0, [])
result = TranspileStatus([], 0, 0, 0, [])

read_dialect = config.get_read_dialect()
transpiler = SqlglotEngine(read_dialect)
Expand All @@ -186,7 +186,7 @@ def morph(workspace_client: WorkspaceClient, config: MorphConfig):
config, validator, transpiler, input_sql, output_file
)
error_log = parse_error + validation_error
result = MorphStatus([str(input_sql)], no_of_sqls, len(parse_error), len(validation_error), error_log)
result = TranspileStatus([str(input_sql)], no_of_sqls, len(parse_error), len(validation_error), error_log)
else:
msg = f"{input_sql} is not a SQL file."
logger.warning(msg)
Expand Down Expand Up @@ -246,16 +246,16 @@ def _parse(

def _validation(
validator: Validator,
config: MorphConfig,
config: TranspileConfig,
sql: str,
) -> ValidationResult:
return validator.validate_format_result(config, sql)


@timeit
def morph_sql(
def transpile_sql(
workspace_client: WorkspaceClient,
config: MorphConfig,
config: TranspileConfig,
sql: str,
) -> tuple[TranspilationResult, ValidationResult | None]:
"""[Experimental] Transpile a single SQL query from one dialect to another."""
Expand All @@ -277,14 +277,14 @@ def morph_sql(


@timeit
def morph_column_exp(
def transpile_column_exp(
workspace_client: WorkspaceClient,
config: MorphConfig,
config: TranspileConfig,
expressions: list[str],
) -> list[tuple[TranspilationResult, ValidationResult | None]]:
"""[Experimental] Transpile a list of SQL expressions from one dialect to another."""
config.skip_validation = True
result = []
for sql in expressions:
result.append(morph_sql(workspace_client, config, sql))
result.append(transpile_sql(workspace_client, config, sql))
return result
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from sqlglot.errors import UnsupportedError
from sqlglot.helper import apply_index_offset, csv

from databricks.labs.remorph.snow import lca_utils, local_expression
from databricks.labs.remorph.transpiler.sqlglot import lca_utils, local_expression

# pylint: disable=too-many-public-methods

Expand Down
Loading

0 comments on commit fb9fc70

Please sign in to comment.