diff --git a/CHANGES.md b/CHANGES.md index e586e13a..16718b4f 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,6 +3,11 @@ ## Unreleased - MongoDB: Added Zyp transformations to the CDC subsystem, making it more symmetric to the full-load procedure. +- Query Converter: Added very basic expression converter utility with + CLI interface +- DynamoDB: Added query expression converter for relocating object + references, to support query migrations after the breaking change + with the SQL DDL schema, by v0.0.27. ## 2024/10/09 v0.0.28 - IO: Improved `BulkProcessor` when running per-record operations by diff --git a/codecov.yml b/codecov.yml index dcf7d6db..f5fdefe2 100644 --- a/codecov.yml +++ b/codecov.yml @@ -6,7 +6,7 @@ coverage: project: default: - target: 60% + target: 50% patch: default: diff --git a/cratedb_toolkit/cli.py b/cratedb_toolkit/cli.py index 36a37c8b..a45e7f15 100644 --- a/cratedb_toolkit/cli.py +++ b/cratedb_toolkit/cli.py @@ -8,6 +8,7 @@ from .cluster.cli import cli as cloud_cli from .io.cli import cli as io_cli from .job.cli import cli_list_jobs +from .query.cli import cli as query_cli from .shell.cli import cli as shell_cli from .wtf.cli import cli as wtf_cli @@ -24,6 +25,7 @@ def cli(ctx: click.Context, verbose: bool, debug: bool): cli.add_command(cfr_cli, name="cfr") cli.add_command(cloud_cli, name="cluster") cli.add_command(io_cli, name="load") +cli.add_command(query_cli, name="query") cli.add_command(rockset_cli, name="rockset") cli.add_command(shell_cli, name="shell") cli.add_command(wtf_cli, name="wtf") diff --git a/cratedb_toolkit/query/__init__.py b/cratedb_toolkit/query/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cratedb_toolkit/query/cli.py b/cratedb_toolkit/query/cli.py new file mode 100644 index 00000000..3333cf22 --- /dev/null +++ b/cratedb_toolkit/query/cli.py @@ -0,0 +1,24 @@ +import logging + +import click +from click_aliases import ClickAliasedGroup + +from ..util.cli import boot_click +from .convert.cli import convert_query + +logger = logging.getLogger(__name__) + + +@click.group(cls=ClickAliasedGroup) # type: ignore[arg-type] +@click.option("--verbose", is_flag=True, required=False, help="Turn on logging") +@click.option("--debug", is_flag=True, required=False, help="Turn on logging with debug level") +@click.version_option() +@click.pass_context +def cli(ctx: click.Context, verbose: bool, debug: bool): + """ + Query expression utilities. + """ + return boot_click(ctx, verbose, debug) + + +cli.add_command(convert_query, name="convert") diff --git a/cratedb_toolkit/query/convert/__init__.py b/cratedb_toolkit/query/convert/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cratedb_toolkit/query/convert/basic.py b/cratedb_toolkit/query/convert/basic.py new file mode 100644 index 00000000..13f6b8f3 --- /dev/null +++ b/cratedb_toolkit/query/convert/basic.py @@ -0,0 +1,24 @@ +import typing as t + +from attr import define + + +@define +class ObjectAttributeRelocation: + container_from: str + container_to: str + key: str + + def apply(self, data: str) -> str: + return data.replace(f"{self.container_from}['{self.key}']", f"{self.container_to}['{self.key}']") + + +def sql_relocate_attribute(data: str, rules: t.List[ObjectAttributeRelocation]) -> str: + for rule in rules: + data = rule.apply(data) + return data + + +def sql_relocate_pks_dynamodb_ctk_0_0_27(data: str, pks: t.List[str]) -> str: + rules = [ObjectAttributeRelocation("data", "pk", pk) for pk in pks] + return sql_relocate_attribute(data, rules) diff --git a/cratedb_toolkit/query/convert/cli.py b/cratedb_toolkit/query/convert/cli.py new file mode 100644 index 00000000..de7ad41e --- /dev/null +++ b/cratedb_toolkit/query/convert/cli.py @@ -0,0 +1,43 @@ +import logging +import sys +from pathlib import Path + +import click + +from cratedb_toolkit.query.convert.basic import sql_relocate_pks_dynamodb_ctk_0_0_27 +from cratedb_toolkit.util.cli import split_list + +logger = logging.getLogger(__name__) + + +@click.command() +@click.argument("input") +@click.option("--type", "type_", type=str, required=True, help="Select converter type") +@click.option("--primary-keys", type=str, required=False, help="Define primary keys, using a comma-separated list") +@click.pass_context +def convert_query( + ctx: click.Context, + input: str, # noqa: A002 + type_: str, + primary_keys: str, +): + """ + Query expression conversion. + """ + + if type_ == "ddb-relocate-pks": + data = sql_relocate_pks_dynamodb_ctk_0_0_27(read_resource(input), pks=split_list(primary_keys)) + sys.stdout.write(data) + else: + raise ValueError(f"Unknown converter: {type_}") + + +def read_resource(resource: str) -> str: + if resource == "-": + return sys.stdin.read() + + resource_path = Path(resource) + if resource_path.exists(): + return resource_path.read_text() + + raise IOError(f"Could not find or access resource: {resource}") diff --git a/doc/index.md b/doc/index.md index 60823a02..eea44f6c 100644 --- a/doc/index.md +++ b/doc/index.md @@ -25,6 +25,7 @@ install datasets adapter/index io/index +query/index retention Cluster Flight Recorder (CFR) Ad Hoc Diagnosis (WTF) diff --git a/doc/query/convert.md b/doc/query/convert.md new file mode 100644 index 00000000..c70cc562 --- /dev/null +++ b/doc/query/convert.md @@ -0,0 +1,47 @@ +# Query Expression Converter + +A very basic query expression converter framework with CLI interface. + +## Synopsis +Convert expression using specified converter type. +```shell +ctk query convert --type= +``` + +## Help +```shell +ctk query convert --help +``` + +## Converters + +## DynamoDB primary key relocator +With CTK 0.0.27, there was a breaking change on the DDL schema where +data from DynamoDB is relayed into. + +> DynamoDB: Change CrateDB data model to use (`pk`, `data`, `aux`) columns. +> +> **Attention:** This is a breaking change. + +This converter adjusts SQL query expressions to account for that change, +specifically amending primary key object references. + +### Usage + +Supply query expression via STDIN. +```shell +echo "SELECT * FROM foobar WHERE data['PK']" | \ + ctk query convert --type=ddb-relocate-pks --primary-keys=PK,SK - +``` + +Supply query expression via filesystem resource. +```shell +echo "SELECT * FROM foobar WHERE data['PK']" > input.sql +ctk query convert --type=ddb-relocate-pks --primary-keys=PK,SK input.sql > output.sql +cat output.sql +``` + +Result: +```sql +SELECT * FROM foobar WHERE pk['PK'] +``` diff --git a/doc/query/index.md b/doc/query/index.md new file mode 100644 index 00000000..9e154999 --- /dev/null +++ b/doc/query/index.md @@ -0,0 +1,11 @@ +# Query Utilities + +A collection of utilities for working with query expressions, mostly +converters and migration support tasks, still in its infancy. + +## Converters +```{toctree} +:maxdepth: 2 + +convert +``` diff --git a/tests/query/__init__.py b/tests/query/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/query/test_convert.py b/tests/query/test_convert.py new file mode 100644 index 00000000..00791a5f --- /dev/null +++ b/tests/query/test_convert.py @@ -0,0 +1,19 @@ +from click.testing import CliRunner + +from cratedb_toolkit.query.cli import cli + + +def test_query_convert_ddb_relocate_pks(): + """ + Verify `ctk query convert --type=ddb-relocate-pks`. + """ + runner = CliRunner() + + result = runner.invoke( + cli, + input="SELECT * FROM foobar WHERE data['PK']", + args="convert --type=ddb-relocate-pks --primary-keys=PK,SK -", + catch_exceptions=False, + ) + assert result.exit_code == 0 + assert result.output == "SELECT * FROM foobar WHERE pk['PK']"