Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Core refactors #126

Merged
merged 18 commits into from
Jun 22, 2023
Merged
71 changes: 32 additions & 39 deletions src/gretel_trainer/relational/backup.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
from __future__ import annotations

from dataclasses import asdict, dataclass
from typing import Any, Optional
from typing import Any, Optional, Union

from gretel_trainer.relational.artifacts import ArtifactCollection
from gretel_trainer.relational.core import ForeignKey, RelationalData
from gretel_trainer.relational.core import ForeignKey, RelationalData, Scope
from gretel_trainer.relational.json import InventedTableMetadata, ProducerMetadata


@dataclass
class BackupRelationalDataTable:
primary_key: list[str]
columns: list[str]
invented_table_metadata: Optional[dict[str, Any]] = None
producer_metadata: Optional[dict[str, Any]] = None


@dataclass
Expand All @@ -31,51 +33,46 @@ def from_fk(cls, fk: ForeignKey) -> BackupForeignKey:
)


@dataclass
class BackupRelationalJson:
original_table_name: str
original_primary_key: list[str]
original_columns: list[str]
table_name_mappings: dict[str, str]


@dataclass
class BackupRelationalData:
tables: dict[str, BackupRelationalDataTable]
foreign_keys: list[BackupForeignKey]
relational_jsons: dict[str, BackupRelationalJson]

@classmethod
def from_relational_data(cls, rel_data: RelationalData) -> BackupRelationalData:
tables = {}
foreign_keys = []
relational_jsons = {}
for table in rel_data.list_all_tables():
backup_table = BackupRelationalDataTable(
for table in rel_data.list_all_tables(Scope.ALL):
tables[table] = BackupRelationalDataTable(
primary_key=rel_data.get_primary_key(table),
columns=rel_data.get_table_columns(table),
invented_table_metadata=_optionally_as_dict(
rel_data.get_invented_table_metadata(table)
),
producer_metadata=_optionally_as_dict(
rel_data.get_producer_metadata(table)
),
)
if (
invented_table_metadata := rel_data.get_invented_table_metadata(table)
) is not None:
backup_table.invented_table_metadata = asdict(invented_table_metadata)
tables[table] = backup_table
foreign_keys.extend(
[
BackupForeignKey.from_fk(key)
for key in rel_data.get_foreign_keys(table)
]
)
for key, rel_json in rel_data.relational_jsons.items():
relational_jsons[key] = BackupRelationalJson(
original_table_name=rel_json.original_table_name,
original_primary_key=rel_json.original_primary_key,
original_columns=rel_json.original_columns,
table_name_mappings=rel_json.table_name_mappings,
)
return BackupRelationalData(
tables=tables, foreign_keys=foreign_keys, relational_jsons=relational_jsons
)

# Producer tables delegate their foreign keys to root invented tables.
# We exclude producers here to avoid adding duplicate foreign keys.
if not rel_data.is_producer_of_invented_tables(table):
foreign_keys.extend(
[
BackupForeignKey.from_fk(key)
for key in rel_data.get_foreign_keys(table)
]
)
return BackupRelationalData(tables=tables, foreign_keys=foreign_keys)


def _optionally_as_dict(
metadata: Optional[Union[InventedTableMetadata, ProducerMetadata]]
) -> Optional[dict[str, Any]]:
if metadata is None:
return None

return asdict(metadata)


@dataclass
Expand Down Expand Up @@ -139,10 +136,6 @@ def from_dict(cls, b: dict[str, Any]):
)
for fk in relational_data.get("foreign_keys", [])
],
relational_jsons={
k: BackupRelationalJson(**v)
for k, v in relational_data.get("relational_jsons", {}).items()
},
)

backup = Backup(
Expand Down
Loading