From 3f0b1341221512992f761ba7636b86f01506a02b Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Mon, 24 Jul 2023 17:13:56 -0500 Subject: [PATCH 01/19] checkpoint: mostly working, tests failing --- dbt_meshify/cli.py | 24 +++ dbt_meshify/linker.py | 87 +++++++-- dbt_meshify/main.py | 73 ++++++-- dbt_meshify/storage/dbt_project_creator.py | 178 +++++++++++-------- dbt_meshify/storage/file_content_editors.py | 81 ++++++++- dbt_meshify/storage/file_manager.py | 5 +- tests/integration/test_subproject_creator.py | 2 +- tests/unit/test_update_ref_functions.py | 4 +- 8 files changed, 336 insertions(+), 118 deletions(-) diff --git a/dbt_meshify/cli.py b/dbt_meshify/cli.py index f4b9f46..c29076a 100644 --- a/dbt_meshify/cli.py +++ b/dbt_meshify/cli.py @@ -12,6 +12,30 @@ help="The path to the dbt project to operate on. Defaults to the current directory.", ) +project_paths = click.option( + "--project-paths", + cls=MultiOption, + multiple=True, + type=tuple, + default=None, + help="The paths to the set of dbt projects to connect. Must supply 2+ paths.", +) + +projects_dir = click.option( + "--projects-dir", + type=click.Path(exists=True), + help="The path to a directory containing multiple dbt projects. Directory must contain 2+ projects.", +) + +exclude_projects = click.option( + "--exclude-projects", + "-e", + multiple=True, + type=tuple, + default=None, + help="The set of dbt projects to exclude from the operation when using the --projects-dir option.", +) + create_path = click.option( "--create-path", type=click.Path(exists=False), diff --git a/dbt_meshify/linker.py b/dbt_meshify/linker.py index 1f49d39..b626ab2 100644 --- a/dbt_meshify/linker.py +++ b/dbt_meshify/linker.py @@ -1,8 +1,12 @@ from dataclasses import dataclass from enum import Enum -from typing import Set +from typing import Set, Union -from dbt_meshify.dbt_projects import BaseDbtProject +from dbt.node_types import AccessType + +from dbt_meshify.dbt_projects import BaseDbtProject, DbtProject +from dbt_meshify.storage.dbt_project_creator import DbtProjectEditor +from dbt_meshify.storage.file_content_editors import DbtMeshConstructor class ProjectDependencyType(str, Enum): @@ -16,12 +20,14 @@ class ProjectDependencyType(str, Enum): class ProjectDependency: """ProjectDependencies define shared resources between two different projects""" - upstream: str - downstream: str + upstream_resource: str + upstream_project_name: str + downstream_resource: str + downstream_project_name: str type: ProjectDependencyType def __key(self): - return self.upstream, self.downstream, self.type + return self.upstream_resource, self.downstream_resource, self.type def __hash__(self): return hash(self.__key()) @@ -50,7 +56,9 @@ def _find_relation_dependencies( return source_relations.intersection(target_relations) def _source_dependencies( - self, project: BaseDbtProject, other_project: BaseDbtProject + self, + project: Union[BaseDbtProject, DbtProject], + other_project: Union[BaseDbtProject, DbtProject], ) -> Set[ProjectDependency]: """ Identify source-hack dependencies between projects. @@ -74,8 +82,10 @@ def _source_dependencies( forward_dependencies = { ProjectDependency( - upstream=project.model_relation_names[relation], - downstream=other_project.source_relation_names[relation], + upstream_resource=project.model_relation_names[relation], + upstream_project_name=project.name, + downstream_resource=other_project.source_relation_names[relation], + downstream_project_name=other_project.name, type=ProjectDependencyType.Source, ) for relation in relations @@ -96,8 +106,10 @@ def _source_dependencies( backward_dependencies = { ProjectDependency( - upstream=other_project.model_relation_names[relation], - downstream=project.source_relation_names[relation], + upstream_resource=other_project.model_relation_names[relation], + upstream_project_name=other_project.name, + downstream_resource=project.source_relation_names[relation], + downstream_project_name=project.name, type=ProjectDependencyType.Source, ) for relation in backwards_relations @@ -106,7 +118,9 @@ def _source_dependencies( return forward_dependencies | backward_dependencies def _package_dependencies( - self, project: BaseDbtProject, other_project: BaseDbtProject + self, + project: Union[BaseDbtProject, DbtProject], + other_project: Union[BaseDbtProject, DbtProject], ) -> Set[ProjectDependency]: """ Identify package-imported dependencies between projects. @@ -133,15 +147,19 @@ def _package_dependencies( return { ProjectDependency( - upstream=project.model_relation_names[relation], - downstream=other_project.model_relation_names[relation], + upstream_resource=project.model_relation_names[relation], + upstream_project_name=project.model_relation_names[relation], + downstream_resource=other_project.model_relation_names[relation], + downstream_project_name=other_project.model_relation_names[relation], type=ProjectDependencyType.Package, ) for relation in relations } def dependencies( - self, project: BaseDbtProject, other_project: BaseDbtProject + self, + project: Union[BaseDbtProject, DbtProject], + other_project: Union[BaseDbtProject, DbtProject], ) -> Set[ProjectDependency]: """Detect dependencies between two projects and return a list of resources shared.""" @@ -156,3 +174,44 @@ def dependencies( dependencies.update(package_dependencies) return dependencies + + def resolve_dependency( + self, + dependency: ProjectDependency, + upstream_project: Union[DbtProject, BaseDbtProject], + downstream_project: Union[DbtProject, BaseDbtProject], + ): + upstream_manifest_entry = upstream_project.get_manifest_node(dependency.upstream_resource) + downstream_manifest_entry = downstream_project.get_manifest_node( + dependency.downstream_resource + ) + upstream_catalog_entry = upstream_project.get_catalog_entry(dependency.upstream_resource) + upstream_mesh_constructor = DbtMeshConstructor( + project_path=upstream_project.path, + node=upstream_manifest_entry, + catalog=upstream_catalog_entry, + ) + # upstream_editor = DbtProjectEditor(upstream_project) + downstream_mesh_constructor = DbtMeshConstructor( + project_path=downstream_project.path, + node=downstream_manifest_entry, + catalog=None, + ) + downstream_editor = DbtProjectEditor(downstream_project) + if dependency.type == ProjectDependencyType.Source: + upstream_mesh_constructor.add_model_access(AccessType.Public) + upstream_mesh_constructor.add_model_contract() + for child in downstream_project.manifest.child_map[dependency.downstream_resource]: + constructor = DbtMeshConstructor( + project_path=downstream_project.path, + node=downstream_project.get_manifest_node(child), + catalog=None, + ) + constructor.replace_source_with_refs( + source_unique_id=dependency.downstream_resource, + model_unique_id=dependency.upstream_resource, + ) + downstream_editor.update_resource_yml_entry( + downstream_mesh_constructor, operation_type="delete" + ) + downstream_editor.update_dependencies_yml(name=upstream_project.name) diff --git a/dbt_meshify/main.py b/dbt_meshify/main.py index da7ccaf..968fa65 100644 --- a/dbt_meshify/main.py +++ b/dbt_meshify/main.py @@ -1,7 +1,8 @@ import os import sys +from itertools import combinations from pathlib import Path -from typing import Optional +from typing import List, Optional import click import yaml @@ -14,18 +15,22 @@ TupleCompatibleCommand, create_path, exclude, + exclude_projects, group_yml_path, owner, owner_email, owner_name, owner_properties, project_path, + project_paths, + projects_dir, read_catalog, select, selector, ) from .dbt_projects import DbtProject, DbtProjectHolder from .exceptions import FatalMeshifyException +from .linker import Linker from .storage.file_content_editors import DbtMeshConstructor log_format = "{time:HH:mm:ss} | {level} | {message}" @@ -48,26 +53,56 @@ def operation(): @cli.command(name="connect") -@click.argument("projects-dir", type=click.Path(exists=True), default=".") -def connect(projects_dir): +@project_paths +@projects_dir +@exclude_projects +@read_catalog +def connect( + project_paths: tuple, projects_dir: Path, exclude_projects: List[str], read_catalog: bool +): """ - !!! info - This command is not yet implemented Connects multiple dbt projects together by adding all necessary dbt Mesh constructs - """ - holder = DbtProjectHolder() - - while True: - path_string = input("Enter the relative path to a dbt project (enter 'done' to finish): ") - if path_string == "done": - break - path = Path(path_string).expanduser().resolve() - project = DbtProject.from_directory(path, read_catalog) - holder.register_project(project) - - print(holder.project_map()) + """ + if project_paths and projects_dir: + raise click.BadOptionUsage( + option_name="project_paths", + message="Cannot specify both project_paths and projects_dir", + ) + # 1. initialize all the projects supplied to the command + # 2. compute the dependency graph between each combination of 2 projects in that set + # 3. for each dependency, add the necessary dbt Mesh constructs to each project. + # This includes: + # - adding the dependency to the dependencies.yml file of the downstream project + # - adding contracts and public access to the upstream models + # - deleting the source definition of the upstream models in the downstream project + # - updating the `{{ source }}` macro in the downstream project to a {{ ref }} to the upstream project + + linker = Linker() + if project_paths: + dbt_projects = [ + DbtProject.from_directory(project_path, read_catalog) for project_path in project_paths + ] + dbt_project_combinations = [combo for combo in combinations(dbt_projects, 2)] + for dbt_project_combo in dbt_project_combinations: + project_map = {project.name: project for project in dbt_project_combo} + for dependency in linker.dependencies(dbt_project_combo[0], dbt_project_combo[1]): + logger.info( + f"Adding found dependency between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}: {dependency}" + ) + try: + linker.resolve_dependency( + dependency, + project_map[dependency.upstream_project_name], + project_map[dependency.downstream_project_name], + ) + except Exception as e: + raise FatalMeshifyException(f"Error resolving dependency : {dependency}") + + if projects_dir: + # TODO: Implement this -- glob the directory for dbt_project.yml files, read projects from there + dbt_projects = [] @cli.command( @@ -230,7 +265,7 @@ def create_group( ) group_owner: Owner = Owner( - name=owner_name, email=owner_email, _extra=yaml.safe_load(owner_properties or '{}') + name=owner_name, email=owner_email, _extra=yaml.safe_load(owner_properties or "{}") ) grouper = ResourceGrouper(project) @@ -279,4 +314,4 @@ def group( Detects the edges of the group, makes their access public, and adds contracts to them """ ctx.forward(create_group) - ctx.invoke(add_contract, select=f'group:{name}', project_path=project_path, public_only=True) + ctx.invoke(add_contract, select=f"group:{name}", project_path=project_path, public_only=True) diff --git a/dbt_meshify/storage/dbt_project_creator.py b/dbt_meshify/storage/dbt_project_creator.py index e049763..ce77676 100644 --- a/dbt_meshify/storage/dbt_project_creator.py +++ b/dbt_meshify/storage/dbt_project_creator.py @@ -1,11 +1,12 @@ +from enum import Enum from pathlib import Path -from typing import Optional, Set +from typing import Optional, Set, Union from dbt.contracts.graph.nodes import ManifestNode from dbt.node_types import AccessType from loguru import logger -from dbt_meshify.dbt_projects import DbtSubProject +from dbt_meshify.dbt_projects import DbtProject, DbtSubProject from dbt_meshify.storage.file_content_editors import ( DbtMeshConstructor, filter_empty_dict_items, @@ -14,28 +15,108 @@ from dbt_meshify.utilities.grouper import ResourceGrouper -class DbtSubprojectCreator: +class YMLOperationType(str, Enum): + """ProjectDependencyTypes define how the dependency relationship was defined.""" + + Move = "move" + Delete = "delete" + + +class DbtProjectEditor: + def __init__(self, project: Union[DbtSubProject, DbtProject]): + self.project = project + self.file_manager = DbtFileManager( + read_project_path=project.path, + ) + + def move_resource(self, meshify_constructor: DbtMeshConstructor) -> None: + """ + move a resource file from one project to another + + """ + current_path = meshify_constructor.get_resource_path() + self.file_manager.move_file(current_path) + + def copy_resource(self, meshify_constructor: DbtMeshConstructor) -> None: + """ + copy a resource file from one project to another + + """ + resource_path = meshify_constructor.get_resource_path() + contents = self.file_manager.read_file(resource_path) + self.file_manager.write_file(resource_path, contents) + + def update_resource_yml_entry( + self, + meshify_constructor: DbtMeshConstructor, + operation_type: YMLOperationType = YMLOperationType.Move, + ) -> None: + """ + move a resource yml entry from one project to another + """ + current_yml_path = meshify_constructor.get_patch_path() + new_yml_path = self.file_manager.write_project_path / current_yml_path + full_yml_entry = self.file_manager.read_file(current_yml_path) + source_name = ( + meshify_constructor.node.source_name + if hasattr(meshify_constructor.node, "source_name") + else None + ) + resource_entry, remainder = meshify_constructor.get_yml_entry( + resource_name=meshify_constructor.node.name, + full_yml=full_yml_entry, # type: ignore + resource_type=meshify_constructor.node.resource_type, + source_name=source_name, + ) + try: + existing_yml = self.file_manager.read_file(new_yml_path) + except FileNotFoundError: + existing_yml = None + if operation_type == YMLOperationType.Move: + new_yml_contents = meshify_constructor.add_entry_to_yml( + resource_entry, existing_yml, meshify_constructor.node.resource_type # type: ignore + ) + self.file_manager.write_file(current_yml_path, new_yml_contents) + if remainder: + self.file_manager.write_file(current_yml_path, remainder, writeback=True) + else: + self.file_manager.delete_file(current_yml_path) + + def update_dependencies_yml(self, name: str) -> None: + import pdb + + pdb.set_trace() + try: + contents = self.file_manager.read_file(Path("dependencies.yml")) + except FileNotFoundError: + contents = {"projects": []} + + contents["projects"].append({"name": name if name else self.project.name}) # type: ignore + self.file_manager.write_file(Path("dependencies.yml"), contents, writeback=True) + + +class DbtSubprojectCreator(DbtProjectEditor): """ Takes a `DbtSubProject` and creates the directory structure and files for it. """ - def __init__(self, subproject: DbtSubProject, target_directory: Optional[Path] = None): - self.subproject = subproject - self.target_directory = target_directory if target_directory else subproject.path + def __init__(self, project: DbtSubProject, target_directory: Optional[Path] = None): + super().__init__(project) + self.target_directory = target_directory if target_directory else project.path self.file_manager = DbtFileManager( - read_project_path=subproject.parent_project.path, + read_project_path=project.parent_project.path, write_project_path=self.target_directory, ) - self.subproject_boundary_models = self._get_subproject_boundary_models() + self.project_boundary_models = self._get_subproject_boundary_models() def _get_subproject_boundary_models(self) -> Set[str]: """ get a set of boundary model unique_ids for all the selected resources """ - nodes = set(filter(lambda x: not x.startswith("source"), self.subproject.resources)) - parent_project_name = self.subproject.parent_project.name + nodes = set(filter(lambda x: not x.startswith("source"), self.project.resources)) + parent_project_name = self.project.parent_project.name interface = ResourceGrouper.identify_interface( - graph=self.subproject.graph.graph, selected_bunch=nodes + graph=self.project.graph.graph, selected_bunch=nodes ) boundary_models = set( filter( @@ -49,7 +130,7 @@ def write_project_file(self) -> None: """ Writes the dbt_project.yml file for the subproject in the specified subdirectory """ - contents = self.subproject.project.to_dict() + contents = self.project.project.to_dict() # was gettinga weird serialization error from ruamel on this value # it's been deprecated, so no reason to keep it contents.pop("version") @@ -71,35 +152,26 @@ def copy_packages_dir(self) -> None: """ raise NotImplementedError("copy_packages_dir not implemented yet") - def update_dependencies_yml(self) -> None: - try: - contents = self.file_manager.read_file(Path("dependencies.yml")) - except FileNotFoundError: - contents = {"projects": []} - - contents["projects"].append({"name": self.subproject.name}) # type: ignore - self.file_manager.write_file(Path("dependencies.yml"), contents, writeback=True) - def update_child_refs(self, resource: ManifestNode) -> None: downstream_models = [ node.unique_id - for node in self.subproject.manifest.nodes.values() + for node in self.project.manifest.nodes.values() if node.resource_type == "model" and resource.unique_id in node.depends_on.nodes # type: ignore ] for model in downstream_models: - model_node = self.subproject.get_manifest_node(model) + model_node = self.project.get_manifest_node(model) if not model_node: raise KeyError(f"Resource {model} not found in manifest") meshify_constructor = DbtMeshConstructor( - project_path=self.subproject.parent_project.path, node=model_node, catalog=None + project_path=self.project.parent_project.path, node=model_node, catalog=None ) meshify_constructor.update_model_refs( - model_name=resource.name, project_name=self.subproject.name + model_name=resource.name, project_name=self.project.name ) def initialize(self) -> None: """Initialize this subproject as a full dbt project at the provided `target_directory`.""" - subproject = self.subproject + subproject = self.project for unique_id in subproject.resources | subproject.custom_macros | subproject.groups: resource = subproject.get_manifest_node(unique_id) catalog = subproject.get_catalog_entry(unique_id) @@ -112,7 +184,7 @@ def initialize(self) -> None: # ignore generic tests, as moving the yml entry will move the test too if resource.resource_type == "test" and len(resource.unique_id.split(".")) == 4: continue - if resource.unique_id in self.subproject_boundary_models: + if resource.unique_id in self.project_boundary_models: logger.info( f"Adding contract to and publicizing boundary node {resource.unique_id}" ) @@ -145,7 +217,7 @@ def initialize(self) -> None: ) try: self.move_resource(meshify_constructor) - self.move_resource_yml_entry(meshify_constructor) + self.update_resource_yml_entry(meshify_constructor) logger.success( f"Successfully moved {resource.unique_id} and associated YML to subproject {subproject.name}" ) @@ -172,7 +244,7 @@ def initialize(self) -> None: f"Moving resource {resource.unique_id} to subproject {subproject.name}..." ) try: - self.move_resource_yml_entry(meshify_constructor) + self.update_resource_yml_entry(meshify_constructor) logger.success( f"Successfully moved resource {resource.unique_id} to subproject {subproject.name}" ) @@ -186,51 +258,3 @@ def initialize(self) -> None: self.copy_packages_yml_file() self.update_dependencies_yml() # self.copy_packages_dir() - - def move_resource(self, meshify_constructor: DbtMeshConstructor) -> None: - """ - move a resource file from one project to another - - """ - current_path = meshify_constructor.get_resource_path() - self.file_manager.move_file(current_path) - - def copy_resource(self, meshify_constructor: DbtMeshConstructor) -> None: - """ - copy a resource file from one project to another - - """ - resource_path = meshify_constructor.get_resource_path() - contents = self.file_manager.read_file(resource_path) - self.file_manager.write_file(resource_path, contents) - - def move_resource_yml_entry(self, meshify_constructor: DbtMeshConstructor) -> None: - """ - move a resource yml entry from one project to another - """ - current_yml_path = meshify_constructor.get_patch_path() - new_yml_path = self.file_manager.write_project_path / current_yml_path - full_yml_entry = self.file_manager.read_file(current_yml_path) - source_name = ( - meshify_constructor.node.source_name - if hasattr(meshify_constructor.node, "source_name") - else None - ) - resource_entry, remainder = meshify_constructor.get_yml_entry( - resource_name=meshify_constructor.node.name, - full_yml=full_yml_entry, # type: ignore - resource_type=meshify_constructor.node.resource_type, - source_name=source_name, - ) - try: - existing_yml = self.file_manager.read_file(new_yml_path) - except FileNotFoundError: - existing_yml = None - new_yml_contents = meshify_constructor.add_entry_to_yml( - resource_entry, existing_yml, meshify_constructor.node.resource_type # type: ignore - ) - self.file_manager.write_file(current_yml_path, new_yml_contents) - if remainder: - self.file_manager.write_file(current_yml_path, remainder, writeback=True) - else: - self.file_manager.delete_file(current_yml_path) diff --git a/dbt_meshify/storage/file_content_editors.py b/dbt_meshify/storage/file_content_editors.py index 4900972..eea4262 100644 --- a/dbt_meshify/storage/file_content_editors.py +++ b/dbt_meshify/storage/file_content_editors.py @@ -281,7 +281,7 @@ def add_model_version_to_yml( models_yml["models"] = list(models.values()) return models_yml - def update_sql_refs(self, model_code: str, model_name: str, project_name: str): + def update_refs__sql(self, model_code: str, model_name: str, project_name: str): import re # pattern to search for ref() with optional spaces and either single or double quotes @@ -295,7 +295,32 @@ def update_sql_refs(self, model_code: str, model_name: str, project_name: str): return new_code - def update_python_refs(self, model_code: str, model_name: str, project_name: str): + def replace_source_with_ref__sql( + self, model_code: str, source_unique_id: str, model_unique_id: str + ): + import re + + source_parsed = source_unique_id.split(".") + model_parsed = model_unique_id.split(".") + + # pattern to search for source() with optional spaces and either single or double quotes + pattern = re.compile( + r"{{\s*source\s*\(\s*['\"]" + + re.escape(source_parsed[2]) + + r"['\"]\s*,\s*['\"]" + + re.escape(source_parsed[3]) + + r"['\"]\s*\)\s*}}" + ) + + # replacement string with the new format + replacement = f"{{{{ ref('{model_parsed[1]}', '{model_parsed[2]}') }}}}" + + # perform replacement + new_code = re.sub(pattern, replacement, model_code) + + return new_code + + def update_refs__python(self, model_code: str, model_name: str, project_name: str): import re # pattern to search for ref() with optional spaces and either single or double quotes @@ -309,6 +334,31 @@ def update_python_refs(self, model_code: str, model_name: str, project_name: str return new_code + def replace_source_with_ref__python( + self, model_code: str, source_unique_id: str, model_unique_id: str + ): + import re + + source_parsed = source_unique_id.split(".") + model_parsed = model_unique_id.split(".") + + # pattern to search for source() with optional spaces and either single or double quotes + pattern = re.compile( + r"dbt\.source\s*\(\s*['\"]" + + re.escape(source_parsed[2]) + + r"['\"]\s*,\s*['\"]" + + re.escape(source_parsed[3]) + + r"['\"]\s*\)" + ) + + # replacement string with the new format + replacement = f'dbt.ref("{model_parsed[1]}", "{model_parsed[2]}")' + + # perform replacement + new_code = re.sub(pattern, replacement, model_code) + + return new_code + class DbtMeshConstructor(DbtMeshFileEditor): def __init__( @@ -478,7 +528,7 @@ def update_model_refs(self, model_name: str, project_name: str) -> None: # read the model file model_code = str(self.file_manager.read_file(model_path)) # This can be defined in the init for this clas. - ref_update_methods = {'sql': self.update_sql_refs, 'python': self.update_python_refs} + ref_update_methods = {'sql': self.update_refs__sql, 'python': self.update_refs__python} # Here, we're trusting the dbt-core code to check the languages for us. 🐉 updated_code = ref_update_methods[self.node.language]( model_name=model_name, @@ -487,3 +537,28 @@ def update_model_refs(self, model_name: str, project_name: str) -> None: ) # write the updated model code to the file self.file_manager.write_file(model_path, updated_code) + + def replace_source_with_refs(self, source_unique_id: str, model_unique_id: str) -> None: + """Updates the model refs in the model's sql file""" + model_path = self.get_resource_path() + + if model_path is None: + raise ModelFileNotFoundError( + f"Unable to find path to model {self.node.name}. Aborting." + ) + + # read the model file + model_code = str(self.file_manager.read_file(model_path)) + # This can be defined in the init for this clas. + ref_update_methods = { + 'sql': self.replace_source_with_ref__sql, + 'python': self.replace_source_with_ref__python, + } + # Here, we're trusting the dbt-core code to check the languages for us. 🐉 + updated_code = ref_update_methods[self.node.language]( + model_code=model_code, + source_unique_id=source_unique_id, + model_unique_id=model_unique_id, + ) + # write the updated model code to the file + self.file_manager.write_file(model_path, updated_code) diff --git a/dbt_meshify/storage/file_manager.py b/dbt_meshify/storage/file_manager.py index 850b4a4..3fb2179 100644 --- a/dbt_meshify/storage/file_manager.py +++ b/dbt_meshify/storage/file_manager.py @@ -55,7 +55,7 @@ def __init__( self.write_project_path = write_project_path if write_project_path else read_project_path def read_file(self, path: Path) -> Union[Dict[str, Any], str]: - """Returns the yaml for a model in the dbt project's manifest""" + """Returns the file contents at a given path""" full_path = self.read_project_path / path if full_path.suffix == ".yml": return yaml.load(full_path.read_text()) @@ -96,4 +96,5 @@ def move_file(self, path: Path) -> None: def delete_file(self, path: Path) -> None: """deletes the specified file""" - path.unlink() + delete_path = self.read_project_path / path + delete_path.unlink() diff --git a/tests/integration/test_subproject_creator.py b/tests/integration/test_subproject_creator.py index 22fc3a1..d090bf6 100644 --- a/tests/integration/test_subproject_creator.py +++ b/tests/integration/test_subproject_creator.py @@ -106,7 +106,7 @@ def test_move_yml_entry(self) -> None: subproject = split_project() meshify_constructor = get_meshify_constructor(subproject, model_unique_id) creator = DbtSubprojectCreator(subproject) - creator.move_resource_yml_entry(meshify_constructor) + creator.update_resource_yml_entry(meshify_constructor) # the original path should still exist, since we take only the single model entry assert Path("test/models/example/schema.yml").exists() assert Path("test/subdir/models/example/schema.yml").exists() diff --git a/tests/unit/test_update_ref_functions.py b/tests/unit/test_update_ref_functions.py index 4a8d92f..941d150 100644 --- a/tests/unit/test_update_ref_functions.py +++ b/tests/unit/test_update_ref_functions.py @@ -38,7 +38,7 @@ def read_yml(yml_str): class TestRemoveResourceYml: def test_update_sql_ref_function__basic(self): - updated_sql = meshify.update_sql_refs( + updated_sql = meshify.update_refs__sql( model_code=simple_model_sql, model_name=upstream_model_name, project_name=upstream_project_name, @@ -46,7 +46,7 @@ def test_update_sql_ref_function__basic(self): assert updated_sql == expected_simple_model_sql def test_update_python_ref_function__basic(self): - updated_python = meshify.update_python_refs( + updated_python = meshify.update_refs__python( model_code=simple_model_python, model_name=upstream_model_name, project_name=upstream_project_name, From e0be64e341a5907f68d7f39e2222f482b10a2704 Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Tue, 25 Jul 2023 08:26:12 -0500 Subject: [PATCH 02/19] handle identifier class for ruamel --- dbt_meshify/main.py | 8 ++++---- dbt_meshify/storage/dbt_project_creator.py | 11 ++++++----- dbt_meshify/storage/file_manager.py | 4 +++- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/dbt_meshify/main.py b/dbt_meshify/main.py index 968fa65..3bc7abc 100644 --- a/dbt_meshify/main.py +++ b/dbt_meshify/main.py @@ -89,7 +89,7 @@ def connect( project_map = {project.name: project for project in dbt_project_combo} for dependency in linker.dependencies(dbt_project_combo[0], dbt_project_combo[1]): logger.info( - f"Adding found dependency between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}: {dependency}" + f"Found dependency between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}: {dependency}" ) try: linker.resolve_dependency( @@ -98,7 +98,7 @@ def connect( project_map[dependency.downstream_project_name], ) except Exception as e: - raise FatalMeshifyException(f"Error resolving dependency : {dependency}") + raise FatalMeshifyException(f"Error resolving dependency : {dependency} {e}") if projects_dir: # TODO: Implement this -- glob the directory for dbt_project.yml files, read projects from there @@ -133,13 +133,13 @@ def split(ctx, project_name, select, exclude, project_path, selector, create_pat create_path = Path(create_path).expanduser().resolve() create_path.parent.mkdir(parents=True, exist_ok=True) - subproject_creator = DbtSubprojectCreator(subproject=subproject, target_directory=create_path) + subproject_creator = DbtSubprojectCreator(project=subproject, target_directory=create_path) logger.info(f"Creating subproject {subproject.name}...") try: subproject_creator.initialize() logger.success(f"Successfully created subproject {subproject.name}") except Exception as e: - raise FatalMeshifyException(f"Error creating subproject {subproject.name}") + raise FatalMeshifyException(f"Error creating subproject {subproject.name}: error {e}") @operation.command(name="add-contract") diff --git a/dbt_meshify/storage/dbt_project_creator.py b/dbt_meshify/storage/dbt_project_creator.py index ce77676..cfcbc4a 100644 --- a/dbt_meshify/storage/dbt_project_creator.py +++ b/dbt_meshify/storage/dbt_project_creator.py @@ -3,6 +3,7 @@ from typing import Optional, Set, Union from dbt.contracts.graph.nodes import ManifestNode +from dbt.contracts.util import Identifier from dbt.node_types import AccessType from loguru import logger @@ -82,16 +83,16 @@ def update_resource_yml_entry( else: self.file_manager.delete_file(current_yml_path) - def update_dependencies_yml(self, name: str) -> None: - import pdb - - pdb.set_trace() + def update_dependencies_yml(self, name: Union[str, None] = None) -> None: try: contents = self.file_manager.read_file(Path("dependencies.yml")) except FileNotFoundError: contents = {"projects": []} - contents["projects"].append({"name": name if name else self.project.name}) # type: ignore + import pdb + + pdb.set_trace() + contents["projects"].append({"name": str(Identifier(name)) if name else self.project.name}) # type: ignore self.file_manager.write_file(Path("dependencies.yml"), contents, writeback=True) diff --git a/dbt_meshify/storage/file_manager.py b/dbt_meshify/storage/file_manager.py index 3fb2179..137085b 100644 --- a/dbt_meshify/storage/file_manager.py +++ b/dbt_meshify/storage/file_manager.py @@ -5,8 +5,10 @@ from pathlib import Path from typing import Any, Dict, Optional, Union +from dbt.contracts.util import Identifier from ruamel.yaml import YAML from ruamel.yaml.compat import StringIO +from ruamel.yaml.representer import Representer class DbtYAML(YAML): @@ -29,7 +31,7 @@ def dump(self, data, stream=None, **kw): yaml = DbtYAML() - +yaml.register_class(Identifier) FileContent = Union[Dict[str, str], str] From 5fcbc509d4bf0f0ae4ddf7ae353db93d53ec577d Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Tue, 25 Jul 2023 08:39:54 -0500 Subject: [PATCH 03/19] mypy type updates - ignore DbtSubproject specific attributes in DbtSubprojectCreator class --- dbt_meshify/storage/dbt_project_creator.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/dbt_meshify/storage/dbt_project_creator.py b/dbt_meshify/storage/dbt_project_creator.py index cfcbc4a..00147f8 100644 --- a/dbt_meshify/storage/dbt_project_creator.py +++ b/dbt_meshify/storage/dbt_project_creator.py @@ -102,6 +102,8 @@ class DbtSubprojectCreator(DbtProjectEditor): """ def __init__(self, project: DbtSubProject, target_directory: Optional[Path] = None): + if not isinstance(project, DbtSubProject): + raise TypeError(f"DbtSubprojectCreator requires a DbtSubProject, got {type(project)}") super().__init__(project) self.target_directory = target_directory if target_directory else project.path self.file_manager = DbtFileManager( @@ -114,8 +116,8 @@ def _get_subproject_boundary_models(self) -> Set[str]: """ get a set of boundary model unique_ids for all the selected resources """ - nodes = set(filter(lambda x: not x.startswith("source"), self.project.resources)) - parent_project_name = self.project.parent_project.name + nodes = set(filter(lambda x: not x.startswith("source"), self.project.resources)) # type: ignore + parent_project_name = self.project.parent_project.name # type: ignore interface = ResourceGrouper.identify_interface( graph=self.project.graph.graph, selected_bunch=nodes ) @@ -164,7 +166,7 @@ def update_child_refs(self, resource: ManifestNode) -> None: if not model_node: raise KeyError(f"Resource {model} not found in manifest") meshify_constructor = DbtMeshConstructor( - project_path=self.project.parent_project.path, node=model_node, catalog=None + project_path=self.project.parent_project.path, node=model_node, catalog=None # type: ignore ) meshify_constructor.update_model_refs( model_name=resource.name, project_name=self.project.name @@ -173,13 +175,13 @@ def update_child_refs(self, resource: ManifestNode) -> None: def initialize(self) -> None: """Initialize this subproject as a full dbt project at the provided `target_directory`.""" subproject = self.project - for unique_id in subproject.resources | subproject.custom_macros | subproject.groups: + for unique_id in subproject.resources | subproject.custom_macros | subproject.groups: # type: ignore resource = subproject.get_manifest_node(unique_id) catalog = subproject.get_catalog_entry(unique_id) if not resource: raise KeyError(f"Resource {unique_id} not found in manifest") meshify_constructor = DbtMeshConstructor( - project_path=subproject.parent_project.path, node=resource, catalog=catalog + project_path=subproject.parent_project.path, node=resource, catalog=catalog # type: ignore ) if resource.resource_type in ["model", "test", "snapshot", "seed"]: # ignore generic tests, as moving the yml entry will move the test too @@ -203,7 +205,7 @@ def initialize(self) -> None: # apply access method too logger.info(f"Updating ref functions for children of {resource.unique_id}...") try: - self.update_child_refs(resource) + self.update_child_refs(resource) # type: ignore logger.success( f"Successfully updated ref functions for children of {resource.unique_id}" ) From c22ce02323c43dfc247f934c551e35b7314c4929 Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Tue, 25 Jul 2023 10:09:50 -0500 Subject: [PATCH 04/19] update package logic to look for children of package model --- dbt_meshify/linker.py | 37 ++++++++++++------- dbt_meshify/storage/dbt_project_creator.py | 3 -- .../integration/test_dependency_detection.py | 18 ++++++--- 3 files changed, 36 insertions(+), 22 deletions(-) diff --git a/dbt_meshify/linker.py b/dbt_meshify/linker.py index b626ab2..c76732d 100644 --- a/dbt_meshify/linker.py +++ b/dbt_meshify/linker.py @@ -5,7 +5,7 @@ from dbt.node_types import AccessType from dbt_meshify.dbt_projects import BaseDbtProject, DbtProject -from dbt_meshify.storage.dbt_project_creator import DbtProjectEditor +from dbt_meshify.storage.dbt_project_creator import DbtProjectEditor, YMLOperationType from dbt_meshify.storage.file_content_editors import DbtMeshConstructor @@ -51,7 +51,7 @@ def _find_relation_dependencies( source_relations: Set[str], target_relations: Set[str] ) -> Set[str]: """ - Identify dependencies between projects using shared relations. + Identify dependencies between projects using shared relation names. """ return source_relations.intersection(target_relations) @@ -132,6 +132,7 @@ def _package_dependencies( if project.project_id not in other_project.installed_packages(): return set() + # find which models are in both manifests relations = self._find_relation_dependencies( source_relations={ model.relation_name @@ -145,15 +146,25 @@ def _package_dependencies( }, ) + # find the children of the shared models in the downstream project + package_children = [ + { + 'upstream_resource': project.model_relation_names[relation], + 'downstream_resource': child, + } + for relation in relations + for child in other_project.manifest.child_map[project.model_relation_names[relation]] + ] + return { ProjectDependency( - upstream_resource=project.model_relation_names[relation], - upstream_project_name=project.model_relation_names[relation], - downstream_resource=other_project.model_relation_names[relation], - downstream_project_name=other_project.model_relation_names[relation], + upstream_resource=child['upstream_resource'], + upstream_project_name=project.name, + downstream_resource=child['downstream_resource'], + downstream_project_name=other_project.name, type=ProjectDependencyType.Package, ) - for relation in relations + for child in package_children } def dependencies( @@ -178,8 +189,8 @@ def dependencies( def resolve_dependency( self, dependency: ProjectDependency, - upstream_project: Union[DbtProject, BaseDbtProject], - downstream_project: Union[DbtProject, BaseDbtProject], + upstream_project: DbtProject, + downstream_project: DbtProject, ): upstream_manifest_entry = upstream_project.get_manifest_node(dependency.upstream_resource) downstream_manifest_entry = downstream_project.get_manifest_node( @@ -188,13 +199,13 @@ def resolve_dependency( upstream_catalog_entry = upstream_project.get_catalog_entry(dependency.upstream_resource) upstream_mesh_constructor = DbtMeshConstructor( project_path=upstream_project.path, - node=upstream_manifest_entry, + node=upstream_manifest_entry, # type: ignore catalog=upstream_catalog_entry, ) # upstream_editor = DbtProjectEditor(upstream_project) downstream_mesh_constructor = DbtMeshConstructor( project_path=downstream_project.path, - node=downstream_manifest_entry, + node=downstream_manifest_entry, # type: ignore catalog=None, ) downstream_editor = DbtProjectEditor(downstream_project) @@ -204,7 +215,7 @@ def resolve_dependency( for child in downstream_project.manifest.child_map[dependency.downstream_resource]: constructor = DbtMeshConstructor( project_path=downstream_project.path, - node=downstream_project.get_manifest_node(child), + node=downstream_project.get_manifest_node(child), # type: ignore catalog=None, ) constructor.replace_source_with_refs( @@ -212,6 +223,6 @@ def resolve_dependency( model_unique_id=dependency.upstream_resource, ) downstream_editor.update_resource_yml_entry( - downstream_mesh_constructor, operation_type="delete" + downstream_mesh_constructor, operation_type=YMLOperationType.Delete ) downstream_editor.update_dependencies_yml(name=upstream_project.name) diff --git a/dbt_meshify/storage/dbt_project_creator.py b/dbt_meshify/storage/dbt_project_creator.py index 00147f8..2f09fe3 100644 --- a/dbt_meshify/storage/dbt_project_creator.py +++ b/dbt_meshify/storage/dbt_project_creator.py @@ -89,9 +89,6 @@ def update_dependencies_yml(self, name: Union[str, None] = None) -> None: except FileNotFoundError: contents = {"projects": []} - import pdb - - pdb.set_trace() contents["projects"].append({"name": str(Identifier(name)) if name else self.project.name}) # type: ignore self.file_manager.write_file(Path("dependencies.yml"), contents, writeback=True) diff --git a/tests/integration/test_dependency_detection.py b/tests/integration/test_dependency_detection.py index 1aa749e..d9b5ec2 100644 --- a/tests/integration/test_dependency_detection.py +++ b/tests/integration/test_dependency_detection.py @@ -50,8 +50,10 @@ def test_linker_detects_source_dependencies(self, src_proj_a, src_proj_b): assert dependencies == { ProjectDependency( - upstream="model.src_proj_a.shared_model", - downstream="source.src_proj_b.src_proj_a.shared_model", + upstream_resource="model.src_proj_a.shared_model", + upstream_project_name="src_proj_a", + downstream_resource="source.src_proj_b.src_proj_a.shared_model", + downstream_project_name="src_proj_b", type=ProjectDependencyType.Source, ) } @@ -64,8 +66,10 @@ def test_linker_detects_source_dependencies_bidirectionally(self, src_proj_a, sr assert dependencies == { ProjectDependency( - upstream="model.src_proj_a.shared_model", - downstream="source.src_proj_b.src_proj_a.shared_model", + upstream_resource="model.src_proj_a.shared_model", + upstream_project_name="src_proj_a", + downstream_resource="source.src_proj_b.src_proj_a.shared_model", + downstream_project_name="src_proj_b", type=ProjectDependencyType.Source, ) } @@ -78,8 +82,10 @@ def test_linker_detects_package_import_dependencies(self, src_proj_a, dest_proj_ assert dependencies == { ProjectDependency( - upstream="model.src_proj_a.shared_model", - downstream="model.src_proj_a.shared_model", + upstream_resource="model.src_proj_a.shared_model", + upstream_project_name="src_proj_a", + downstream_resource="model.dest_proj_a.downstream_model", + downstream_project_name="dest_proj_a", type=ProjectDependencyType.Package, ) } From 83b4aa1a70b959c7b6e2d0aa25ccaf2182604fb1 Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Tue, 25 Jul 2023 10:19:05 -0500 Subject: [PATCH 05/19] support package dependecies --- dbt_meshify/linker.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/dbt_meshify/linker.py b/dbt_meshify/linker.py index c76732d..d7e570c 100644 --- a/dbt_meshify/linker.py +++ b/dbt_meshify/linker.py @@ -193,6 +193,10 @@ def resolve_dependency( downstream_project: DbtProject, ): upstream_manifest_entry = upstream_project.get_manifest_node(dependency.upstream_resource) + if not upstream_manifest_entry: + raise ValueError( + f"Could not find upstream resource {dependency.upstream_resource} in project {upstream_project.name}" + ) downstream_manifest_entry = downstream_project.get_manifest_node( dependency.downstream_resource ) @@ -209,9 +213,11 @@ def resolve_dependency( catalog=None, ) downstream_editor = DbtProjectEditor(downstream_project) + # for either dependency type, add contracts and make model public + upstream_mesh_constructor.add_model_access(AccessType.Public) + upstream_mesh_constructor.add_model_contract() + if dependency.type == ProjectDependencyType.Source: - upstream_mesh_constructor.add_model_access(AccessType.Public) - upstream_mesh_constructor.add_model_contract() for child in downstream_project.manifest.child_map[dependency.downstream_resource]: constructor = DbtMeshConstructor( project_path=downstream_project.path, @@ -225,4 +231,12 @@ def resolve_dependency( downstream_editor.update_resource_yml_entry( downstream_mesh_constructor, operation_type=YMLOperationType.Delete ) - downstream_editor.update_dependencies_yml(name=upstream_project.name) + + if dependency.type == ProjectDependencyType.Package: + downstream_mesh_constructor.update_model_refs( + model_name=upstream_manifest_entry.name, + project_name=dependency.upstream_project_name, + ) + + # for both types, add upstream project to downstream project's dependencies.yml + downstream_editor.update_dependencies_yml(name=upstream_project.name) From bbc2b5b6948c2602613094b403f7057b2fb91401 Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Tue, 25 Jul 2023 10:34:34 -0500 Subject: [PATCH 06/19] logging --- dbt_meshify/linker.py | 74 +++++++++++++++++---- dbt_meshify/storage/file_content_editors.py | 4 +- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/dbt_meshify/linker.py b/dbt_meshify/linker.py index d7e570c..d14db3f 100644 --- a/dbt_meshify/linker.py +++ b/dbt_meshify/linker.py @@ -3,8 +3,10 @@ from typing import Set, Union from dbt.node_types import AccessType +from loguru import logger from dbt_meshify.dbt_projects import BaseDbtProject, DbtProject +from dbt_meshify.exceptions import FatalMeshifyException, FileEditorException from dbt_meshify.storage.dbt_project_creator import DbtProjectEditor, YMLOperationType from dbt_meshify.storage.file_content_editors import DbtMeshConstructor @@ -214,8 +216,20 @@ def resolve_dependency( ) downstream_editor = DbtProjectEditor(downstream_project) # for either dependency type, add contracts and make model public - upstream_mesh_constructor.add_model_access(AccessType.Public) - upstream_mesh_constructor.add_model_contract() + try: + upstream_mesh_constructor.add_model_access(AccessType.Public) + logger.success( + f"Successfully update model access : {dependency.upstream_resource} is now {AccessType.Public}" + ) + except FatalMeshifyException as e: + logger.error(f"Failed to update model access: {dependency.upstream_resource}") + raise e + try: + upstream_mesh_constructor.add_model_contract() + logger.success(f"Successfully added contract to model: {dependency.upstream_resource}") + except FatalMeshifyException as e: + logger.error(f"Failed to add contract to model: {dependency.upstream_resource}") + raise e if dependency.type == ProjectDependencyType.Source: for child in downstream_project.manifest.child_map[dependency.downstream_resource]: @@ -224,19 +238,51 @@ def resolve_dependency( node=downstream_project.get_manifest_node(child), # type: ignore catalog=None, ) - constructor.replace_source_with_refs( - source_unique_id=dependency.downstream_resource, - model_unique_id=dependency.upstream_resource, - ) - downstream_editor.update_resource_yml_entry( - downstream_mesh_constructor, operation_type=YMLOperationType.Delete - ) + try: + constructor.replace_source_with_refs( + source_unique_id=dependency.downstream_resource, + model_unique_id=dependency.upstream_resource, + ) + logger.success( + f"Successfully replaced source function with ref to upstream resource: {dependency.downstream_resource} now calls {dependency.upstream_resource} directly" + ) + except FileEditorException as e: + logger.error( + f"Failed to replace source function with ref to upstream resource" + ) + raise e + try: + downstream_editor.update_resource_yml_entry( + downstream_mesh_constructor, operation_type=YMLOperationType.Delete + ) + logger.success( + f"Successfully deleted unnecessary source: {dependency.downstream_resource}" + ) + except FatalMeshifyException as e: + logger.error(f"Failed to delete unnecessary source") + raise e if dependency.type == ProjectDependencyType.Package: - downstream_mesh_constructor.update_model_refs( - model_name=upstream_manifest_entry.name, - project_name=dependency.upstream_project_name, - ) + try: + downstream_mesh_constructor.update_model_refs( + model_name=upstream_manifest_entry.name, + project_name=dependency.upstream_project_name, + ) + logger.success( + f"Successfully updated model refs: {dependency.downstream_resource} now references {dependency.upstream_resource}" + ) + except FileEditorException as e: + logger.error(f"Failed to update model refs") + raise e # for both types, add upstream project to downstream project's dependencies.yml - downstream_editor.update_dependencies_yml(name=upstream_project.name) + try: + downstream_editor.update_dependencies_yml(name=upstream_project.name) + logger.success( + f"Successfully added {dependency.upstream_project_name} to {dependency.downstream_project_name}'s dependencies.yml" + ) + except FileEditorException as e: + logger.error( + f"Failed to add {dependency.upstream_project_name} to {dependency.downstream_project_name}'s dependencies.yml" + ) + raise e diff --git a/dbt_meshify/storage/file_content_editors.py b/dbt_meshify/storage/file_content_editors.py index eea4262..d2831cc 100644 --- a/dbt_meshify/storage/file_content_editors.py +++ b/dbt_meshify/storage/file_content_editors.py @@ -397,7 +397,6 @@ def get_patch_path(self) -> Path: filename = f"_{self.node.resource_type.pluralize()}.yml" yml_path = resource_path.parent / filename self.file_manager.write_file(yml_path, {}) - logger.info(f"Schema entry for {self.node.unique_id} written to {yml_path}") return yml_path def get_resource_path(self) -> Path: @@ -412,6 +411,7 @@ def get_resource_path(self) -> Path: def add_model_contract(self) -> None: """Adds a model contract to the model's yaml""" yml_path = self.get_patch_path() + logger.info(f"Adding contract to {self.node.name} at {yml_path}") # read the yml file # pass empty dict if no file contents returned models_yml = self.file_manager.read_file(yml_path) @@ -432,7 +432,7 @@ def add_model_contract(self) -> None: def add_model_access(self, access_type: AccessType) -> None: """Adds a model contract to the model's yaml""" yml_path = self.get_patch_path() - logger.info(f"Adding model contract for {self.node.name} at {yml_path}") + logger.info(f"Adding {access_type} access to {self.node.name} at {yml_path}") # read the yml file # pass empty dict if no file contents returned models_yml = self.file_manager.read_file(yml_path) From aa4dc53c484c285457023013b36beee069e18618 Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Tue, 25 Jul 2023 10:53:48 -0500 Subject: [PATCH 07/19] passing tests exclamation point --- tests/integration/test_connect_command.py | 77 +++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 tests/integration/test_connect_command.py diff --git a/tests/integration/test_connect_command.py b/tests/integration/test_connect_command.py new file mode 100644 index 0000000..0c877c3 --- /dev/null +++ b/tests/integration/test_connect_command.py @@ -0,0 +1,77 @@ +from pathlib import Path + +import yaml +from click.testing import CliRunner + +from dbt_meshify.main import connect +from tests.dbt_project_utils import setup_test_project, teardown_test_project + +producer_project = "test-projects/source-hack/src_proj_a" +source_consumer_project = "test-projects/source-hack/src_proj_b" +package_consumer_project = "test-projects/source-hack/dest_proj_a" + + +class TestConnectCommand: + def test_connect_source_hack(self): + copy_producer_project = producer_project + "_copy" + copy_source_consumer_project = source_consumer_project + "_copy" + setup_test_project(producer_project, copy_producer_project) + setup_test_project(source_consumer_project, copy_source_consumer_project) + runner = CliRunner() + result = runner.invoke( + connect, + ["--project-paths", copy_producer_project, copy_source_consumer_project], + ) + + assert result.exit_code == 0 + + # assert that the source is replaced with a ref + x_proj_ref = "{{ ref('src_proj_a', 'shared_model') }}" + source_func = "{{ source('src_proj_a', 'shared_model') }}" + child_sql = ( + Path(copy_source_consumer_project) / "models" / "downstream_model.sql" + ).read_text() + assert x_proj_ref in child_sql + assert source_func not in child_sql + + # assert that the source was deleted + # may want to add some nuance in the future for cases where we delete a single source out of a set + assert not ( + Path(copy_source_consumer_project) / "models" / "staging" / "_sources.yml" + ).exists() + + # assert that the dependecies yml was created with a pointer to the upstream project + assert ( + "src_proj_a" in (Path(copy_source_consumer_project) / "dependencies.yml").read_text() + ) + + teardown_test_project(copy_producer_project) + teardown_test_project(copy_source_consumer_project) + + def test_connect_package(self): + copy_producer_project = producer_project + "_copy" + copy_package_consumer_project = package_consumer_project + "_copy" + setup_test_project(producer_project, copy_producer_project) + setup_test_project(package_consumer_project, copy_package_consumer_project) + runner = CliRunner() + result = runner.invoke( + connect, + ["--project-paths", copy_producer_project, copy_package_consumer_project], + ) + + assert result.exit_code == 0 + + # assert that the source is replaced with a ref + x_proj_ref = "{{ ref('src_proj_a', 'shared_model') }}" + child_sql = ( + Path(copy_package_consumer_project) / "models" / "downstream_model.sql" + ).read_text() + assert x_proj_ref in child_sql + + # assert that the dependecies yml was created with a pointer to the upstream project + assert ( + "src_proj_a" in (Path(copy_package_consumer_project) / "dependencies.yml").read_text() + ) + + teardown_test_project(copy_producer_project) + teardown_test_project(copy_package_consumer_project) From 68b897d0fb47beb2a85e173b508f7b24daa8ad0c Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Tue, 25 Jul 2023 11:08:08 -0500 Subject: [PATCH 08/19] remove dest_proj_b -- it can't compile, and it's not used in any tests at the moment --- .../source-hack/dest_proj_b/.gitignore | 4 -- .../source-hack/dest_proj_b/.user.yml | 1 - .../source-hack/dest_proj_b/README.md | 15 -------- .../source-hack/dest_proj_b/analyses/.gitkeep | 0 .../source-hack/dest_proj_b/dbt_project.yml | 38 ------------------- .../source-hack/dest_proj_b/macros/.gitkeep | 0 .../dest_proj_b/models/downstream_model.sql | 8 ---- .../source-hack/dest_proj_b/profiles.yml | 7 ---- .../source-hack/dest_proj_b/seeds/.gitkeep | 0 .../dest_proj_b/snapshots/.gitkeep | 0 .../source-hack/dest_proj_b/tests/.gitkeep | 0 11 files changed, 73 deletions(-) delete mode 100644 test-projects/source-hack/dest_proj_b/.gitignore delete mode 100644 test-projects/source-hack/dest_proj_b/.user.yml delete mode 100644 test-projects/source-hack/dest_proj_b/README.md delete mode 100644 test-projects/source-hack/dest_proj_b/analyses/.gitkeep delete mode 100644 test-projects/source-hack/dest_proj_b/dbt_project.yml delete mode 100644 test-projects/source-hack/dest_proj_b/macros/.gitkeep delete mode 100644 test-projects/source-hack/dest_proj_b/models/downstream_model.sql delete mode 100644 test-projects/source-hack/dest_proj_b/profiles.yml delete mode 100644 test-projects/source-hack/dest_proj_b/seeds/.gitkeep delete mode 100644 test-projects/source-hack/dest_proj_b/snapshots/.gitkeep delete mode 100644 test-projects/source-hack/dest_proj_b/tests/.gitkeep diff --git a/test-projects/source-hack/dest_proj_b/.gitignore b/test-projects/source-hack/dest_proj_b/.gitignore deleted file mode 100644 index 49f147c..0000000 --- a/test-projects/source-hack/dest_proj_b/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ - -target/ -dbt_packages/ -logs/ diff --git a/test-projects/source-hack/dest_proj_b/.user.yml b/test-projects/source-hack/dest_proj_b/.user.yml deleted file mode 100644 index 9873cdd..0000000 --- a/test-projects/source-hack/dest_proj_b/.user.yml +++ /dev/null @@ -1 +0,0 @@ -id: 9d149f56-cf27-4d94-815c-01a7bee0e8ca diff --git a/test-projects/source-hack/dest_proj_b/README.md b/test-projects/source-hack/dest_proj_b/README.md deleted file mode 100644 index 7874ac8..0000000 --- a/test-projects/source-hack/dest_proj_b/README.md +++ /dev/null @@ -1,15 +0,0 @@ -Welcome to your new dbt project! - -### Using the starter project - -Try running the following commands: -- dbt run -- dbt test - - -### Resources: -- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) -- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers -- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support -- Find [dbt events](https://events.getdbt.com) near you -- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/test-projects/source-hack/dest_proj_b/analyses/.gitkeep b/test-projects/source-hack/dest_proj_b/analyses/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/test-projects/source-hack/dest_proj_b/dbt_project.yml b/test-projects/source-hack/dest_proj_b/dbt_project.yml deleted file mode 100644 index 70ee64e..0000000 --- a/test-projects/source-hack/dest_proj_b/dbt_project.yml +++ /dev/null @@ -1,38 +0,0 @@ - -# Name your project! Project names should contain only lowercase characters -# and underscores. A good package name should reflect your organization's -# name or the intended use of these models -name: 'dest_proj_b' -version: '1.0.0' -config-version: 2 - -# This setting configures which "profile" dbt uses for this project. -profile: 'dest_proj_b' - -# These configurations specify where dbt should look for different types of files. -# The `model-paths` config, for example, states that models in this project can be -# found in the "models/" directory. You probably won't need to change these! -model-paths: ["models"] -analysis-paths: ["analyses"] -test-paths: ["tests"] -seed-paths: ["seeds"] -macro-paths: ["macros"] -snapshot-paths: ["snapshots"] - -target-path: "target" # directory which will store compiled SQL files -clean-targets: # directories to be removed by `dbt clean` - - "target" - - "dbt_packages" - - -# Configuring models -# Full documentation: https://docs.getdbt.com/docs/configuring-models - -# In this example config, we tell dbt to build all models in the example/ -# directory as views. These settings can be overridden in the individual model -# files using the `{{ config(...) }}` macro. -models: - dest_proj_b: - # Config indicated by + and applies to all files under models/example/ - example: - +materialized: view diff --git a/test-projects/source-hack/dest_proj_b/macros/.gitkeep b/test-projects/source-hack/dest_proj_b/macros/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/test-projects/source-hack/dest_proj_b/models/downstream_model.sql b/test-projects/source-hack/dest_proj_b/models/downstream_model.sql deleted file mode 100644 index df8f32c..0000000 --- a/test-projects/source-hack/dest_proj_b/models/downstream_model.sql +++ /dev/null @@ -1,8 +0,0 @@ -with - -upstream as ( - select * from {{ ref('src_proj_a', 'shared_model') }} -) - -select * from upstream -where colleague = 'grace' diff --git a/test-projects/source-hack/dest_proj_b/profiles.yml b/test-projects/source-hack/dest_proj_b/profiles.yml deleted file mode 100644 index 11b83df..0000000 --- a/test-projects/source-hack/dest_proj_b/profiles.yml +++ /dev/null @@ -1,7 +0,0 @@ -dest_proj_b: - outputs: - dev: - path: ../src_proj_a/database.db - threads: 2 - type: duckdb - target: dev diff --git a/test-projects/source-hack/dest_proj_b/seeds/.gitkeep b/test-projects/source-hack/dest_proj_b/seeds/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/test-projects/source-hack/dest_proj_b/snapshots/.gitkeep b/test-projects/source-hack/dest_proj_b/snapshots/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/test-projects/source-hack/dest_proj_b/tests/.gitkeep b/test-projects/source-hack/dest_proj_b/tests/.gitkeep deleted file mode 100644 index e69de29..0000000 From 1999c054e938222ee3aef59aec6aaddb6a9b9844 Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Tue, 25 Jul 2023 13:44:20 -0500 Subject: [PATCH 09/19] add feature for --packages-dir and --exclude-packages --- dbt_meshify/cli.py | 2 + dbt_meshify/linker.py | 44 +++++++++++++++++++++- dbt_meshify/main.py | 46 ++++++++++++++--------- tests/integration/test_connect_command.py | 37 ++++++++++++++++++ 4 files changed, 110 insertions(+), 19 deletions(-) diff --git a/dbt_meshify/cli.py b/dbt_meshify/cli.py index c29076a..7a62408 100644 --- a/dbt_meshify/cli.py +++ b/dbt_meshify/cli.py @@ -24,12 +24,14 @@ projects_dir = click.option( "--projects-dir", type=click.Path(exists=True), + default=None, help="The path to a directory containing multiple dbt projects. Directory must contain 2+ projects.", ) exclude_projects = click.option( "--exclude-projects", "-e", + cls=MultiOption, multiple=True, type=tuple, default=None, diff --git a/dbt_meshify/linker.py b/dbt_meshify/linker.py index d14db3f..c4f0470 100644 --- a/dbt_meshify/linker.py +++ b/dbt_meshify/linker.py @@ -131,7 +131,10 @@ def _package_dependencies( project (Project B) imports Project A and references the model. """ - if project.project_id not in other_project.installed_packages(): + if ( + project.project_id not in other_project.installed_packages() + and other_project.project_id not in project.installed_packages() + ): return set() # find which models are in both manifests @@ -158,7 +161,7 @@ def _package_dependencies( for child in other_project.manifest.child_map[project.model_relation_names[relation]] ] - return { + forward_dependencies = { ProjectDependency( upstream_resource=child['upstream_resource'], upstream_project_name=project.name, @@ -169,6 +172,43 @@ def _package_dependencies( for child in package_children } + # find which models are in both manifests + backward_relations = self._find_relation_dependencies( + source_relations={ + model.relation_name + for model in other_project.models.values() + if model.relation_name is not None + }, + target_relations={ + model.relation_name + for model in project.models.values() + if model.relation_name is not None + }, + ) + + # find the children of the shared models in the downstream project + backward_package_children = [ + { + 'upstream_resource': other_project.model_relation_names[relation], + 'downstream_resource': child, + } + for relation in backward_relations + for child in project.manifest.child_map[other_project.model_relation_names[relation]] + ] + + backward_dependencies = { + ProjectDependency( + upstream_resource=child['upstream_resource'], + upstream_project_name=other_project.name, + downstream_resource=child['downstream_resource'], + downstream_project_name=project.name, + type=ProjectDependencyType.Package, + ) + for child in backward_package_children + } + + return forward_dependencies | backward_dependencies + def dependencies( self, project: Union[BaseDbtProject, DbtProject], diff --git a/dbt_meshify/main.py b/dbt_meshify/main.py index 3bc7abc..3054728 100644 --- a/dbt_meshify/main.py +++ b/dbt_meshify/main.py @@ -84,25 +84,37 @@ def connect( dbt_projects = [ DbtProject.from_directory(project_path, read_catalog) for project_path in project_paths ] - dbt_project_combinations = [combo for combo in combinations(dbt_projects, 2)] - for dbt_project_combo in dbt_project_combinations: - project_map = {project.name: project for project in dbt_project_combo} - for dependency in linker.dependencies(dbt_project_combo[0], dbt_project_combo[1]): - logger.info( - f"Found dependency between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}: {dependency}" - ) - try: - linker.resolve_dependency( - dependency, - project_map[dependency.upstream_project_name], - project_map[dependency.downstream_project_name], - ) - except Exception as e: - raise FatalMeshifyException(f"Error resolving dependency : {dependency} {e}") if projects_dir: - # TODO: Implement this -- glob the directory for dbt_project.yml files, read projects from there - dbt_projects = [] + dbt_project_paths = [path.parent for path in Path(projects_dir).glob("**/dbt_project.yml")] + all_dbt_projects = [ + DbtProject.from_directory(project_path, read_catalog) + for project_path in dbt_project_paths + ] + dbt_projects = [ + project for project in all_dbt_projects if project.name not in exclude_projects + ] + + dbt_project_combinations = [combo for combo in combinations(dbt_projects, 2)] + for dbt_project_combo in dbt_project_combinations: + project_map = {project.name: project for project in dbt_project_combo} + dependencies = linker.dependencies(dbt_project_combo[0], dbt_project_combo[1]) + if len(dependencies) == 0: + logger.info( + f"No dependencies found between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}" + ) + for dependency in linker.dependencies(dbt_project_combo[0], dbt_project_combo[1]): + logger.info( + f"Found dependency between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}: {dependency}" + ) + try: + linker.resolve_dependency( + dependency, + project_map[dependency.upstream_project_name], + project_map[dependency.downstream_project_name], + ) + except Exception as e: + raise FatalMeshifyException(f"Error resolving dependency : {dependency} {e}") @cli.command( diff --git a/tests/integration/test_connect_command.py b/tests/integration/test_connect_command.py index 0c877c3..1f16fdb 100644 --- a/tests/integration/test_connect_command.py +++ b/tests/integration/test_connect_command.py @@ -75,3 +75,40 @@ def test_connect_package(self): teardown_test_project(copy_producer_project) teardown_test_project(copy_package_consumer_project) + + def test_packages_dir_exclude_packages(self): + # copy all three packages into a subdirectory to ensure no repeat project names in one directory + copy_producer_project = "test-projects/source-hack/subdir/src_proj_a" + copy_source_consumer_project = "test-projects/source-hack/subdir/src_proj_b" + copy_package_consumer_project = "test-projects/source-hack/subdir/dest_proj_a" + setup_test_project(producer_project, copy_producer_project) + setup_test_project(source_consumer_project, copy_source_consumer_project) + setup_test_project(package_consumer_project, copy_package_consumer_project) + runner = CliRunner() + result = runner.invoke( + connect, + [ + "--projects-dir", + "test-projects/source-hack/subdir", + "--exclude-projects", + "src_proj_b", + ], + ) + + assert result.exit_code == 0 + + # assert that the source is replaced with a ref + x_proj_ref = "{{ ref('src_proj_a', 'shared_model') }}" + child_sql = ( + Path(copy_package_consumer_project) / "models" / "downstream_model.sql" + ).read_text() + assert x_proj_ref in child_sql + + # assert that the dependecies yml was created with a pointer to the upstream project + assert ( + "src_proj_a" in (Path(copy_package_consumer_project) / "dependencies.yml").read_text() + ) + + teardown_test_project(copy_producer_project) + teardown_test_project(copy_package_consumer_project) + teardown_test_project(copy_source_consumer_project) From 3c8f9140432df80892610951a9445cf4db8cc3b1 Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Tue, 25 Jul 2023 14:02:27 -0500 Subject: [PATCH 10/19] rename file --- dbt_meshify/linker.py | 2 +- dbt_meshify/main.py | 2 +- .../storage/{dbt_project_creator.py => dbt_project_editors.py} | 0 tests/integration/test_subproject_creator.py | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename dbt_meshify/storage/{dbt_project_creator.py => dbt_project_editors.py} (100%) diff --git a/dbt_meshify/linker.py b/dbt_meshify/linker.py index c4f0470..1b92861 100644 --- a/dbt_meshify/linker.py +++ b/dbt_meshify/linker.py @@ -7,7 +7,7 @@ from dbt_meshify.dbt_projects import BaseDbtProject, DbtProject from dbt_meshify.exceptions import FatalMeshifyException, FileEditorException -from dbt_meshify.storage.dbt_project_creator import DbtProjectEditor, YMLOperationType +from dbt_meshify.storage.dbt_project_editors import DbtProjectEditor, YMLOperationType from dbt_meshify.storage.file_content_editors import DbtMeshConstructor diff --git a/dbt_meshify/main.py b/dbt_meshify/main.py index 3054728..7f7f346 100644 --- a/dbt_meshify/main.py +++ b/dbt_meshify/main.py @@ -9,7 +9,7 @@ from dbt.contracts.graph.unparsed import Owner from loguru import logger -from dbt_meshify.storage.dbt_project_creator import DbtSubprojectCreator +from dbt_meshify.storage.dbt_project_editors import DbtSubprojectCreator from .cli import ( TupleCompatibleCommand, diff --git a/dbt_meshify/storage/dbt_project_creator.py b/dbt_meshify/storage/dbt_project_editors.py similarity index 100% rename from dbt_meshify/storage/dbt_project_creator.py rename to dbt_meshify/storage/dbt_project_editors.py diff --git a/tests/integration/test_subproject_creator.py b/tests/integration/test_subproject_creator.py index d090bf6..ddc998e 100644 --- a/tests/integration/test_subproject_creator.py +++ b/tests/integration/test_subproject_creator.py @@ -6,7 +6,7 @@ from dbt_meshify.dbt import Dbt from dbt_meshify.dbt_projects import DbtProject -from dbt_meshify.storage.dbt_project_creator import DbtSubprojectCreator +from dbt_meshify.storage.dbt_project_editors import DbtSubprojectCreator from dbt_meshify.storage.file_content_editors import DbtMeshConstructor test_project_profile = yaml.safe_load( From 9ab2bc1bd87d9a6e4d67c9ddcf56ded91063b953 Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Tue, 25 Jul 2023 14:03:27 -0500 Subject: [PATCH 11/19] Revert "remove dest_proj_b -- it can't compile, and it's not used in any tests at the moment" This reverts commit 68b897d0fb47beb2a85e173b508f7b24daa8ad0c. --- .../source-hack/dest_proj_b/.gitignore | 4 ++ .../source-hack/dest_proj_b/.user.yml | 1 + .../source-hack/dest_proj_b/README.md | 15 ++++++++ .../source-hack/dest_proj_b/analyses/.gitkeep | 0 .../source-hack/dest_proj_b/dbt_project.yml | 38 +++++++++++++++++++ .../source-hack/dest_proj_b/macros/.gitkeep | 0 .../dest_proj_b/models/downstream_model.sql | 8 ++++ .../source-hack/dest_proj_b/profiles.yml | 7 ++++ .../source-hack/dest_proj_b/seeds/.gitkeep | 0 .../dest_proj_b/snapshots/.gitkeep | 0 .../source-hack/dest_proj_b/tests/.gitkeep | 0 11 files changed, 73 insertions(+) create mode 100644 test-projects/source-hack/dest_proj_b/.gitignore create mode 100644 test-projects/source-hack/dest_proj_b/.user.yml create mode 100644 test-projects/source-hack/dest_proj_b/README.md create mode 100644 test-projects/source-hack/dest_proj_b/analyses/.gitkeep create mode 100644 test-projects/source-hack/dest_proj_b/dbt_project.yml create mode 100644 test-projects/source-hack/dest_proj_b/macros/.gitkeep create mode 100644 test-projects/source-hack/dest_proj_b/models/downstream_model.sql create mode 100644 test-projects/source-hack/dest_proj_b/profiles.yml create mode 100644 test-projects/source-hack/dest_proj_b/seeds/.gitkeep create mode 100644 test-projects/source-hack/dest_proj_b/snapshots/.gitkeep create mode 100644 test-projects/source-hack/dest_proj_b/tests/.gitkeep diff --git a/test-projects/source-hack/dest_proj_b/.gitignore b/test-projects/source-hack/dest_proj_b/.gitignore new file mode 100644 index 0000000..49f147c --- /dev/null +++ b/test-projects/source-hack/dest_proj_b/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_packages/ +logs/ diff --git a/test-projects/source-hack/dest_proj_b/.user.yml b/test-projects/source-hack/dest_proj_b/.user.yml new file mode 100644 index 0000000..9873cdd --- /dev/null +++ b/test-projects/source-hack/dest_proj_b/.user.yml @@ -0,0 +1 @@ +id: 9d149f56-cf27-4d94-815c-01a7bee0e8ca diff --git a/test-projects/source-hack/dest_proj_b/README.md b/test-projects/source-hack/dest_proj_b/README.md new file mode 100644 index 0000000..7874ac8 --- /dev/null +++ b/test-projects/source-hack/dest_proj_b/README.md @@ -0,0 +1,15 @@ +Welcome to your new dbt project! + +### Using the starter project + +Try running the following commands: +- dbt run +- dbt test + + +### Resources: +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support +- Find [dbt events](https://events.getdbt.com) near you +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/test-projects/source-hack/dest_proj_b/analyses/.gitkeep b/test-projects/source-hack/dest_proj_b/analyses/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/test-projects/source-hack/dest_proj_b/dbt_project.yml b/test-projects/source-hack/dest_proj_b/dbt_project.yml new file mode 100644 index 0000000..70ee64e --- /dev/null +++ b/test-projects/source-hack/dest_proj_b/dbt_project.yml @@ -0,0 +1,38 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'dest_proj_b' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'dest_proj_b' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +target-path: "target" # directory which will store compiled SQL files +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views. These settings can be overridden in the individual model +# files using the `{{ config(...) }}` macro. +models: + dest_proj_b: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view diff --git a/test-projects/source-hack/dest_proj_b/macros/.gitkeep b/test-projects/source-hack/dest_proj_b/macros/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/test-projects/source-hack/dest_proj_b/models/downstream_model.sql b/test-projects/source-hack/dest_proj_b/models/downstream_model.sql new file mode 100644 index 0000000..df8f32c --- /dev/null +++ b/test-projects/source-hack/dest_proj_b/models/downstream_model.sql @@ -0,0 +1,8 @@ +with + +upstream as ( + select * from {{ ref('src_proj_a', 'shared_model') }} +) + +select * from upstream +where colleague = 'grace' diff --git a/test-projects/source-hack/dest_proj_b/profiles.yml b/test-projects/source-hack/dest_proj_b/profiles.yml new file mode 100644 index 0000000..11b83df --- /dev/null +++ b/test-projects/source-hack/dest_proj_b/profiles.yml @@ -0,0 +1,7 @@ +dest_proj_b: + outputs: + dev: + path: ../src_proj_a/database.db + threads: 2 + type: duckdb + target: dev diff --git a/test-projects/source-hack/dest_proj_b/seeds/.gitkeep b/test-projects/source-hack/dest_proj_b/seeds/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/test-projects/source-hack/dest_proj_b/snapshots/.gitkeep b/test-projects/source-hack/dest_proj_b/snapshots/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/test-projects/source-hack/dest_proj_b/tests/.gitkeep b/test-projects/source-hack/dest_proj_b/tests/.gitkeep new file mode 100644 index 0000000..e69de29 From db236e8c8d0efa66fee961280c99587a11f4c8a3 Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Thu, 27 Jul 2023 08:37:05 -0500 Subject: [PATCH 12/19] edits from code review --- dbt_meshify/linker.py | 16 +--------------- dbt_meshify/main.py | 2 +- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/dbt_meshify/linker.py b/dbt_meshify/linker.py index 1b92861..a5f3785 100644 --- a/dbt_meshify/linker.py +++ b/dbt_meshify/linker.py @@ -172,27 +172,13 @@ def _package_dependencies( for child in package_children } - # find which models are in both manifests - backward_relations = self._find_relation_dependencies( - source_relations={ - model.relation_name - for model in other_project.models.values() - if model.relation_name is not None - }, - target_relations={ - model.relation_name - for model in project.models.values() - if model.relation_name is not None - }, - ) - # find the children of the shared models in the downstream project backward_package_children = [ { 'upstream_resource': other_project.model_relation_names[relation], 'downstream_resource': child, } - for relation in backward_relations + for relation in relations for child in project.manifest.child_map[other_project.model_relation_names[relation]] ] diff --git a/dbt_meshify/main.py b/dbt_meshify/main.py index 7f7f346..e62a27f 100644 --- a/dbt_meshify/main.py +++ b/dbt_meshify/main.py @@ -103,7 +103,7 @@ def connect( logger.info( f"No dependencies found between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}" ) - for dependency in linker.dependencies(dbt_project_combo[0], dbt_project_combo[1]): + for dependency in dependencies: logger.info( f"Found dependency between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}: {dependency}" ) From 3b055ecaf84eba00c22cafd4f14689b03aa7d2ac Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Thu, 27 Jul 2023 08:38:39 -0500 Subject: [PATCH 13/19] remove old comment --- dbt_meshify/linker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbt_meshify/linker.py b/dbt_meshify/linker.py index a5f3785..551bdd9 100644 --- a/dbt_meshify/linker.py +++ b/dbt_meshify/linker.py @@ -234,7 +234,7 @@ def resolve_dependency( node=upstream_manifest_entry, # type: ignore catalog=upstream_catalog_entry, ) - # upstream_editor = DbtProjectEditor(upstream_project) + downstream_mesh_constructor = DbtMeshConstructor( project_path=downstream_project.path, node=downstream_manifest_entry, # type: ignore From 335db9d7a47355894ecbe865f4ae2f0316a568e2 Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Mon, 7 Aug 2023 16:26:35 -0500 Subject: [PATCH 14/19] remove whitespace for help text --- dbt_meshify/main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/dbt_meshify/main.py b/dbt_meshify/main.py index e62a27f..eb62ac1 100644 --- a/dbt_meshify/main.py +++ b/dbt_meshify/main.py @@ -61,9 +61,7 @@ def connect( project_paths: tuple, projects_dir: Path, exclude_projects: List[str], read_catalog: bool ): """ - Connects multiple dbt projects together by adding all necessary dbt Mesh constructs - """ if project_paths and projects_dir: raise click.BadOptionUsage( From 4e857848a035d92976486344068433faf446aa1d Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Mon, 7 Aug 2023 16:46:12 -0500 Subject: [PATCH 15/19] pool set of dependecies before resolving --- dbt_meshify/main.py | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/dbt_meshify/main.py b/dbt_meshify/main.py index eb62ac1..2deb751 100644 --- a/dbt_meshify/main.py +++ b/dbt_meshify/main.py @@ -93,26 +93,37 @@ def connect( project for project in all_dbt_projects if project.name not in exclude_projects ] + project_map = {project.name: project for project in dbt_projects} dbt_project_combinations = [combo for combo in combinations(dbt_projects, 2)] + all_dependencies = set() for dbt_project_combo in dbt_project_combinations: - project_map = {project.name: project for project in dbt_project_combo} dependencies = linker.dependencies(dbt_project_combo[0], dbt_project_combo[1]) if len(dependencies) == 0: logger.info( f"No dependencies found between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}" ) - for dependency in dependencies: + else: logger.info( - f"Found dependency between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}: {dependency}" + f"Found {len(dependencies)} dependencies between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}" ) - try: - linker.resolve_dependency( - dependency, - project_map[dependency.upstream_project_name], - project_map[dependency.downstream_project_name], - ) - except Exception as e: - raise FatalMeshifyException(f"Error resolving dependency : {dependency} {e}") + all_dependencies.update(dependencies) + if len(all_dependencies) == 0: + logger.info("No dependencies found between any of the projects") + return + else: + logger.info(f"Found {len(all_dependencies)} unique dependencies between all projects.") + for dependency in all_dependencies: + logger.info( + f"Resolving dependency between {dependency.upstream_resource} and {dependency.downstream_resource}" + ) + try: + linker.resolve_dependency( + dependency, + project_map[dependency.upstream_project_name], + project_map[dependency.downstream_project_name], + ) + except Exception as e: + raise FatalMeshifyException(f"Error resolving dependency : {dependency} {e}") @cli.command( From a5034e88e7d947ff8d7592d83148a0fbce0295ef Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Mon, 7 Aug 2023 16:49:45 -0500 Subject: [PATCH 16/19] grammar ha ha --- dbt_meshify/main.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dbt_meshify/main.py b/dbt_meshify/main.py index 2deb751..abecefd 100644 --- a/dbt_meshify/main.py +++ b/dbt_meshify/main.py @@ -103,15 +103,17 @@ def connect( f"No dependencies found between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}" ) else: + noun = "dependency" if len(dependencies) == 1 else "dependencies" logger.info( - f"Found {len(dependencies)} dependencies between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}" + f"Found {len(dependencies)} {noun} between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}" ) all_dependencies.update(dependencies) if len(all_dependencies) == 0: logger.info("No dependencies found between any of the projects") return else: - logger.info(f"Found {len(all_dependencies)} unique dependencies between all projects.") + noun = "dependency" if len(all_dependencies) == 1 else "dependencies" + logger.info(f"Found {len(all_dependencies)} unique {noun} between all projects.") for dependency in all_dependencies: logger.info( f"Resolving dependency between {dependency.upstream_resource} and {dependency.downstream_resource}" From 8b4c1a3d0f0554a951a7819b26eec96902472c67 Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Mon, 7 Aug 2023 17:10:41 -0500 Subject: [PATCH 17/19] use fixture to clean up first two tests --- tests/integration/test_connect_command.py | 81 ++++++++++++----------- 1 file changed, 43 insertions(+), 38 deletions(-) diff --git a/tests/integration/test_connect_command.py b/tests/integration/test_connect_command.py index 1f16fdb..48cfe90 100644 --- a/tests/integration/test_connect_command.py +++ b/tests/integration/test_connect_command.py @@ -1,26 +1,38 @@ from pathlib import Path -import yaml +import pytest from click.testing import CliRunner from dbt_meshify.main import connect from tests.dbt_project_utils import setup_test_project, teardown_test_project -producer_project = "test-projects/source-hack/src_proj_a" -source_consumer_project = "test-projects/source-hack/src_proj_b" -package_consumer_project = "test-projects/source-hack/dest_proj_a" +producer_project_path = "test-projects/source-hack/src_proj_a" +source_consumer_project_path = "test-projects/source-hack/src_proj_b" +package_consumer_project_path = "test-projects/source-hack/dest_proj_a" +copy_producer_project_path = producer_project_path + "_copy" +copy_source_consumer_project_path = source_consumer_project_path + "_copy" +copy_package_consumer_project_path = package_consumer_project_path + "_copy" + + +@pytest.fixture +def producer_project(): + setup_test_project(producer_project_path, copy_producer_project_path) + setup_test_project(source_consumer_project_path, copy_source_consumer_project_path) + setup_test_project(package_consumer_project_path, copy_package_consumer_project_path) + # yield to the test. We'll come back here after the test returns. + yield + + teardown_test_project(copy_producer_project_path) + teardown_test_project(copy_source_consumer_project_path) + teardown_test_project(copy_package_consumer_project_path) class TestConnectCommand: - def test_connect_source_hack(self): - copy_producer_project = producer_project + "_copy" - copy_source_consumer_project = source_consumer_project + "_copy" - setup_test_project(producer_project, copy_producer_project) - setup_test_project(source_consumer_project, copy_source_consumer_project) + def test_connect_source_hack(self, producer_project): runner = CliRunner() result = runner.invoke( connect, - ["--project-paths", copy_producer_project, copy_source_consumer_project], + ["--project-paths", copy_producer_project_path, copy_source_consumer_project_path], ) assert result.exit_code == 0 @@ -29,7 +41,7 @@ def test_connect_source_hack(self): x_proj_ref = "{{ ref('src_proj_a', 'shared_model') }}" source_func = "{{ source('src_proj_a', 'shared_model') }}" child_sql = ( - Path(copy_source_consumer_project) / "models" / "downstream_model.sql" + Path(copy_source_consumer_project_path) / "models" / "downstream_model.sql" ).read_text() assert x_proj_ref in child_sql assert source_func not in child_sql @@ -37,26 +49,20 @@ def test_connect_source_hack(self): # assert that the source was deleted # may want to add some nuance in the future for cases where we delete a single source out of a set assert not ( - Path(copy_source_consumer_project) / "models" / "staging" / "_sources.yml" + Path(copy_source_consumer_project_path) / "models" / "staging" / "_sources.yml" ).exists() # assert that the dependecies yml was created with a pointer to the upstream project assert ( - "src_proj_a" in (Path(copy_source_consumer_project) / "dependencies.yml").read_text() + "src_proj_a" + in (Path(copy_source_consumer_project_path) / "dependencies.yml").read_text() ) - teardown_test_project(copy_producer_project) - teardown_test_project(copy_source_consumer_project) - - def test_connect_package(self): - copy_producer_project = producer_project + "_copy" - copy_package_consumer_project = package_consumer_project + "_copy" - setup_test_project(producer_project, copy_producer_project) - setup_test_project(package_consumer_project, copy_package_consumer_project) + def test_connect_package(self, producer_project): runner = CliRunner() result = runner.invoke( connect, - ["--project-paths", copy_producer_project, copy_package_consumer_project], + ["--project-paths", copy_producer_project_path, copy_package_consumer_project_path], ) assert result.exit_code == 0 @@ -64,26 +70,24 @@ def test_connect_package(self): # assert that the source is replaced with a ref x_proj_ref = "{{ ref('src_proj_a', 'shared_model') }}" child_sql = ( - Path(copy_package_consumer_project) / "models" / "downstream_model.sql" + Path(copy_package_consumer_project_path) / "models" / "downstream_model.sql" ).read_text() assert x_proj_ref in child_sql # assert that the dependecies yml was created with a pointer to the upstream project assert ( - "src_proj_a" in (Path(copy_package_consumer_project) / "dependencies.yml").read_text() + "src_proj_a" + in (Path(copy_package_consumer_project_path) / "dependencies.yml").read_text() ) - teardown_test_project(copy_producer_project) - teardown_test_project(copy_package_consumer_project) - def test_packages_dir_exclude_packages(self): # copy all three packages into a subdirectory to ensure no repeat project names in one directory - copy_producer_project = "test-projects/source-hack/subdir/src_proj_a" - copy_source_consumer_project = "test-projects/source-hack/subdir/src_proj_b" - copy_package_consumer_project = "test-projects/source-hack/subdir/dest_proj_a" - setup_test_project(producer_project, copy_producer_project) - setup_test_project(source_consumer_project, copy_source_consumer_project) - setup_test_project(package_consumer_project, copy_package_consumer_project) + subdir_producer_project = "test-projects/source-hack/subdir/src_proj_a" + subdir_source_consumer_project = "test-projects/source-hack/subdir/src_proj_b" + subdir_package_consumer_project = "test-projects/source-hack/subdir/dest_proj_a" + setup_test_project(producer_project_path, subdir_producer_project) + setup_test_project(source_consumer_project_path, subdir_source_consumer_project) + setup_test_project(package_consumer_project_path, subdir_package_consumer_project) runner = CliRunner() result = runner.invoke( connect, @@ -100,15 +104,16 @@ def test_packages_dir_exclude_packages(self): # assert that the source is replaced with a ref x_proj_ref = "{{ ref('src_proj_a', 'shared_model') }}" child_sql = ( - Path(copy_package_consumer_project) / "models" / "downstream_model.sql" + Path(subdir_package_consumer_project) / "models" / "downstream_model.sql" ).read_text() assert x_proj_ref in child_sql # assert that the dependecies yml was created with a pointer to the upstream project assert ( - "src_proj_a" in (Path(copy_package_consumer_project) / "dependencies.yml").read_text() + "src_proj_a" + in (Path(subdir_package_consumer_project) / "dependencies.yml").read_text() ) - teardown_test_project(copy_producer_project) - teardown_test_project(copy_package_consumer_project) - teardown_test_project(copy_source_consumer_project) + teardown_test_project(subdir_producer_project) + teardown_test_project(subdir_package_consumer_project) + teardown_test_project(subdir_source_consumer_project) From 5af261f8a7df3f4d8cdf73ce0f3f50d262714894 Mon Sep 17 00:00:00 2001 From: dave-connors-3 <73915542+dave-connors-3@users.noreply.github.com> Date: Tue, 8 Aug 2023 08:17:41 -0500 Subject: [PATCH 18/19] Update dbt_meshify/main.py Co-authored-by: Nicholas A. Yager --- dbt_meshify/main.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/dbt_meshify/main.py b/dbt_meshify/main.py index abecefd..2485fa0 100644 --- a/dbt_meshify/main.py +++ b/dbt_meshify/main.py @@ -102,12 +102,13 @@ def connect( logger.info( f"No dependencies found between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}" ) - else: - noun = "dependency" if len(dependencies) == 1 else "dependencies" - logger.info( - f"Found {len(dependencies)} {noun} between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}" - ) - all_dependencies.update(dependencies) + continue + + noun = "dependency" if len(dependencies) == 1 else "dependencies" + logger.info( + f"Found {len(dependencies)} {noun} between {dbt_project_combo[0].name} and {dbt_project_combo[1].name}" + ) + all_dependencies.update(dependencies) if len(all_dependencies) == 0: logger.info("No dependencies found between any of the projects") return From d29b4310f12cb0d7330168f83f9b3d954802426d Mon Sep 17 00:00:00 2001 From: dave-connors-3 <73915542+dave-connors-3@users.noreply.github.com> Date: Tue, 8 Aug 2023 08:18:06 -0500 Subject: [PATCH 19/19] Apply suggestions from code review Co-authored-by: Nicholas A. Yager --- dbt_meshify/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbt_meshify/main.py b/dbt_meshify/main.py index 2485fa0..4999148 100644 --- a/dbt_meshify/main.py +++ b/dbt_meshify/main.py @@ -112,9 +112,9 @@ def connect( if len(all_dependencies) == 0: logger.info("No dependencies found between any of the projects") return - else: - noun = "dependency" if len(all_dependencies) == 1 else "dependencies" - logger.info(f"Found {len(all_dependencies)} unique {noun} between all projects.") + + noun = "dependency" if len(all_dependencies) == 1 else "dependencies" + logger.info(f"Found {len(all_dependencies)} unique {noun} between all projects.") for dependency in all_dependencies: logger.info( f"Resolving dependency between {dependency.upstream_resource} and {dependency.downstream_resource}"