From 6687ccc7cdf7b16ab5dbaab57d8aae5f9905b21a Mon Sep 17 00:00:00 2001 From: Kshitij Aranke Date: Tue, 18 Jun 2024 21:27:35 +0100 Subject: [PATCH 1/8] Add `get_catalog_for_single_relation` macro and capability (#231) --- .../unreleased/Features-20240528-013623.yaml | 6 ++ .../test_get_catalog_for_single_relation.py | 87 +++++++++++++++++++ dbt/adapters/base/impl.py | 15 +++- dbt/adapters/capability.py | 7 +- dbt/adapters/sql/impl.py | 9 +- .../macros/adapters/metadata.sql | 9 ++ pyproject.toml | 5 +- 7 files changed, 132 insertions(+), 6 deletions(-) create mode 100644 .changes/unreleased/Features-20240528-013623.yaml create mode 100644 dbt-tests-adapter/dbt/tests/adapter/basic/test_get_catalog_for_single_relation.py diff --git a/.changes/unreleased/Features-20240528-013623.yaml b/.changes/unreleased/Features-20240528-013623.yaml new file mode 100644 index 000000000..2c01e9728 --- /dev/null +++ b/.changes/unreleased/Features-20240528-013623.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add get_catalog_for_single_relation macro and capability to enable adapters to optimize catalog generation +time: 2024-05-28T01:36:23.588295+01:00 +custom: + Author: aranke + Issue: "231" diff --git a/dbt-tests-adapter/dbt/tests/adapter/basic/test_get_catalog_for_single_relation.py b/dbt-tests-adapter/dbt/tests/adapter/basic/test_get_catalog_for_single_relation.py new file mode 100644 index 000000000..78055cc59 --- /dev/null +++ b/dbt-tests-adapter/dbt/tests/adapter/basic/test_get_catalog_for_single_relation.py @@ -0,0 +1,87 @@ +import pytest + +from dbt.tests.util import run_dbt, get_connection + +models__my_table_model_sql = """ +select * from {{ ref('my_seed') }} +""" + + +models__my_view_model_sql = """ +{{ + config( + materialized='view', + ) +}} + +select * from {{ ref('my_seed') }} +""" + +seed__my_seed_csv = """id,first_name,email,ip_address,updated_at +1,Larry,lking0@miitbeian.gov.cn,69.135.206.194,2008-09-12 19:08:31 +""" + + +class BaseGetCatalogForSingleRelation: + @pytest.fixture(scope="class") + def project_config_update(self): + return {"name": "get_catalog_for_single_relation"} + + @pytest.fixture(scope="class") + def seeds(self): + return { + "my_seed.csv": seed__my_seed_csv, + } + + @pytest.fixture(scope="class") + def models(self): + return { + "my_view_model.sql": models__my_view_model_sql, + "my_table_model.sql": models__my_table_model_sql, + } + + @pytest.fixture(scope="class") + def expected_catalog_my_seed(self, project): + raise NotImplementedError( + "To use this test, please implement `get_catalog_for_single_relation`, inherited from `SQLAdapter`." + ) + + @pytest.fixture(scope="class") + def expected_catalog_my_model(self, project): + raise NotImplementedError( + "To use this test, please implement `get_catalog_for_single_relation`, inherited from `SQLAdapter`." + ) + + def get_relation_for_identifier(self, project, identifier): + return project.adapter.get_relation( + database=project.database, + schema=project.test_schema, + identifier=identifier, + ) + + def test_get_catalog_for_single_relation( + self, project, expected_catalog_my_seed, expected_catalog_my_view_model + ): + results = run_dbt(["seed"]) + assert len(results) == 1 + + my_seed_relation = self.get_relation_for_identifier(project, "my_seed") + + with get_connection(project.adapter): + actual_catalog_my_seed = project.adapter.get_catalog_for_single_relation( + my_seed_relation + ) + + assert actual_catalog_my_seed == expected_catalog_my_seed + + results = run_dbt(["run"]) + assert len(results) == 2 + + my_view_model_relation = self.get_relation_for_identifier(project, "my_view_model") + + with get_connection(project.adapter): + actual_catalog_my_view_model = project.adapter.get_catalog_for_single_relation( + my_view_model_relation + ) + + assert actual_catalog_my_view_model == expected_catalog_my_view_model diff --git a/dbt/adapters/base/impl.py b/dbt/adapters/base/impl.py index f58f8aba0..1fdbce488 100644 --- a/dbt/adapters/base/impl.py +++ b/dbt/adapters/base/impl.py @@ -1,10 +1,10 @@ import abc +import time from concurrent.futures import as_completed, Future from contextlib import contextmanager from datetime import datetime from enum import Enum from multiprocessing.context import SpawnContext -import time from typing import ( Any, Callable, @@ -23,12 +23,15 @@ TYPE_CHECKING, ) +import pytz from dbt_common.clients.jinja import CallableMacroGenerator from dbt_common.contracts.constraints import ( ColumnLevelConstraint, ConstraintType, ModelLevelConstraint, ) +from dbt_common.contracts.metadata import CatalogTable +from dbt_common.events.functions import fire_event, warn_or_error from dbt_common.exceptions import ( DbtInternalError, DbtRuntimeError, @@ -38,14 +41,12 @@ NotImplementedError, UnexpectedNullError, ) -from dbt_common.events.functions import fire_event, warn_or_error from dbt_common.utils import ( AttrDict, cast_to_str, executor, filter_null_values, ) -import pytz from dbt.adapters.base.column import Column as BaseColumn from dbt.adapters.base.connections import ( @@ -222,6 +223,7 @@ class BaseAdapter(metaclass=AdapterMeta): - truncate_relation - rename_relation - get_columns_in_relation + - get_catalog_for_single_relation - get_column_schema_from_query - expand_column_types - list_relations_without_caching @@ -627,6 +629,13 @@ def get_columns_in_relation(self, relation: BaseRelation) -> List[BaseColumn]: """Get a list of the columns in the given Relation.""" raise NotImplementedError("`get_columns_in_relation` is not implemented for this adapter!") + @abc.abstractmethod + def get_catalog_for_single_relation(self, relation: BaseRelation) -> Optional[CatalogTable]: + """Get catalog information including table-level and column-level metadata for a single relation.""" + raise NotImplementedError( + "`get_catalog_for_single_relation` is not implemented for this adapter!" + ) + @available.deprecated("get_columns_in_relation", lambda *a, **k: []) def get_columns_in_table(self, schema: str, identifier: str) -> List[BaseColumn]: """DEPRECATED: Get a list of the columns in the given table.""" diff --git a/dbt/adapters/capability.py b/dbt/adapters/capability.py index 305604c71..2bd491123 100644 --- a/dbt/adapters/capability.py +++ b/dbt/adapters/capability.py @@ -14,7 +14,12 @@ class Capability(str, Enum): """Indicates support for determining the time of the last table modification by querying database metadata.""" TableLastModifiedMetadataBatch = "TableLastModifiedMetadataBatch" - """Indicates support for performantly determining the time of the last table modification by querying database metadata in batch.""" + """Indicates support for performantly determining the time of the last table modification by querying database + metadata in batch.""" + + GetCatalogForSingleRelation = "GetCatalogForSingleRelation" + """Indicates support for getting catalog information including table-level and column-level metadata for a single + relation.""" class Support(str, Enum): diff --git a/dbt/adapters/sql/impl.py b/dbt/adapters/sql/impl.py index 8c6e0e8e4..91ca3ebbb 100644 --- a/dbt/adapters/sql/impl.py +++ b/dbt/adapters/sql/impl.py @@ -1,5 +1,6 @@ from typing import Any, List, Optional, Tuple, Type, TYPE_CHECKING +from dbt_common.contracts.metadata import CatalogTable from dbt_common.events.functions import fire_event from dbt.adapters.base import BaseAdapter, BaseRelation, available @@ -9,9 +10,9 @@ from dbt.adapters.exceptions import RelationTypeNullError from dbt.adapters.sql.connections import SQLConnectionManager - LIST_RELATIONS_MACRO_NAME = "list_relations_without_caching" GET_COLUMNS_IN_RELATION_MACRO_NAME = "get_columns_in_relation" +GET_CATALOG_FOR_SINGLE_RELATION_NAME = "get_catalog_for_single_relation" LIST_SCHEMAS_MACRO_NAME = "list_schemas" CHECK_SCHEMA_EXISTS_MACRO_NAME = "check_schema_exists" CREATE_SCHEMA_MACRO_NAME = "create_schema" @@ -41,6 +42,7 @@ class SQLAdapter(BaseAdapter): - get_catalog - list_relations_without_caching - get_columns_in_relation + - get_catalog_for_single_relation """ ConnectionManager: Type[SQLConnectionManager] @@ -158,6 +160,11 @@ def get_columns_in_relation(self, relation): GET_COLUMNS_IN_RELATION_MACRO_NAME, kwargs={"relation": relation} ) + def get_catalog_for_single_relation(self, relation: BaseRelation) -> Optional[CatalogTable]: + return self.execute_macro( + GET_CATALOG_FOR_SINGLE_RELATION_NAME, kwargs={"relation": relation} + ) + def create_schema(self, relation: BaseRelation) -> None: relation = relation.without_identifier() fire_event(SchemaCreation(relation=_make_ref_key_dict(relation))) diff --git a/dbt/include/global_project/macros/adapters/metadata.sql b/dbt/include/global_project/macros/adapters/metadata.sql index c8e8a4140..0aa7aabb4 100644 --- a/dbt/include/global_project/macros/adapters/metadata.sql +++ b/dbt/include/global_project/macros/adapters/metadata.sql @@ -77,6 +77,15 @@ 'list_relations_without_caching macro not implemented for adapter '+adapter.type()) }} {% endmacro %} +{% macro get_catalog_for_single_relation(relation) %} + {{ return(adapter.dispatch('get_catalog_for_single_relation', 'dbt')(relation)) }} +{% endmacro %} + +{% macro default__get_catalog_for_single_relation(relation) %} + {{ exceptions.raise_not_implemented( + 'get_catalog_for_single_relation macro not implemented for adapter '+adapter.type()) }} +{% endmacro %} + {% macro get_relations() %} {{ return(adapter.dispatch('get_relations', 'dbt')()) }} {% endmacro %} diff --git a/pyproject.toml b/pyproject.toml index a4b011a8f..e50aa63ab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ "Programming Language :: Python :: 3.12", ] dependencies = [ - "dbt-common<2.0", + "dbt-common>=1.3,<2.0", "pytz>=2015.7", # installed via dbt-common but used directly "agate>=1.0,<2.0", @@ -43,6 +43,9 @@ Changelog = "https://github.com/dbt-labs/dbt-adapters/blob/main/CHANGELOG.md" requires = ["hatchling"] build-backend = "hatchling.build" +[tool.hatch.metadata] +allow-direct-references = true + [tool.hatch.version] path = "dbt/adapters/__about__.py" From 9f504d3fc7a822ebeab7da1000ad78b9a7efc8b3 Mon Sep 17 00:00:00 2001 From: Github Build Bot Date: Tue, 18 Jun 2024 22:01:45 +0000 Subject: [PATCH 2/8] Bumping version to 1.3.0 and generate changelog --- .changes/1.3.0.md | 5 +++++ .changes/unreleased/Features-20240528-013623.yaml | 6 ------ CHANGELOG.md | 6 ++++++ dbt/adapters/__about__.py | 2 +- 4 files changed, 12 insertions(+), 7 deletions(-) create mode 100644 .changes/1.3.0.md delete mode 100644 .changes/unreleased/Features-20240528-013623.yaml diff --git a/.changes/1.3.0.md b/.changes/1.3.0.md new file mode 100644 index 000000000..dae1f819a --- /dev/null +++ b/.changes/1.3.0.md @@ -0,0 +1,5 @@ +## dbt-adapters 1.3.0 - June 18, 2024 + +### Features + +* Add get_catalog_for_single_relation macro and capability to enable adapters to optimize catalog generation diff --git a/.changes/unreleased/Features-20240528-013623.yaml b/.changes/unreleased/Features-20240528-013623.yaml deleted file mode 100644 index 2c01e9728..000000000 --- a/.changes/unreleased/Features-20240528-013623.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Features -body: Add get_catalog_for_single_relation macro and capability to enable adapters to optimize catalog generation -time: 2024-05-28T01:36:23.588295+01:00 -custom: - Author: aranke - Issue: "231" diff --git a/CHANGELOG.md b/CHANGELOG.md index 43c3937ec..5f7560f72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,12 @@ and is generated by [Changie](https://github.com/miniscruff/changie). * Update Clone test to reflect core change removing `deferred` attribute from nodes +## dbt-adapters 1.3.0 - June 18, 2024 + +### Features + +* Add get_catalog_for_single_relation macro and capability to enable adapters to optimize catalog generation + ## dbt-adapters 1.2.1 - May 21, 2024 ### Features diff --git a/dbt/adapters/__about__.py b/dbt/adapters/__about__.py index eb1d9a0ff..d28b3ddc3 100644 --- a/dbt/adapters/__about__.py +++ b/dbt/adapters/__about__.py @@ -1 +1 @@ -version = "1.2.1" +version = "1.3.0" From 3460543a155eeb4ccb749fe185d65c8f579dbd04 Mon Sep 17 00:00:00 2001 From: Github Build Bot Date: Tue, 18 Jun 2024 22:58:01 +0000 Subject: [PATCH 3/8] Bumping version to 1.9.0 and generate changelog --- .changes/1.9.0.md | 1 + CHANGELOG.md | 2 ++ dbt-tests-adapter/dbt/tests/__about__.py | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 .changes/1.9.0.md diff --git a/.changes/1.9.0.md b/.changes/1.9.0.md new file mode 100644 index 000000000..2f016563b --- /dev/null +++ b/.changes/1.9.0.md @@ -0,0 +1 @@ +## dbt-adapters 1.9.0 - June 18, 2024 diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f7560f72..016c13eec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html), and is generated by [Changie](https://github.com/miniscruff/changie). +## dbt-adapters 1.9.0 - June 18, 2024 + ## dbt-adapters 1.8.0 - May 09, 2024 ### Features diff --git a/dbt-tests-adapter/dbt/tests/__about__.py b/dbt-tests-adapter/dbt/tests/__about__.py index 6aaa73b80..7aba64097 100644 --- a/dbt-tests-adapter/dbt/tests/__about__.py +++ b/dbt-tests-adapter/dbt/tests/__about__.py @@ -1 +1 @@ -version = "1.8.0" +version = "1.9.0" From 46e470cbbe8cefc98b00a2f0cd7223c43c2be4d1 Mon Sep 17 00:00:00 2001 From: Kshitij Aranke Date: Wed, 19 Jun 2024 16:50:08 +0100 Subject: [PATCH 4/8] Make `get_catalog_for_single_relation` a concrete method in `BaseAdapter` (#240) --- dbt/adapters/base/impl.py | 1 - 1 file changed, 1 deletion(-) diff --git a/dbt/adapters/base/impl.py b/dbt/adapters/base/impl.py index 1fdbce488..3b36022f3 100644 --- a/dbt/adapters/base/impl.py +++ b/dbt/adapters/base/impl.py @@ -629,7 +629,6 @@ def get_columns_in_relation(self, relation: BaseRelation) -> List[BaseColumn]: """Get a list of the columns in the given Relation.""" raise NotImplementedError("`get_columns_in_relation` is not implemented for this adapter!") - @abc.abstractmethod def get_catalog_for_single_relation(self, relation: BaseRelation) -> Optional[CatalogTable]: """Get catalog information including table-level and column-level metadata for a single relation.""" raise NotImplementedError( From ecf3e1d52bc31a4814f25fd67bc186bbb4ed132b Mon Sep 17 00:00:00 2001 From: Kshitij Aranke Date: Thu, 20 Jun 2024 17:41:48 +0100 Subject: [PATCH 5/8] Remove get_catalog_for_single_relation from sql/impl.py (#241) --- dbt/adapters/sql/impl.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/dbt/adapters/sql/impl.py b/dbt/adapters/sql/impl.py index 91ca3ebbb..8a8473f27 100644 --- a/dbt/adapters/sql/impl.py +++ b/dbt/adapters/sql/impl.py @@ -1,6 +1,5 @@ from typing import Any, List, Optional, Tuple, Type, TYPE_CHECKING -from dbt_common.contracts.metadata import CatalogTable from dbt_common.events.functions import fire_event from dbt.adapters.base import BaseAdapter, BaseRelation, available @@ -12,7 +11,6 @@ LIST_RELATIONS_MACRO_NAME = "list_relations_without_caching" GET_COLUMNS_IN_RELATION_MACRO_NAME = "get_columns_in_relation" -GET_CATALOG_FOR_SINGLE_RELATION_NAME = "get_catalog_for_single_relation" LIST_SCHEMAS_MACRO_NAME = "list_schemas" CHECK_SCHEMA_EXISTS_MACRO_NAME = "check_schema_exists" CREATE_SCHEMA_MACRO_NAME = "create_schema" @@ -160,11 +158,6 @@ def get_columns_in_relation(self, relation): GET_COLUMNS_IN_RELATION_MACRO_NAME, kwargs={"relation": relation} ) - def get_catalog_for_single_relation(self, relation: BaseRelation) -> Optional[CatalogTable]: - return self.execute_macro( - GET_CATALOG_FOR_SINGLE_RELATION_NAME, kwargs={"relation": relation} - ) - def create_schema(self, relation: BaseRelation) -> None: relation = relation.without_identifier() fire_event(SchemaCreation(relation=_make_ref_key_dict(relation))) From 10f197a09ebca9404e79710b49ec04ac71d23b99 Mon Sep 17 00:00:00 2001 From: Github Build Bot Date: Thu, 20 Jun 2024 19:17:15 +0000 Subject: [PATCH 6/8] Bumping version to 1.3.1 and generate changelog --- .changes/1.3.1.md | 1 + CHANGELOG.md | 2 ++ dbt/adapters/__about__.py | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 .changes/1.3.1.md diff --git a/.changes/1.3.1.md b/.changes/1.3.1.md new file mode 100644 index 000000000..b8ec73740 --- /dev/null +++ b/.changes/1.3.1.md @@ -0,0 +1 @@ +## dbt-adapters 1.3.1 - June 20, 2024 diff --git a/CHANGELOG.md b/CHANGELOG.md index 016c13eec..b06ec9f27 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ and is generated by [Changie](https://github.com/miniscruff/changie). * Update Clone test to reflect core change removing `deferred` attribute from nodes +## dbt-adapters 1.3.1 - June 20, 2024 + ## dbt-adapters 1.3.0 - June 18, 2024 ### Features diff --git a/dbt/adapters/__about__.py b/dbt/adapters/__about__.py index d28b3ddc3..35ec9a371 100644 --- a/dbt/adapters/__about__.py +++ b/dbt/adapters/__about__.py @@ -1 +1 @@ -version = "1.3.0" +version = "1.3.1" From f4880e57ae59215d6f4beedcaee290b5ffe22ae3 Mon Sep 17 00:00:00 2001 From: Github Build Bot Date: Thu, 20 Jun 2024 19:27:26 +0000 Subject: [PATCH 7/8] Bumping version to 1.9.1 and generate changelog --- .changes/1.9.1.md | 1 + CHANGELOG.md | 2 ++ dbt-tests-adapter/dbt/tests/__about__.py | 2 +- 3 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 .changes/1.9.1.md diff --git a/.changes/1.9.1.md b/.changes/1.9.1.md new file mode 100644 index 000000000..900e6b755 --- /dev/null +++ b/.changes/1.9.1.md @@ -0,0 +1 @@ +## dbt-adapters 1.9.1 - June 20, 2024 diff --git a/CHANGELOG.md b/CHANGELOG.md index b06ec9f27..9114bcd1d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html), and is generated by [Changie](https://github.com/miniscruff/changie). +## dbt-adapters 1.9.1 - June 20, 2024 + ## dbt-adapters 1.9.0 - June 18, 2024 ## dbt-adapters 1.8.0 - May 09, 2024 diff --git a/dbt-tests-adapter/dbt/tests/__about__.py b/dbt-tests-adapter/dbt/tests/__about__.py index 7aba64097..702279763 100644 --- a/dbt-tests-adapter/dbt/tests/__about__.py +++ b/dbt-tests-adapter/dbt/tests/__about__.py @@ -1 +1 @@ -version = "1.9.0" +version = "1.9.1" From 267cf5e31a132be6bf994c95ccce1d628043be64 Mon Sep 17 00:00:00 2001 From: Matthew McKnight <91097623+McKnight-42@users.noreply.github.com> Date: Thu, 20 Jun 2024 14:44:33 -0500 Subject: [PATCH 8/8] update user docs-issue workflow (#238) Co-authored-by: Mike Alfare <13974384+mikealfare@users.noreply.github.com> --- .github/workflows/docs-issue.yml | 41 ++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 .github/workflows/docs-issue.yml diff --git a/.github/workflows/docs-issue.yml b/.github/workflows/docs-issue.yml new file mode 100644 index 000000000..f49cf517c --- /dev/null +++ b/.github/workflows/docs-issue.yml @@ -0,0 +1,41 @@ +# **what?** +# Open an issue in docs.getdbt.com when an issue is labeled `user docs` and closed as completed + +# **why?** +# To reduce barriers for keeping docs up to date + +# **when?** +# When an issue is labeled `user docs` and is closed as completed. Can be labeled before or after the issue is closed. + + +name: Open issues in docs.getdbt.com repo when an issue is labeled +run-name: "Open an issue in docs.getdbt.com for issue #${{ github.event.issue.number }}" + +on: + issues: + types: [labeled, closed] + +defaults: + run: + shell: bash + +permissions: + issues: write # comments on issues + +jobs: + open_issues: + # we only want to run this when the issue is closed as completed and the label `user docs` has been assigned. + # If this logic does not exist in this workflow, it runs the + # risk of duplicaton of issues being created due to merge and label both triggering this workflow to run and neither having + # generating the comment before the other runs. This lives here instead of the shared workflow because this is where we + # decide if it should run or not. + if: | + (github.event.issue.state == 'closed' && github.event.issue.state_reason == 'completed') && ( + (github.event.action == 'closed' && contains(github.event.issue.labels.*.name, 'user docs')) || + (github.event.action == 'labeled' && github.event.label.name == 'user docs')) + uses: dbt-labs/actions/.github/workflows/open-issue-in-repo.yml@main + with: + issue_repository: "dbt-labs/docs.getdbt.com" + issue_title: "Docs Changes Needed from ${{ github.event.repository.name }} Issue #${{ github.event.issue.number }}" + issue_body: "At a minimum, update body to include a link to the page on docs.getdbt.com requiring updates and what part(s) of the page you would like to see updated." + secrets: inherit