Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: Support top level properties in model_defaults #3696

Merged
merged 2 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions docs/reference/model_configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,85 @@ Configuration options for SQLMesh model properties. Supported by all model kinds

The SQLMesh project-level configuration must contain the `model_defaults` key and must specify a value for its `dialect` key. Other values are set automatically unless explicitly overridden in the model definition. Learn more about project-level configuration in the [configuration guide](../guides/configuration.md).

In `physical_properties`, `virtual_properties`, and `session_properties`, when both project-level and model-specific properties are defined, they are merged, with model-level properties taking precedence. To unset a project-wide property for a specific model, set it to `None` in the `MODEL`'s DDL properties or within the `@model` decorator for Python models.

For example, with the following `model_defaults` configuration:

=== "YAML"

```yaml linenums="1"
model_defaults:
dialect: snowflake
start: 2022-01-01
physical_properties:
partition_expiration_days: 7
require_partition_filter: True
project_level_property: "value"
```

=== "Python"

```python linenums="1"
from sqlmesh.core.config import Config, ModelDefaultsConfig

config = Config(
model_defaults=ModelDefaultsConfig(
dialect="snowflake",
start="2022-01-01",
physical_properties={
"partition_expiration_days": 7,
"require_partition_filter": True,
"project_level_property": "value"
},
),
)
```

To override `partition_expiration_days`, add a new `creatable_type` property and unset `project_level_property`, you can define the model as follows:

=== "SQL"

```sql linenums="1"
MODEL (
...,
physical_properties (
partition_expiration_days = 14,
creatable_type = TRANSIENT,
project_level_property = None,
)
);
```

=== "Python"

```python linenums="1"
@model(
...,
physical_properties={
"partition_expiration_days": 14,
"creatable_type": "TRANSIENT",
"project_level_property": None
},
)
```


The SQLMesh project-level `model_defaults` key supports the following options, described in the [general model properties](#general-model-properties) table above:

- kind
- dialect
- cron
- owner
- start
- table_format
- storage_format
- physical_properties
- virtual_properties
- session_properties (on per key basis)
- on_destructive_change (described [below](#incremental-models))
- audits (described [here](../concepts/audits.md#generic-audits))
- optimize_query
- validate_query


### Model Naming
Expand Down
5 changes: 5 additions & 0 deletions sqlmesh/core/config/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ class ModelDefaultsConfig(BaseConfig):
storage_format: The storage format used to store the physical table, only applicable in certain engines.
(eg. 'parquet', 'orc')
on_destructive_change: What should happen when a forward-only model requires a destructive schema change.
physical_properties: A key-value mapping of arbitrary properties that are applied to the model table / view in the physical layer.
virtual_properties: A key-value mapping of arbitrary properties that are applied to the model view in the virtual layer.
session_properties: A key-value mapping of properties specific to the target engine that are applied to the engine session.
audits: The audits to be applied globally to all models in the project.
optimize_query: Whether the SQL models should be optimized
"""
Expand All @@ -44,6 +47,8 @@ class ModelDefaultsConfig(BaseConfig):
table_format: t.Optional[str] = None
storage_format: t.Optional[str] = None
on_destructive_change: t.Optional[OnDestructiveChange] = None
physical_properties: t.Optional[t.Dict[str, t.Any]] = None
virtual_properties: t.Optional[t.Dict[str, t.Any]] = None
session_properties: t.Optional[t.Dict[str, t.Any]] = None
audits: t.Optional[t.List[FunctionCall]] = None
optimize_query: t.Optional[bool] = None
Expand Down
23 changes: 12 additions & 11 deletions sqlmesh/core/model/definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -2113,9 +2113,8 @@ def _create_model(
) -> Model:
_validate_model_fields(klass, {"name", *kwargs} - {"grain", "table_properties"}, path)

kwargs["session_properties"] = _resolve_session_properties(
(defaults or {}).get("session_properties"), kwargs.get("session_properties")
)
for prop in ["session_properties", "physical_properties", "virtual_properties"]:
kwargs[prop] = _resolve_properties((defaults or {}).get(prop), kwargs.get(prop))

dialect = dialect or ""

Expand Down Expand Up @@ -2296,25 +2295,27 @@ def _split_sql_model_statements(
return query, sql_statements[:pos], sql_statements[pos + 1 :], on_virtual_update, inline_audits


def _resolve_session_properties(
def _resolve_properties(
default: t.Optional[t.Dict[str, t.Any]],
provided: t.Optional[exp.Expression | t.Dict[str, t.Any]],
) -> t.Optional[exp.Expression]:
if isinstance(provided, dict):
session_properties = {k: exp.Literal.string(k).eq(v) for k, v in provided.items()}
properties = {k: exp.Literal.string(k).eq(v) for k, v in provided.items()}
elif provided:
if isinstance(provided, exp.Paren):
provided = exp.Tuple(expressions=[provided.this])
session_properties = {expr.this.name: expr for expr in provided}
properties = {expr.this.name: expr for expr in provided}
else:
session_properties = {}
properties = {}

for k, v in (default or {}).items():
if k not in session_properties:
session_properties[k] = exp.Literal.string(k).eq(v)
if k not in properties:
properties[k] = exp.Literal.string(k).eq(v)
elif properties[k].expression.sql().lower() in {"none", "null"}:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is matching null here safe? Can that be an actual property value?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to match for python models properties where None will be null here

del properties[k]

if session_properties:
return exp.Tuple(expressions=list(session_properties.values()))
if properties:
return exp.Tuple(expressions=list(properties.values()))

return None

Expand Down
61 changes: 55 additions & 6 deletions tests/core/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1971,12 +1971,14 @@ def my_model(context, **kwargs):
assert m.depends_on == {'"foo"."table_name"'}


def test_python_model_with_session_properties():
def test_python_model_with_properties():
@model(
name="python_model_prop",
kind="full",
columns={"some_col": "int"},
session_properties={"some_string": "string_prop", "some_bool": True, "some_float": 1.0},
physical_properties={"partition_expiration_days": 7},
virtual_properties={"creatable_type": None},
)
def python_model_prop(context, **kwargs):
context.resolve_table("foo")
Expand All @@ -1989,7 +1991,14 @@ def python_model_prop(context, **kwargs):
"session_properties": {
"some_string": "default_string",
"default_value": "default_value",
}
},
"physical_properties": {
"partition_expiration_days": 13,
"creatable_type": "TRANSIENT",
},
"virtual_properties": {
"creatable_type": "SECURE",
},
},
)
assert m.session_properties == {
Expand All @@ -1999,6 +2008,13 @@ def python_model_prop(context, **kwargs):
"default_value": "default_value",
}

assert m.physical_properties == {
"partition_expiration_days": exp.convert(7),
"creatable_type": exp.convert("TRANSIENT"),
}

assert not m.virtual_properties


def test_python_models_returning_sql(assert_exp_eq) -> None:
config = Config(model_defaults=ModelDefaultsConfig(dialect="snowflake"))
Expand Down Expand Up @@ -3319,7 +3335,12 @@ def test_session_properties_on_model_and_project(sushi_context):
"some_bool": False,
"quoted_identifier": "value_you_wont_see",
"project_level_property": "project_property",
}
},
physical_properties={
"warehouse": "small",
"target_lag": "10 minutes",
},
virtual_properties={"creatable_type": "SECURE"},
)

model = load_sql_based_model(
Expand All @@ -3334,7 +3355,10 @@ def test_session_properties_on_model_and_project(sushi_context):
some_float = 0.1,
quoted_identifier = "quoted identifier",
unquoted_identifier = unquoted_identifier,
)
),
physical_properties (
target_lag = '1 hour'
),
);
SELECT a FROM tbl;
""",
Expand All @@ -3353,21 +3377,38 @@ def test_session_properties_on_model_and_project(sushi_context):
"project_level_property": "project_property",
}

assert model.physical_properties == {
"warehouse": exp.convert("small"),
"target_lag": exp.convert("1 hour"),
}

assert model.virtual_properties == {
"creatable_type": exp.convert("SECURE"),
}

def test_project_level_session_properties(sushi_context):

def test_project_level_properties(sushi_context):
model_defaults = ModelDefaultsConfig(
session_properties={
"some_bool": False,
"some_float": 0.1,
"project_level_property": "project_property",
}
},
physical_properties={
"warehouse": "small",
"target_lag": "1 hour",
},
virtual_properties={"creatable_type": "SECURE"},
)

model = load_sql_based_model(
d.parse(
"""
MODEL (
name test_schema.test_model,
virtual_properties (
creatable_type = None
)
);
SELECT a FROM tbl;
""",
Expand All @@ -3382,6 +3423,14 @@ def test_project_level_session_properties(sushi_context):
"project_level_property": "project_property",
}

assert model.physical_properties == {
"warehouse": exp.convert("small"),
"target_lag": exp.convert("1 hour"),
}

# Validate disabling global property
assert not model.virtual_properties


def test_model_session_properties(sushi_context):
assert sushi_context.models['"memory"."sushi"."items"'].session_properties == {
Expand Down