Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Meta.yaml schema (implemented with traitlets!) #137

Merged
merged 11 commits into from
Nov 17, 2023
34 changes: 23 additions & 11 deletions pangeo_forge_runner/feedstock.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from ruamel.yaml import YAML

from .meta_yaml import MetaYaml
from .recipe_rewriter import RecipeRewriter

yaml = YAML()
Expand All @@ -30,7 +31,7 @@
"""
self.feedstock_dir = feedstock_dir
with open(self.feedstock_dir / "meta.yaml") as f:
self.meta = yaml.load(f)
self.meta = MetaYaml(**yaml.load(f))

self.prune = prune
self.callable_args_injections = (
Expand Down Expand Up @@ -75,30 +76,41 @@
*Executes arbitrary code* defined in the feedstock recipes.
"""
recipes = {}
recipes_config = self.meta.get("recipes")
if isinstance(recipes_config, list):
for r in recipes_config:
if isinstance(self.meta.recipes, list):
for r in self.meta.recipes:
recipes[r["id"]] = self._import(r["object"])
elif isinstance(recipes_config, dict):
recipes = self._import(recipes_config["dict_object"])
elif isinstance(self.meta.recipes, dict):
recipes = self._import(self.meta.recipes["dict_object"])

Check warning on line 83 in pangeo_forge_runner/feedstock.py

View check run for this annotation

Codecov / codecov/patch

pangeo_forge_runner/feedstock.py#L82-L83

Added lines #L82 - L83 were not covered by tests
else:
raise ValueError("Could not parse recipes config in meta.yaml")

return recipes

def get_expanded_meta(self):
def get_expanded_meta(self, drop_none=True) -> dict:
"""
Return full meta.yaml file, expanding recipes if needed.

recipes are guaranteed to be a list of dictionaries with 'id' keys.
'object' keys *may* be present, but not guaranteed.
"""
meta_copy = deepcopy(self.meta)
if "recipes" in self.meta and "dict_object" in self.meta["recipes"]:
if self.meta.recipes and "dict_object" in self.meta.recipes:

Check warning on line 97 in pangeo_forge_runner/feedstock.py

View check run for this annotation

Codecov / codecov/patch

pangeo_forge_runner/feedstock.py#L97

Added line #L97 was not covered by tests
# We have a dict_object, so let's parse the recipes, and provide
# keep just the ids, discarding the values - as the values do not
# actually serialize.
recipes = self.parse_recipes()
meta_copy["recipes"] = [{"id": k} for k, v in recipes.items()]

return meta_copy
meta_copy.recipes = [

Check warning on line 102 in pangeo_forge_runner/feedstock.py

View check run for this annotation

Codecov / codecov/patch

pangeo_forge_runner/feedstock.py#L102

Added line #L102 was not covered by tests
# In place of the values, we add a placeholder string, so that the
# re-assignment to the MetaYaml schema here will pass validation
# of the `recipes` field, which requires "id" and "object" fields.
{"id": k, "object": "DICT_VALUE_PLACEHOLDER"}
for k, _ in recipes.items()
]

return (

Check warning on line 110 in pangeo_forge_runner/feedstock.py

View check run for this annotation

Codecov / codecov/patch

pangeo_forge_runner/feedstock.py#L110

Added line #L110 was not covered by tests
# the traitlets MetaYaml schema will give us empty containers
# by default, but in most cases lets assume we don't want that
{k: v for k, v in meta_copy.trait_values().items() if v}
if drop_none
else meta_copy.trait_values()
)
110 changes: 110 additions & 0 deletions pangeo_forge_runner/meta_yaml.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import jsonschema
from traitlets import Dict, HasTraits, List, TraitError, Unicode, Union, validate

recipes_field_per_element_schema = {
"type": "object",
"properties": {
"id": {"type": "string"},
"object": {"type": "string"},
},
"required": ["id", "object"],
}


class MetaYaml(HasTraits):
"""Schema for the ``meta.yaml`` file which must be included in each feedstock directory.
Only the ``recipes`` field is strictly required for ``pangeo-forge-runner`` to function.
All other fields are recommended but not required.
"""

def __init__(self, recipes=None, **kwargs):
"""The only required field is ``recipes``, so we put it explicitly in the init
signature to ensure it is not omitted, as demonstrated in:
https://github.com/ipython/traitlets/issues/490#issuecomment-479716288
"""
super().__init__(**kwargs)
self.recipes = recipes

@validate("recipes")
def _validate_recipes(self, proposal):
"""Ensure the ``recipes`` trait is not passed as an empty container and that
each element of the field contains all expected subfields.
"""
if not proposal["value"]:
raise TraitError(

Check warning on line 34 in pangeo_forge_runner/meta_yaml.py

View check run for this annotation

Codecov / codecov/patch

pangeo_forge_runner/meta_yaml.py#L34

Added line #L34 was not covered by tests
f"The ``recipes`` trait, passed as {proposal['value']}, cannot be empty."
)

if isinstance(proposal["value"], list):
for recipe_spec in proposal["value"]:
try:
jsonschema.validate(recipe_spec, recipes_field_per_element_schema)
except jsonschema.ValidationError as e:
raise TraitError(e)

Check warning on line 43 in pangeo_forge_runner/meta_yaml.py

View check run for this annotation

Codecov / codecov/patch

pangeo_forge_runner/meta_yaml.py#L42-L43

Added lines #L42 - L43 were not covered by tests
return proposal["value"]

title = Unicode(
allow_none=True,
help="""
Title for this dataset.
""",
)
description = Unicode(
allow_none=True,
help="""
Description of the dataset.
""",
)
recipes = Union(
[List(Dict()), Dict()],
help="""
Specifies the deployable Python objects to run in the recipe module.
If the recipes are assigned to their own Python variable names,
should be of the form:

```yaml
recipes:
- id: "unique-identifier-for-recipe"
object: "recipe:transforms"
```

Alternatively, if the recipes are values in a Python dict:

```yaml
recipes:
dict_object: "recipe:name_of_recipes_dict"
```
""",
)
provenance = Dict(
allow_none=True,
help="""
Dataset provenance information including provider, license, etc.
""",
per_key_traits={
"providers": List(
Dict(
per_key_traits={
"name": Unicode(),
"description": Unicode(),
"roles": List(), # TODO: enum
"url": Unicode(),
},
),
),
"license": Unicode(), # TODO: guidance on suggested formatting (enum?)
},
)
maintainers = List(
Dict(
per_key_traits={
"name": Unicode(help="Full name of the maintainer."),
"orcid": Unicode(help="Maintainer's ORCID ID"), # TODO: regex
"github": Unicode(help="Maintainer's GitHub username."),
},
),
allow_none=True,
help="""
Maintainers of this Pangeo Forge feedstock.
""",
)
3 changes: 0 additions & 3 deletions tests/unit/test_expand_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
"meta": {
"title": "Global Precipitation Climatology Project",
"description": "Global Precipitation Climatology Project (GPCP) Daily Version 1.3 gridded, merged ty satellite/gauge precipitation Climate data Record (CDR) from 1996 to present.\n",
"pangeo_forge_version": "0.9.0",
"pangeo_notebook_version": "2022.06.02",
"recipes": [{"id": "gpcp-from-gcs", "object": "recipe:recipe"}],
"provenance": {
"providers": [
Expand All @@ -37,7 +35,6 @@
"github": "cisaacstern",
}
],
"bakery": {"id": "pangeo-ldeo-nsf-earthcube"},
},
}
]
Expand Down
81 changes: 81 additions & 0 deletions tests/unit/test_feedstock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from textwrap import dedent

import pytest
from ruamel.yaml import YAML

from pangeo_forge_runner.feedstock import Feedstock
from pangeo_forge_runner.meta_yaml import MetaYaml

yaml = YAML()


@pytest.fixture(params=["recipe_object", "dict_object"])
def tmp_feedstock(request, tmp_path_factory: pytest.TempPathFactory):
tmpdir = tmp_path_factory.mktemp("feedstock")
if request.param == "recipe_object":
meta_yaml = dedent(
"""\
recipes:
- id: aws-noaa-sea-surface-temp-whoi
object: 'recipe:recipe'
"""
)
recipe_py = dedent(
"""\
class Recipe:
pass

recipe = Recipe()
"""
)
elif request.param == "dict_object":
meta_yaml = dedent(
"""\
recipes:
dict_object: 'recipe:recipes'
"""
)
recipe_py = dedent(
"""\
class Recipe:
pass

recipes = {"my_recipe": Recipe()}
"""
)

with open(tmpdir / "meta.yaml", mode="w") as f:
f.write(meta_yaml)
with open(tmpdir / "recipe.py", mode="w") as f:
f.write(recipe_py)

yield tmpdir, meta_yaml, request.param


def test_feedstock(tmp_feedstock):
tmpdir, meta_yaml, recipes_section_type = tmp_feedstock
f = Feedstock(feedstock_dir=tmpdir)
# equality of HasTraits instances doesn't work as I might expect,
# so just check equality of the relevant trait (`.recipes`)
assert f.meta.recipes == MetaYaml(**yaml.load(meta_yaml)).recipes

expanded_meta = f.get_expanded_meta()
recipes = f.parse_recipes()

for recipe_metadata in expanded_meta["recipes"]:
# the recipe_object metadata looks something like this:
# {'recipes': [{'id': 'my_recipe', 'object': 'DICT_VALUE_PLACEHOLDER'}]}
# and the dict_object metadata looks like this:
# {'recipes': [{'id': 'aws-noaa-sea-surface-temp-whoi', 'object': 'recipe:recipe'}]}
# both have an "id" field:
assert "id" in recipe_metadata
# but only the "recipe_object" has an "object" field:
if recipes_section_type == "recipe_object":
assert "object" in recipe_metadata
elif recipes_section_type == "dict_object":
assert recipe_metadata["object"] == "DICT_VALUE_PLACEHOLDER"

for r in recipes.values():
# the values of the recipes dict should all be python objects
# we used the mock type `Recipe` here, so this should be true:
assert str(r).startswith("<Recipe object")
Loading
Loading