update DANRA example, README and CHANGELOG

mllam · Nov 20, 2024 · 3189341 · 3189341
1 parent a6da5f8
commit 3189341
Show file tree

Hide file tree

Showing 7 changed files with 83 additions and 7 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,7 +5,22 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
-## v0.4.0
+## Unreleased
+
+[All changes](https://github.com/mllam/mllam-data-prep/compare/v0.3.0...v0.4.0)
+
+### Added
+
+- Add optional section called `extra` to config file to allow for user-defined extra information that is ignored by `mllam-data-prep` but can be used by downstream applications. ![\#18](https://github.com/mllam/mllam-data-prep/pull/18), @leifdenby
+
+### Changed
+
+- Schema version bumped to `v0.5.0` to match next expected release that will support optional `extra` section in config [\#18](https://github.com/mllam/mllam-data-prep/pull/18)
+
+
+## [v0.4.0](https://github.com/mllam/mllam-data-prep/releases/tag/v0.4.0)
+
+[All changes](https://github.com/mllam/mllam-data-prep/compare/v0.3.0...v0.4.0)
 
 This release adds support for defining the output path in the command line
 interface and addresses bugs around optional dependencies for
@@ -25,7 +40,7 @@ interface and addresses bugs around optional dependencies for
 
 ## [v0.3.0](https://github.com/mllam/mllam-data-prep/releases/tag/v0.3.0)
 
-[All changes](https://github.com/mllam/mllam-data-prep/compare/v0.3.0...v0.2.0)
+[All changes](https://github.com/mllam/mllam-data-prep/compare/v0.2.0...v0.3.0)
 
 ### Added
 
@@ -38,7 +53,7 @@ interface and addresses bugs around optional dependencies for
 
 ## [v0.2.0](https://github.com/mllam/mllam-data-prep/releases/tags/v0.2.0)
 
-[All changes](https://github.com/mllam/mllam-data-prep/compare/v0.2.0...v0.1.0)
+[All changes](https://github.com/mllam/mllam-data-prep/compare/v0.1.0...v0.2.0)
 
 ### Added
 

diff --git a/README.md b/README.md
@@ -112,7 +112,7 @@ ds = mdp.create_dataset(config=config)
 A full example configuration file is given in [example.danra.yaml](example.danra.yaml), and reproduced here for completeness:
 
 ```yaml
-schema_version: v0.2.0+dev
+schema_version: v0.5.0
 dataset_version: v0.1.0
 
 output:
@@ -317,3 +317,10 @@ The `inputs` section defines the source datasets to extract data from. Each sour
   - `rename`: simply rename the dimension to the new name
   - `stack`: stack the listed dimension to create the dimension in the output
   - `stack_variables_by_var_name`: stack the dimension into the new dimension, and also stack the variable name into the new variable name. This is useful when you have multiple variables with the same dimensions that you want to stack into a single variable.
+
+
+### Config schema versioning
+
+The schema version of the configuration file is defined by the `schema_version` attribute at the top of the configuration file. This is used to keep track of changes to the configuration file format. The schema version is used to check that the configuration file is compatible with the version of `mllam-data-prep` that you are using. If the schema version of the configuration file is not compatible with the version of `mllam-data-prep` that you are using you will get an error message telling you that the schema version is not compatible.
+
+The schema version is updated whenever the configuration format changes, with the new schema version matching the minimum version of `mllam-data-prep` that is required to use the new configuration format. As `mllam-data-prep` is still in rapid development (and hasn't reached version `v1.0.0` yet) we unfortunately make no gaurantee about backward compatibility. However, the [CHANGELOG.md](CHANGELOG.md) will always contain migration instructions when the config format changes.
diff --git a/example.danra.yaml b/example.danra.yaml
@@ -94,3 +94,6 @@ extra:
       central_longitude: 25.0
       central_latitude: 56.7
       standard_parallels: [56.7, 56.7]
+      globe:
+        semimajor_axis: 6367470.0
+        semiminor_axis: 6367470.0
diff --git a/mllam_data_prep/create_dataset.py b/mllam_data_prep/create_dataset.py
@@ -241,7 +241,7 @@ def create_dataset_zarr(fp_config, fp_zarr: str = None):
     """
     config = Config.from_yaml_file(file=fp_config)
 
-    expected_schema_version = "v0.2.0+dev"
+    expected_schema_version = "v0.5.0"
     assert (
         config.schema_version == expected_schema_version
     ), f"Expected schema version {expected_schema_version}, got {config.schema_version}"

diff --git a/tests/data.py b/tests/data.py
@@ -5,7 +5,7 @@
 import pandas as pd
 import xarray as xr
 
-SCHEMA_VERSION = "v0.2.0+dev"
+SCHEMA_VERSION = "v0.5.0"
 
 NX, NY = 10, 8
 NT_ANALYSIS, NT_FORECAST = 5, 12

diff --git a/tests/test_config.py b/tests/test_config.py
@@ -4,7 +4,7 @@
 import mllam_data_prep as mdp
 
 INVALID_EXTRA_FIELDS_CONFIG_YAML = """
-schema_version: v0.1.0
+schema_version: {schema_version}
 dataset_version: v0.1.0
 
 output:

diff --git a/tests/test_from_config.py b/tests/test_from_config.py
@@ -280,3 +280,54 @@ def test_danra_example():
     fp_config = Path(__file__).parent.parent / "example.danra.yaml"
     with tempfile.TemporaryDirectory(suffix=".zarr") as tmpdir:
         mdp.create_dataset_zarr(fp_config=fp_config, fp_zarr=tmpdir)
+
+
+@pytest.mark.parametrize("extra_content", [None, {"foobar": {"baz": 42}}])
+def test_optional_extra_section(extra_content):
+    """
+    Test to ensure that the optional `extra` section of the config can contain
+    arbitrary information and is not required for the config to be valid
+    """
+    tmpdir = tempfile.TemporaryDirectory()
+    datasets = testdata.create_data_collection(
+        data_kinds=["static"], fp_root=tmpdir.name
+    )
+
+    config_dict = dict(
+        schema_version=testdata.SCHEMA_VERSION,
+        dataset_version="v0.1.0",
+        output=dict(
+            variables=dict(
+                static=["grid_index", "static_feature"],
+            ),
+        ),
+        inputs=dict(
+            danra_static=dict(
+                path=datasets["static"],
+                dims=["x", "y"],
+                variables=testdata.DEFAULT_STATIC_VARS,
+                dim_mapping=dict(
+                    grid_index=dict(
+                        method="stack",
+                        dims=["x", "y"],
+                    ),
+                    static_feature=dict(
+                        method="stack_variables_by_var_name",
+                        name_format="{var_name}",
+                    ),
+                ),
+                target_output_variable="static",
+            ),
+        ),
+    )
+
+    if extra_content is not None:
+        config_dict["extra"] = extra_content
+
+    # write yaml config to file
+    fn_config = "config.yaml"
+    fp_config = Path(tmpdir.name) / fn_config
+    with open(fp_config, "w") as f:
+        yaml.dump(config_dict, f)
+
+    mdp.create_dataset_zarr(fp_config=fp_config)