ai2cm · AnnaKwa · Apr 3, 2020 · Mar 26, 2020 · Mar 27, 2020 · Mar 27, 2020
diff --git a/external/fv3config b/external/fv3config
diff --git a/external/vcm/tests/test_calc.py b/external/vcm/tests/test_calc.py
@@ -10,7 +10,7 @@
     dz_and_top_to_phis,
     _add_coords_to_interface_variable,
 )
-from vcm.calc.calc import local_time
+from vcm.calc.calc import local_time, apparent_source
 from vcm.cubedsphere.constants import COORD_Z_CENTER, COORD_Z_OUTER
 
 
@@ -85,3 +85,37 @@ def test_solar_time():
     lon = xr.DataArray([0, 180, 270, 360, 0, 270], dims=["x"], coords={"x": range(6)})
     ds_solar_test = xr.Dataset({"initialization_time": t, "lon": lon})
     assert np.allclose(local_time(ds_solar_test), [0, 12, 18, 0, 6, 0])
+
+
+def test_apparent_source():
+    coords = {
+        "initial_time": [
+            cftime.DatetimeJulian(2016, 8, 1, 0, 15, 0),
+            cftime.DatetimeJulian(2016, 8, 1, 0, 30, 0),
+        ],
+        "forecast_time": np.array([0.0, 60.0, 120.0, 180.0, 240.0]).astype(
+            np.dtype("<m8[s]")
+        ),
+    }
+    T = xr.DataArray(
+        [[1, 2, 4, 7, 11.0], [3, 5, 5, 5, 5.0]],
+        dims=["initial_time", "forecast_time"],
+        coords=coords,
+    )
+    # check Q calculated for different forecast time steps
+    Q1_forecast0 = apparent_source(
+        T,
+        forecast_time_index_onestep=0,
+        forecast_time_index_highres=0,
+        t_dim="initial_time",
+        s_dim="forecast_time",
+    )
+    assert Q1_forecast0 == pytest.approx((2.0 / (15 * 60)) - (1.0 / 60))
+    Q1_forecast3 = apparent_source(
+        T,
+        forecast_time_index_onestep=3,
+        forecast_time_index_highres=0,
+        t_dim="initial_time",
+        s_dim="forecast_time",
+    )
+    assert Q1_forecast3 == pytest.approx((2.0 / (15 * 60)) - (4.0 / 60))
diff --git a/external/vcm/vcm/calc/calc.py b/external/vcm/vcm/calc/calc.py
@@ -1,11 +1,6 @@
 import numpy as np
 import xarray as xr
-from vcm.cubedsphere.constants import (
-    INIT_TIME_DIM,
-    FORECAST_TIME_DIM,
-    COORD_Z_CENTER,
-    VAR_LON_CENTER,
-)
+from vcm.cubedsphere.constants import INIT_TIME_DIM, COORD_Z_CENTER, VAR_LON_CENTER
 
 gravity = 9.81
 specific_heat = 1004
@@ -27,14 +22,24 @@ def timedelta_to_seconds(dt):
 
 
 def apparent_source(
-    q: xr.DataArray, t_dim: str = INIT_TIME_DIM, s_dim: str = FORECAST_TIME_DIM
+    q: xr.DataArray,
+    forecast_time_index_onestep,
+    forecast_time_index_highres,
+    t_dim: str,
+    s_dim: str,
 ) -> xr.DataArray:
     """Compute the apparent source from stepped output
 
     Args:
         q: The variable to compute the source of
+        forecast_time_index_onestep: forecast time step to use for
+            calculating one step run tendency
+        forecast_time_index_highres: forecast time step to use for
+            calculating high res run tendency
         t_dim, optional: the dimension corresponding to the initial condition
         s_dim, optional: the dimension corresponding to the forecast time
+        step_dim: dimension corresponding to the step time dimension
+            (begin, before physics, after physics)
 
     Returns:
         The apparent source of q. Has units [q]/s
@@ -53,10 +58,12 @@ def apparent_source(
     tend_c48 = dq_c48 / ds
 
     # restore coords
-    tend = tend.isel({s_dim: 0}).assign_coords(**{t_dim: t[:-1]})
-    tend_c48 = tend_c48.isel({s_dim: 0, t_dim: slice(0, -1)}).assign_coords(
+    tend = tend.isel({s_dim: forecast_time_index_highres}).assign_coords(
         **{t_dim: t[:-1]}
     )
+    tend_c48 = tend_c48.isel(
+        {s_dim: forecast_time_index_onestep, t_dim: slice(0, -1)}
+    ).assign_coords(**{t_dim: t[:-1]})
 
     return tend - tend_c48
 

diff --git a/fv3net/pipelines/create_training_data/__init__.py b/fv3net/pipelines/create_training_data/__init__.py
@@ -1,41 +0,0 @@
-# residuals that the ML is training on
-# high resolution tendency - coarse res model's one step tendency
-VAR_Q_HEATING_ML = "dQ1"
-VAR_Q_MOISTENING_ML = "dQ2"
-VAR_Q_U_WIND_ML = "dQU"
-VAR_Q_V_WIND_ML = "dQV"
-
-# suffixes denote whether diagnostic variable is from the coarsened
-# high resolution prognostic run or the coarse res one step train data run
-SUFFIX_HIRES_DIAG = "prog"
-SUFFIX_COARSE_TRAIN_DIAG = "train"
-
-DIAG_VARS = [
-    "LHTFLsfc",
-    "SHTFLsfc",
-    "PRATEsfc",
-    "DSWRFtoa",
-    "DSWRFsfc",
-    "USWRFtoa",
-    "USWRFsfc",
-    "DLWRFsfc",
-    "ULWRFtoa",
-    "ULWRFsfc",
-]
-RENAMED_PROG_DIAG_VARS = {f"{var}_coarse": f"{var}_prog" for var in DIAG_VARS}
-RENAMED_TRAIN_DIAG_VARS = {var: f"{var}_train" for var in DIAG_VARS}
-
-
-RESTART_VARS = [
-    "sphum",
-    "T",
-    "delp",
-    "u",
-    "v",
-    "slmsk",
-    "phis",
-    "tsea",
-    "slope",
-    "DZ",
-    "W",
-]

diff --git a/fv3net/pipelines/create_training_data/__main__.py b/fv3net/pipelines/create_training_data/__main__.py
@@ -1,5 +1,7 @@
 import argparse
-from fv3net.pipelines.create_training_data.pipeline import run
+import yaml
+
+from .pipeline import run
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
@@ -21,6 +23,12 @@
         help="Write path for train data in Google Cloud Storage bucket. "
         "Don't include bucket in path.",
     )
+    parser.add_argument(
+        "variable_namefile",
+        type=str,
+        default=None,
+        help="yaml file for providing data variable names",
+    )
     parser.add_argument(
         "--timesteps-per-output-file",
         type=int,
@@ -38,7 +46,11 @@
         "Output zarr files will be saved in either 'train' or 'test' subdir of "
         "gcs-output-data-dir",
     )
+
     args, pipeline_args = parser.parse_known_args()
-    print(args)
-    """Main function"""
-    run(args=args, pipeline_args=pipeline_args)
+    with open(args.variable_namefile, "r") as stream:
+        try:
+            names = yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            raise ValueError(f"Bad yaml config: {exc}")
+    run(args=args, pipeline_args=pipeline_args, names=names)
diff --git a/fv3net/pipelines/create_training_data/helpers.py b/fv3net/pipelines/create_training_data/helpers.py
@@ -19,6 +19,11 @@
 logger.setLevel(logging.INFO)
 
 
+def convert_forecast_time_to_timedelta(ds, forecast_time_dim):
+    timedelta_coords = ds[forecast_time_dim].astype("timedelta64[ns]")
+    return ds.assign_coords({forecast_time_dim: timedelta_coords})
+
+
 def _path_from_first_timestep(ds, train_test_labels=None):
     """ Uses first init time as zarr filename, and appends a 'train'/'test' subdir
     if a dict of labels is provided