ai2cm · AnnaKwa · Apr 3, 2020 · Mar 26, 2020 · Mar 27, 2020 · Mar 27, 2020
diff --git a/external/fv3config b/external/fv3config
diff --git a/external/vcm/tests/test_calc.py b/external/vcm/tests/test_calc.py
@@ -10,7 +10,7 @@
     dz_and_top_to_phis,
     _add_coords_to_interface_variable,
 )
-from vcm.calc.calc import local_time
+from vcm.calc.calc import local_time, apparent_source
 from vcm.cubedsphere.constants import COORD_Z_CENTER, COORD_Z_OUTER
 
 
@@ -85,3 +85,37 @@ def test_solar_time():
     lon = xr.DataArray([0, 180, 270, 360, 0, 270], dims=["x"], coords={"x": range(6)})
     ds_solar_test = xr.Dataset({"initialization_time": t, "lon": lon})
     assert np.allclose(local_time(ds_solar_test), [0, 12, 18, 0, 6, 0])
+
+
+def test_apparent_source():
+    coords = {
+        "initial_time": [
+            cftime.DatetimeJulian(2016, 8, 1, 0, 15, 0),
+            cftime.DatetimeJulian(2016, 8, 1, 0, 30, 0),
+        ],
+        "forecast_time": np.array([0.0, 60.0, 120.0, 180.0, 240.0]).astype(
+            np.dtype("<m8[s]")
+        ),
+    }
+    T = xr.DataArray(
+        [[1, 2, 4, 7, 11.0], [3, 5, 5, 5, 5.0]],
+        dims=["initial_time", "forecast_time"],
+        coords=coords,
+    )
+    # check Q calculated for different forecast time steps
+    Q1_forecast0 = apparent_source(
+        T,
+        forecast_time_index_onestep=0,
+        forecast_time_index_highres=0,
+        t_dim="initial_time",
+        s_dim="forecast_time",
+    )
+    assert Q1_forecast0 == pytest.approx((2.0 / (15 * 60)) - (1.0 / 60))
+    Q1_forecast3 = apparent_source(
+        T,
+        forecast_time_index_onestep=3,
+        forecast_time_index_highres=0,
+        t_dim="initial_time",
+        s_dim="forecast_time",
+    )
+    assert Q1_forecast3 == pytest.approx((2.0 / (15 * 60)) - (4.0 / 60))
diff --git a/external/vcm/vcm/calc/calc.py b/external/vcm/vcm/calc/calc.py
@@ -1,11 +1,6 @@
 import numpy as np
 import xarray as xr
-from vcm.cubedsphere.constants import (
-    INIT_TIME_DIM,
-    FORECAST_TIME_DIM,
-    COORD_Z_CENTER,
-    VAR_LON_CENTER,
-)
+from vcm.cubedsphere.constants import INIT_TIME_DIM, COORD_Z_CENTER, VAR_LON_CENTER
 
 gravity = 9.81
 specific_heat = 1004
@@ -27,14 +22,24 @@ def timedelta_to_seconds(dt):
 
 
 def apparent_source(
-    q: xr.DataArray, t_dim: str = INIT_TIME_DIM, s_dim: str = FORECAST_TIME_DIM
+    q: xr.DataArray,
+    forecast_time_index_onestep,
+    forecast_time_index_highres,
+    t_dim: str,
+    s_dim: str,
 ) -> xr.DataArray:
     """Compute the apparent source from stepped output
 
     Args:
         q: The variable to compute the source of
+        forecast_time_index_onestep: forecast time step to use for
+            calculating one step run tendency
+        forecast_time_index_highres: forecast time step to use for
+            calculating high res run tendency
         t_dim, optional: the dimension corresponding to the initial condition
         s_dim, optional: the dimension corresponding to the forecast time
+        step_dim: dimension corresponding to the step time dimension
+            (begin, before physics, after physics)
 
     Returns:
         The apparent source of q. Has units [q]/s
@@ -53,10 +58,12 @@ def apparent_source(
     tend_c48 = dq_c48 / ds
 
     # restore coords
-    tend = tend.isel({s_dim: 0}).assign_coords(**{t_dim: t[:-1]})
-    tend_c48 = tend_c48.isel({s_dim: 0, t_dim: slice(0, -1)}).assign_coords(
+    tend = tend.isel({s_dim: forecast_time_index_highres}).assign_coords(
         **{t_dim: t[:-1]}
     )
+    tend_c48 = tend_c48.isel(
+        {s_dim: forecast_time_index_onestep, t_dim: slice(0, -1)}
+    ).assign_coords(**{t_dim: t[:-1]})
 
     return tend - tend_c48
 

diff --git a/fv3net/pipelines/create_training_data/__init__.py b/fv3net/pipelines/create_training_data/__init__.py
@@ -1,41 +0,0 @@
-# residuals that the ML is training on
-# high resolution tendency - coarse res model's one step tendency
-VAR_Q_HEATING_ML = "dQ1"
-VAR_Q_MOISTENING_ML = "dQ2"
-VAR_Q_U_WIND_ML = "dQU"
-VAR_Q_V_WIND_ML = "dQV"
-
-# suffixes denote whether diagnostic variable is from the coarsened
-# high resolution prognostic run or the coarse res one step train data run
-SUFFIX_HIRES_DIAG = "prog"
-SUFFIX_COARSE_TRAIN_DIAG = "train"
-
-DIAG_VARS = [
-    "LHTFLsfc",
-    "SHTFLsfc",
-    "PRATEsfc",
-    "DSWRFtoa",
-    "DSWRFsfc",
-    "USWRFtoa",
-    "USWRFsfc",
-    "DLWRFsfc",
-    "ULWRFtoa",
-    "ULWRFsfc",
-]
-RENAMED_PROG_DIAG_VARS = {f"{var}_coarse": f"{var}_prog" for var in DIAG_VARS}
-RENAMED_TRAIN_DIAG_VARS = {var: f"{var}_train" for var in DIAG_VARS}
-
-
-RESTART_VARS = [
-    "sphum",
-    "T",
-    "delp",
-    "u",
-    "v",
-    "slmsk",
-    "phis",
-    "tsea",
-    "slope",
-    "DZ",
-    "W",
-]

diff --git a/fv3net/pipelines/create_training_data/__main__.py b/fv3net/pipelines/create_training_data/__main__.py
@@ -38,7 +38,11 @@
         "Output zarr files will be saved in either 'train' or 'test' subdir of "
         "gcs-output-data-dir",
     )
+    parser.add_argument(
+        "--var-names-yaml",
+        type=str,
+        default=None,
+        help="optional yaml for providing data variable names",
+    )
     args, pipeline_args = parser.parse_known_args()
-    print(args)
-    """Main function"""
     run(args=args, pipeline_args=pipeline_args)
diff --git a/fv3net/pipelines/create_training_data/names.py b/fv3net/pipelines/create_training_data/names.py
@@ -0,0 +1,69 @@
+# suffixes that denote whether diagnostic variable is from the coarsened
+# high resolution prognostic run or the coarse res one step train data run
+suffix_hires = "prog"
+suffix_coarse_train = "train"
+
+# variable names for one step run output and coarsened high res output
+init_time_dim = "initial_time"
+forecast_time_dim = "forecast_time"
+step_time_dim = "step"
+coord_begin_step = "begin"
+var_lon_center, var_lat_center, var_lon_outer, var_lat_outer = (
+    "lon",
+    "lat",
+    "lonb",
+    "latb",
+)
+coord_x_center, coord_y_center, coord_z_center = ("x", "y", "z")
+var_x_wind, var_y_wind = ("x_wind", "y_wind")
+var_temp, var_sphum = ("air_temperature", "specific_humidity")
+radiation_vars = [
+    "DSWRFtoa",
+    "DSWRFsfc",
+    "USWRFtoa",
+    "USWRFsfc",
+    "DLWRFsfc",
+    "ULWRFtoa",
+    "ULWRFsfc",
+]
+
+one_step_vars = radiation_vars + [
+    "total_precipitation",
+    "surface_temperature",
+    "land_sea_mask",
+    "latent_heat_flux",
+    "sensible_heat_flux",
+    "mean_cos_zenith_angle",
+    "surface_geopotential",
+    "vertical_thickness_of_atmospheric_layer",
+    "vertical_wind",
+    "pressure_thickness_of_atmospheric_layer",
+    var_temp,
+    var_sphum,
+    var_x_wind,
+    var_y_wind,
+]
+
+# names for residuals that the ML is training on
+# high resolution tendency - coarse res model's one step tendency
+var_source_name_map = {
+    var_x_wind: "dQU",
+    var_y_wind: "dQV",
+    var_temp: "dQ1",
+    var_sphum: "dQ2",
+}
+target_vars = list(var_source_name_map.values())
+
+# mappings for renaming of variables in training data output
+renamed_high_res_vars = {
+    **{f"{var}_coarse": f"{var}_{suffix_hires}" for var in radiation_vars},
+    "lhtflsfc_coarse": f"latent_heat_flux_{suffix_hires}",
+    "shtflsfc_coarse": f"sensible_heat_flux_{suffix_hires}",
+}
+renamed_one_step_vars = {var: f"{var}_{suffix_coarse_train}" for var in radiation_vars}
+renamed_dims = {
+    "grid_xt": "x",
+    "grid_yt": "y",
+    "grid_x": "x_interface",
+    "grid_y": "y_interface",
+}