Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/use onestep zarr train data #207

Merged
merged 29 commits into from
Apr 3, 2020
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion external/fv3config
21 changes: 20 additions & 1 deletion external/vcm/tests/test_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
dz_and_top_to_phis,
_add_coords_to_interface_variable,
)
from vcm.calc.calc import local_time
from vcm.calc.calc import local_time, apparent_source
from vcm.cubedsphere.constants import COORD_Z_CENTER, COORD_Z_OUTER


Expand Down Expand Up @@ -85,3 +85,22 @@ def test_solar_time():
lon = xr.DataArray([0, 180, 270, 360, 0, 270], dims=["x"], coords={"x": range(6)})
ds_solar_test = xr.Dataset({"initialization_time": t, "lon": lon})
assert np.allclose(local_time(ds_solar_test), [0, 12, 18, 0, 6, 0])


def test_apparent_source():
coords = {
"initial_time": [
cftime.DatetimeJulian(2016, 8, 1, 0, 15, 0),
cftime.DatetimeJulian(2016, 8, 1, 0, 30, 0),
],
"forecast_time": np.array([0., 60., 120., 180., 240.]).astype(np.dtype("<m8[s]")),
}
T = xr.DataArray(
[[1, 2, 4, 7, 11.], [3, 5, 5, 5, 5.]],
dims=["initial_time", "forecast_time"],
coords=coords,
)
Q1_forecast0 = apparent_source(T, forecast_time_index=0, t_dim="initial_time", s_dim="forecast_time")
assert Q1_forecast0 == pytest.approx((2.0 / (15 * 60)) - (1.0 / 60))
Q1_forecast3 = apparent_source(T, forecast_time_index=3, t_dim="initial_time", s_dim="forecast_time")
assert Q1_forecast3 == pytest.approx((2.0 / (15 * 60)) - (4.0 / 60))
25 changes: 16 additions & 9 deletions external/vcm/vcm/calc/calc.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
import numpy as np
import xarray as xr
from vcm.cubedsphere.constants import (
INIT_TIME_DIM,
FORECAST_TIME_DIM,
COORD_Z_CENTER,
VAR_LON_CENTER,
)
from vcm.cubedsphere.constants import INIT_TIME_DIM, COORD_Z_CENTER, VAR_LON_CENTER

gravity = 9.81
specific_heat = 1004
Expand All @@ -27,14 +22,24 @@ def timedelta_to_seconds(dt):


def apparent_source(
q: xr.DataArray, t_dim: str = INIT_TIME_DIM, s_dim: str = FORECAST_TIME_DIM
q: xr.DataArray,
forecast_time_index_onestep,
forecast_time_index_highres,
t_dim: str,
s_dim: str,
) -> xr.DataArray:
"""Compute the apparent source from stepped output

Args:
q: The variable to compute the source of
forecast_time_index_onestep: forecast time step to use for
AnnaKwa marked this conversation as resolved.
Show resolved Hide resolved
calculating one step run tendency
forecast_time_index_highres: forecast time step to use for
calculating high res run tendency
t_dim, optional: the dimension corresponding to the initial condition
s_dim, optional: the dimension corresponding to the forecast time
step_dim: dimension corresponding to the step time dimension
(begin, before physics, after physics)

Returns:
The apparent source of q. Has units [q]/s
Expand All @@ -53,10 +58,12 @@ def apparent_source(
tend_c48 = dq_c48 / ds

# restore coords
tend = tend.isel({s_dim: 0}).assign_coords(**{t_dim: t[:-1]})
tend_c48 = tend_c48.isel({s_dim: 0, t_dim: slice(0, -1)}).assign_coords(
tend = tend.isel({s_dim: forecast_time_index_highres}).assign_coords(
**{t_dim: t[:-1]}
)
tend_c48 = tend_c48.isel(
{s_dim: forecast_time_index_onestep, t_dim: slice(0, -1)}
).assign_coords(**{t_dim: t[:-1]})

return tend - tend_c48

Expand Down
41 changes: 0 additions & 41 deletions fv3net/pipelines/create_training_data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,41 +0,0 @@
# residuals that the ML is training on
# high resolution tendency - coarse res model's one step tendency
VAR_Q_HEATING_ML = "dQ1"
VAR_Q_MOISTENING_ML = "dQ2"
VAR_Q_U_WIND_ML = "dQU"
VAR_Q_V_WIND_ML = "dQV"

# suffixes denote whether diagnostic variable is from the coarsened
# high resolution prognostic run or the coarse res one step train data run
SUFFIX_HIRES_DIAG = "prog"
SUFFIX_COARSE_TRAIN_DIAG = "train"

DIAG_VARS = [
"LHTFLsfc",
"SHTFLsfc",
"PRATEsfc",
"DSWRFtoa",
"DSWRFsfc",
"USWRFtoa",
"USWRFsfc",
"DLWRFsfc",
"ULWRFtoa",
"ULWRFsfc",
]
RENAMED_PROG_DIAG_VARS = {f"{var}_coarse": f"{var}_prog" for var in DIAG_VARS}
RENAMED_TRAIN_DIAG_VARS = {var: f"{var}_train" for var in DIAG_VARS}


RESTART_VARS = [
"sphum",
"T",
"delp",
"u",
"v",
"slmsk",
"phis",
"tsea",
"slope",
"DZ",
"W",
]
8 changes: 6 additions & 2 deletions fv3net/pipelines/create_training_data/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,11 @@
"Output zarr files will be saved in either 'train' or 'test' subdir of "
"gcs-output-data-dir",
)
parser.add_argument(
"--var-names-yaml",
AnnaKwa marked this conversation as resolved.
Show resolved Hide resolved
type=str,
default=None,
help="optional yaml for providing data variable names",
)
args, pipeline_args = parser.parse_known_args()
print(args)
"""Main function"""
run(args=args, pipeline_args=pipeline_args)
69 changes: 69 additions & 0 deletions fv3net/pipelines/create_training_data/names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# suffixes that denote whether diagnostic variable is from the coarsened
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is pretty much exactly the list of var names that the one step diags will use. If we're in agreement that fv3net.pipelines.common is a good idea for sharing/consistency across workflow steps, then this file (and the yaml if it's being used) seems like a good candidate to reside there

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

referencing this comment from @nbren12 's review: #207 (comment)

Since in a previous discussion we decided against doing the "import names from common .py" route to avoid linking the workflows in that manner, if we're using a lot of common var names across the workflows then I think we should go with the (2) and pass the variable name information to the workflows' respective main/run functions.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@brianhenn As discussed offline, I'll change the source of the var names to be read in and passed to the run function so that a common list can be provided to both workflows

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In particular this commit should address your comment: 354d675

# high resolution prognostic run or the coarse res one step train data run
suffix_hires = "prog"
suffix_coarse_train = "train"

# variable names for one step run output and coarsened high res output
init_time_dim = "initial_time"
forecast_time_dim = "forecast_time"
step_time_dim = "step"
coord_begin_step = "begin"
var_lon_center, var_lat_center, var_lon_outer, var_lat_outer = (
"lon",
"lat",
"lonb",
"latb",
)
coord_x_center, coord_y_center, coord_z_center = ("x", "y", "z")
var_x_wind, var_y_wind = ("x_wind", "y_wind")
var_temp, var_sphum = ("air_temperature", "specific_humidity")
radiation_vars = [
"DSWRFtoa",
"DSWRFsfc",
"USWRFtoa",
"USWRFsfc",
"DLWRFsfc",
"ULWRFtoa",
"ULWRFsfc",
]

one_step_vars = radiation_vars + [
"total_precipitation",
"surface_temperature",
"land_sea_mask",
"latent_heat_flux",
"sensible_heat_flux",
"mean_cos_zenith_angle",
"surface_geopotential",
"vertical_thickness_of_atmospheric_layer",
"vertical_wind",
"pressure_thickness_of_atmospheric_layer",
var_temp,
var_sphum,
var_x_wind,
var_y_wind,
]

# names for residuals that the ML is training on
# high resolution tendency - coarse res model's one step tendency
var_source_name_map = {
var_x_wind: "dQU",
var_y_wind: "dQV",
var_temp: "dQ1",
var_sphum: "dQ2",
}
target_vars = list(var_source_name_map.values())

# mappings for renaming of variables in training data output
renamed_high_res_vars = {
**{f"{var}_coarse": f"{var}_{suffix_hires}" for var in radiation_vars},
"lhtflsfc_coarse": f"latent_heat_flux_{suffix_hires}",
"shtflsfc_coarse": f"sensible_heat_flux_{suffix_hires}",
}
renamed_one_step_vars = {var: f"{var}_{suffix_coarse_train}" for var in radiation_vars}
renamed_dims = {
"grid_xt": "x",
"grid_yt": "y",
"grid_x": "x_interface",
"grid_y": "y_interface",
}
Loading