Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/use onestep zarr train data #207

Merged
merged 29 commits into from
Apr 3, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion external/fv3config
36 changes: 35 additions & 1 deletion external/vcm/tests/test_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
dz_and_top_to_phis,
_add_coords_to_interface_variable,
)
from vcm.calc.calc import local_time
from vcm.calc.calc import local_time, apparent_source
from vcm.cubedsphere.constants import COORD_Z_CENTER, COORD_Z_OUTER


Expand Down Expand Up @@ -85,3 +85,37 @@ def test_solar_time():
lon = xr.DataArray([0, 180, 270, 360, 0, 270], dims=["x"], coords={"x": range(6)})
ds_solar_test = xr.Dataset({"initialization_time": t, "lon": lon})
assert np.allclose(local_time(ds_solar_test), [0, 12, 18, 0, 6, 0])


def test_apparent_source():
coords = {
"initial_time": [
cftime.DatetimeJulian(2016, 8, 1, 0, 15, 0),
cftime.DatetimeJulian(2016, 8, 1, 0, 30, 0),
],
"forecast_time": np.array([0.0, 60.0, 120.0, 180.0, 240.0]).astype(
np.dtype("<m8[s]")
),
}
T = xr.DataArray(
[[1, 2, 4, 7, 11.0], [3, 5, 5, 5, 5.0]],
dims=["initial_time", "forecast_time"],
coords=coords,
)
# check Q calculated for different forecast time steps
Q1_forecast0 = apparent_source(
T,
coarse_tstep_idx=0,
highres_tstep_idx=0,
t_dim="initial_time",
s_dim="forecast_time",
)
assert Q1_forecast0 == pytest.approx((2.0 / (15 * 60)) - (1.0 / 60))
Q1_forecast3 = apparent_source(
T,
coarse_tstep_idx=3,
highres_tstep_idx=0,
t_dim="initial_time",
s_dim="forecast_time",
)
assert Q1_forecast3 == pytest.approx((2.0 / (15 * 60)) - (4.0 / 60))
24 changes: 12 additions & 12 deletions external/vcm/vcm/calc/calc.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
import numpy as np
import xarray as xr
from vcm.cubedsphere.constants import (
INIT_TIME_DIM,
FORECAST_TIME_DIM,
COORD_Z_CENTER,
VAR_LON_CENTER,
)
from vcm.cubedsphere.constants import INIT_TIME_DIM, COORD_Z_CENTER, VAR_LON_CENTER

gravity = 9.81
specific_heat = 1004
Expand All @@ -27,15 +22,20 @@ def timedelta_to_seconds(dt):


def apparent_source(
q: xr.DataArray, t_dim: str = INIT_TIME_DIM, s_dim: str = FORECAST_TIME_DIM
q: xr.DataArray, t_dim: str, s_dim: str, coarse_tstep_idx=0, highres_tstep_idx=0
) -> xr.DataArray:
"""Compute the apparent source from stepped output

Args:
q: The variable to compute the source of
t_dim, optional: the dimension corresponding to the initial condition
s_dim, optional: the dimension corresponding to the forecast time

step_dim: dimension corresponding to the step time dimension
(begin, before physics, after physics)
coarse_tstep_idx: (default=0) forecast time step to use for
calculating one step run tendency
highres_tstep_idx: (default=0) forecast time step to use for
calculating high res run tendency
Returns:
The apparent source of q. Has units [q]/s

Expand All @@ -53,10 +53,10 @@ def apparent_source(
tend_c48 = dq_c48 / ds

# restore coords
tend = tend.isel({s_dim: 0}).assign_coords(**{t_dim: t[:-1]})
tend_c48 = tend_c48.isel({s_dim: 0, t_dim: slice(0, -1)}).assign_coords(
**{t_dim: t[:-1]}
)
tend = tend.isel({s_dim: highres_tstep_idx}).assign_coords(**{t_dim: t[:-1]})
tend_c48 = tend_c48.isel(
{s_dim: coarse_tstep_idx, t_dim: slice(0, -1)}
).assign_coords(**{t_dim: t[:-1]})

return tend - tend_c48

Expand Down
41 changes: 0 additions & 41 deletions fv3net/pipelines/create_training_data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,41 +0,0 @@
# residuals that the ML is training on
# high resolution tendency - coarse res model's one step tendency
VAR_Q_HEATING_ML = "dQ1"
VAR_Q_MOISTENING_ML = "dQ2"
VAR_Q_U_WIND_ML = "dQU"
VAR_Q_V_WIND_ML = "dQV"

# suffixes denote whether diagnostic variable is from the coarsened
# high resolution prognostic run or the coarse res one step train data run
SUFFIX_HIRES_DIAG = "prog"
SUFFIX_COARSE_TRAIN_DIAG = "train"

DIAG_VARS = [
"LHTFLsfc",
"SHTFLsfc",
"PRATEsfc",
"DSWRFtoa",
"DSWRFsfc",
"USWRFtoa",
"USWRFsfc",
"DLWRFsfc",
"ULWRFtoa",
"ULWRFsfc",
]
RENAMED_PROG_DIAG_VARS = {f"{var}_coarse": f"{var}_prog" for var in DIAG_VARS}
RENAMED_TRAIN_DIAG_VARS = {var: f"{var}_train" for var in DIAG_VARS}


RESTART_VARS = [
"sphum",
"T",
"delp",
"u",
"v",
"slmsk",
"phis",
"tsea",
"slope",
"DZ",
"W",
]
17 changes: 13 additions & 4 deletions fv3net/pipelines/create_training_data/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import argparse
from fv3net.pipelines.create_training_data.pipeline import run
import yaml

from .pipeline import run

if __name__ == "__main__":
parser = argparse.ArgumentParser()
Expand All @@ -21,6 +23,12 @@
help="Write path for train data in Google Cloud Storage bucket. "
"Don't include bucket in path.",
)
parser.add_argument(
"variable_namefile",
type=str,
default=None,
help="yaml file for providing data variable names",
)
parser.add_argument(
"--timesteps-per-output-file",
type=int,
Expand All @@ -38,7 +46,8 @@
"Output zarr files will be saved in either 'train' or 'test' subdir of "
"gcs-output-data-dir",
)

args, pipeline_args = parser.parse_known_args()
print(args)
"""Main function"""
run(args=args, pipeline_args=pipeline_args)
with open(args.variable_namefile, "r") as stream:
names = yaml.safe_load(stream)
run(args=args, pipeline_args=pipeline_args, names=names)
49 changes: 47 additions & 2 deletions fv3net/pipelines/create_training_data/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@
logger.setLevel(logging.INFO)


def _path_from_first_timestep(ds, train_test_labels=None):
def _convert_forecast_time_to_timedelta(ds, forecast_time_dim):
timedelta_coords = ds[forecast_time_dim].astype("timedelta64[ns]")
return ds.assign_coords({forecast_time_dim: timedelta_coords})


def _path_from_first_timestep(ds, init_time_dim, time_fmt, train_test_labels=None):
""" Uses first init time as zarr filename, and appends a 'train'/'test' subdir
if a dict of labels is provided

Expand All @@ -31,7 +36,7 @@ def _path_from_first_timestep(ds, train_test_labels=None):
Returns:
path in args.gcs_output_dir to write the zarr to
"""
timestep = min(ds[INIT_TIME_DIM].values).strftime(TIME_FMT)
timestep = min(ds[init_time_dim].values).strftime(time_fmt)
if isinstance(train_test_labels, dict):
try:
if timestep in train_test_labels["train"]:
Expand Down Expand Up @@ -102,3 +107,43 @@ def load_train_diag(top_level_dir, init_times):
ds_diag = open_diagnostic(run_dir, "sfc_dt_atmos").isel(time=0)
one_step_diags.append(ds_diag.squeeze().drop("time"))
return xr.concat([ds for ds in one_step_diags], time_dim_index)


def _rename_centered_xy_coords(cell_centered_da, edge_dim, center_dim):
"""
Args:
cell_centered_da: data array that got shifted from edges to cell centers
Returns:
same input array with dims renamed to corresponding cell center dims
"""
# cell_centered_da[edge_dim] = cell_centered_da[edge_dim] - 1
cell_centered_da = cell_centered_da.rename({edge_dim: center_dim})
return cell_centered_da


def _shift_edge_var_to_center(edge_var: xr.DataArray, edge_to_center_dims):
"""
Args:
edge_var: variable that is defined on edges of grid, e.g. u, v

Returns:
data array with the original variable at cell center
"""
edge_dims = list(edge_to_center_dims.keys())
for dim_to_recenter in [
edge_dim for edge_dim in edge_dims if edge_dim in edge_var.dims
]:

return _rename_centered_xy_coords(
0.5
* (edge_var + edge_var.shift({dim_to_recenter: 1})).isel(
{dim_to_recenter: slice(1, None)}
),
edge_dim=dim_to_recenter,
center_dim=edge_to_center_dims[dim_to_recenter],
)
else:
raise ValueError(
"Variable to shift to center must be centered on one horizontal axis and "
"edge-valued on the other."
)
Loading