Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/use onestep zarr train data #207

Merged
merged 29 commits into from
Apr 3, 2020
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion external/fv3config
36 changes: 35 additions & 1 deletion external/vcm/tests/test_calc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
dz_and_top_to_phis,
_add_coords_to_interface_variable,
)
from vcm.calc.calc import local_time
from vcm.calc.calc import local_time, apparent_source
from vcm.cubedsphere.constants import COORD_Z_CENTER, COORD_Z_OUTER


Expand Down Expand Up @@ -85,3 +85,37 @@ def test_solar_time():
lon = xr.DataArray([0, 180, 270, 360, 0, 270], dims=["x"], coords={"x": range(6)})
ds_solar_test = xr.Dataset({"initialization_time": t, "lon": lon})
assert np.allclose(local_time(ds_solar_test), [0, 12, 18, 0, 6, 0])


def test_apparent_source():
coords = {
"initial_time": [
cftime.DatetimeJulian(2016, 8, 1, 0, 15, 0),
cftime.DatetimeJulian(2016, 8, 1, 0, 30, 0),
],
"forecast_time": np.array([0.0, 60.0, 120.0, 180.0, 240.0]).astype(
np.dtype("<m8[s]")
),
}
T = xr.DataArray(
[[1, 2, 4, 7, 11.0], [3, 5, 5, 5, 5.0]],
dims=["initial_time", "forecast_time"],
coords=coords,
)
# check Q calculated for different forecast time steps
Q1_forecast0 = apparent_source(
T,
forecast_time_index_onestep=0,
forecast_time_index_highres=0,
t_dim="initial_time",
s_dim="forecast_time",
)
assert Q1_forecast0 == pytest.approx((2.0 / (15 * 60)) - (1.0 / 60))
Q1_forecast3 = apparent_source(
T,
forecast_time_index_onestep=3,
forecast_time_index_highres=0,
t_dim="initial_time",
s_dim="forecast_time",
)
assert Q1_forecast3 == pytest.approx((2.0 / (15 * 60)) - (4.0 / 60))
25 changes: 16 additions & 9 deletions external/vcm/vcm/calc/calc.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
import numpy as np
import xarray as xr
from vcm.cubedsphere.constants import (
INIT_TIME_DIM,
FORECAST_TIME_DIM,
COORD_Z_CENTER,
VAR_LON_CENTER,
)
from vcm.cubedsphere.constants import INIT_TIME_DIM, COORD_Z_CENTER, VAR_LON_CENTER

gravity = 9.81
specific_heat = 1004
Expand All @@ -27,14 +22,24 @@ def timedelta_to_seconds(dt):


def apparent_source(
q: xr.DataArray, t_dim: str = INIT_TIME_DIM, s_dim: str = FORECAST_TIME_DIM
q: xr.DataArray,
forecast_time_index_onestep,
forecast_time_index_highres,
t_dim: str,
s_dim: str,
) -> xr.DataArray:
"""Compute the apparent source from stepped output

Args:
q: The variable to compute the source of
forecast_time_index_onestep: forecast time step to use for
AnnaKwa marked this conversation as resolved.
Show resolved Hide resolved
calculating one step run tendency
forecast_time_index_highres: forecast time step to use for
calculating high res run tendency
t_dim, optional: the dimension corresponding to the initial condition
s_dim, optional: the dimension corresponding to the forecast time
step_dim: dimension corresponding to the step time dimension
(begin, before physics, after physics)

Returns:
The apparent source of q. Has units [q]/s
Expand All @@ -53,10 +58,12 @@ def apparent_source(
tend_c48 = dq_c48 / ds

# restore coords
tend = tend.isel({s_dim: 0}).assign_coords(**{t_dim: t[:-1]})
tend_c48 = tend_c48.isel({s_dim: 0, t_dim: slice(0, -1)}).assign_coords(
tend = tend.isel({s_dim: forecast_time_index_highres}).assign_coords(
**{t_dim: t[:-1]}
)
tend_c48 = tend_c48.isel(
{s_dim: forecast_time_index_onestep, t_dim: slice(0, -1)}
).assign_coords(**{t_dim: t[:-1]})

return tend - tend_c48

Expand Down
41 changes: 0 additions & 41 deletions fv3net/pipelines/create_training_data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,41 +0,0 @@
# residuals that the ML is training on
# high resolution tendency - coarse res model's one step tendency
VAR_Q_HEATING_ML = "dQ1"
VAR_Q_MOISTENING_ML = "dQ2"
VAR_Q_U_WIND_ML = "dQU"
VAR_Q_V_WIND_ML = "dQV"

# suffixes denote whether diagnostic variable is from the coarsened
# high resolution prognostic run or the coarse res one step train data run
SUFFIX_HIRES_DIAG = "prog"
SUFFIX_COARSE_TRAIN_DIAG = "train"

DIAG_VARS = [
"LHTFLsfc",
"SHTFLsfc",
"PRATEsfc",
"DSWRFtoa",
"DSWRFsfc",
"USWRFtoa",
"USWRFsfc",
"DLWRFsfc",
"ULWRFtoa",
"ULWRFsfc",
]
RENAMED_PROG_DIAG_VARS = {f"{var}_coarse": f"{var}_prog" for var in DIAG_VARS}
RENAMED_TRAIN_DIAG_VARS = {var: f"{var}_train" for var in DIAG_VARS}


RESTART_VARS = [
"sphum",
"T",
"delp",
"u",
"v",
"slmsk",
"phis",
"tsea",
"slope",
"DZ",
"W",
]
20 changes: 16 additions & 4 deletions fv3net/pipelines/create_training_data/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import argparse
from fv3net.pipelines.create_training_data.pipeline import run
import yaml

from .pipeline import run

if __name__ == "__main__":
parser = argparse.ArgumentParser()
Expand All @@ -21,6 +23,12 @@
help="Write path for train data in Google Cloud Storage bucket. "
"Don't include bucket in path.",
)
parser.add_argument(
"variable_namefile",
type=str,
default=None,
help="yaml file for providing data variable names",
)
parser.add_argument(
"--timesteps-per-output-file",
type=int,
Expand All @@ -38,7 +46,11 @@
"Output zarr files will be saved in either 'train' or 'test' subdir of "
"gcs-output-data-dir",
)

args, pipeline_args = parser.parse_known_args()
print(args)
"""Main function"""
run(args=args, pipeline_args=pipeline_args)
with open(args.variable_namefile, "r") as stream:
try:
names = yaml.safe_load(stream)
except yaml.YAMLError as exc:
raise ValueError(f"Bad yaml config: {exc}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This try catch seems redundant since you re-raise the error raised by yaml and don't have any special error handling logic. The traceback should make it pretty clear where the error is from.

run(args=args, pipeline_args=pipeline_args, names=names)
5 changes: 5 additions & 0 deletions fv3net/pipelines/create_training_data/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
logger.setLevel(logging.INFO)


def convert_forecast_time_to_timedelta(ds, forecast_time_dim):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this a public function?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, added _ to name

timedelta_coords = ds[forecast_time_dim].astype("timedelta64[ns]")
return ds.assign_coords({forecast_time_dim: timedelta_coords})


def _path_from_first_timestep(ds, train_test_labels=None):
""" Uses first init time as zarr filename, and appends a 'train'/'test' subdir
if a dict of labels is provided
Expand Down
Loading