Skip to content

Commit

Permalink
added timerange check in open_dataset_extra including test (#708)
Browse files Browse the repository at this point in the history
* added timerange check in open_dataset_extra including test

* updated whatsnew

* fixed testcases

* updated notebook
  • Loading branch information
veenstrajelmer authored Dec 8, 2023
1 parent dc51ba3 commit 141e68f
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 122 deletions.
30 changes: 20 additions & 10 deletions dfm_tools/interpolate_grid2bnd.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,21 @@ def extract_component(ds):
return data_interp


def check_time_extent(data_xr, tstart, tstop):
tstart = pd.Timestamp(tstart)
tstop = pd.Timestamp(tstop)

#get timevar and compare requested dates
timevar = data_xr['time']
xr_tstartstop = pd.to_datetime(timevar.isel(time=[0,-1]).to_series())
nc_tstart = xr_tstartstop.index[0]
nc_tstop = xr_tstartstop.index[-1]
if tstart < nc_tstart:
raise OutOfRangeError(f'requested tstart {tstart} outside of available range {nc_tstart} to {nc_tstop}.')
if tstop > nc_tstop:
raise OutOfRangeError(f'requested tstop {tstop} outside of available range {nc_tstart} to {nc_tstop}.')


def open_dataset_extra(dir_pattern, quantity, tstart, tstop, conversion_dict=None, refdate_str=None, reverse_depth=False, chunks=None):
"""
empty docstring
Expand Down Expand Up @@ -310,16 +325,8 @@ def open_dataset_extra(dir_pattern, quantity, tstart, tstop, conversion_dict=Non
print(f'WARNING: calendar different than proleptic_gregorian found ({data_xr_calendar}), convert_calendar is called so check output carefully. It should be no issue for datasets with a monthly interval.')
data_xr = data_xr.convert_calendar('standard') #TODO: does this not result in 29feb nan values in e.g. GFDL model? Check missing argument at https://docs.xarray.dev/en/stable/generated/xarray.Dataset.convert_calendar.html
data_xr['time'].encoding['units'] = units_copy #put back dropped units

#get timevar and compare requested dates
timevar = data_xr['time']
xr_tstartstop = pd.to_datetime(timevar.isel(time=[0,-1]).to_series())
nc_tstart = xr_tstartstop.index[0]
nc_tstop = xr_tstartstop.index[-1]
if tstart < nc_tstart:
raise OutOfRangeError(f'requested tstart {tstart} outside of available range {nc_tstart} to {nc_tstop}')
if tstop > nc_tstop:
raise OutOfRangeError(f'requested tstop {tstop} outside of available range {nc_tstart} to {nc_tstop}')

check_time_extent(data_xr, tstart, tstop)

#360 to 180 conversion
convert_360to180 = (data_xr['longitude'].to_numpy()>180).any() #TODO: replace to_numpy() with load()
Expand All @@ -334,6 +341,9 @@ def open_dataset_extra(dir_pattern, quantity, tstart, tstop, conversion_dict=Non
quantity_list_notavailable = pd.Series(quantity_list).loc[~bool_quanavailable].tolist()
raise KeyError(f'quantity {quantity_list_notavailable} not found in netcdf, available are: {data_vars}. Try updating conversion_dict to rename these variables.')
data_xr_vars = data_xr[quantity_list].sel(time=slice(tstart,tstop))
# check time extent again to avoid issues with eg midday data being
# sliced to midnight times: https://github.com/Deltares/dfm_tools/issues/707
check_time_extent(data_xr_vars, tstart, tstop)

#optional conversion of units. Multiplications or other simple operatiors do not affect performance (dask.array(getitem) becomes dask.array(mul). With more complex operation it is better do do it on the interpolated array.
for quan in quantity_list: #TODO: maybe do unit conversion before interp or is that computationally heavy?
Expand Down
4 changes: 0 additions & 4 deletions dfm_tools/modelbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,10 +123,6 @@ def cmems_nc_to_ini(ext_old, dir_output, list_quantities, tstart, dir_pattern, c

print('writing file')
file_output = os.path.join(dir_output,f"{quantity}_{tstart_str}.nc")
if len(data_xr.time) < 2:
raise ValueError(f"your initial field contains less than two timesteps ({data_xr.time.to_pandas().index.tolist()}), "
"this is not accepted by FM. CMEMS moved to midday to midnight based daily times. "
"Re-download your CMEMS data and try again.")
data_xr.to_netcdf(file_output)

#append forcings to ext
Expand Down
144 changes: 69 additions & 75 deletions docs/notebooks/modelbuilder_example.ipynb

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions docs/whats-new.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
### Feat
- replaced `is_geographic` with `crs` argument in `dfmt.make_basegrid()` by [@veenstrajelmer](https://github.com/veenstrajelmer) in [#696](https://github.com/Deltares/dfm_tools/pull/696)
- moved from `OPeNDAP` to `copernicus-marine-client` in `dfmt.download_CMEMS()` including safer authentication by [@veenstrajelmer](https://github.com/veenstrajelmer) in [#691](https://github.com/Deltares/dfm_tools/pull/691)
- added timerange check of resulting dataset of `dfmt.open_dataset_extra()` by [@veenstrajelmer](https://github.com/veenstrajelmer) in [#708](https://github.com/Deltares/dfm_tools/pull/708)


## 0.17.0 (2023-11-17)
Expand Down
30 changes: 28 additions & 2 deletions tests/test_interpolate_grid2bnd.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
components_translate_upper,
get_ncbnd_construct,
interp_regularnc_to_plipointsDataset,
check_time_extent,
)
from dfm_tools.hydrolib_helpers import PolyFile_to_geodataframe_points
import hydrolib.core.dflowfm as hcdfm
from dfm_tools.errors import OutOfRangeError


def data_dcsm_gdf():
Expand Down Expand Up @@ -75,6 +77,30 @@ def cmems_dataset_4times():
return ds


def test_check_time_extent():
ds = cmems_dataset_4times()

# prior to ds timerange
try:
check_time_extent(ds, tstart="2011-01-01", tstop="2011-01-01")
except OutOfRangeError as e:
assert "tstart" in str(e)

# after ds timerange
try:
check_time_extent(ds, tstart="2021-01-01", tstop="2021-01-01")
except OutOfRangeError as e:
assert "tstop" in str(e)

# in ds timerange
check_time_extent(ds, tstart="2020-01-01", tstop="2020-01-01")

# on ds timerange
check_time_extent(ds, tstart="2019-12-31 12:00:00", tstop="2020-01-03 12:00:00")

del ds


@pytest.mark.unittest
def test_conversion_dict():
"""
Expand Down Expand Up @@ -222,7 +248,7 @@ def test_open_dataset_extra_correctdepths():
file_nc = 'temp_cmems_dummydata.nc'
ds_moretime.to_netcdf(file_nc)

ds_moretime_import = dfmt.open_dataset_extra(dir_pattern=file_nc, quantity='salinitybnd', tstart='2020-01-01', tstop='2020-01-03')
ds_moretime_import = dfmt.open_dataset_extra(dir_pattern=file_nc, quantity='salinitybnd', tstart='2020-01-01 12:00:00', tstop='2020-01-02 12:00:00')

ncbnd_construct = get_ncbnd_construct()
varn_depth = ncbnd_construct['varn_depth']
Expand Down Expand Up @@ -258,7 +284,7 @@ def test_open_dataset_extra_slightly_different_latlons():
ds2.to_netcdf(file_nc2)

try:
ds = dfmt.open_dataset_extra('temp_cmems_2day_*.nc', quantity='salinitybnd', tstart='2020-01-01', tstop='2020-01-03')
ds = dfmt.open_dataset_extra('temp_cmems_2day_*.nc', quantity='salinitybnd', tstart='2020-01-01 12:00:00', tstop='2020-01-02 12:00:00')

# add assertion just to be safe, but the code will not reach here
assert ds.dims['longitude'] == ds1.dims['longitude']
Expand Down
31 changes: 0 additions & 31 deletions tests/test_modelbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,37 +14,6 @@
from dfm_tools.hydrolib_helpers import get_ncbnd_construct


@pytest.mark.systemtest
def test_cmems_nc_to_ini_midday_centered():

# TODO: create fixture
from tests.test_interpolate_grid2bnd import cmems_dataset_4times
ds1 = cmems_dataset_4times().isel(time=slice(None,2))
ds2 = cmems_dataset_4times().isel(time=slice(2,None))

dir_pattern = "./temp_cmems_2day_*.nc"
file_nc1 = dir_pattern.replace("*","sal_p1")
file_nc2 = dir_pattern.replace("*","sal_p2")
file_nc3 = dir_pattern.replace("*","tem_p1")
file_nc4 = dir_pattern.replace("*","tem_p2")
ds1.to_netcdf(file_nc1)
ds2.to_netcdf(file_nc2)
ds1.rename({"so":"thetao"}).to_netcdf(file_nc3)
ds2.rename({"so":"thetao"}).to_netcdf(file_nc4)

ext_old = hcdfm.ExtOldModel()
try:
ext_old = dfmt.cmems_nc_to_ini(ext_old=ext_old,
dir_output='.',
list_quantities=["salinitybnd"],
tstart="2020-01-01",
dir_pattern=dir_pattern)
except ValueError as e:
assert "less than two timesteps" in str(e)

# times_expected were '2019-12-31 12:00:00' and '2020-01-01 12:00:00'


@pytest.mark.systemtest
def test_cmems_nc_to_ini_midnight_centered():

Expand Down

0 comments on commit 141e68f

Please sign in to comment.