Skip to content

Commit

Permalink
added observation functions (#711)
Browse files Browse the repository at this point in the history
* implemented ssh_catalog_subset and ssh_retrieve_data and ssh_catalog_toxynfile

* added testcases

* add erddapy dependency

* added slev subset and retrieve notebook

* first alignments of catalogs and netcdf files

* updated whatsnew
  • Loading branch information
veenstrajelmer authored Dec 10, 2023
1 parent 2fd43b4 commit 271857e
Show file tree
Hide file tree
Showing 7 changed files with 1,148 additions and 7 deletions.
1 change: 1 addition & 0 deletions dfm_tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from dfm_tools.coastlines import *
from dfm_tools import data
from dfm_tools.modelbuilder import *
from dfm_tools.observations import *

import warnings
warnings.filterwarnings(action="always", category=DeprecationWarning)
Expand Down
743 changes: 743 additions & 0 deletions dfm_tools/observations.py

Large diffs are not rendered by default.

272 changes: 272 additions & 0 deletions docs/notebooks/subset_retrieve_sealevel_observations.ipynb

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions docs/whats-new.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
## UNRELEASED

### Feat
- added spatial/temporal subsetting and retrieving of insitu observation data with `dfmt.ssh_catalog_subset()` and `dfmt.ssh_retrieve_data()` by [@veenstrajelmer](https://github.com/veenstrajelmer) in [#711](https://github.com/Deltares/dfm_tools/pull/711)


## 0.18.0 (2023-12-08)

### Feat
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ dependencies = [
"cdsapi>=0.6.1",
#pydap<3.4.0 is from May 2017 and does not support newer python versions
"pydap>=3.4.0",
#erddapy<2.0.0 does not support pandas>=2.0.0
"erddapy>=2.0.0",
#copernicus-marine-client<0.10.0 expects different metadata catalog keys
"copernicus-marine-client>=0.10.0",
#pooch<1.1.0 do not have attribute retrieve
Expand Down
20 changes: 13 additions & 7 deletions tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
@author: veenstra
"""

import shutil
import os
import pytest
import pandas as pd
Expand Down Expand Up @@ -38,15 +39,16 @@ def test_download_era5():
date_max = '2010-01-02'
longitude_min, longitude_max, latitude_min, latitude_max = 2, 3, 51, 52 #test domain
variables_era5 = ['msl']#'v10n'] # check variables_dict in dfmt.download_ERA5() for valid names
dir_output = './tests/tests_output/era5_temp'
dir_output = 'era5_temp'
for varkey in variables_era5:
os.makedirs(dir_output, exist_ok=True)

dfmt.download_ERA5(varkey,
longitude_min=longitude_min, longitude_max=longitude_max, latitude_min=latitude_min, latitude_max=latitude_max,
date_min=date_min, date_max=date_max,
dir_output=dir_output, overwrite=True)
# os.rmdir(dir_output)
#clean up
shutil.rmtree(dir_output)


#TODO: properly set environment variables in github would prevent localness
Expand All @@ -57,14 +59,15 @@ def test_download_cmems_my():
date_max = '2010-01-02'
longitude_min, longitude_max, latitude_min, latitude_max = 2, 3, 51, 52 #test domain
varlist_cmems = ['bottomT','no3'] # avaliable variables differ per product, examples are ['bottomT','mlotst','siconc','sithick','so','thetao','uo','vo','usi','vsi','zos','no3']. More info on https://data.marine.copernicus.eu/products
dir_output = './tests/tests_output/cmems_temp_my'
dir_output = 'cmems_temp_my'
for varkey in varlist_cmems:
file_prefix = 'cmems_'
dfmt.download_CMEMS(varkey=varkey,
longitude_min=longitude_min, longitude_max=longitude_max, latitude_min=latitude_min, latitude_max=latitude_max,
date_min=date_min, date_max=date_max,
dir_output=dir_output, file_prefix=file_prefix, overwrite=True)
# os.rmdir(dir_output)
#clean up
shutil.rmtree(dir_output)


#TODO: properly set environment variables in github would prevent localness
Expand All @@ -75,14 +78,15 @@ def test_download_cmems_forecast():
date_max = pd.Timestamp.today() + pd.Timedelta(days=1)
longitude_min, longitude_max, latitude_min, latitude_max = 2, 3, 51, 52 #test domain
varlist_cmems = ['tob','no3'] # avaliable variables differ per product, examples are ['bottomT','mlotst','siconc','sithick','so','thetao','uo','vo','usi','vsi','zos','no3']. More info on https://data.marine.copernicus.eu/products
dir_output = './tests/tests_output/cmems_temp_forecast'
dir_output = 'cmems_temp_forecast'
for varkey in varlist_cmems:
file_prefix = 'cmems_'
dfmt.download_CMEMS(varkey=varkey,
longitude_min=longitude_min, longitude_max=longitude_max, latitude_min=latitude_min, latitude_max=latitude_max,
date_min=date_min, date_max=date_max,
dir_output=dir_output, file_prefix=file_prefix, overwrite=True)
# os.rmdir(dir_output)
#clean up
shutil.rmtree(dir_output)


@pytest.mark.unittest
Expand All @@ -93,7 +97,7 @@ def test_download_hycom():
date_max = '2010-01-02'
varlist_hycom = ['surf_el']#'water_temp'] #['tau','water_u','water_v','water_temp','salinity','surf_el']

dir_output = './tests/tests_output/hycom_temp'
dir_output = 'hycom_temp'
os.makedirs(dir_output, exist_ok=True)
for varkey in varlist_hycom:
# Path(dir_output).mkdir(parents=True, exist_ok=True)
Expand All @@ -105,3 +109,5 @@ def test_download_hycom():
longitude_min=longitude_min, longitude_max=longitude_max, latitude_min=latitude_min, latitude_max=latitude_max,
date_min=date_min, date_max=date_max,
dir_output=dir_output, file_prefix=file_prefix, overwrite=True)
#clean up
shutil.rmtree(dir_output)
111 changes: 111 additions & 0 deletions tests/test_observations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
# -*- coding: utf-8 -*-
"""
Created on Sat Dec 9 17:46:57 2023
@author: veenstra
"""

import shutil
import os
import pytest
import dfm_tools as dfmt
from dfm_tools.observations import (ssc_sscid_from_otherid,
ssc_ssh_subset_groups,
)


@pytest.mark.unittest
def test_ssh_catalog_subset_expected_fields():
fields_expected = ["geometry", "source", "country", "station_name_unique"]
source_list = ["uhslc-fast", "uhslc-rqds", "psmsl-gnssir", "ssc", "ioc"]
if os.path.exists(r"p:\1230882-emodnet_hrsm\data\GESLA3"):
# not possible without p-drive connection
source_list += ["gesla3"]
if os.name=="nt":
source_list += ["cmems"] # TODO: not possible on Github, due to missing credentials
for source in source_list:
ssc_catalog_gpd = dfmt.ssh_catalog_subset(source=source)
for field in fields_expected:
assert field in ssc_catalog_gpd.columns
if source not in ["ssc", "psmsl-gnssir"]:
assert "time_ndays" in ssc_catalog_gpd.columns


@pytest.mark.unittest
def test_ssh_catalog_subset():
lon_min, lon_max, lat_min, lat_max = -6, 5, 48, 50.5 # france
# lon_min, lon_max, lat_min, lat_max = 123, 148, 23, 47 # japan
# lon_min, lon_max, lat_min, lat_max = -20, 40, 25, 72
# time_min, time_max = '2016-01-01','2016-06-01'
time_min, time_max = '2020-01-01','2020-06-01'

source_list_witime = ["uhslc-fast", "uhslc-rqds", "psmsl-gnssir", "ioc"]
if os.path.exists(r"p:\1230882-emodnet_hrsm\data\GESLA3"):
# not possible without p-drive connection
source_list_witime += ["gesla3"]
if os.name=="nt":
source_list_witime += ["cmems"] # TODO: not possible on Github, due to missing credentials
source_list_notime = ["ssc"]
for source in source_list_witime+source_list_notime:
ssc_catalog_gpd = dfmt.ssh_catalog_subset(source=source)
if source in source_list_notime:
ssc_catalog_gpd_sel = dfmt.ssh_catalog_subset(source=source,
lon_min=lon_min, lon_max=lon_max,
lat_min=lat_min, lat_max=lat_max)
else:
ssc_catalog_gpd_sel = dfmt.ssh_catalog_subset(source=source,
lon_min=lon_min, lon_max=lon_max,
lat_min=lat_min, lat_max=lat_max,
time_min=time_min, time_max=time_max)
assert len(ssc_catalog_gpd) > len(ssc_catalog_gpd_sel)


@pytest.mark.unittest
def test_ssh_retrieve_data():
dir_output = "./temp_ssh_data"
os.makedirs(dir_output, exist_ok=True)

time_min, time_max = '2020-01-01','2020-02-01'

source_list = ["ioc", "uhslc-fast", "uhslc-rqds", "psmsl-gnssir"]
if os.path.exists(r"p:\1230882-emodnet_hrsm"):
# not possible without p-drive connection
source_list += ["gesla3"]
if os.name=="nt":
source_list += ["cmems"] # TODO: not possible on Github, due to missing credentials
for source in source_list:
ssc_catalog_gpd = dfmt.ssh_catalog_subset(source=source)
ssc_catalog_gpd_sel = ssc_catalog_gpd.iloc[:1]
if source=="cmems": #TODO: remove this exception when the cmems API works for insitu data
dfmt.ssh_retrieve_data(ssc_catalog_gpd_sel, dir_output)
else:
dfmt.ssh_retrieve_data(ssc_catalog_gpd_sel, dir_output,
time_min=time_min, time_max=time_max)
#clean up
shutil.rmtree(dir_output)


@pytest.mark.unittest
def test_ssc_sscid_from_otherid():
sscid_from_uhslcid = ssc_sscid_from_otherid(group_id=347, groupname='uhslc')
assert sscid_from_uhslcid=="SSC-abas"


@pytest.mark.unittest
def test_ssc_ssh_subset_groups():
ssc_catalog_gpd_uhslc = ssc_ssh_subset_groups(groups='uhslc')
ssc_catalog_gpd_ioc = ssc_ssh_subset_groups(groups='ioc')
ssc_catalog_gpd_twogroups = ssc_ssh_subset_groups(groups=['ioc','uhslc'])
assert len(ssc_catalog_gpd_uhslc) < len(ssc_catalog_gpd_twogroups)
assert len(ssc_catalog_gpd_ioc) < len(ssc_catalog_gpd_twogroups)
assert len(ssc_catalog_gpd_ioc) + len(ssc_catalog_gpd_uhslc) > len(ssc_catalog_gpd_twogroups)


@pytest.mark.unittest
def test_ssh_catalog_toxynfile():
ssc_catalog_gpd = dfmt.ssh_catalog_subset(source="ssc")
file_xyn = 'test_ssc_obs.xyn'
dfmt.ssh_catalog_toxynfile(ssc_catalog_gpd, file_xyn)
assert os.path.isfile(file_xyn)
os.remove(file_xyn)

0 comments on commit 271857e

Please sign in to comment.