Skip to content

Commit

Permalink
Merge pull request #236 from jarq6c/add-azure-client
Browse files Browse the repository at this point in the history
Add interface to `nwm_client_new` for Azure Blob Storage
  • Loading branch information
jarq6c authored Sep 19, 2023
2 parents 1f23cd3 + e75693e commit ef6d0b6
Show file tree
Hide file tree
Showing 5 changed files with 153 additions and 2 deletions.
37 changes: 37 additions & 0 deletions python/nwm_client_new/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,43 @@ print(forecast_data.head())

</details>

<details><summary><b>Retrieving data from Azure Blob Storage</b></summary>

```python
# Import the NWM Client
from hydrotools.nwm_client_new.NWMFileClient import NWMFileClient
from hydrotools.nwm_client_new.AzureFileCatalog import AzureFileCatalog
import pandas as pd

# Instantiate model data client
catalog = AzureFileCatalog()
model_data_client = NWMFileClient(catalog=catalog)

# Set reference time
yesterday = pd.Timestamp.utcnow() - pd.Timedelta("1D")

# Retrieve forecast data
forecast_data = model_data_client.get(
configurations = ["short_range"],
reference_times = [yesterday],
nwm_feature_ids = [724696]
)

# Look at the data
print(forecast_data.head())
```
### Example output
```console
reference_time nwm_feature_id value_time value measurement_unit variable_name configuration usgs_site_code
0 2022-08-07 18:00:00 724696 2022-08-07 19:00:00 20.369999 m3 s-1 streamflow short_range 01013500
1 2022-08-07 18:00:00 724696 2022-08-08 10:00:00 24.439999 m3 s-1 streamflow short_range 01013500
2 2022-08-07 18:00:00 724696 2022-08-08 09:00:00 24.469999 m3 s-1 streamflow short_range 01013500
3 2022-08-07 18:00:00 724696 2022-08-08 08:00:00 24.490000 m3 s-1 streamflow short_range 01013500
4 2022-08-07 18:00:00 724696 2022-08-08 07:00:00 24.510000 m3 s-1 streamflow short_range 01013500
```

</details>

<details><summary><b>Retrieving data from Nomads</b></summary>

```python
Expand Down
4 changes: 4 additions & 0 deletions python/nwm_client_new/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ install_requires =
netcdf4
tables
pint
azure-storage-blob
planetary-computer
adlfs
hydrotools._restclient
python_requires = >=3.8
include_package_data = True

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
=============================================
NWM Azure Blob Storage Container File Catalog
=============================================
Concrete implementation of a National Water Model file client for discovering
files on Microsoft Azure.
https://planetarycomputer.microsoft.com/dataset/storage/noaa-nwm
Classes
-------
AzureFileCatalog
"""
from .NWMFileCatalog import NWMFileCatalog
from hydrotools._restclient.urllib import Url
import azure.storage.blob
import planetary_computer
import adlfs
from typing import List

class AzureFileCatalog(NWMFileCatalog):
"""An Azure Cloud client class for NWM data.
This AzureFileCatalog class provides various methods for discovering NWM
files on Azure Blob Storage.
"""

def __init__(
self,
server: str = 'https://noaanwm.blob.core.windows.net/'
) -> None:
"""Initialize catalog of NWM data source on Azure Blob Storage.
Parameters
----------
server : str, required
Fully qualified path to Azure Cloud endpoint.
Returns
-------
None
"""
super().__init__()
self.server = server

def list_blobs(
self,
configuration: str,
reference_time: str,
must_contain: str = 'channel_rt'
) -> List[str]:
"""List available blobs with provided parameters.
Parameters
----------
configuration : str, required
Particular model simulation or forecast configuration. For a list
of available configurations see NWMDataService.configurations
reference_time : str, required
Model simulation or forecast issuance/reference time in
YYYYmmddTHHZ format.
must_contain : str, optional, default 'channel_rt'
Optional substring found in each blob name.
Returns
-------
A list of blob names that satisfy the criteria set by the parameters.
"""
# Validate configuration
self.raise_invalid_configuration(configuration)

# Break-up reference time
issue_date, issue_time = self.separate_datetime(reference_time)

# Get list of blobs
fs = adlfs.AzureBlobFileSystem(
"noaanwm", credential=planetary_computer.sas.get_token("noaanwm", "nwm").token
)
blobs = fs.glob(f"nwm/nwm.{issue_date}/{configuration}/nwm.t{issue_time}*")

# Return blob URLs
return [
str(self.server / suffix)
for suffix in list(blobs)
if must_contain in suffix
]

@property
def server(self) -> str:
return self._server

@server.setter
def server(self, server: str) -> None:
self._server = Url(server)

Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "7.2.1"
__version__ = "7.3.0"
18 changes: 17 additions & 1 deletion python/nwm_client_new/tests/test_NWMFileCatalog.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
from hydrotools.nwm_client_new.GCPFileCatalog import GCPFileCatalog
from hydrotools.nwm_client_new.HTTPFileCatalog import HTTPFileCatalog
from hydrotools.nwm_client_new.AzureFileCatalog import AzureFileCatalog
import pandas as pd

# Set reference time
Expand All @@ -17,15 +18,22 @@ def setup_http():
server="https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/"
)

def test_parameters(setup_gcp, setup_http):
@pytest.fixture
def setup_azure():
return AzureFileCatalog()

def test_parameters(setup_gcp, setup_http, setup_azure):
assert setup_gcp.configurations
assert setup_http.configurations
assert setup_azure.configurations

assert setup_http.server == "https://nomads.ncep.noaa.gov/pub/data/nccf/com/nwm/prod/"
assert setup_http.ssl_context

assert setup_gcp.bucket_name == "national-water-model"

assert setup_azure.server == "https://noaanwm.blob.core.windows.net/"

@pytest.mark.slow
def test_gcp_list_blobs(setup_gcp):
blobs = setup_gcp.list_blobs(
Expand All @@ -41,3 +49,11 @@ def test_http_list_blobs(setup_http):
reference_time=pd.Timestamp(reference_time)
)
assert len(blobs) == 3

@pytest.mark.slow
def test_azure_list_blobs(setup_azure):
blobs = setup_azure.list_blobs(
configuration="analysis_assim",
reference_time=pd.Timestamp(reference_time)
)
assert len(blobs) == 3

0 comments on commit ef6d0b6

Please sign in to comment.