Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Testing time decoding #10

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
@@ -1 +1,41 @@
name: CI

on:
push:
branches:
- "main"
pull_request:
branches:
- "*"
schedule:
- cron: "0 13 * * 1"

jobs:
build:
defaults:
run:
shell: bash -l {0}
strategy:
fail-fast: false
matrix:
os: ["ubuntu-latest"]
python-version: ["3.10", "3.11", "3.12"]
runs-on: ${{ matrix.os }}
steps:
- name: 🫙 Checkout code
uses: actions/checkout@v4
with:
fetch-depth: 0 # checkout tags (which is not done by default)
- name: 🔁 Setup Python
id: setup-python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: 🌈 Install dependencies
shell: bash -l {0}
run: |
python -m pip install -r requirements.txt
- name: 🏄‍♂️ Run Tests
shell: bash -l {0}
run: |
pytest tests -v
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
git+https://github.com/TomAugspurger/VirtualiZarr.git@user/tom/feature/filesystems
git+https://github.com/jsignell/VirtualiZarr.git@cftime
kerchunk
xarray
requests
aiohttp
tqdm
dask
cftime
pytest
76 changes: 76 additions & 0 deletions tests/test_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import pytest
import xarray as xr
import fsspec
from virtualizarr import open_virtual_dataset

urls = [
"http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp126/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp126_r1i1p1f1_gn_201501-201912.nc",
"http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp126/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp126_r1i1p1f1_gn_202001-202412.nc",
# "http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp126/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp126_r1i1p1f1_gn_202501-202912.nc",
# "http://aims3.llnl.gov/thredds/fileServer/css03_data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp126/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp126_r1i1p1f1_gn_203001-203412.nc",
]

@pytest.fixture()
def ds_combined():
ds_list = []
for url in urls:
with fsspec.open(url) as f:
ds = xr.open_dataset(f, use_cftime=True).load() #workaround from https://github.com/fsspec/s3fs/issues/337
ds_list.append(ds)
return xr.combine_nested(
ds_list,
concat_dim=["time"],
coords="minimal",
compat="override",
combine_attrs="drop_conflicts",
)

@pytest.fixture(scope="module")
def vds():
# load virtual datasets in serial
vds_list = []
for url in urls:
vds = open_virtual_dataset(
url, indexes={},
reader_options={}, # needed for now to circumvent a bug in https://github.com/TomNicholas/VirtualiZarr/pull/126
cftime_variables=["time"],
loadable_variables=["time"],
)
vds_list.append(vds)

combined_vds = xr.combine_nested(
vds_list,
concat_dim=["time"],
coords="minimal",
compat="override",
combine_attrs="drop_conflicts",
)
return combined_vds

@pytest.fixture(scope="module")
def vds_json(vds, tmpdir_factory):
json_filename = str(tmpdir_factory.mktemp('data').join("combined_full.json"))
vds.virtualize.to_kerchunk(json_filename, format="json")
return json_filename

def ds_from_json(json_filename, **kwargs):
return xr.open_dataset(
json_filename,
engine='kerchunk',
**kwargs
)

def test_time(vds_json, ds_combined, ):
ds = ds_from_json(vds_json, chunks={}, use_cftime=True)
def clean_time(ds: xr.Dataset) -> xr.DataArray:
return ds.time.reset_coords(drop=True).load()
xr.testing.assert_identical(clean_time(ds), clean_time(ds_combined))

@pytest.mark.parametrize('chunks', [None, {}])
def test_equal(vds_json, ds_combined, chunks):
ds = ds_from_json(vds_json, chunks=chunks, use_cftime=True)
ds = ds.load()
ds_combined = ds_combined.load()
print(f"{ds=}")
print(f"{ds_combined=}")
xr.testing.assert_identical(ds, ds_combined)
11 changes: 7 additions & 4 deletions virtual-zarr-script.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@
vds_list = []
for url in tqdm(urls):
vds = open_virtual_dataset(
url, indexes={}, reader_options={}
) # reader_options={} is needed for now to circumvent a bug in https://github.com/TomNicholas/VirtualiZarr/pull/126
url, indexes={},
reader_options={}, # needed for now to circumvent a bug in https://github.com/TomNicholas/VirtualiZarr/pull/126
cftime_variables=["time"],
loadable_variables=["time"],
)
vds_list.append(vds)

combined_vds = xr.combine_nested(
Expand All @@ -29,7 +32,7 @@
)
combined_vds.virtualize.to_kerchunk(json_filename, format="json")

## test load and print the the mean of the output
## load and testthe output
print(f"Loading the mean of the virtual dataset from {json_filename=}")

ds = xr.open_dataset(
Expand All @@ -40,4 +43,4 @@
print(f"Dataset before mean: {ds}")
with ProgressBar():
ds_mean = ds.mean().load()
print(ds_mean)
print(ds_mean)
Loading