From d4cfcb4658fe1517ff5bfc1739eb38c3b7c2a7fa Mon Sep 17 00:00:00 2001 From: Hauke Schulz <43613877+observingClouds@users.noreply.github.com> Date: Fri, 13 Jan 2023 15:49:19 -0700 Subject: [PATCH] Fix #554: check filesystem url is a string (#558) * check url is string * add test for containerized netcdf file * make dask single-threaded Co-authored-by: Max Grover * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove auto pytest Co-authored-by: Max Grover --- intake_esm/source.py | 2 +- setup.cfg | 2 +- tests/test_source.py | 36 ++++++++++++++++++++++++++++++++---- 3 files changed, 34 insertions(+), 6 deletions(-) diff --git a/intake_esm/source.py b/intake_esm/source.py index 074de0aa..b40ad149 100644 --- a/intake_esm/source.py +++ b/intake_esm/source.py @@ -64,7 +64,7 @@ def _open_dataset( url = fsspec.open(urlpath, **storage_options).open() # Handle multi-file datasets with `xr.open_mfdataset()` - if '*' in url or isinstance(url, list): + if (isinstance(url, str) and '*' in url) or isinstance(url, list): # How should we handle concat_dim, and other xr.open_mfdataset kwargs? xarray_open_kwargs.update(preprocess=preprocess) xarray_open_kwargs.update(parallel=True) diff --git a/setup.cfg b/setup.cfg index 3f781f96..416254c7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,6 +13,6 @@ skip= [tool:pytest] console_output_style = count -addopts = -n auto --cov=./ --cov-report=xml --verbose +addopts = --cov=./ --cov-report=xml --verbose markers = network: tests requiring a network connection diff --git a/tests/test_source.py b/tests/test_source.py index ea438bd0..2c762fa4 100644 --- a/tests/test_source.py +++ b/tests/test_source.py @@ -1,8 +1,13 @@ import os +import tarfile +import tempfile +import dask import pytest import xarray +dask.config.set(scheduler='single-threaded') + from intake_esm.source import _get_xarray_open_kwargs, _open_dataset, _update_attrs here = os.path.abspath(os.path.dirname(__file__)) @@ -25,14 +30,37 @@ multi_path = f'{os.path.dirname(f1)}/*.nc' -def _common_open(fpath, varname='tasmax'): +def _create_tmp_folder(): + tmpdir = tempfile.mkdtemp() + return tmpdir + + +def _create_tar_file(ipath): + tmp_folder = _create_tmp_folder() + tar_fn = tmp_folder + '/test.tar' + basename = os.path.basename(ipath) + with tarfile.open(tar_fn, 'w') as tar: + tar.add(ipath, arcname=basename) + return tar_fn + + +tar_path = _create_tar_file(f1) +tar_url = f'tar://{os.path.basename(f1)}::{tar_path}' + + +def _common_open(fpath, varname='tasmax', engine=None): _xarray_open_kwargs = _get_xarray_open_kwargs('netcdf') + if engine is not None: + _xarray_open_kwargs['engine'] = engine return _open_dataset(fpath, varname, xarray_open_kwargs=_xarray_open_kwargs).compute() -@pytest.mark.parametrize('fpath,expected_time_size', [(f1, 2), (f2, 2), (multi_path, 4)]) -def test_open_dataset(fpath, expected_time_size): - ds = _common_open(fpath) +@pytest.mark.parametrize( + 'fpath,expected_time_size,engine', + [(f1, 2, None), (f2, 2, None), (multi_path, 4, None), (tar_url, 2, 'scipy')], +) +def test_open_dataset(fpath, expected_time_size, engine): + ds = _common_open(fpath, engine=engine) assert isinstance(ds, xarray.Dataset) assert len(ds.time) == expected_time_size