From ffad7d92ed78cac38a7f2f060a1b7797473a7be3 Mon Sep 17 00:00:00 2001
From: Julia Signell <jsignell@gmail.com>
Date: Wed, 23 Jan 2019 11:39:45 -0500
Subject: [PATCH] Added version checking and new tests

---
 intake_xarray/netcdf.py        |  36 ++++++++++++++++++++------
 tests/{util.py => conftest.py} |   6 ++---
 tests/data/example_2.nc        | Bin 0 -> 1736 bytes
 tests/test_catalog.py          |   1 -
 tests/test_intake_xarray.py    |  46 +++++++++++++++++++++++----------
 5 files changed, 64 insertions(+), 25 deletions(-)
 rename tests/{util.py => conftest.py} (87%)
 create mode 100644 tests/data/example_2.nc

diff --git a/intake_xarray/netcdf.py b/intake_xarray/netcdf.py
index 36c64bf..3786881 100644
--- a/intake_xarray/netcdf.py
+++ b/intake_xarray/netcdf.py
@@ -1,5 +1,10 @@
 # -*- coding: utf-8 -*-
-import xarray as xr
+from distutils.version import LooseVersion
+try:
+    import xarray as xr
+    XARRAY_VERSION = LooseVersion(xr.__version__)
+except ImportError:
+    XARRAY_VERSION = None
 from intake.source.base import PatternMixin
 from intake.source.utils import reverse_format
 from .base import DataSourceMixin
@@ -10,38 +15,47 @@ class NetCDFSource(DataSourceMixin, PatternMixin):
 
     Parameters
     ----------
-    urlpath: str
+    urlpath : str
         Path to source file. May include glob "*" characters, format
         pattern strings, or list.
         Some examples:
-            - ``{{ CATALOG_DIR }}data/air.nc``
-            - ``{{ CATALOG_DIR }}data/*.nc``
-            - ``{{ CATALOG_DIR }}data/air_{year}.nc``
-    chunks: int or dict
+            - ``{{ CATALOG_DIR }}/data/air.nc``
+            - ``{{ CATALOG_DIR }}/data/*.nc``
+            - ``{{ CATALOG_DIR }}/data/air_{year}.nc``
+    chunks : int or dict, optional
         Chunks is used to load the new dataset into dask
         arrays. ``chunks={}`` loads the dataset with dask using a single
         chunk for all arrays.
-    path_as_pattern: bool or str, optional
+    concat_dim : str, optional
+        Name of dimension along which to concatenate the files. Can
+        be new or pre-existing. Default is 'concat_dim'.
+    path_as_pattern : bool or str, optional
         Whether to treat the path as a pattern (ie. ``data_{field}.nc``)
         and create new coodinates in the output corresponding to pattern
         fields. If str, is treated as pattern to match on. Default is True.
     """
     name = 'netcdf'
 
-    def __init__(self, urlpath, chunks, xarray_kwargs=None, metadata=None,
+    def __init__(self, urlpath, chunks=None, concat_dim='concat_dim',
+                 xarray_kwargs=None, metadata=None,
                  path_as_pattern=True, **kwargs):
         self.path_as_pattern = path_as_pattern
         self.urlpath = urlpath
         self.chunks = chunks
+        self.concat_dim = concat_dim
         self._kwargs = xarray_kwargs or kwargs
         self._ds = None
         super(NetCDFSource, self).__init__(metadata=metadata)
 
     def _open_dataset(self):
+        if not XARRAY_VERSION:
+            raise ImportError("xarray not available")
         url = self.urlpath
         kwargs = self._kwargs
         if "*" in url or isinstance(url, list):
             _open_dataset = xr.open_mfdataset
+            if 'concat_dim' not in kwargs.keys():
+                kwargs.update(concat_dim=self.concat_dim)
             if self.pattern:
                 kwargs.update(preprocess=self._add_path_to_ds)
         else:
@@ -52,6 +66,12 @@ def _open_dataset(self):
     def _add_path_to_ds(self, ds):
         """Adding path info to a coord for a particular file
         """
+        if not (XARRAY_VERSION > '0.11.1'):
+            raise ImportError("Your version of xarray is '{}'. "
+                "The insurance that source path is available on output of "
+                "open_dataset was added in 0.11.2, so "
+                "pattern urlpaths are not supported.".format(XARRAY_VERSION))
+
         var = next(var for var in ds)
         new_coords = reverse_format(self.pattern, ds[var].encoding['source'])
         return ds.assign_coords(**new_coords)
diff --git a/tests/util.py b/tests/conftest.py
similarity index 87%
rename from tests/util.py
rename to tests/conftest.py
index b59c906..9d044a9 100644
--- a/tests/util.py
+++ b/tests/conftest.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 
-import os
+import posixpath
 import pytest
 import shutil
 import tempfile
@@ -11,11 +11,11 @@
 
 TEST_DATA_DIR = 'tests/data'
 TEST_DATA = 'example_1.nc'
-TEST_URLPATH = os.path.join(TEST_DATA_DIR, TEST_DATA)
+TEST_URLPATH = posixpath.join(TEST_DATA_DIR, TEST_DATA)
 
 
 @pytest.fixture
-def cdf_source():
+def netcdf_source():
     return NetCDFSource(TEST_URLPATH, {})
 
 
diff --git a/tests/data/example_2.nc b/tests/data/example_2.nc
new file mode 100644
index 0000000000000000000000000000000000000000..5775622d0ef85828b436dffcd21366f7538fc55c
GIT binary patch
literal 1736
zcmeHGF-s#s6dp~Yc*(&DBG?>TUSkmhf^g>+CsvB!AIR+`lVo7BE3>l!!NS_gHda?y
z`V%ZIEY`|aYp=Y6D~0%dvl+>W;vdMsmp5<Td*6HW?d1E`_KXmM_yClFl8!8lsJML+
zbI({s%YoGNioCK_eOb7o21|~)G|5b>IGu&RyscVRC2^#K-J~sbu$S3`%+ZS~^MSIJ
z{R3K{1h9^aeB`CSpp&@Uj3eKWXI0io6WPqThLtQ<Hbnr7@%fh2smk05?otYHe8-sS
z;kW3;eq8VMS~L=>sDTW6Szoo4JuK>~gGj4((?oIC&A|Jxw_D*KzjIn%L&8U#czxNE
z%WL;?{*P_hHBzR{I5D;u*=e+d7N@8yK@--K$InmulBec*WRy~Q>ih*9=ggh>rmW@c
zZ_TsNS6Zu|kr``Do=+&rVf|Ym2Q__*W2uMtkp`)XTQaC`Y^<-=SL@>%@XcLekNb(w
z0A>_xz}L}e^aHTK3GfTJ0CM0KxCb79zi9m%*vCwlkmTPH^q*p?!SoHh{suz)xi)w5
zo6Q`c9S+aD&sexJJPR9*#y7?hqn(7yK|Dk);qqV|$JqXt)9G|V;5m=|Ccs{fYn+D>
R#B**A-U+x|pI1Acc>;h`|0@6h

literal 0
HcmV?d00001

diff --git a/tests/test_catalog.py b/tests/test_catalog.py
index 5a76f4a..38b031e 100644
--- a/tests/test_catalog.py
+++ b/tests/test_catalog.py
@@ -4,7 +4,6 @@
 import pytest
 
 from intake import open_catalog
-from .util import dataset  # noqa
 
 
 @pytest.fixture
diff --git a/tests/test_intake_xarray.py b/tests/test_intake_xarray.py
index a0d9f44..277f9bf 100644
--- a/tests/test_intake_xarray.py
+++ b/tests/test_intake_xarray.py
@@ -7,12 +7,10 @@
 
 here = os.path.dirname(__file__)
 
-from .util import TEST_URLPATH, cdf_source, zarr_source, dataset  # noqa
 
-
-@pytest.mark.parametrize('source', ['cdf', 'zarr'])
-def test_discover(source, cdf_source, zarr_source, dataset):
-    source = {'cdf': cdf_source, 'zarr': zarr_source}[source]
+@pytest.mark.parametrize('source', ['netcdf', 'zarr'])
+def test_discover(source, netcdf_source, zarr_source, dataset):
+    source = {'netcdf': netcdf_source, 'zarr': zarr_source}[source]
     r = source.discover()
 
     assert r['datashape'] is None
@@ -25,9 +23,9 @@ def test_discover(source, cdf_source, zarr_source, dataset):
     assert set(source.metadata['coords']) == set(dataset.coords.keys())
 
 
-@pytest.mark.parametrize('source', ['cdf', 'zarr'])
-def test_read(source, cdf_source, zarr_source, dataset):
-    source = {'cdf': cdf_source, 'zarr': zarr_source}[source]
+@pytest.mark.parametrize('source', ['netcdf', 'zarr'])
+def test_read(source, netcdf_source, zarr_source, dataset):
+    source = {'netcdf': netcdf_source, 'zarr': zarr_source}[source]
 
     ds = source.read_chunked()
     assert ds.temp.chunks
@@ -38,8 +36,8 @@ def test_read(source, cdf_source, zarr_source, dataset):
     assert np.all(ds.rh == dataset.rh)
 
 
-def test_read_partition_cdf(cdf_source):
-    source = cdf_source
+def test_read_partition_netcdf(netcdf_source):
+    source = netcdf_source
     with pytest.raises(TypeError):
         source.read_partition(None)
     out = source.read_partition(('temp', 0, 0, 0, 0))
@@ -48,6 +46,28 @@ def test_read_partition_cdf(cdf_source):
     assert np.all(out == expected)
 
 
+def test_read_list_of_netcdf_files():
+    from intake_xarray.netcdf import NetCDFSource
+    source = NetCDFSource([
+        os.path.join(here, 'data', 'example_1.nc'),
+        os.path.join(here, 'data', 'example_2.nc'),
+    ])
+    d = source.to_dask()
+    assert d.dims == {'lat': 5, 'lon': 10, 'level': 4, 'time': 1,
+                      'concat_dim': 2}
+
+
+def test_read_glob_pattern_of_netcdf_files():
+    from intake_xarray.netcdf import NetCDFSource
+
+    source = NetCDFSource(os.path.join(here, 'data', 'example_{num: d}.nc'),
+                          concat_dim='num')
+    d = source.to_dask()
+    assert d.dims == {'lat': 5, 'lon': 10, 'level': 4, 'time': 1,
+                      'num': 2}
+    assert (d.num.data == np.array([1, 2])).all()
+
+
 def test_read_partition_zarr(zarr_source):
     source = zarr_source
     with pytest.raises(TypeError):
@@ -57,9 +77,9 @@ def test_read_partition_zarr(zarr_source):
     assert np.all(out == expected)
 
 
-@pytest.mark.parametrize('source', ['cdf', 'zarr'])
-def test_to_dask(source, cdf_source, zarr_source, dataset):
-    source = {'cdf': cdf_source, 'zarr': zarr_source}[source]
+@pytest.mark.parametrize('source', ['netcdf', 'zarr'])
+def test_to_dask(source, netcdf_source, zarr_source, dataset):
+    source = {'netcdf': netcdf_source, 'zarr': zarr_source}[source]
     ds = source.to_dask()
 
     assert ds.dims == dataset.dims