From a1a794fd8bb1589c1bbb9d2113bfed0639538c56 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 25 Sep 2020 13:54:53 -0400 Subject: [PATCH 01/19] Work for fsspec integration with zarr --- xarray/backends/api.py | 32 ++++++++++++++++++++++++-------- xarray/backends/zarr.py | 5 +++-- xarray/core/utils.py | 2 +- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index 9049db5d602..af4f848c938 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -303,6 +303,8 @@ def open_dataset( backend_kwargs=None, use_cftime=None, decode_timedelta=None, + storage_options=None, + fs=None ): """Open and decode a dataset from a file or file-like object. @@ -545,7 +547,8 @@ def maybe_decode_store(store, chunks, lock=False): ) elif isinstance(filename_or_obj, str): - filename_or_obj = _normalize_path(filename_or_obj) + if fs is None: + filename_or_obj = _normalize_path(filename_or_obj) if engine is None: engine = _get_default_engine(filename_or_obj, allow_remote=True) @@ -578,8 +581,10 @@ def maybe_decode_store(store, chunks, lock=False): overwrite_encoded_chunks = _backend_kwargs.pop( "overwrite_encoded_chunks", None ) + if fs is not None: + filename_or_obj = fs.get_mapper(filename_or_obj) store = backends.ZarrStore.open_group( - filename_or_obj, group=group, **_backend_kwargs + filename_or_obj, group=group, storage_options=storage_options, **_backend_kwargs ) else: if engine not in [None, "scipy", "h5netcdf"]: @@ -931,13 +936,24 @@ def open_mfdataset( """ if isinstance(paths, str): if is_remote_uri(paths): - raise ValueError( - "cannot do wild-card matching for paths that are remote URLs: " - "{!r}. Instead, supply paths as an explicit list of strings.".format( - paths + if engine != "zarr": + raise ValueError( + "cannot do wild-card matching for paths that are remote URLs: " + "{!r}. Instead, supply paths as an explicit list of strings.".format( + paths + ) ) - ) - paths = sorted(glob(paths)) + else: + import fsspec # + storage_options = kwargs.get('storage_options', None) + fs, _, _ = fsspec.core.get_fs_token_paths( + paths, storage_options=storage_options + ) + paths = fs.expand_path(paths) + kwargs['fs'] = fs + + else: + paths = sorted(glob(paths)) else: paths = [str(p) if isinstance(p, Path) else p for p in paths] diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 2651f3148fd..8d2ddcd5665 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -279,6 +279,7 @@ def open_group( consolidated=False, consolidate_on_close=False, chunk_store=None, + storage_options=None ): import zarr @@ -288,9 +289,9 @@ def open_group( if consolidated: # TODO: an option to pass the metadata_key keyword - zarr_group = zarr.open_consolidated(store, **open_kwargs) + zarr_group = zarr.open_consolidated(store, storage_options=storage_options, **open_kwargs) else: - zarr_group = zarr.open_group(store, **open_kwargs) + zarr_group = zarr.open_group(store, storage_options=storage_options, **open_kwargs) return cls(zarr_group, consolidate_on_close) def __init__(self, zarr_group, consolidate_on_close=False): diff --git a/xarray/core/utils.py b/xarray/core/utils.py index cfb627f7af5..cf1f682c091 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -607,7 +607,7 @@ def close_on_error(f): def is_remote_uri(path: str) -> bool: - return bool(re.search(r"^https?\://", path)) + return bool(re.search(r"^[a-z][a-z0-9]*(\://|\:\:)", path)) def is_grib_path(path: str) -> bool: From ca029ea6208da5bf2387196fc85fdc65e7427ea4 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 25 Sep 2020 21:04:47 -0400 Subject: [PATCH 02/19] Remove explicit storage_options kwarg This can probably be cleaned up... --- xarray/backends/api.py | 14 ++++++++------ xarray/backends/zarr.py | 6 ++++-- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index c6115c0f347..c832ad66230 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -324,8 +324,6 @@ def open_dataset( backend_kwargs=None, use_cftime=None, decode_timedelta=None, - storage_options=None, - fs=None ): """Open and decode a dataset from a file or file-like object. @@ -540,6 +538,8 @@ def maybe_decode_store(store, chunks, lock=False): if isinstance(filename_or_obj, AbstractDataStore): store = filename_or_obj else: + backend_kwargs = backend_kwargs.copy() + fs = backend_kwargs.pop("fs", None) if isinstance(filename_or_obj, str) and fs is None: filename_or_obj = _normalize_path(filename_or_obj) @@ -565,9 +565,9 @@ def maybe_decode_store(store, chunks, lock=False): ) extra_kwargs["mode"] = "r" extra_kwargs["group"] = group - extra_kwargs['storage_options'] = storage_options if fs is not None: filename_or_obj = fs.get_mapper(filename_or_obj) + backend_kwargs.pop("storage_options", None) opener = _get_backend_cls(engine) store = opener(filename_or_obj, **extra_kwargs, **backend_kwargs) @@ -917,13 +917,15 @@ def open_mfdataset( ) else: import fsspec # - storage_options = kwargs.get('storage_options', None) + backend_kwargs = kwargs.get('backend_kwargs', {}) + storage_options = backend_kwargs.get('storage_options', None) + fs, _, _ = fsspec.core.get_fs_token_paths( paths, storage_options=storage_options ) paths = fs.expand_path(paths) - kwargs['fs'] = fs - + backend_kwargs['fs'] = fs + kwargs['backend_kwargs'] = backend_kwargs else: paths = sorted(glob(paths)) else: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 29e0459bf92..5d7a7cba33f 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -286,11 +286,13 @@ def open_group( if chunk_store: open_kwargs["chunk_store"] = chunk_store + if storage_options: + open_kwargs["storage_options"] = storage_options if consolidated: # TODO: an option to pass the metadata_key keyword - zarr_group = zarr.open_consolidated(store, storage_options=storage_options, **open_kwargs) + zarr_group = zarr.open_consolidated(store, **open_kwargs) else: - zarr_group = zarr.open_group(store, storage_options=storage_options, **open_kwargs) + zarr_group = zarr.open_group(store, **open_kwargs) return cls(zarr_group, consolidate_on_close) def __init__(self, zarr_group, consolidate_on_close=False): From 67a86f74535bf4c18d7862a3b571a5ca4b3920ab Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Fri, 25 Sep 2020 21:05:58 -0400 Subject: [PATCH 03/19] lint --- xarray/backends/api.py | 9 +++++---- xarray/backends/zarr.py | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index c832ad66230..aefa9247a25 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -917,15 +917,16 @@ def open_mfdataset( ) else: import fsspec # - backend_kwargs = kwargs.get('backend_kwargs', {}) - storage_options = backend_kwargs.get('storage_options', None) + + backend_kwargs = kwargs.get("backend_kwargs", {}) + storage_options = backend_kwargs.get("storage_options", None) fs, _, _ = fsspec.core.get_fs_token_paths( paths, storage_options=storage_options ) paths = fs.expand_path(paths) - backend_kwargs['fs'] = fs - kwargs['backend_kwargs'] = backend_kwargs + backend_kwargs["fs"] = fs + kwargs["backend_kwargs"] = backend_kwargs else: paths = sorted(glob(paths)) else: diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 5d7a7cba33f..59e61049312 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -278,7 +278,7 @@ def open_group( consolidated=False, consolidate_on_close=False, chunk_store=None, - storage_options=None + storage_options=None, ): import zarr From 3fe984db19ae04c2ac12e626af7915aefb220e16 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 28 Sep 2020 10:28:51 -0400 Subject: [PATCH 04/19] extra lint --- xarray/backends/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/backends/api.py b/xarray/backends/api.py index aefa9247a25..113d610e089 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -916,7 +916,7 @@ def open_mfdataset( ) ) else: - import fsspec # + import fsspec # type: ignore backend_kwargs = kwargs.get("backend_kwargs", {}) storage_options = backend_kwargs.get("storage_options", None) From ee48ae27a679e3cd2dabfc3a18289eae7231441f Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 29 Sep 2020 15:13:32 -0400 Subject: [PATCH 05/19] Add a test --- xarray/tests/__init__.py | 1 + xarray/tests/test_backends.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/xarray/tests/__init__.py b/xarray/tests/__init__.py index 9e1fdc0df33..efd79d5e549 100644 --- a/xarray/tests/__init__.py +++ b/xarray/tests/__init__.py @@ -72,6 +72,7 @@ def LooseVersion(vstring): has_nc_time_axis, requires_nc_time_axis = _importorskip("nc_time_axis") has_rasterio, requires_rasterio = _importorskip("rasterio") has_zarr, requires_zarr = _importorskip("zarr") +has_fsspec, requires_fsspec = _importorskip("fsspec") has_iris, requires_iris = _importorskip("iris") has_cfgrib, requires_cfgrib = _importorskip("cfgrib") has_numbagg, requires_numbagg = _importorskip("numbagg") diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index c9030e31a9e..ed8f9ff6f21 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -54,6 +54,7 @@ requires_cfgrib, requires_cftime, requires_dask, + requires_fsspec, requires_h5netcdf, requires_netCDF4, requires_pseudonetcdf, @@ -4656,3 +4657,31 @@ def test_extract_zarr_variable_encoding(): actual = backends.zarr.extract_zarr_variable_encoding( var, raise_on_invalid=True ) + + +@requires_zarr +@requires_fsspec +def test_open_fsspec(): + import fsspec + import zarr + + if not hasattr(zarr.storage.FSStore, "getitems"): + pytest.skip("zarr too old") + + ds = open_dataset(os.path.join(os.path.dirname(__file__), "data", "example_1.nc")) + + m = fsspec.filesystem("memory") + mm = m.get_mapper("out1.zarr") + ds.to_zarr(mm) # old interface + ds0 = ds.copy() + ds0["time"] = ds.time + pd.to_timedelta("1 day") + mm = m.get_mapper("out2.zarr") + ds0.to_zarr(mm) # old interface + + url = "memory://out2.zarr" + ds2 = open_dataset(url, engine="zarr") + assert ds0 == ds2 + + url = "memory://out*.zarr" + ds2 = open_mfdataset(url, engine="zarr") + assert xr.concat([ds, ds0], dim="time") == ds2 From 46e068a08510262ae06a67e4368d2966445b8cbe Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 29 Sep 2020 16:12:39 -0400 Subject: [PATCH 06/19] for old zarr --- xarray/tests/test_backends.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 8aa2a0b0d9e..73db28d0270 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4674,10 +4674,12 @@ def test_extract_zarr_variable_encoding(): @requires_zarr @requires_fsspec def test_open_fsspec(): - import fsspec + import fsspec # type: ignore import zarr - if not hasattr(zarr.storage.FSStore, "getitems"): + if not hasattr(zarr.storage, "FSStore") or not hasattr( + zarr.storage.FSStore, "getitems" + ): pytest.skip("zarr too old") ds = open_dataset(os.path.join(os.path.dirname(__file__), "data", "example_1.nc")) From 65d3862d3a7358328d15df6a470fed0459942e95 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 30 Sep 2020 11:07:25 -0400 Subject: [PATCH 07/19] Update docstrings and whatsnew --- doc/whats-new.rst | 5 +++++ xarray/backends/api.py | 9 +++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index b4ef3c4c28c..609e18720d1 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -31,6 +31,11 @@ New Features now works with ``engine="zarr"`` (:issue:`3668`, :pull:`4003`, :pull:`4187`). By `Miguel Jimenez `_ and `Wei Ji Leong `_. +- :py:func:`open_dataset` and :py:func:`open_mfdataset` now accept ``fsspec`` URLs + (including globs for the latter) for ``engine="zarr"``, and so allow reading from + many remote and other file systems (:pull:`4461`) + By `Martin Durant `_ + Bug fixes ~~~~~~~~~ diff --git a/xarray/backends/api.py b/xarray/backends/api.py index a13a5040108..ef80eac6fa3 100644 --- a/xarray/backends/api.py +++ b/xarray/backends/api.py @@ -338,6 +338,7 @@ def open_dataset( ends with .gz, in which case the file is gunzipped and opened with scipy.io.netcdf (only netCDF3 supported). Byte-strings or file-like objects are opened by scipy.io.netcdf (netCDF3) or h5py (netCDF4/HDF). + Also supports arbitrary ``fsspec`` URLs, only for the "zarr" backend. group : str, optional Path to the netCDF4 group in the given file to open (only works for netCDF4 files). @@ -398,7 +399,10 @@ def open_dataset( backend_kwargs: dict, optional A dictionary of keyword arguments to pass on to the backend. This may be useful when backend options would improve performance or - allow user control of dataset processing. + allow user control of dataset processing. When using an ``fsspec`` + path for the filename, they key ``storage_options`` can be used + here to configure the backend storage instance. Alternatively, a + pre-configured file instance can be supplied with key ``fs``. use_cftime: bool, optional Only relevant if encoded dates come from a standard calendar (e.g. "gregorian", "proleptic_gregorian", "standard", or not @@ -781,7 +785,8 @@ def open_mfdataset( files to open. Paths can be given as strings or as pathlib Paths. If concatenation along more than one dimension is desired, then ``paths`` must be a nested list-of-lists (see ``combine_nested`` for details). (A string glob will - be expanded to a 1-dimensional list.) + be expanded to a 1-dimensional list.). When engine=="zarr", the path(s) can + be of any type understood by ``fsspec``. chunks : int or dict, optional Dictionary with keys given by dimension names and values given by chunk sizes. In general, these should divide the dimensions of each dataset. If int, chunk From 05960881021999fee14156b6ac53b1f457a7b373 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 30 Sep 2020 11:17:26 -0400 Subject: [PATCH 08/19] Update IO doc --- doc/io.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/doc/io.rst b/doc/io.rst index 956d9394653..53889f69561 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -913,6 +913,9 @@ storage buckets using zarr. This example uses the `gcsfs`_ package to provide a ``MutableMapping`` interface to `Google Cloud Storage`_, which we can then pass to xarray:: + +.. ipython:: python + import gcsfs fs = gcsfs.GCSFileSystem(project='', token=None) gcsmap = gcsfs.mapping.GCSMap('', gcs=fs, check=True, create=False) @@ -921,6 +924,22 @@ pass to xarray:: # read it back ds_gcs = xr.open_zarr(gcsmap) +New in v0.16.2: general `fsspec`_ URLs are now parsed and the store set up for you +automatically when reading, such that the read part of the above code can +be replaced with + +.. ipython:: python + + ds_gcs = xr.open_dataset( + "gcs:///path.zarr", + backend_kwargs={"storage_options": {"project": '', token=None}}, + engine="zarr" + ) + +This also works with ``open_mfdataset``, allowing you to pass a list of paths or +a URL to be interpreted as a glob string. + +.. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/ .. _Zarr: http://zarr.readthedocs.io/ .. _Amazon S3: https://aws.amazon.com/s3/ .. _Google Cloud Storage: https://cloud.google.com/storage/ From d34423bf3b1ec2561122a8796a1b17faaa82ab3e Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 30 Sep 2020 13:35:23 -0400 Subject: [PATCH 09/19] doc syntax --- doc/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/io.rst b/doc/io.rst index 53889f69561..31ce627edd3 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -932,7 +932,7 @@ be replaced with ds_gcs = xr.open_dataset( "gcs:///path.zarr", - backend_kwargs={"storage_options": {"project": '', token=None}}, + backend_kwargs={"storage_options": {"project": '', "token": None}}, engine="zarr" ) From 0b8e25d443a9cf5bda6b65ef32d1c4118eaeb474 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 6 Oct 2020 14:19:40 -0400 Subject: [PATCH 10/19] Reorder IO zarr doc --- doc/io.rst | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index 31ce627edd3..55d6e08f613 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -876,6 +876,7 @@ can be omitted as it will internally be set to ``'a'``. .. ipython:: python + ds1 = xr.Dataset( ds1 = xr.Dataset( {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, coords={ @@ -910,25 +911,15 @@ Cloud Storage Buckets It is possible to read and write xarray datasets directly from / to cloud storage buckets using zarr. This example uses the `gcsfs`_ package to provide -a ``MutableMapping`` interface to `Google Cloud Storage`_, which we can then -pass to xarray:: - - -.. ipython:: python - - import gcsfs - fs = gcsfs.GCSFileSystem(project='', token=None) - gcsmap = gcsfs.mapping.GCSMap('', gcs=fs, check=True, create=False) - # write to the bucket - ds.to_zarr(store=gcsmap) - # read it back - ds_gcs = xr.open_zarr(gcsmap) +an interface to `Google Cloud Storage`_. -New in v0.16.2: general `fsspec`_ URLs are now parsed and the store set up for you -automatically when reading, such that the read part of the above code can -be replaced with +From v0.16.2: general `fsspec`_ URLs are parsed and the store set up for you +automatically when reading, such that you can open a dataset ina single +call. You should include any arguments to the storage backend as the +key ``storage_options``, part of ``backend_kwargs``. .. ipython:: python + :okexcept: ds_gcs = xr.open_dataset( "gcs:///path.zarr", @@ -939,6 +930,21 @@ be replaced with This also works with ``open_mfdataset``, allowing you to pass a list of paths or a URL to be interpreted as a glob string. +For older versions, and for writing, you must explicitly set up a ``MutibleMapping`` +instance and pass this, as follows: + +.. ipython:: python + :okexcept: + + import gcsfs + fs = gcsfs.GCSFileSystem(project='', token=None) + gcsmap = gcsfs.mapping.GCSMap('', gcs=fs, check=True, create=False) + # write to the bucket + ds.to_zarr(store=gcsmap) + # read it back + ds_gcs = xr.open_zarr(gcsmap) + + .. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/ .. _Zarr: http://zarr.readthedocs.io/ .. _Amazon S3: https://aws.amazon.com/s3/ From 13855ff6937ec977260f39c5d0e72bf4dbef0ece Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 6 Oct 2020 16:32:42 -0400 Subject: [PATCH 11/19] don't execute code --- doc/io.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/doc/io.rst b/doc/io.rst index 55d6e08f613..bb2338f77f9 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -918,8 +918,7 @@ automatically when reading, such that you can open a dataset ina single call. You should include any arguments to the storage backend as the key ``storage_options``, part of ``backend_kwargs``. -.. ipython:: python - :okexcept: +.. code:: python ds_gcs = xr.open_dataset( "gcs:///path.zarr", @@ -933,8 +932,7 @@ a URL to be interpreted as a glob string. For older versions, and for writing, you must explicitly set up a ``MutibleMapping`` instance and pass this, as follows: -.. ipython:: python - :okexcept: +.. code:: python import gcsfs fs = gcsfs.GCSFileSystem(project='', token=None) From 2da9b4d8787134d98fb9de0cdffb376c1e654918 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Wed, 7 Oct 2020 10:34:25 -0400 Subject: [PATCH 12/19] Add line to kick CI --- doc/io.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/io.rst b/doc/io.rst index bb2338f77f9..5d7b06c0512 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -942,6 +942,7 @@ instance and pass this, as follows: # read it back ds_gcs = xr.open_zarr(gcsmap) +(or use the utility function ``fsspec.get_mapper()``). .. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/ .. _Zarr: http://zarr.readthedocs.io/ From db4a84ef7f0713d26d327af5681cf15d4a744fa4 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Sun, 18 Oct 2020 11:29:53 -0400 Subject: [PATCH 13/19] Remove stray line in io.rst --- doc/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/io.rst b/doc/io.rst index 5d7b06c0512..9439f5e767a 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -875,8 +875,8 @@ can be omitted as it will internally be set to ``'a'``. ! rm -rf path/to/directory.zarr .. ipython:: python + :okexcept: - ds1 = xr.Dataset( ds1 = xr.Dataset( {"foo": (("x", "y", "t"), np.random.rand(4, 5, 2))}, coords={ From cce42e2f70983d8111e90d8fbf4613c96bcbdad3 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Sun, 18 Oct 2020 15:15:59 -0400 Subject: [PATCH 14/19] Update doc/io.rst Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- doc/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/io.rst b/doc/io.rst index 9439f5e767a..081c8a7b5d5 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -929,7 +929,7 @@ key ``storage_options``, part of ``backend_kwargs``. This also works with ``open_mfdataset``, allowing you to pass a list of paths or a URL to be interpreted as a glob string. -For older versions, and for writing, you must explicitly set up a ``MutibleMapping`` +For older versions, and for writing, you must explicitly set up a ``MutableMapping`` instance and pass this, as follows: .. code:: python From bb4d2a9aee22fbeffc01b9132801f8c81e7ff310 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 19 Oct 2020 09:10:07 -0400 Subject: [PATCH 15/19] merge fail --- xarray/tests/test_backends.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index 1ffc6c325cb..6cd7242f823 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -4722,4 +4722,3 @@ def test_load_single_value_h5netcdf(tmp_path): ds.to_netcdf(tmp_path / "test.nc") with xr.open_dataset(tmp_path / "test.nc", engine="h5netcdf") as ds2: ds2["test"][0].load() ->>>>>>> master From 3ad14486a6bcbbbe9c9585d8ef81ba2d42b0f8d4 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 3 Nov 2020 13:20:44 -0500 Subject: [PATCH 16/19] fix --- xarray/backends/apiv2.py | 4 +++- xarray/backends/zarr.py | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py index 2f34cc285ff..19b3f2399f0 100644 --- a/xarray/backends/apiv2.py +++ b/xarray/backends/apiv2.py @@ -197,7 +197,9 @@ def open_dataset( if backend_kwargs is None: backend_kwargs = {} - filename_or_obj = _normalize_path(filename_or_obj) + if 'fs' not in backend_kwargs: + # do *not* mange paths meant for a specific file system made in open_mfdataset + filename_or_obj = _normalize_path(filename_or_obj) if engine is None: engine = _autodetect_engine(filename_or_obj) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 7eb48834baf..c381a544655 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -701,6 +701,7 @@ def open_backend_dataset_zarr( consolidated=False, consolidate_on_close=False, chunk_store=None, + fs=None ): if not decode_cf: @@ -709,6 +710,8 @@ def open_backend_dataset_zarr( concat_characters = False decode_coords = False decode_timedelta = False + if fs is not None: + filename_or_obj = fs.get_mapper(filename_or_obj) store = ZarrStore.open_group( filename_or_obj, From c5f62492e62f94713b633377ea5d6c96b5e44730 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 3 Nov 2020 13:21:19 -0500 Subject: [PATCH 17/19] format --- xarray/backends/apiv2.py | 2 +- xarray/backends/zarr.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/backends/apiv2.py b/xarray/backends/apiv2.py index 19b3f2399f0..3303d7ec845 100644 --- a/xarray/backends/apiv2.py +++ b/xarray/backends/apiv2.py @@ -197,7 +197,7 @@ def open_dataset( if backend_kwargs is None: backend_kwargs = {} - if 'fs' not in backend_kwargs: + if "fs" not in backend_kwargs: # do *not* mange paths meant for a specific file system made in open_mfdataset filename_or_obj = _normalize_path(filename_or_obj) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index c381a544655..e9402858d18 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -701,7 +701,7 @@ def open_backend_dataset_zarr( consolidated=False, consolidate_on_close=False, chunk_store=None, - fs=None + fs=None, ): if not decode_cf: From 64a61509ea56ce1c7efb42e0b12a5de40fad16a8 Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Mon, 30 Nov 2020 09:25:49 -0500 Subject: [PATCH 18/19] remove decode_cf block --- xarray/backends/zarr.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/xarray/backends/zarr.py b/xarray/backends/zarr.py index 2c016a69b62..e8dd44216d4 100644 --- a/xarray/backends/zarr.py +++ b/xarray/backends/zarr.py @@ -712,12 +712,6 @@ def open_backend_dataset_zarr( fs=None, ): - if not decode_cf: - mask_and_scale = False - decode_times = False - concat_characters = False - decode_coords = False - decode_timedelta = False if fs is not None: filename_or_obj = fs.get_mapper(filename_or_obj) From ef857407de081ef62797af6603dd85708c78f450 Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Mon, 30 Nov 2020 09:39:06 -0500 Subject: [PATCH 19/19] Update doc/io.rst Co-authored-by: Ryan Abernathey --- doc/io.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/io.rst b/doc/io.rst index 3b59e5e572a..bc6e2ecd2d2 100644 --- a/doc/io.rst +++ b/doc/io.rst @@ -925,7 +925,7 @@ storage buckets using zarr. This example uses the `gcsfs`_ package to provide an interface to `Google Cloud Storage`_. From v0.16.2: general `fsspec`_ URLs are parsed and the store set up for you -automatically when reading, such that you can open a dataset ina single +automatically when reading, such that you can open a dataset in a single call. You should include any arguments to the storage backend as the key ``storage_options``, part of ``backend_kwargs``.