Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

APIv2: move all _autodetect_engine logic to the plugins #4709

Merged
merged 11 commits into from
Dec 22, 2020
1 change: 1 addition & 0 deletions xarray/backends/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def _get_engine_from_magic_number(filename_or_obj):
engine = "h5netcdf"
else:
raise ValueError(
"cannot guess the engine, "
f"{magic_number} is not the signature of any supported file format "
"did you mean to pass a string for a path instead?"
)
Expand Down
9 changes: 2 additions & 7 deletions xarray/backends/apiv2.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,7 @@
from ..core.dataset import _get_chunk, _maybe_chunk
from ..core.utils import is_remote_uri
from . import plugins
from .api import (
_autodetect_engine,
_get_backend_cls,
_normalize_path,
_protect_dataset_variables_inplace,
)
from .api import _get_backend_cls, _normalize_path, _protect_dataset_variables_inplace


def _get_mtime(filename_or_obj):
Expand Down Expand Up @@ -248,7 +243,7 @@ def open_dataset(
filename_or_obj = _normalize_path(filename_or_obj)

if engine is None:
engine = _autodetect_engine(filename_or_obj)
engine = plugins.guess_engine(filename_or_obj)

engines = plugins.list_engines()
backend = _get_backend_cls(engine, engines=engines)
Expand Down
14 changes: 13 additions & 1 deletion xarray/backends/cfgrib_.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

import numpy as np

from ..core import indexing
Expand Down Expand Up @@ -73,6 +75,14 @@ def get_encoding(self):
return encoding


def guess_can_open_cfgrib(store_spec):
try:
_, ext = os.path.splitext(store_spec)
except TypeError:
return False
return ext in {".grib", ".grib2", ".grb", ".grb2"}


def open_backend_dataset_cfgrib(
filename_or_obj,
*,
Expand Down Expand Up @@ -116,4 +126,6 @@ def open_backend_dataset_cfgrib(
return ds


cfgrib_backend = BackendEntrypoint(open_dataset=open_backend_dataset_cfgrib)
cfgrib_backend = BackendEntrypoint(
open_dataset=open_backend_dataset_cfgrib, guess_can_open=guess_can_open_cfgrib
)
29 changes: 28 additions & 1 deletion xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import functools
import io
import os
from distutils.version import LooseVersion

import numpy as np
Expand Down Expand Up @@ -325,6 +327,29 @@ def close(self, **kwargs):
self._manager.close(**kwargs)


def guess_can_open_h5netcdf(store_spec):
# check byte header to determine file type
if isinstance(store_spec, bytes) or isinstance(store_spec, io.IOBase):
if isinstance(store_spec, bytes):
magic_number = store_spec[:8]
else:
if store_spec.tell() != 0:
raise ValueError(
"file-like object read/write pointer not at zero "
"please close and reopen, or use a context manager"
)
magic_number = store_spec.read(8)
store_spec.seek(0)

return magic_number.startswith(b"\211HDF\r\n\032\n")

try:
_, ext = os.path.splitext(store_spec)
except TypeError:
return False
return ext in {".nc", ".nc4", ".cdf"}


def open_backend_dataset_h5netcdf(
filename_or_obj,
*,
Expand Down Expand Up @@ -364,4 +389,6 @@ def open_backend_dataset_h5netcdf(
return ds


h5netcdf_backend = BackendEntrypoint(open_dataset=open_backend_dataset_h5netcdf)
h5netcdf_backend = BackendEntrypoint(
open_dataset=open_backend_dataset_h5netcdf, guess_can_open=guess_can_open_h5netcdf
)
14 changes: 13 additions & 1 deletion xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,16 @@ def close(self, **kwargs):
self._manager.close(**kwargs)


def guess_can_open_netcdf4(store_spec):
if isinstance(store_spec, str) and is_remote_uri(store_spec):
return True
try:
_, ext = os.path.splitext(store_spec)
except TypeError:
return False
return ext in {".nc", ".nc4", ".cdf"}


def open_backend_dataset_netcdf4(
filename_or_obj,
mask_and_scale=True,
Expand Down Expand Up @@ -549,4 +559,6 @@ def open_backend_dataset_netcdf4(
return ds


netcdf4_backend = BackendEntrypoint(open_dataset=open_backend_dataset_netcdf4)
netcdf4_backend = BackendEntrypoint(
open_dataset=open_backend_dataset_netcdf4, guess_can_open=guess_can_open_netcdf4
)
24 changes: 22 additions & 2 deletions xarray/backends/plugins.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,19 @@
import inspect
import itertools
import logging
import warnings
from functools import lru_cache

import pkg_resources


class BackendEntrypoint:
__slots__ = ("open_dataset", "open_dataset_parameters")
__slots__ = ("guess_can_open", "open_dataset", "open_dataset_parameters")

def __init__(self, open_dataset, open_dataset_parameters=None):
def __init__(self, open_dataset, open_dataset_parameters=None, guess_can_open=None):
self.open_dataset = open_dataset
self.open_dataset_parameters = open_dataset_parameters
self.guess_can_open = guess_can_open


def remove_duplicates(backend_entrypoints):
Expand Down Expand Up @@ -76,3 +78,21 @@ def list_engines():
engines = create_engines_dict(backend_entrypoints)
set_missing_parameters(engines)
return engines


def guess_engine(store_spec):
engines = list_engines()

# use the pre-defined selection order for netCDF files
for engine in ["netcdf4", "h5netcdf", "scipy"]:
if engine in engines and engines[engine].guess_can_open(store_spec):
return engine

for engine, beckend in engines.items():
alexamici marked this conversation as resolved.
Show resolved Hide resolved
try:
if beckend.guess_can_open and beckend.guess_can_open(store_spec):
alexamici marked this conversation as resolved.
Show resolved Hide resolved
return engine
except Exception:
logging.exception(f"{engine!r} fails while guessing")

raise ValueError("cannot guess the engine, try passing one explicitly")
6 changes: 5 additions & 1 deletion xarray/backends/pydap_.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from ..core import indexing
from ..core.pycompat import integer_types
from ..core.utils import Frozen, FrozenDict, is_dict_like
from ..core.utils import Frozen, FrozenDict, is_dict_like, is_remote_uri
from ..core.variable import Variable
from .common import AbstractDataStore, BackendArray, robust_getitem
from .plugins import BackendEntrypoint
Expand Down Expand Up @@ -96,6 +96,10 @@ def get_dimensions(self):
return Frozen(self.ds.dimensions)


def can_open_pydap(store_spec):
return isinstance(store_spec, str) and is_remote_uri(store_spec)


def open_backend_dataset_pydap(
filename_or_obj,
mask_and_scale=True,
Expand Down
32 changes: 29 additions & 3 deletions xarray/backends/scipy_.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from io import BytesIO
import io
import os

import numpy as np

Expand Down Expand Up @@ -78,7 +79,7 @@ def _open_scipy_netcdf(filename, mode, mmap, version):

if isinstance(filename, bytes) and filename.startswith(b"CDF"):
# it's a NetCDF3 bytestring
filename = BytesIO(filename)
filename = io.BytesIO(filename)

try:
return scipy.io.netcdf_file(filename, mode=mode, mmap=mmap, version=version)
Expand Down Expand Up @@ -222,6 +223,29 @@ def close(self):
self._manager.close()


def guess_can_open_scipy(store_spec):
# check byte header to determine file type
if isinstance(store_spec, bytes) or isinstance(store_spec, io.IOBase):
if isinstance(store_spec, bytes):
magic_number = store_spec[:8]
else:
if store_spec.tell() != 0:
raise ValueError(
"file-like object read/write pointer not at zero "
alexamici marked this conversation as resolved.
Show resolved Hide resolved
"please close and reopen, or use a context manager"
)
magic_number = store_spec.read(8)
store_spec.seek(0)

return magic_number.startswith(b"CDF")

try:
_, ext = os.path.splitext(store_spec)
except TypeError:
return False
return ext in {".nc", ".nc4", ".cdf", ".gz"}


def open_backend_dataset_scipy(
filename_or_obj,
mask_and_scale=True,
Expand Down Expand Up @@ -255,4 +279,6 @@ def open_backend_dataset_scipy(
return ds


scipy_backend = BackendEntrypoint(open_dataset=open_backend_dataset_scipy)
scipy_backend = BackendEntrypoint(
open_dataset=open_backend_dataset_scipy, guess_can_open=guess_can_open_scipy
)
9 changes: 8 additions & 1 deletion xarray/backends/store.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from .. import conventions
from ..core.dataset import Dataset
from ..core.utils import close_on_error
from .common import AbstractDataStore
from .plugins import BackendEntrypoint


def guess_can_open_store(store_spec):
return isinstance(store_spec, AbstractDataStore)


def open_backend_dataset_store(
store,
*,
Expand Down Expand Up @@ -40,4 +45,6 @@ def open_backend_dataset_store(
return ds


store_backend = BackendEntrypoint(open_dataset=open_backend_dataset_store)
store_backend = BackendEntrypoint(
open_dataset=open_backend_dataset_store, guess_can_open=guess_can_open_store
)
2 changes: 1 addition & 1 deletion xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2623,7 +2623,7 @@ def test_open_badbytes(self):
with raises_regex(ValueError, "HDF5 as bytes"):
with open_dataset(b"\211HDF\r\n\032\n", engine="h5netcdf"):
pass
with raises_regex(ValueError, "not the signature of any supported file"):
with raises_regex(ValueError, "cannot guess the engine"):
with open_dataset(b"garbage"):
pass
with raises_regex(ValueError, "can only read bytes"):
Expand Down