Skip to content

Commit

Permalink
Merge pull request #141 from amoodie/io_testing
Browse files Browse the repository at this point in the history
Io testing
  • Loading branch information
amoodie authored May 10, 2024
2 parents ff5df0f + d9ff762 commit 999bbe9
Show file tree
Hide file tree
Showing 7 changed files with 230 additions and 249 deletions.
110 changes: 55 additions & 55 deletions deltametrics/io.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@

import abc
import os
import copy
from warnings import warn
import warnings

import xarray as xr
import numpy as np
Expand All @@ -28,21 +27,18 @@ class BaseIO(abc.ABC):
"""

def __init__(self, io_type):
"""Initialize the base IO.
"""
"""Initialize the base IO."""
self.io_type = io_type

@abc.abstractmethod
def __getitem__(self):
"""Should slice the data from file.
"""
"""Should slice the data from file."""
return

@property
@abc.abstractmethod
def keys(self):
"""Should link to all key _names_ stored in file.
"""
"""Should link to all key _names_ stored in file."""
return


Expand Down Expand Up @@ -108,7 +104,7 @@ def data_path(self, var):
if os.path.exists(var):
self._data_path = var
else:
raise FileNotFoundError('File not found at supplied path: %s' % var)
raise FileNotFoundError("File not found at supplied path: %s" % var)

@abc.abstractmethod
def connect(self):
Expand Down Expand Up @@ -139,8 +135,7 @@ def get_known_coords(self):

@abc.abstractmethod
def read(self):
"""Should read data into memory.
"""
"""Should read data into memory."""
return

@abc.abstractmethod
Expand Down Expand Up @@ -207,51 +202,58 @@ def connect(self):
"""
if not os.path.isfile(self.data_path):
_tempdataset = netCDF4.Dataset(
self.data_path, "w", format="NETCDF4")
_tempdataset = netCDF4.Dataset(self.data_path, "w", format="NETCDF4")
_tempdataset.close()

_ext = os.path.splitext(self.data_path)[-1]
if _ext == '.nc':
_engine = 'netcdf4'
elif _ext == '.hdf5':
_engine = 'h5netcdf'
if _ext == ".nc":
_engine = "netcdf4"
elif _ext == ".hdf5":
_engine = "h5netcdf"
else:
TypeError('File format is not supported '
'by DeltaMetrics: {0}'.format(_ext))
TypeError(
"File format is not supported " "by DeltaMetrics: {0}".format(_ext)
)

try:
# open the dataset
_dataset = xr.open_dataset(self.data_path, engine=_engine)
except Exception as e:
raise TypeError(
f'File format out of scope for DeltaMetrics: {e}')
raise TypeError(f"File format out of scope for DeltaMetrics: {e}")

# try to find if coordinates have been preconfigured
_coords_list = list(_dataset.coords)
with warnings.catch_warnings():
# filter warning about Dataset.dims changing return, we use the correct use already
warnings.filterwarnings("ignore", category=FutureWarning)
_dims_set = set(_dataset.dims.keys())
if len(_coords_list) == 3:
# the coordinates are preconfigured
self.dataset = _dataset.set_coords(_coords_list)
self.coords = list(self.dataset.coords)
self.dims = copy.deepcopy(self.coords)
elif set(['total_time', 'length', 'width']).issubset(set(_dataset.dims.keys())):
elif set(["total_time", "length", "width"]).issubset(_dims_set):
# the coordinates are not set, but there are matching arrays
# this is a legacy option, so issue a warning here
self.dataset = _dataset.set_coords(['x', 'y', 'time'])
self.dims = ['time', 'length', 'width']
self.coords = ['total_time', 'x', 'y']
warn('Coordinates for "time", and ("y", "x") were found as '
'variables in the underlying data file, '
'but are not specified as coordinates in the undelying '
'data file. Please reformat the data file for use '
'with DeltaMetrics. This warning may be replaced '
'with an Error in a future version.', UserWarning)
self.dataset = _dataset.set_coords(["x", "y", "time"])
self.dims = ["time", "length", "width"]
self.coords = ["total_time", "x", "y"]
warnings.warn(
'Coordinates for "time", and ("y", "x") were found as '
"variables in the underlying data file, "
"but are not specified as coordinates in the undelying "
"data file. Please reformat the data file for use "
"with DeltaMetrics. This warning may be replaced "
"with an Error in a future version.",
UserWarning,
)
else:
# coordinates were not found and are not being set
raise NotImplementedError(
'Underlying NetCDF datasets without any specified coordinates '
'are not supported. See source for additional notes about '
'how to implement this feature.')
"Underlying NetCDF datasets without any specified coordinates "
"are not supported. See source for additional notes about "
"how to implement this feature."
)
# DEVELOPER NOTE: it may be possible to support a netcdf file that
# does not have specified coordinates, but we need a test case to
# make it work. It may work to just pass everything along to the
Expand All @@ -264,12 +266,12 @@ def connect(self):
# given data file.', UserWarning)

try:
_meta = xr.open_dataset(self.data_path, group='meta',
engine=_engine)
_meta = xr.open_dataset(self.data_path, group="meta", engine=_engine)
self.meta = _meta
except OSError:
warn('No associated metadata was found in the given data file.',
UserWarning)
warnings.warn(
"No associated metadata was found in the given data file.", UserWarning
)
self.meta = None

def get_known_variables(self):
Expand All @@ -279,8 +281,8 @@ def get_known_variables(self):
"""
_vars = list(self.dataset.variables)
_coords = list(self.dataset.coords)
if ('strata_age' in _vars) or ('strata_depth' in _vars):
_coords += ['strata_age', 'strata_depth']
if ("strata_age" in _vars) or ("strata_depth" in _vars):
_coords += ["strata_age", "strata_depth"]
self.known_variables = [item for item in _vars if item not in _coords]

def get_known_coords(self):
Expand Down Expand Up @@ -327,8 +329,7 @@ def __getitem__(self, var):

@property
def keys(self):
"""Variable names in file.
"""
"""Variable names in file."""
return [var for var in self.dataset.variables]


Expand All @@ -349,7 +350,7 @@ def __init__(self, data_dictionary, dimensions=None):
each variable.
"""

super().__init__(io_type='dictionary')
super().__init__(io_type="dictionary")

self.dataset = data_dictionary
self._in_memory_data = self.dataset
Expand Down Expand Up @@ -386,27 +387,28 @@ def get_known_coords(self, dimensions):
# if dimensions was passed, it must be a dictionary
if not isinstance(dimensions, dict):
raise TypeError(
'Input type for `dimensions` must be '
'`dict` but was {0}'.format(type(dimensions)))
"Input type for `dimensions` must be "
"`dict` but was {0}".format(type(dimensions))
)
# there should be exactly 3 keys
if not (len(dimensions.keys()) == 3):
raise ValueError(
'`dimensions` must contain three dimensions!')
raise ValueError("`dimensions` must contain three dimensions!")
# use the dimensions keys as dims and the vals as coords
# note, we check the size against the underlying a we go
for i, (k, v) in enumerate(dimensions.items()):
if not (len(dimensions[k]) == under_shp[i]):
raise ValueError(
'Shape of `dimensions` at position {0} was {1}, '
'which does not match the variables dimensions '
'{2}.'.format(i, len(dimensions[k]), under_shp))
"Shape of `dimensions` at position {0} was {1}, "
"which does not match the variables dimensions "
"{2}.".format(i, len(dimensions[k]), under_shp)
)
# make the assignment
self.dims = list(dimensions.keys())
self.coords = list(dimensions.values())
self.dimensions = dimensions
# otherwise, fill with np.arange(shape)
else:
self.dims = ['dim0', 'dim1', 'dim2']
self.dims = ["dim0", "dim1", "dim2"]
coords = []
for i in range(3):
coords.append(np.arange(under_shp[i]))
Expand Down Expand Up @@ -455,11 +457,9 @@ def __getitem__(self, var):
elif var in self.known_coords:
return self.dimensions[var]
else:
raise ValueError(
'No variable named {0} found.'.format(var))
raise ValueError("No variable named {0} found.".format(var))

@property
def keys(self):
"""Variable names in file.
"""
"""Variable names in file."""
return [var for var in self.dataset.variables]
2 changes: 1 addition & 1 deletion deltametrics/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class VariableInfo(object):
.. doctest::
>>> veg3 = VariableInfo('vegetation_density',
... cmap=mpl.colormaps['Greens'].resampled(3))
... cmap=matplotlib.colormaps['Greens'].resampled(3))
>>> veg3.cmap.N
3
Expand Down
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
import deltametrics as dm
import numpy as np
from matplotlib import pyplot as plt
import matplotlib
'''

# empty string disables testing all code in any docstring
Expand Down
7 changes: 4 additions & 3 deletions docs/source/guides/10min.rst
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,12 @@ The underlying xarray object can be directly accessed by using a ``.data`` attri
.. doctest::

>>> np.mean( golfcube['eta'][1:,43,123] - golfcube['eta'][:-1,43,123] )
<xarray.DataArray 'eta' ()>
<xarray.DataArray 'eta' ()> Size: 4B
array(0., dtype=float32)
Coordinates:
x float32 2.15e+03
y float32 6.15e+03
x float32 4B 2.15e+03
y float32 4B 6.15e+03




Expand Down
13 changes: 7 additions & 6 deletions docs/source/guides/subject_guides/section.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ The data that make up the section can view the section as a `spacetime` section
>>> rcm8cube = dm.sample_data.golf()
>>> strike = dm.section.StrikeSection(rcm8cube, distance_idx=10)
>>> strike['velocity']
<xarray.DataArray 'velocity' (time: 101, s: 200)>
<xarray.DataArray 'velocity' (time: 101, s: 200)> Size: 81kB
array([[0.2 , 0.2 , 0.2 , ..., 0.2 , 0.2 , 0.2 ],
[0. , 0. , 0. , ..., 0. , 0. , 0. ],
[0. , 0.0025, 0. , ..., 0. , 0. , 0. ],
Expand All @@ -31,22 +31,23 @@ The data that make up the section can view the section as a `spacetime` section
[0. , 0. , 0. , ..., 0.0025, 0. , 0. ]],
dtype=float32)
Coordinates:
* s (s) float64 0.0 50.0 100.0 150.0 ... 9.85e+03 9.9e+03 9.95e+03
* time (time) float32 0.0 5e+05 1e+06 1.5e+06 ... 4.9e+07 4.95e+07 5e+07
* s (s) float64 2kB 0.0 50.0 100.0 150.0 ... 9.85e+03 9.9e+03 9.95e+03
* time (time) float32 404B 0.0 5e+05 1e+06 ... 4.9e+07 4.95e+07 5e+07
Attributes:
slicetype: data_section
knows_stratigraphy: False
knows_spacetime: True



If a `DataCube` has preservation information (i.e., if the :meth:`~deltametrics.cube.DataCube.stratigraphy_from()` method has been called), then the `xarray` object that is returned has this information too.
The same `spacetime` data can be requested in the "preserved" form, where non-preserved t-x-y points are masked with ``np.nan``.

.. doctest::

>>> rcm8cube.stratigraphy_from('eta')
>>> strike['velocity'].strat.as_preserved()
<xarray.DataArray 'velocity' (time: 101, s: 200)>
<xarray.DataArray 'velocity' (time: 101, s: 200)> Size: 81kB
array([[0.2, 0.2, 0.2, ..., 0.2, 0.2, 0.2],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
Expand All @@ -55,8 +56,8 @@ The same `spacetime` data can be requested in the "preserved" form, where non-pr
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]], dtype=float32)
Coordinates:
* s (s) float64 0.0 50.0 100.0 150.0 ... 9.85e+03 9.9e+03 9.95e+03
* time (time) float32 0.0 5e+05 1e+06 1.5e+06 ... 4.9e+07 4.95e+07 5e+07
* s (s) float64 2kB 0.0 50.0 100.0 150.0 ... 9.85e+03 9.9e+03 9.95e+03
* time (time) float32 404B 0.0 5e+05 1e+06 ... 4.9e+07 4.95e+07 5e+07
Attributes:
slicetype: data_section
knows_stratigraphy: True
Expand Down
21 changes: 11 additions & 10 deletions docs/source/guides/userguide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ The data returned from the planform are an `xarray` `DataArray`, so you can cont
>>> final.shape
(100, 200)
>>> final['eta']
<xarray.DataArray 'eta' (x: 100, y: 200)>
<xarray.DataArray 'eta' (x: 100, y: 200)> Size: 80kB
array([[ 0.015 , 0.015 , 0.015 , ..., 0.015 , 0.015 , 0.015 ],
[ 0.0075, 0.0075, 0.0075, ..., 0.0075, 0.0075, 0.0075],
[ 0. , 0. , 0. , ..., 0. , 0. , 0. ],
Expand All @@ -160,9 +160,9 @@ The data returned from the planform are an `xarray` `DataArray`, so you can cont
[-2. , -2. , -2. , ..., -2. , -2. , -2. ]],
dtype=float32)
Coordinates:
time float32 5e+07
* x (x) float32 0.0 50.0 100.0 150.0 ... 4.85e+03 4.9e+03 4.95e+03
* y (y) float32 0.0 50.0 100.0 150.0 ... 9.85e+03 9.9e+03 9.95e+03
time float32 4B 5e+07
* x (x) float32 400B 0.0 50.0 100.0 150.0 ... 4.85e+03 4.9e+03 4.95e+03
* y (y) float32 800B 0.0 50.0 100.0 150.0 ... 9.85e+03 9.9e+03 9.95e+03
Attributes:
slicetype: data_planform
knows_stratigraphy: False
Expand Down Expand Up @@ -257,7 +257,7 @@ are sliced themselves, similarly to the cube.

>>> golfcube.register_section('demo', dm.section.StrikeSection(distance_idx=10))
>>> golfcube.sections['demo']['velocity']
<xarray.DataArray 'velocity' (time: 101, s: 200)>
<xarray.DataArray 'velocity' (time: 101, s: 200)> Size: 81kB
array([[0.2 , 0.2 , 0.2 , ..., 0.2 , 0.2 , 0.2 ],
[0. , 0. , 0. , ..., 0. , 0. , 0. ],
[0. , 0.0025, 0. , ..., 0. , 0. , 0. ],
Expand All @@ -267,8 +267,8 @@ are sliced themselves, similarly to the cube.
[0. , 0. , 0. , ..., 0.0025, 0. , 0. ]],
dtype=float32)
Coordinates:
* s (s) float64 0.0 50.0 100.0 150.0 ... 9.85e+03 9.9e+03 9.95e+03
* time (time) float32 0.0 5e+05 1e+06 1.5e+06 ... 4.9e+07 4.95e+07 5e+07
* s (s) float64 2kB 0.0 50.0 100.0 150.0 ... 9.85e+03 9.9e+03 9.95e+03
* time (time) float32 404B 0.0 5e+05 1e+06 ... 4.9e+07 4.95e+07 5e+07
Attributes:
slicetype: data_section
knows_stratigraphy: False
Expand Down Expand Up @@ -321,7 +321,7 @@ Now, the ``DataCube`` has knowledge of stratigraphy, which we can further use to
.. doctest::

>>> golfcube.sections['demo']['velocity'].strat.as_preserved()
<xarray.DataArray 'velocity' (time: 101, s: 200)>
<xarray.DataArray 'velocity' (time: 101, s: 200)> Size: 81kB
array([[0.2, 0.2, 0.2, ..., 0.2, 0.2, 0.2],
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan],
Expand All @@ -330,14 +330,15 @@ Now, the ``DataCube`` has knowledge of stratigraphy, which we can further use to
[nan, nan, nan, ..., nan, nan, nan],
[nan, nan, nan, ..., nan, nan, nan]], dtype=float32)
Coordinates:
* s (s) float64 0.0 50.0 100.0 150.0 ... 9.85e+03 9.9e+03 9.95e+03
* time (time) float32 0.0 5e+05 1e+06 1.5e+06 ... 4.9e+07 4.95e+07 5e+07
* s (s) float64 2kB 0.0 50.0 100.0 150.0 ... 9.85e+03 9.9e+03 9.95e+03
* time (time) float32 404B 0.0 5e+05 1e+06 ... 4.9e+07 4.95e+07 5e+07
Attributes:
slicetype: data_section
knows_stratigraphy: True
knows_spacetime: True



.. doctest::

>>> fig, ax = plt.subplots(3, 1, sharex=True, figsize=(12, 8))
Expand Down
Loading

0 comments on commit 999bbe9

Please sign in to comment.