diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..67ec305f --- /dev/null +++ b/.coveragerc @@ -0,0 +1,7 @@ +[run] +source = deltametrics + +omit = + *__init__* + */sample_data/* + *_version* diff --git a/deltametrics/cube.py b/deltametrics/cube.py index 27299c60..7c14973c 100644 --- a/deltametrics/cube.py +++ b/deltametrics/cube.py @@ -1,10 +1,12 @@ import os +import copy import warnings import time import abc import numpy as np import scipy as sp +import xarray as xr import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # noqa: F401 unused import @@ -17,13 +19,15 @@ from . import plot -class CubeVariable(np.ndarray): +@xr.register_dataarray_accessor("cubevar") +class CubeVariable(): """Slice of a Cube. Slicing an :obj:`~deltametrics.cube.Cube` returns an object of this type. - The ``CubeVariable`` is essentially a thin wrapper around the numpy - ``ndarray``, enabling additional iniformation to be augmented, a - lighter-weight __repr__, and flexibility for development. + The ``CubeVariable`` is an accessor of the `xarray.DataArray` class, + enabling additional functions to be added to the object, while retaining + the `xarray` object and it's native functionality under + ``CubeVariable.data``. .. warning:: You probably should not instantiate objects of this type directly. @@ -38,55 +42,47 @@ class CubeVariable(np.ndarray): >>> type(rcm8cube['velocity']) - >>> type(rcm8cube['velocity'].base) - + >>> type(rcm8cube['velocity'].data) + >>> rcm8cube['velocity'].variable 'velocity' """ - def __new__(cls, *args, **kwargs): - """Initialize the ndarray. - """ + def __init__(self, xarray_obj): + """Initialize the ``CubeVariable`` object.""" + self.data = xarray_obj + + def initialize(self, **kwargs): + """Initialize with **kwargs.""" + self.shape = self.data.shape + self.ndim = len(self.shape) variable = kwargs.pop('variable', None) + self.variable = variable coords = kwargs.pop('coords', None) - obj = np.array(*args, **kwargs) - obj = np.asarray(obj).view(cls) - obj.variable = variable if not coords: - obj.t, obj.x, obj.y = [np.arange(l) for l in obj.shape] + self.t, self.x, self.y = [np.arange(itm) for itm in self.shape] else: - obj.t, obj.x, obj.y = coords['t'], coords['x'], coords['y'] - return obj + self.t, self.x, self.y = coords['t'], coords['x'], coords['y'] - def __array_finalize__(self, obj): - """Place things that must always happen here. + def as_frozen(self): + """Export variable values as a `numpy.ndarray`.""" + return self.data.values - This method is called when any new array is cast. The ``__new__`` - method is not called when views are cast of an existing - `CubeVariable`, so anything special about the `CubeVariable` needs to - implemented here in addition to in ``__new__``. + def __getitem__(self, slc): + """Get items from the underlying data. - This method could be configured to return a standard ndarray if a view - is cast, but currently, it will return another CubeVariable instance. - """ - if obj is None: - return - self.variable = getattr(obj, 'variable', None) - self.t, self.x, self.y = getattr(obj, 'coords', range(3)) + Takes a numpy slicing style and slices data from the underlying data. + Note that the underlying data is stored in an :obj:`xarray.DataArray`, + and this method returns a :obj:`xarray.DataArray`. - def __repr__(self): - """Lighterweight repr + Parameters + ---------- + slc : a `numpy` slice + A valid `numpy` style slice. For example, :code:`[10, :, :]`. + Dimension validation is not performed before slicing. """ - if self.size > 5000: - return str(type(self)) + ' variable: `' + self.variable \ - + '`; dtype: ' + str(self.dtype) + ' size: ' + str(self.size) - else: - return str(self) - - def as_frozen(self): - """Export variable as `ndarray`.""" - return self.view(np.ndarray) + return self.data[slc] class BaseCube(abc.ABC): @@ -168,9 +164,9 @@ def _connect_to_file(self, data_path): """ _, ext = os.path.splitext(data_path) if ext == '.nc': - self._dataio = io.NetCDFIO(data_path) - elif ext == '.hf5': # ????? - self._dataio = io.HDFIO(data_path) + self._dataio = io.NetCDFIO(data_path, 'netcdf') + elif ext == '.hdf5': + self._dataio = io.NetCDFIO(data_path, 'hdf5') else: raise ValueError( 'Invalid file extension for "data_path": %s' % data_path) @@ -181,14 +177,22 @@ def _read_meta_from_file(self): This method is used internally to gather some useful info for navigating the variable trees in the stored files. """ - self._variables = self._dataio.keys - self._X = self._dataio['x'] # mesh grid of x values of cube - self._Y = self._dataio['y'] # mesh grid of y values of cube - self._x = np.copy(self._X[0, :].squeeze()) # array of x values of cube - self._y = np.copy(self._Y[:, 0].squeeze()) # array of y values of cube + self._coords = self._dataio.known_coords + self._variables = self._dataio.known_variables + # if x is 2-D then we assume x and y are mesh grid values + if np.ndim(self._dataio['x']) == 2: + self._X = self._dataio['x'] # mesh grid of x values of cube + self._Y = self._dataio['y'] # mesh grid of y values of cube + self._x = np.copy(self._X[0, :].squeeze()) # array of xval of cube + self._y = np.copy(self._Y[:, 0].squeeze()) # array of yval of cube + # if x is 1-D we do mesh-gridding + elif np.ndim(self._dataio['x']) == 1: + self._x = self._dataio['x'] # array of xval of cube + self._y = self._dataio['y'] # array of yval of cube + self._X, self._Y = np.meshgrid(self._x, self._y) # mesh grids x&y def read(self, variables): - """Read variable into memory + """Read variable into memory. Parameters ---------- @@ -196,7 +200,7 @@ def read(self, variables): Which variables to read into memory. """ if variables is True: # special case, read all variables - variables = self._dataio.variables + variables = self.variables elif type(variables) is str: variables = [variables] else: @@ -226,7 +230,8 @@ def varset(self, var): def data_path(self): """:obj:`str` : Path connected to for file IO. - Returns a string if connected to file, or None if cube initialized from ``dict``. + Returns a string if connected to file, or None if cube initialized + from ``dict``. """ return self._data_path @@ -236,6 +241,11 @@ def dataio(self): """ return self._dataio + @property + def coords(self): + """`list` : List of coordinate names as strings.""" + return self._coords + @property def variables(self): """`list` : List of variable names as strings. @@ -375,12 +385,23 @@ def export_frozen_variable(self, var, return_cube=False): if return_cube: raise NotImplementedError else: - return self.__getitem__(var).view(np.ndarray) + return self[var].data def show_cube(self, var, t=-1, x=-1, y=-1, ax=None): """Show the cube in a 3d axis. + + 3d visualization via `pyvista`. + + .. warning:: + Implementation is crude and should be revisited. """ - raise NotImplementedError + try: + import pyvista as pv + except ImportError: + ImportError('3d plotting dependency, pyvista, was not found.') + + _grid = pv.wrap(self[var].data.values) + _grid.plot() def show_plan(self, var, t=-1, ax=None, title=None, ticks=False): """Show planform image. @@ -389,7 +410,7 @@ def show_plan(self, var, t=-1, ax=None, title=None, ticks=False): NEEDS TO BE PORTED OVER TO WRAP THE .show() METHOD OF PLAN! """ - _plan = self[var][t] # REPLACE WITH OBJECT RETURNED FROM PLAN + _plan = self[var].data[t] # REPLACE WITH OBJECT RETURNED FROM PLAN if not ax: ax = plt.gca() @@ -483,7 +504,18 @@ def __init__(self, data, read=[], varset=None, stratigraphy_from=None): self._t = np.array(self._dataio['time'], copy=True) _, self._T, _ = np.meshgrid(self.y, self.t, self.x) - self._H, self._L, self._W = self['eta'].shape + + # get shape from a variable that is not x, y, or time + i = 0 + while i < len(self.variables): + if self.variables[i] == 'x' or self.variables[i] == 'y' \ + or self.variables[i] == 'time': + i += 1 + else: + _var = self.variables[i] + i = len(self.variables) + + self._H, self._L, self._W = self[_var].data.shape self._knows_stratigraphy = False @@ -506,13 +538,27 @@ def __getitem__(self, var): CubeVariable : `~deltametrics.cube.CubeVariable` The instantiated CubeVariable. """ - if var == 'time': - # a special attribute we add, which matches eta.shape - _t = np.expand_dims(self.dataio['time'], axis=(1, 2)) - return CubeVariable(np.tile(_t, (1, *self.shape[1:])), - variable='time') + if var in self._coords: + # ensure coords can be called by cube[var] + _coords = {} + _coords['t'] = self.T + _coords['x'] = self.X + _coords['y'] = self.Y + if var == 'time': # special case for time + _t = np.expand_dims(self.dataio.dataset['time'].values, + axis=(1, 2)) + _xrt = xr.DataArray(np.tile(_t, (1, *self.shape[1:]))) + _obj = _xrt.cubevar + else: + _obj = self._dataio.dataset[var].cubevar + _obj.initialize(variable=var, coords=_coords) + return _obj + elif var in self._variables: - return CubeVariable(self.dataio[var], variable=var) + _obj = self._dataio.dataset[var].cubevar + _obj.initialize(variable=var) + return _obj + else: raise AttributeError('No variable of {cube} named {var}'.format( cube=str(self), var=var)) @@ -536,9 +582,13 @@ def stratigraphy_from(self, variable='eta', style='mesh', **kwargs): initializers. """ if style == 'mesh': - self.strat_attr = strat.MeshStratigraphyAttributes(elev=self[variable], **kwargs) + self.strat_attr = \ + strat.MeshStratigraphyAttributes(elev=self[variable], + **kwargs) elif style == 'boxy': - self.strat_attr = strat.BoxyStratigraphyAttributes(elev=self[variable], **kwargs) + self.strat_attr = \ + strat.BoxyStratigraphyAttributes(elev=self[variable], + **kwargs) else: raise ValueError('Bad "style" argument supplied: %s' % str(style)) self._knows_stratigraphy = True @@ -635,22 +685,23 @@ def __init__(self, data, read=[], varset=None, supplied, a new default VariableSet instance is created. """ super().__init__(data, read, varset) - if isinstance(data, str): raise NotImplementedError('Precomputed NetCDF?') elif isinstance(data, np.ndarray): raise NotImplementedError('Precomputed numpy array?') elif isinstance(data, DataCube): # i.e., creating from a DataCube - _elev = np.array(self.dataio[stratigraphy_from], copy=True) + _elev = copy.deepcopy(data[stratigraphy_from]) # set up coordinates of the array - self._z = strat._determine_strat_coordinates(_elev, dz=dz) + self._z = strat._determine_strat_coordinates(_elev.data, dz=dz) self._H = len(self.z) self._L, self._W = _elev.shape[1:] self._Z = np.tile(self.z, (self.W, self.L, 1)).T - _out = strat.compute_boxy_stratigraphy_coordinates(_elev, z=self.z, return_strata=True) + _out = strat.compute_boxy_stratigraphy_coordinates(_elev, + z=self.z, + return_strata=True) self.strata_coords, self.data_coords, self.strata = _out else: raise TypeError('No other input types implemented yet.') @@ -685,9 +736,13 @@ def __getitem__(self, var): cube=str(self), var=var)) # the following lines apply the data to stratigraphy mapping - _cut = _var[self.data_coords[:, 0], self.data_coords[:, 1], self.data_coords[:, 2]] - _arr[self.strata_coords[:, 0], self.strata_coords[:, 1], self.strata_coords[:, 2]] = _cut - return CubeVariable(_arr, variable=var) + _cut = _var[self.data_coords[:, 0], self.data_coords[:, 1], + self.data_coords[:, 2]] + _arr[self.strata_coords[:, 0], self.strata_coords[:, 1], + self.strata_coords[:, 2]] = _cut + _obj = xr.DataArray(_arr).cubevar + _obj.initialize(variable=var) + return _obj @property def strata(self): diff --git a/deltametrics/io.py b/deltametrics/io.py index 6663f702..be462534 100644 --- a/deltametrics/io.py +++ b/deltametrics/io.py @@ -2,33 +2,13 @@ import abc import os import sys +from warnings import warn import numpy as np +import xarray as xr import netCDF4 -def known_variables(): - """A list of known variables. - - These variables are common variables we anticipate being present in all - sorts of use-cases. Each one is given a set of default parameters in - :obj:`~deltametrics.plot.VariableSet`. - """ - return ['eta', 'stage', 'depth', 'discharge', - 'velocity', 'strata_sand_frac'] - - -def known_coords(): - """A list of known coordinates. - - These coordinates are commonly defined coordinate matricies that may be - stored inside of a file on disk. We don't treat these any differently in - the io wrappers, but knowing they are coordinates can be helpful. - """ - - return ['x', 'y', 'time'] - - class BaseIO(abc.ABC): """BaseIO object other file format wrappers inheririt from. @@ -38,17 +18,18 @@ class BaseIO(abc.ABC): methods ``connect``, ``read``, ``write``, and ``keys``. """ - def __init__(self, data_path, write): + def __init__(self, data_path, type, write): """Initialize the base IO. """ - self.known_variables = known_variables() - self.known_coords = known_coords() - self.data_path = data_path + self.type = type self.write = write self.connect() + self.get_known_coords() + self.get_known_variables() + @property def data_path(self): """`str` : Path to data file. @@ -60,8 +41,8 @@ def data_path(self): Notes ----- - The setter method validates the path, and returns a ``FileNotFoundError`` if - the file is not found. + The setter method validates the path, and returns a + ``FileNotFoundError`` if the file is not found. """ return self._data_path @@ -81,6 +62,22 @@ def connect(self): """ return + @abc.abstractmethod + def get_known_variables(self): + """Should create list of known variables. + + This function needs to populate `self.known_variables`. + """ + return + + @abc.abstractmethod + def get_known_coords(self): + """A list of known coordinates. + + This function needs to populate `self.known_coords`. + """ + return + @abc.abstractmethod def read(self): """Should read data into memory. @@ -110,15 +107,24 @@ def keys(self): class NetCDFIO(BaseIO): - """Utility for consistent IO with netCDF files. + """Utility for consistent IO with netCDF4 files. This module wraps calls to the netCDF4 python module in a consistent API, so the user can work seamlessly with either netCDF4 files or HDF5 files. The public methods of this class are consistent with :obj:`~deltametrics.utils.HDFIO`. + + Note that the netCDF4, netCDF4-classic, and HDF5 file standards are very + similar and (almost) interchangable. This means that the same data loader + can be used to handle these files. We use the `xarray` data reader which + supports the netCDF4/HDF5 file-format. + + Older file formats such as netCDF3 or HDF4 are unsupported. For more + information about the netCDF4 format, visit the netCDF4 + `docs `_. """ - def __init__(self, data_path, write=False): + def __init__(self, data_path, type, write=False): """Initialize the NetCDFIO handler. Initialize a connection to a NetCDF file. @@ -128,22 +134,26 @@ def __init__(self, data_path, write=False): data_path : `str` Path to file to read or write to. + type : `str` + Stores the type of output file loaded, either a netCDF4 file, + 'netcdf' or an HDF5 file, 'hdf5'. + write : `bool`, optional Whether to allow writing to an existing file. Set to False by default, if a file already exists at ``data_path``, writing is disabled, unless ``write`` is set to True. """ - super().__init__(data_path=data_path, write=write) + super().__init__(data_path=data_path, type=type, write=write) - self.type = 'netcdf' self._in_memory_data = {} def connect(self): """Connect to the data file. Initialize the file if it does not exist, or simply ``return`` if the - file already exists. + file already exists. This connection to the data file is "lazy" + loading, meaning that array values are not being loaded into memory. .. note:: This function is automatically called during initialization of any @@ -151,19 +161,44 @@ def connect(self): """ if not os.path.isfile(self.data_path): - self.dataset = netCDF4.Dataset( + _tempdataset = netCDF4.Dataset( self.data_path, "w", format="NETCDF4") - else: - if self.write: - self.dataset = netCDF4.Dataset(self.data_path, "r+") + _tempdataset.close() + + try: + _dataset = xr.open_dataset(self.data_path) + + if 'time' and 'y' and 'x' in _dataset.variables: + self.dataset = _dataset.set_coords(['time', 'y', 'x']) else: - self.dataset = netCDF4.Dataset(self.data_path, "r") + self.dataset = _dataset.set_coords([]) + warn('Dimensions "time", "y", and "x" not provided in the \ + given data file.', UserWarning) + + except Exception: + raise TypeError('File format out of scope for DeltaMetrics') + + def get_known_variables(self): + """List known variables. + + These variables are pulled from the loaded dataset. + """ + _vars = list(self.dataset.variables) + _coords = list(self.dataset.coords) + ['strata_age', 'strata_depth'] + self.known_variables = [item for item in _vars if item not in _coords] + + def get_known_coords(self): + """List known coordinates. + + These coordinates are pulled from the loaded dataset. + """ + self.known_coords = list(self.dataset.coords) def read(self, var): """Read variable from file and into memory. - Converts `variables` in netCDF file to `ndarray` for coersion into a - :obj:`~deltametrics.cube.Cube` instance. + Converts `variables` in data file to `xarray` objects for coersion + into a :obj:`~deltametrics.cube.Cube` instance. Parameters ---------- @@ -171,10 +206,11 @@ def read(self, var): Which variable to load from the file. """ try: - _arr = self.dataset.variables[var] - except ValueError as e: + _arr = self.dataset[var] + except KeyError as e: raise e - self._in_memory_data[var] = np.array(_arr, copy=True) + + self._in_memory_data[var] = _arr.load() def write(self): """Write data to file. @@ -198,20 +234,3 @@ def keys(self): """Variable names in file. """ return [var for var in self.dataset.variables] - - -class HDFIO(BaseIO): - """Utility for consistent IO with HDF5 files. - - This module wraps calls to the hdf5 python module in a consistent API, - so the user can work seamlessly with either HDF5 files or netCDF4 files. - The public methods of this class are consistent with - :obj:`~deltametrics.utils.NetCDFIO`. - """ - - def __init__(self, data_path): - """Initialize the HDF5_IO handler - - parameters - """ - raise NotImplementedError diff --git a/deltametrics/plot.py b/deltametrics/plot.py index 51a57533..2c5a0256 100644 --- a/deltametrics/plot.py +++ b/deltametrics/plot.py @@ -228,7 +228,9 @@ def __init__(self, override_dict=None): """ _added_list = ['net_to_gross'] - self.known_list = io.known_variables() + io.known_coords() + _added_list + self.known_list = ['eta', 'stage', 'depth', 'discharge', + 'velocity', 'strata_sand_frac'] + \ + ['x', 'y', 'time'] + _added_list for var in self.known_list: # set to defaults defined below (or None if no default) diff --git a/deltametrics/sample_data/cube.py b/deltametrics/sample_data/cube.py index 92d6a842..3c5b87ba 100644 --- a/deltametrics/sample_data/cube.py +++ b/deltametrics/sample_data/cube.py @@ -37,7 +37,7 @@ import netCDF4 from .. import cube -from ..io import NetCDFIO, HDFIO +from ..io import NetCDFIO def tdb12(): diff --git a/deltametrics/sample_data/files/LandsatEx.hdf5 b/deltametrics/sample_data/files/LandsatEx.hdf5 new file mode 100644 index 00000000..a725563a Binary files /dev/null and b/deltametrics/sample_data/files/LandsatEx.hdf5 differ diff --git a/deltametrics/section.py b/deltametrics/section.py index b8a06c5d..c116b543 100644 --- a/deltametrics/section.py +++ b/deltametrics/section.py @@ -301,16 +301,24 @@ def __getitem__(self, var): """ if type(self.cube) is cube.DataCube: if self.cube._knows_stratigraphy: - return DataSectionVariable(_data=self.cube[var][:, self._y, self._x], + return DataSectionVariable(_data=self.cube[var].data.values[:, + self._y, + self._x], _s=self.s, _z=self.z, - _psvd_mask=self.cube.strat_attr.psvd_idx[ - :, self._y, self._x], - _strat_attr=self.cube.strat_attr('section', self._y, self._x)) + _psvd_mask=self.cube.strat_attr.psvd_idx[:, + self._y, + self._x], + _strat_attr=self.cube.strat_attr( + 'section', self._y, self._x)) else: - return DataSectionVariable(_data=self.cube[var][:, self._y, self._x], + return DataSectionVariable(_data=self.cube[var].data.values[:, + self.y, + self._x], _s=self.s, _z=self.z) elif type(self.cube) is cube.StratigraphyCube: - return StratigraphySectionVariable(_data=self.cube[var][:, self._y, self._x], + return StratigraphySectionVariable(_data=self.cube[var].data.values[:, + self.y, + self._x], _s=self.s, _z=self.z) elif self.cube is None: raise AttributeError( @@ -397,14 +405,16 @@ def show(self, SectionAttribute, style='shaded', data=None, if not ax: ax = plt.gca() _varinfo = self.cube.varset[SectionAttribute] if \ - issubclass(type(self.cube), cube.BaseCube) else plot.VariableSet()[SectionAttribute] + issubclass(type(self.cube), cube.BaseCube) else \ + plot.VariableSet()[SectionAttribute] SectionVariableInstance = self[SectionAttribute] # main routines for plot styles if style in ['shade', 'shaded']: _data, _X, _Y = plot.get_display_arrays(SectionVariableInstance, data=data) - ci = ax.pcolormesh(_X, _Y, _data, cmap=_varinfo.cmap, norm=_varinfo.norm, + ci = ax.pcolormesh(_X, _Y, _data, cmap=_varinfo.cmap, + norm=_varinfo.norm, vmin=_varinfo.vmin, vmax=_varinfo.vmax, rasterized=True, shading='auto') elif style in ['line', 'lines']: @@ -493,7 +503,7 @@ def __init__(self, *args, y=0): def _compute_section_coords(self): """Calculate coordinates of the strike section. """ - _nx = self.cube['eta'].shape[2] + _nx = self.cube.shape[2] self._x = np.arange(_nx) self._y = np.tile(self.y, (_nx)) diff --git a/deltametrics/strat.py b/deltametrics/strat.py index 1b09e843..c31a533e 100644 --- a/deltametrics/strat.py +++ b/deltametrics/strat.py @@ -1,6 +1,7 @@ import abc import numpy as np +import xarray as xr from scipy import stats import matplotlib.pyplot as plt @@ -88,9 +89,11 @@ def compute_boxy_stratigraphy_volume(elev, prop, dz=None, z=None, # copy data out and into the stratigraphy based on coordinates nx, ny = strata.shape[1:] stratigraphy = np.full((len(z), nx, ny), np.nan) # preallocate nans - _cut = prop[data_coords[:, 0], data_coords[:, 1], data_coords[:, 2]] - stratigraphy[strata_coords[:, 0], strata_coords[ - :, 1], strata_coords[:, 2]] = _cut + _cut = prop.data.values[data_coords[:, 0], data_coords[:, 1], + data_coords[:, 2]] + stratigraphy[strata_coords[:, 0], + strata_coords[:, 1], + strata_coords[:, 2]] = _cut elevations = np.tile(z, (ny, nx, 1)).T @@ -252,7 +255,7 @@ def __init__(self, elev, **kwargs): """ super().__init__('mesh') - _eta = np.copy(elev) + _eta = elev.data.copy() _strata, _psvd = _compute_elevation_to_preservation(_eta) _psvd[0, ...] = True self.strata = _strata @@ -272,9 +275,9 @@ def __init__(self, elev, **kwargs): *_eta.shape[1:]), np.nan) for i in np.arange(_eta.shape[1]): for j in np.arange(_eta.shape[2]): - self.psvd_vxl_eta[0:self.psvd_vxl_cnt[i, j], i, j] = _eta[ + self.psvd_vxl_eta[0:self.psvd_vxl_cnt[i, j], i, j] = _eta.data[ self.psvd_idx[:, i, j], i, j].copy() - self.psvd_flld[0:self.psvd_vxl_cnt[i, j], i, j] = _eta[ + self.psvd_flld[0:self.psvd_vxl_cnt[i, j], i, j] = _eta.data[ self.psvd_idx[:, i, j], i, j].copy() self.psvd_flld[self.psvd_vxl_cnt[i, j]:, i, j] = self.psvd_flld[ self.psvd_vxl_cnt[i, j] - 1, i, j] @@ -364,8 +367,8 @@ def _compute_elevation_to_preservation(elev): Parameters ---------- - elev : :obj:`ndarray` - The `t-x-y` ndarry of elevation data to determine stratigraphy. + elev : :obj:`ndarray` or :obj:`xr.core.dataarray.DataArray` + The `t-x-y` volume of elevation data to determine stratigraphy. Returns ------- @@ -378,19 +381,27 @@ def _compute_elevation_to_preservation(elev): To determine whether time from a given *timestep* is preserved, use ``psvd.nonzero()[0] - 1``. """ - psvd = np.zeros_like(elev, dtype=np.bool) # bool, if retained - strata = np.zeros_like(elev) # elev of surface at each t + psvd = np.zeros_like(elev.data, dtype=np.bool) # bool, if retained + strata = np.zeros_like(elev.data) # elev of surface at each t nt = strata.shape[0] - strata[-1, ...] = elev[-1, ...] + if isinstance(elev, np.ndarray) is True: + _elev = elev + elif isinstance(elev, xr.core.dataarray.DataArray) is True: + _elev = elev.values + else: # case where elev is a CubeVariable + _elev = elev.data.values + + strata[-1, ...] = _elev[-1, ...] for j in np.arange(nt - 2, -1, -1): - strata[j, ...] = np.minimum(elev[j, ...], + strata[j, ...] = np.minimum(_elev[j, ...], strata[j + 1, ...]) psvd[j + 1, ...] = np.less(strata[j, ...], strata[j + 1, ...]) if nt > 1: # allows a single-time elevation-series to return psvd[0, ...] = np.less(strata[0, ...], strata[1, ...]) + return strata, psvd @@ -534,7 +545,7 @@ def _determine_strat_coordinates(elev, z=None, dz=None, nz=None): ``np.arange(np.min(elev), np.max(elev)+dz, step=dz)``. nz : :obj:`int`, optional - Number of intervals in `z`. Z array is created as + Number of intervals in `z`. Z array is created as ``np.linspace(np.min(elev), np.max(elev), num=nz, endpoint=True)``. """ if (dz is None) and (z is None) and (nz is None): @@ -548,14 +559,14 @@ def _determine_strat_coordinates(elev, z=None, dz=None, nz=None): elif not (dz is None): if dz <= 0: raise _valerr - max_dos = np.max(elev) + dz # max depth of section, meters - min_dos = np.min(elev) # min dos, meters + max_dos = np.max(elev.data) + dz # max depth of section, meters + min_dos = np.min(elev.data) # min dos, meters return np.arange(min_dos, max_dos, step=dz) elif not (nz is None): if nz <= 0: raise _valerr - max_dos = np.max(elev) - min_dos = np.min(elev) + max_dos = np.max(elev.data) + min_dos = np.min(elev.data) return np.linspace(min_dos, max_dos, num=nz, endpoint=True) else: raise RuntimeError('No coordinates determined. Check inputs.') diff --git a/deltametrics/utils.py b/deltametrics/utils.py index a2e0157a..b2e2aee8 100644 --- a/deltametrics/utils.py +++ b/deltametrics/utils.py @@ -106,7 +106,7 @@ class AttributeChecker(object): the function works as ``hasattr(self, attr)``, where ``attr`` is the attribute of interest. - The benefit of this methos over ``hasattr`` is that this method optionally + The benefit of this method over ``hasattr`` is that this method optionally takes a list of arguments, and returns a well-formatted error message, to help explain which attribute is necessary for a given operation. """ diff --git a/docs/source/guides/10min.rst b/docs/source/guides/10min.rst index cd63e453..e80902f0 100644 --- a/docs/source/guides/10min.rst +++ b/docs/source/guides/10min.rst @@ -13,7 +13,7 @@ For a more in depth guide, be sure to check out the :doc:`userguide`. All of the documentation in this package assumes that you have imported the DeltaMetrics package as ``dm``: .. doctest:: - + >>> import deltametrics as dm Additionally, we frequently rely on the `numpy` package, and `matplotlib`. We will assume you have imported these packages by their common shorthand as well; if we import other packages, or other modules from `matplotlib`, these imports will be declared! @@ -27,7 +27,7 @@ Additionally, we frequently rely on the `numpy` package, and `matplotlib`. We wi Connect to data =============== -In your application, you will want to connect to a your own dataset, but more on that later. +In your application, you will want to connect to a your own dataset, but more on that later. For now, let's use a sample dataset that is distributed with DeltaMetrics. .. doctest:: @@ -45,32 +45,37 @@ Inspect which variables are available in the ``rcm8cube``. .. doctest:: >>> rcm8cube.variables - ['x', 'y', 'time', 'eta', 'stage', 'depth', 'discharge', 'velocity', 'strata_age', 'strata_sand_frac', 'strata_depth'] - + ['eta', 'stage', 'depth', 'discharge', 'velocity', 'strata_sand_frac'] + Accessing data from a DataCube ============================== A :obj:`~deltametrics.cube.DataCube` can be sliced directly by variable name. -Slicing a cube returns an instance of :obj:`~deltametrics.cube.CubeVariable`, which is a numpy ``ndarray`` compatible object; this means that it can be manipulated exactly as a standard ``ndarray``, supporting any arbitrary math. +Slicing a cube returns an instance of :obj:`~deltametrics.cube.CubeVariable`, which is an xarray "accessor"; this means that it contains an xarray object in addition to custom functions. .. doctest:: >>> type(rcm8cube['velocity']) - >>> type(rcm8cube['velocity'].base) - + >>> type(rcm8cube['velocity'].data) + -For example, we could determine how much the average bed elevation change at a specific location in the model domain (43, 123), by slicing the ``eta`` variable, and differencing timesteps. +The underlying xarray object can be directly accessed by using a ``.data`` attribute, however, this is not necessary, and you can slice the `CubeVariable` directly with any valid `numpy` slicing style. For example, we could determine how much the average bed elevation changed at a specific location in the model domain (43, 123), by slicing the ``eta`` variable, and differencing timesteps. .. doctest:: >>> np.mean( rcm8cube['eta'][1:,43,123] - rcm8cube['eta'][:-1,43,123] ) - 0.08364895 + + array(0.08364895, dtype=float32) + Coordinates: + x float32 123.0 + y float32 43.0 + -The DataCube is often used by taking horizontal or vertical "cuts" of the cube. -In this package, we refer to horizontal cuts as "plans" (`Planform` data) and vertical cuts as "sections" (`Section` data). +The DataCube is often used by taking horizontal or vertical "cuts" of the cube. +In this package, we refer to horizontal cuts as "plans" (`Planform` data) and vertical cuts as "sections" (`Section` data). The :doc:`Planform <../reference/plan/index>` and :doc:`Section <../reference/section/index>` data types have a series of helpful classes and functions, which are fully documented in their respective pages. @@ -106,7 +111,7 @@ Section data We are often interested in not only the spatiotemporal changes in the planform of the delta, but we want to know what is preserved in the subsurface. In DeltaMetrics, we refer to this preserved history as the "stratigraphy", and we provide a number of convenient routines for computing stratigraphy and analyzing the deposits. -Importantly, the stratigraphy (or i.e., which voxels are preserved) is not computed by default when a Cube instance is created. +Importantly, the stratigraphy (or i.e., which voxels are preserved) is not computed by default when a Cube instance is created. We must directly tell the Cube instance to compute stratigraphy by specifying which variable contains the bed elevation history, because this history dictates preservation. .. doctest:: diff --git a/docs/source/guides/examples/computations/comparing_speeds_of_stratigraphy_access.rst b/docs/source/guides/examples/computations/comparing_speeds_of_stratigraphy_access.rst index a33c95b2..b17705dc 100644 --- a/docs/source/guides/examples/computations/comparing_speeds_of_stratigraphy_access.rst +++ b/docs/source/guides/examples/computations/comparing_speeds_of_stratigraphy_access.rst @@ -23,7 +23,7 @@ Keeping data on disk is advantageous for large datasets, but slows down access c >>> # time extraction from the underlying DataCube data on disk >>> start2 = time.time() >>> for _ in range(100): - ... _val = sc8cube['strata_sand_frac'][10:20, 31:35, -1:-30:-2] + ... _val = sc8cube['strata_sand_frac'].data[10:20, 31:35, -1:-30:-2] >>> end2 = time.time() >>> print("Elapsed time for frozen cube: ", end1-start1, " seconds") #doctest: +SKIP @@ -32,5 +32,3 @@ Keeping data on disk is advantageous for large datasets, but slows down access c Elapsed time for on-disk cube: 7.14995002746582 seconds >>> print("Speed difference: ", (end2-start2)/(end1-start1), " times faster") #doctest: +SKIP Speed difference: 61705.89300411523 times faster - - \ No newline at end of file diff --git a/docs/source/guides/userguide.rst b/docs/source/guides/userguide.rst index e3d53f57..f1a7379f 100644 --- a/docs/source/guides/userguide.rst +++ b/docs/source/guides/userguide.rst @@ -63,16 +63,13 @@ Inspect which variables are available in the ``rcm8cube``. .. doctest:: >>> rcm8cube.variables - ['x', 'y', 'time', 'eta', 'stage', 'depth', 'discharge', 'velocity', 'strata_age', 'strata_sand_frac', 'strata_depth'] + ['eta', 'stage', 'depth', 'discharge', 'velocity', 'strata_sand_frac'] -We can access the underlying variables by name. The returned object are numpy-like arrays with coordinates ``t-x-y``. +We can access the underlying variables by name. The returned object are xarray-accessors with coordinates ``t-x-y``. For example, access variables as: .. doctest:: - >>> rcm8cube['eta'] - variable: `eta`; dtype: float32 size: 1468800 - >>> type(rcm8cube['eta']) >>> rcm8cube['eta'].shape @@ -101,7 +98,7 @@ Let’s examine the timeseries of bed elevations by taking slices out of the ``' The 0th dimension of the cube is the *time* dimension, and the 1st and 2nd dimensions are the `y` and `x` dimensions of the model domain, respectively. The `x` dimension is the *cross-channel* dimension, Implementations using non-standard data should permute datasets to match this convention. -The CubeVariable supports arbitrary math (all using `numpy` for fast computations!). +The CubeVariable supports arbitrary math (using `xarray` for fast computations via CubeVariable.data syntax). For example: .. doctest:: @@ -331,7 +328,6 @@ Here’s a simple example to demonstrate how we place data into the stratigraphy .. doctest:: >>> ets = rcm8cube['eta'][:, 25, 120] # a "real" slice of the model - >>> fig, ax = plt.subplots(figsize=(8, 4)) >>> dm.plot.show_one_dimensional_trajectory_to_strata(ets, ax=ax, dz=0.25) >>> plt.show() #doctest: +SKIP @@ -345,7 +341,7 @@ Begin by creating a ``StratigraphyCube``: >>> sc8cube = dm.cube.StratigraphyCube.from_DataCube(rcm8cube, dz=0.05) >>> sc8cube.variables - ['x', 'y', 'time', 'eta', 'stage', 'depth', 'discharge', 'velocity', 'strata_age', 'strata_sand_frac', 'strata_depth'] + ['eta', 'stage', 'depth', 'discharge', 'velocity', 'strata_sand_frac'] We can then slice this cube in the same way as the ``DataCube``, but what we get back is *stratigraphy* rather than *spacetime*. @@ -447,7 +443,7 @@ We should verify that the frozen cubes actually match the underlying data! .. doctest:: - >>> np.all( fs[~np.isnan(fs)] == sc8cube['strata_sand_frac'][~np.isnan(sc8cube['strata_sand_frac'])] ) + >>> np.all( fs[~np.isnan(fs)] == sc8cube['strata_sand_frac'][~np.isnan(sc8cube['strata_sand_frac'])] ) #doctest: +SKIP True The access speed of a frozen volume is **much** faster than a live cube. diff --git a/docs/source/pyplots/guides/userguide_1d_example.py b/docs/source/pyplots/guides/userguide_1d_example.py index 6f0c5618..9a6eb2d4 100644 --- a/docs/source/pyplots/guides/userguide_1d_example.py +++ b/docs/source/pyplots/guides/userguide_1d_example.py @@ -1,6 +1,6 @@ rcm8cube = dm.sample_data.cube.rcm8() -ets = rcm8cube['eta'][:, 25, 120] # a "real" slice of the model +ets = rcm8cube['eta'].data[:, 25, 120] # a "real" slice of the model fig, ax = plt.subplots(figsize=(8, 4)) dm.plot.show_one_dimensional_trajectory_to_strata(ets, ax=ax, dz=0.25) diff --git a/docs/source/pyplots/guides/userguide_bed_elevation_change.py b/docs/source/pyplots/guides/userguide_bed_elevation_change.py index 06a4d81a..bca81af9 100644 --- a/docs/source/pyplots/guides/userguide_bed_elevation_change.py +++ b/docs/source/pyplots/guides/userguide_bed_elevation_change.py @@ -4,7 +4,7 @@ ts = np.linspace(0, rcm8cube['eta'].shape[0]-1, num=nt, dtype=np.int) # compute the change in bed elevation between the last two intervals above -diff_time = rcm8cube['eta'][ts[-1], ...] - rcm8cube['eta'][ts[-2], ...] +diff_time = rcm8cube['eta'].data[ts[-1], ...] - rcm8cube['eta'].data[ts[-2], ...] fig, ax = plt.subplots(figsize=(5, 3)) im = ax.imshow(diff_time, cmap='RdBu', vmax=abs(diff_time).max(), diff --git a/docs/source/pyplots/guides/userguide_bed_timeseries.py b/docs/source/pyplots/guides/userguide_bed_timeseries.py index ce5e174a..fd0afab1 100644 --- a/docs/source/pyplots/guides/userguide_bed_timeseries.py +++ b/docs/source/pyplots/guides/userguide_bed_timeseries.py @@ -5,7 +5,7 @@ fig, ax = plt.subplots(1, nt, figsize=(12, 2)) for i, t in enumerate(ts): - ax[i].imshow(rcm8cube['eta'][t, :, :], vmin=-5, vmax=0.5) + ax[i].imshow(rcm8cube['eta'].data[t, :, :], vmin=-5, vmax=0.5) ax[i].set_title('t = ' + str(t)) ax[i].axes.get_xaxis().set_ticks([]) ax[i].axes.get_yaxis().set_ticks([]) diff --git a/docs/source/pyplots/guides/userguide_masks_all_demo.py b/docs/source/pyplots/guides/userguide_masks_all_demo.py index 6b389178..c4e44c1e 100644 --- a/docs/source/pyplots/guides/userguide_masks_all_demo.py +++ b/docs/source/pyplots/guides/userguide_masks_all_demo.py @@ -2,19 +2,19 @@ rcm8cube = dm.sample_data.cube.rcm8() -land_mask = dm.mask.LandMask(rcm8cube['eta'][-1, :, :]) -wet_mask = dm.mask.WetMask(rcm8cube['eta'][-1, :, :]) -channel_mask = dm.mask.ChannelMask(rcm8cube['velocity'][-1, :, :], rcm8cube['eta'][-1, :, :]) +land_mask = dm.mask.LandMask(rcm8cube['eta'].data[-1, :, :]) +wet_mask = dm.mask.WetMask(rcm8cube['eta'].data[-1, :, :]) +channel_mask = dm.mask.ChannelMask(rcm8cube['velocity'].data[-1, :, :], rcm8cube['eta'].data[-1, :, :]) centerline_mask = dm.mask.CenterlineMask(channel_mask) -edge_mask = dm.mask.EdgeMask(rcm8cube['eta'][-1, :, :]) -shore_mask = dm.mask.ShorelineMask(rcm8cube['eta'][-1, :, :]) +edge_mask = dm.mask.EdgeMask(rcm8cube['eta'].data[-1, :, :]) +shore_mask = dm.mask.ShorelineMask(rcm8cube['eta'].data[-1, :, :]) fig = plt.figure(constrained_layout=True, figsize=(12, 10)) spec = gs.GridSpec(ncols=2, nrows=4, figure=fig) ax0 = fig.add_subplot(spec[0, :]) axs = [fig.add_subplot(spec[i, j]) for i, j in zip(np.repeat( np.arange(1, 4), 2), np.tile(np.arange(2), (4,)))] -ax0.imshow(rcm8cube['eta'][-1, :, :]) +ax0.imshow(rcm8cube['eta'].data[-1, :, :]) for i, m in enumerate([land_mask, wet_mask, channel_mask, centerline_mask, edge_mask, shore_mask]): diff --git a/docs/source/pyplots/mask/centerlinemask.py b/docs/source/pyplots/mask/centerlinemask.py index 48e5a4e4..ccc406dc 100644 --- a/docs/source/pyplots/mask/centerlinemask.py +++ b/docs/source/pyplots/mask/centerlinemask.py @@ -4,7 +4,7 @@ from deltametrics.mask import CenterlineMask rcm8cube = dm.sample_data.cube.rcm8() -channel_mask = ChannelMask(rcm8cube['velocity'][-1, :, :], - rcm8cube['eta'][-1, :, :]) +channel_mask = ChannelMask(rcm8cube['velocity'].data[-1, :, :], + rcm8cube['eta'].data[-1, :, :]) centerline_mask = CenterlineMask(channel_mask) centerline_mask.show() diff --git a/docs/source/pyplots/mask/channelmask.py b/docs/source/pyplots/mask/channelmask.py index 5ae43fd7..d779b658 100644 --- a/docs/source/pyplots/mask/channelmask.py +++ b/docs/source/pyplots/mask/channelmask.py @@ -3,6 +3,6 @@ from deltametrics.mask import ChannelMask rcm8cube = dm.sample_data.cube.rcm8() -channel_mask = ChannelMask(rcm8cube['velocity'][-1, :, :], - rcm8cube['eta'][-1, :, :]) +channel_mask = ChannelMask(rcm8cube['velocity'].data[-1, :, :], + rcm8cube['eta'].data[-1, :, :]) channel_mask.show() diff --git a/docs/source/pyplots/mask/edgemask.py b/docs/source/pyplots/mask/edgemask.py index 0335f5e0..8dd86678 100644 --- a/docs/source/pyplots/mask/edgemask.py +++ b/docs/source/pyplots/mask/edgemask.py @@ -3,5 +3,5 @@ from deltametrics.mask import EdgeMask rcm8cube = dm.sample_data.cube.rcm8() -edge_mask = EdgeMask(rcm8cube['eta'][-1, :, :]) +edge_mask = EdgeMask(rcm8cube['eta'].data[-1, :, :]) edge_mask.show() diff --git a/docs/source/pyplots/mask/landmask.py b/docs/source/pyplots/mask/landmask.py index cc369527..ee9af9e8 100644 --- a/docs/source/pyplots/mask/landmask.py +++ b/docs/source/pyplots/mask/landmask.py @@ -3,5 +3,5 @@ from deltametrics.mask import LandMask rcm8cube = dm.sample_data.cube.rcm8() -land_mask = LandMask(rcm8cube['eta'][-1, :, :]) +land_mask = LandMask(rcm8cube['eta'].data[-1, :, :]) land_mask.show() diff --git a/docs/source/pyplots/mask/shoremask.py b/docs/source/pyplots/mask/shoremask.py index fc6ae321..f580bfec 100644 --- a/docs/source/pyplots/mask/shoremask.py +++ b/docs/source/pyplots/mask/shoremask.py @@ -3,5 +3,5 @@ from deltametrics.mask import ShorelineMask rcm8cube = dm.sample_data.cube.rcm8() -shore_mask = ShorelineMask(rcm8cube['eta'][-1, :, :]) +shore_mask = ShorelineMask(rcm8cube['eta'].data[-1, :, :]) shore_mask.show() diff --git a/docs/source/pyplots/mask/wetmask.py b/docs/source/pyplots/mask/wetmask.py index bd584ee0..7ba7fadb 100644 --- a/docs/source/pyplots/mask/wetmask.py +++ b/docs/source/pyplots/mask/wetmask.py @@ -3,5 +3,5 @@ from deltametrics.mask import WetMask rcm8cube = dm.sample_data.cube.rcm8() -wet_mask = WetMask(rcm8cube['eta'][-1, :, :]) +wet_mask = WetMask(rcm8cube['eta'].data[-1, :, :]) wet_mask.show() diff --git a/docs/source/reference/io/index.rst b/docs/source/reference/io/index.rst index e218013b..9fa4e4be 100644 --- a/docs/source/reference/io/index.rst +++ b/docs/source/reference/io/index.rst @@ -6,10 +6,10 @@ Input/output file operations The package specifies several data input/output utilities. Typically, these utilities will not be interacted with directly, but rather are used by the :obj:`~deltametrics.cube.Cube` or :doc:`../section/index` to handle slicing. The IO utilities are data agnostic, that is to say, they do not care about what the data *are*, but instead just connect by variable names. Datasets commonly used are could be lidar scans, overhead photos, grain-size maps (DeltaRCM), or flow velocity (DeltaRCM). -By default, nothing is loaded from disk and into memory, and instead all slicing operations are handled on the fly. +By default, nothing is loaded from disk and into memory, and instead all slicing operations are handled on the fly. -The functions are defined in ``deltametrics.io``. +The functions are defined in ``deltametrics.io``. .. io functions @@ -17,9 +17,8 @@ The functions are defined in ``deltametrics.io``. .. currentmodule:: deltametrics.io -.. autosummary:: +.. autosummary:: :toctree: ../../_autosummary BaseIO NetCDFIO - HDFIO diff --git a/setup.py b/setup.py index 0f04e79b..9a83d69b 100644 --- a/setup.py +++ b/setup.py @@ -13,5 +13,5 @@ include_package_data=True, url='https://github.com/DeltaRCM/DeltaMetrics', install_requires=['matplotlib', 'netCDF4', - 'scipy', 'numpy', 'pyyaml'], + 'scipy', 'numpy', 'pyyaml', 'xarray'], ) diff --git a/tests/test_cube.py b/tests/test_cube.py index 850e84ac..2db0e751 100644 --- a/tests/test_cube.py +++ b/tests/test_cube.py @@ -4,6 +4,7 @@ import os import numpy as np +import xarray as xr from deltametrics import cube @@ -15,6 +16,10 @@ rcm8_path = os.path.join(os.path.dirname(__file__), '..', 'deltametrics', 'sample_data', 'files', 'pyDeltaRCM_Output_8.nc') +hdf_path = os.path.join(os.path.dirname(__file__), '..', 'deltametrics', + 'sample_data', 'files', + 'LandsatEx.hdf5') + class TestDataCubeNoStratigraphy: @@ -76,7 +81,7 @@ def test_slice_op(self): slc = rcm8cube['eta'] assert type(slc) is cube.CubeVariable assert slc.ndim == 3 - assert type(slc.base) is np.ndarray + assert type(slc.data) is xr.core.dataarray.DataArray def test_slice_op_invalid_name(self): rcm8cube = cube.DataCube(rcm8_path) @@ -292,13 +297,50 @@ class TestFrozenStratigraphyCube: 'time') def test_types(self): - assert isinstance(self.frozenstratigraphycube, np.ndarray) + assert isinstance(self.frozenstratigraphycube, + xr.core.dataarray.DataArray) def test_matches_underlying_data(self): assert not self.frozenstratigraphycube is self.fixedstratigraphycube - frzn_log = self.frozenstratigraphycube[ - ~np.isnan(self.frozenstratigraphycube)] - fixd_log = self.fixedstratigraphycube['time'][ - ~np.isnan(self.fixedstratigraphycube['time'])] + frzn_log = self.frozenstratigraphycube.values[ + ~np.isnan(self.frozenstratigraphycube.values)] + fixd_log = self.fixedstratigraphycube['time'].data.values[ + ~np.isnan(self.fixedstratigraphycube['time'].data.values)] assert frzn_log.shape == fixd_log.shape assert np.all(fixd_log == frzn_log) + + +class TestLandsatCube: + + landsatcube = cube.DataCube(hdf_path) + + def test_init_cube_from_path_hdf5(self): + hdfcube = cube.DataCube(hdf_path) + assert hdfcube._data_path == hdf_path + assert hdfcube.dataio.type == 'hdf5' + assert hdfcube._plan_set == {} + assert hdfcube._section_set == {} + assert type(hdfcube.varset) is plot.VariableSet + + def test_read_Blue_intomemory(self): + assert self.landsatcube._dataio._in_memory_data == {} + assert self.landsatcube.variables == ['Blue', 'Green', 'NIR', 'Red'] + assert len(self.landsatcube.variables) == 4 + + self.landsatcube.read('Blue') + assert len(self.landsatcube.dataio._in_memory_data) == 1 + + def test_read_all_intomemory(self): + assert self.landsatcube.variables == ['Blue', 'Green', 'NIR', 'Red'] + assert len(self.landsatcube.variables) == 4 + + self.landsatcube.read(True) + assert len(self.landsatcube.dataio._in_memory_data) == 4 + + def test_read_invalid(self): + with pytest.raises(TypeError): + self.landsatcube.read(5) + + def test_get_coords(self): + assert self.landsatcube.coords == ['time', 'x', 'y'] + assert self.landsatcube._coords == ['time', 'x', 'y'] diff --git a/tests/test_io.py b/tests/test_io.py index 0c369965..0b3a6a67 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -4,34 +4,40 @@ import os import numpy as np +import xarray as xr from deltametrics import io +import utilities rcm8_path = os.path.join(os.path.dirname(__file__), '..', 'deltametrics', 'sample_data', 'files', 'pyDeltaRCM_Output_8.nc') +hdf_path = os.path.join(os.path.dirname(__file__), '..', 'deltametrics', + 'sample_data', 'files', + 'LandsatEx.hdf5') + def test_netcdf_io_init(): - netcdf_io = io.NetCDFIO(data_path=rcm8_path) + netcdf_io = io.NetCDFIO(rcm8_path, 'netcdf') assert netcdf_io.type == 'netcdf' assert len(netcdf_io._in_memory_data.keys()) == 0 def test_netcdf_io_keys(): - netcdf_io = io.NetCDFIO(data_path=rcm8_path) + netcdf_io = io.NetCDFIO(rcm8_path, 'netcdf') assert len(netcdf_io.keys) == 11 def test_netcdf_io_nomemory(): - netcdf_io = io.NetCDFIO(data_path=rcm8_path) + netcdf_io = io.NetCDFIO(rcm8_path, 'netcdf') dataset_size = sys.getsizeof(netcdf_io.dataset) inmemory_size = sys.getsizeof(netcdf_io._in_memory_data) var = 'velocity' - velocity_arr = netcdf_io.dataset[var][ - :, 10, :] # slice the dataset directly + # slice the dataset directly + velocity_arr = netcdf_io.dataset[var].data[:, 10, :] assert len(velocity_arr.shape) == 2 - assert type(velocity_arr) is np.ma.MaskedArray + assert type(velocity_arr) is np.ndarray dataset_size_after = sys.getsizeof(netcdf_io.dataset) inmemory_size_after = sys.getsizeof(netcdf_io._in_memory_data) @@ -42,7 +48,7 @@ def test_netcdf_io_nomemory(): @pytest.mark.xfail() def test_netcdf_io_intomemory_direct(): - netcdf_io = io.NetCDFIO(data_path=rcm8_path) + netcdf_io = io.NetCDFIO(rcm8_path, 'netcdf') dataset_size = sys.getsizeof(netcdf_io.dataset) inmemory_size = sys.getsizeof(netcdf_io._in_memory_data) @@ -62,7 +68,7 @@ def test_netcdf_io_intomemory_direct(): @pytest.mark.xfail() def test_netcdf_io_intomemory_read(): - netcdf_io = io.NetCDFIO(data_path=rcm8_path) + netcdf_io = io.NetCDFIO(rcm8_path, 'netcdf') dataset_size = sys.getsizeof(netcdf_io.dataset) inmemory_size = sys.getsizeof(netcdf_io._in_memory_data) @@ -72,9 +78,54 @@ def test_netcdf_io_intomemory_read(): assert len(netcdf_io._in_memory_data.keys()) == 1 _arr = netcdf_io._in_memory_data[var] + assert isinstance(_arr, xr.core.dataarray.DataArray) + dataset_size_after = sys.getsizeof(netcdf_io.dataset) inmemory_size_after = sys.getsizeof(netcdf_io._in_memory_data) assert dataset_size == dataset_size_after assert inmemory_size < inmemory_size_after - assert sys.getsizeof(_arr) > 1000 + + +def test_hdf5_io_init(): + netcdf_io = io.NetCDFIO(hdf_path, 'hdf5') + assert netcdf_io.type == 'hdf5' + assert len(netcdf_io._in_memory_data.keys()) == 0 + + +def test_hdf5_io_keys(): + hdf5_io = io.NetCDFIO(hdf_path, 'hdf5') + assert len(hdf5_io.keys) == 7 + + +def test_nofile(): + with pytest.raises(FileNotFoundError): + io.NetCDFIO('badpath', 'netcdf') + + +def test_empty_file(tmp_path): + p = utilities.create_dummy_netcdf(tmp_path) + with pytest.warns(UserWarning): + io.NetCDFIO(p, 'netcdf') + + +def test_invalid_file(tmp_path): + p = utilities.create_dummy_txt_file(tmp_path) + with pytest.raises(TypeError): + io.NetCDFIO(p, 'netcdf') + + +def test_readvar_intomemory(): + netcdf_io = io.NetCDFIO(rcm8_path, 'netcdf') + assert netcdf_io._in_memory_data == {} + + netcdf_io.read('eta') + assert ('eta' in netcdf_io._in_memory_data.keys()) is True + + +def test_readvar_intomemory_error(): + netcdf_io = io.NetCDFIO(rcm8_path, 'netcdf') + assert netcdf_io._in_memory_data == {} + + with pytest.raises(KeyError): + netcdf_io.read('nonexistant') diff --git a/tests/test_mask.py b/tests/test_mask.py index d0090950..30df8d87 100644 --- a/tests/test_mask.py +++ b/tests/test_mask.py @@ -38,7 +38,7 @@ def test_maskTrue(self): shoremask = mask.ShorelineMask(rcm8cube['eta'][-1, :, :], is_mask=True) # do assertion - assert np.all(shoremask.mask == rcm8cube['eta'][-1, :, :]) + assert np.all(shoremask.mask[-1, :, :] == rcm8cube['eta'][-1, :, :]) def test_assign_vals(self): """Test that specified values are assigned.""" @@ -113,7 +113,7 @@ def test_maskTrue(self): landmask = mask.LandMask(rcm8cube['eta'][-1, :, :], is_mask=True) # do assertion - assert np.all(landmask.mask == rcm8cube['eta'][-1, :, :]) + assert np.all(landmask.mask[-1, :, :] == rcm8cube['eta'][-1, :, :]) def test_assign_vals(self): """Test that specified values are assigned.""" @@ -209,7 +209,7 @@ def test_maskTrue(self): wetmask = mask.WetMask(rcm8cube['eta'][-1, :, :], is_mask=True) # do assertion - assert np.all(wetmask.mask == rcm8cube['eta'][-1, :, :]) + assert np.all(wetmask.mask[-1, :, :] == rcm8cube['eta'][-1, :, :]) def test_assign_vals(self): """Test that specified values are assigned.""" @@ -269,7 +269,8 @@ def test_submerged_givenland(self): """Check what happens when there is no land above water.""" # define the mask landmask = mask.LandMask(rcm8cube['eta'][0, :, :]) - wetmask = mask.WetMask(rcm8cube['eta'][0, :, :], landmask=landmask) + wetmask = mask.WetMask(rcm8cube['eta'][0, :, :], + landmask=landmask) # assert - expect all values to be 0s assert np.all(wetmask.mask == 0) @@ -315,7 +316,7 @@ def test_maskTrue(self): rcm8cube['eta'][-1, :, :], is_mask=True) # do assertion - assert np.all(channelmask.mask == rcm8cube['eta'][-1, :, :]) + assert np.all(channelmask.mask[-1, :, :] == rcm8cube['eta'][-1, :, :]) def test_assign_vals(self): """Test that specified values are assigned.""" @@ -482,7 +483,7 @@ def test_maskTrue(self): edgemask = mask.EdgeMask(rcm8cube['eta'][-1, :, :], is_mask=True) # do assertion - assert np.all(edgemask.mask == rcm8cube['eta'][-1, :, :]) + assert np.all(edgemask.mask[-1, :, :] == rcm8cube['eta'][-1, :, :]) def test_assign_vals(self): """Test that specified values are assigned.""" @@ -570,7 +571,8 @@ def test_submerged_givenland(self): """Check what happens when there is no land above water.""" # define the mask landmask = mask.LandMask(rcm8cube['eta'][0, :, :]) - edgemask = mask.EdgeMask(rcm8cube['eta'][0, :, :], landmask=landmask) + edgemask = mask.EdgeMask(rcm8cube['eta'][0, :, :], + landmask=landmask) # assert - expect all values to be 0s assert np.all(edgemask.mask == 0) @@ -578,7 +580,8 @@ def test_submerged_givenwet(self): """Check what happens when there is no land above water.""" # define the mask wetmask = mask.WetMask(rcm8cube['eta'][0, :, :]) - edgemask = mask.EdgeMask(rcm8cube['eta'][0, :, :], wetmask=wetmask) + edgemask = mask.EdgeMask(rcm8cube['eta'][0, :, :], + wetmask=wetmask) # assert - expect all values to be 0s assert np.all(edgemask.mask == 0) @@ -640,7 +643,7 @@ def test_maskTrue(self): centerlinemask = mask.CenterlineMask(channelmask, is_mask=True) # do assertion - assert np.all(centerlinemask.mask == channelmask) + assert np.all(centerlinemask.mask[-1, :, :] == channelmask) def test_passChannelMask(self): """Test that a ChannelMask object can be passed in.""" diff --git a/tests/test_strat.py b/tests/test_strat.py index fa64ed6e..2357a225 100644 --- a/tests/test_strat.py +++ b/tests/test_strat.py @@ -3,6 +3,7 @@ import sys import os import numpy as np +import xarray as xr from deltametrics import cube from deltametrics import strat @@ -19,14 +20,18 @@ class TestComputeBoxyStratigraphyVolume: time = rcm8cube['time'] def test_returns_volume_and_elevations(self): - s, e = strat.compute_boxy_stratigraphy_volume(self.elev, self.time, dz=0.05) + s, e = strat.compute_boxy_stratigraphy_volume(self.elev, + self.time, + dz=0.05) assert s.ndim == 3 assert s.shape == e.shape assert e[1,0,0] - e[0,0,0] == pytest.approx(0.05) def test_returns_volume_and_elevations_given_z(self): z = np.linspace(-20, 500, 200) - s, e = strat.compute_boxy_stratigraphy_volume(self.elev, self.time, z=z) + s, e = strat.compute_boxy_stratigraphy_volume(self.elev, + self.time, + z=z) assert s.ndim == 3 assert s.shape == e.shape assert np.all(e[:,0,0] == z) @@ -34,7 +39,8 @@ def test_returns_volume_and_elevations_given_z(self): @pytest.mark.xfail(raises=NotImplementedError, strict=True, reason='Not yet developed.') def test_return_cube(self): s, e = strat.compute_boxy_stratigraphy_volume(self.elev, self.time, - dz=0.05, return_cube=True) + dz=0.05, + return_cube=True) def test_lessthan3d_error(self): with pytest.raises(ValueError, match=r'Input arrays must be three-dimensional.'): @@ -265,14 +271,15 @@ def test_onedim_traj_upsanddowns(self): assert c[0] == 1 def test_onedim_traj_upsanddowns_negatives(self): - e = np.array([0, 0, -1, -4, -2, 3, 3.5, 3, 3, 4, 4]) - # e = np.expand_dims(e, axis=(1,2)) + # e = np.array([0, 0, -1, -4, -2, 3, 3.5, 3, 3, 4, 4]) + e_xr = xr.DataArray([0, 0, -1, -4, -2, 3, 3.5, 3, 3, 4, 4]) + e = e_xr.cubevar z = strat._determine_strat_coordinates(e, dz=0.5) # vert coordinates s, p = strat._compute_elevation_to_preservation(e) # strat and preservation sc, dc = strat._compute_preservation_to_cube(s, z) c = self.take_var_time(s, z, sc, dc) assert np.all(p.nonzero()[0] == (4, 5, 9)) - + class TestDetermineStratCoordinates: diff --git a/tests/test_utils.py b/tests/test_utils.py index 6764a25b..9b178e57 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -27,7 +27,6 @@ def test_obj_and_var(self): raise utils.NoStratigraphyError('someobj', 'somevar') - chmap = np.zeros((5, 4, 4)) # define time = 0 chmap[0, :, 1] = 1 @@ -89,3 +88,71 @@ def test_exponential_fit(): [0.01139969, 0.08422946, 0.01327807]])) assert pytest.approx(err == np.array([0.29009757, 0.77392321, 0.11523053])) + + +def test_format_number_float(): + _val = float(5.2) + _fnum = utils.format_number(_val) + assert _fnum == '10' + + _val = float(50.2) + _fnum = utils.format_number(_val) + assert _fnum == '50' + + _val = float(15.0) + _fnum = utils.format_number(_val) + assert _fnum == '20' + + +def test_format_number_int(): + _val = int(5) + _fnum = utils.format_number(_val) + assert _fnum == '0' + + _val = int(6) + _fnum = utils.format_number(_val) + assert _fnum == '10' + + _val = int(52) + _fnum = utils.format_number(_val) + assert _fnum == '50' + + _val = int(15) + _fnum = utils.format_number(_val) + assert _fnum == '20' + + +def test_format_table_float(): + _val = float(5.2) + _fnum = utils.format_table(_val) + assert _fnum == '5.2' + + _val = float(50.2) + _fnum = utils.format_table(_val) + assert _fnum == '50.2' + + _val = float(15.0) + _fnum = utils.format_table(_val) + assert _fnum == '15.0' + + _val = float(15.03689) + _fnum = utils.format_table(_val) + assert _fnum == '15.0' + + _val = float(15.0689) + _fnum = utils.format_table(_val) + assert _fnum == '15.1' + + +def test_format_table_float(): + _val = int(5) + _fnum = utils.format_table(_val) + assert _fnum == '5' + + _val = int(5.8) + _fnum = utils.format_table(_val) + assert _fnum == '5' + + _val = int(5.2) + _fnum = utils.format_table(_val) + assert _fnum == '5' diff --git a/tests/utilities.py b/tests/utilities.py new file mode 100644 index 00000000..bd78b4bd --- /dev/null +++ b/tests/utilities.py @@ -0,0 +1,32 @@ +import sys +import os + +import pytest +import netCDF4 + + +def create_dummy_netcdf(tmp_path): + """Create blank NetCDF4 file.""" + d = tmp_path / 'netcdf_files' + try: + d.mkdir() + except Exception: + pass + p = d / 'dummy.nc' + f = netCDF4.Dataset(p, "w", format="NETCDF4") + f.createVariable('test', 'f4') + f.close() + return p + + +def create_dummy_txt_file(tmp_path): + """Create a dummy text file.""" + d = tmp_path / 'txt_files' + try: + d.mkdir() + except Exception: + pass + p = d / 'dummy.txt' + _fobj = open(p, 'x') + _fobj.close() + return p