Skip to content

Commit

Permalink
Merge pull request #79 from amoodie/integrate_xarray
Browse files Browse the repository at this point in the history
decouple the coordinates and dimensions of the dataset from x and y
  • Loading branch information
amoodie authored Nov 17, 2021
2 parents fd53b5d + e68c494 commit 7d3210d
Show file tree
Hide file tree
Showing 13 changed files with 417 additions and 339 deletions.
89 changes: 56 additions & 33 deletions deltametrics/cube.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,17 +126,28 @@ def _read_meta_from_file(self, coordinates):
self.coordinates = copy.deepcopy(default_coordinates)
self._coords = self._dataio.known_coords
self._variables = self._dataio.known_variables

# process the dimensions into attributes
if len(self._dataio.dims) > 0:
d0, d1, d2 = self._dataio.dims
else:
# try taking a slice of the dataset to get the coordinates
d0, d1, d2 = self.dataio[self._dataio.known_variables[0]].dims
self._dim0_idx = self._dataio.dataset[d0]

# if x is 2-D then we assume x and y are mesh grid values
if np.ndim(self._dataio['x']) == 2:
self._X = self._dataio['x'] # mesh grid of x values of cube
self._Y = self._dataio['y'] # mesh grid of y values of cube
self._x = np.copy(self._X[0, :].squeeze()) # array of xval of cube
self._y = np.copy(self._Y[:, 0].squeeze()) # array of yval of cube
if np.ndim(self._dataio[d1]) == 2:
self._dim1_idx = self._dataio.dataset[d1][:, 0].squeeze()
self._dim2_idx = self._dataio.dataset[d2][0, :].squeeze()
# if x is 1-D we do mesh-gridding
elif np.ndim(self._dataio['x']) == 1:
self._x = self._dataio[default_coordinates['x']] # array of xval of cube
self._y = self._dataio[default_coordinates['y']] # array of yval of cube
self._X, self._Y = np.meshgrid(self._x, self._y) # mesh grids x&y
elif np.ndim(self._dataio[d1]) == 1:
self._dim1_idx = self._dataio.dataset[d1]
self._dim2_idx = self._dataio.dataset[d2]

# assign values to dimensions of cube
self._dim0_coords = self._t = self._dim0_idx
self._dim1_coords = self._dim1_idx
self._dim2_coords = self._dim2_idx

def read(self, variables):
"""Read variable into memory.
Expand Down Expand Up @@ -277,24 +288,22 @@ def register_section(self, name, SectionInstance, return_section=False):
return self._section_set[name]

@property
def x(self):
"""x-direction coordinate."""
return self._x

@property
def X(self):
"""x-direction mesh."""
return self._X
def dim0_coords(self):
"""Coordinates along the first dimension of `cube`.
"""
return self.z

@property
def y(self):
"""y-direction coordinate."""
return self._y
def dim1_coords(self):
"""Coordinates along the second dimension of `cube`.
"""
return self._dim1_coords

@property
def Y(self):
"""y-direction mesh."""
return self._Y
def dim2_coords(self):
"""Coordinates along the third dimension of `cube`.
"""
return self._dim2_coords

@property
@abc.abstractmethod
Expand Down Expand Up @@ -471,8 +480,9 @@ def __init__(self, data, read=[], varset=None, stratigraphy_from=None,
"""
super().__init__(data, read, varset, coordinates)

self._t = np.array(self._dataio['time'], copy=True)
_, self._T, _ = np.meshgrid(self.y, self.t, self.x)
# set up the grid for time
_, self._T, _ = np.meshgrid(
self.dim1_coords, self.dim0_coords, self.dim2_coords)

# get shape from a variable that is not x, y, or time
i = 0
Expand All @@ -486,6 +496,14 @@ def __init__(self, data, read=[], varset=None, stratigraphy_from=None,

self._H, self._L, self._W = self[_var].data.shape

# set up dimension and coordinate fields for when slices of the cube
# are made
self._view_dimensions = self._dataio.dims
self._view_coordinates = copy.deepcopy({
self._view_dimensions[0]: self.dim0_coords,
self._view_dimensions[1]: self.dim1_coords,
self._view_dimensions[2]: self.dim2_coords})

self._knows_stratigraphy = False

if stratigraphy_from:
Expand Down Expand Up @@ -514,10 +532,8 @@ def __getitem__(self, var):
axis=(1, 2))
_xrt = xr.DataArray(
np.tile(_t, (1, *self.shape[1:])),
coords={"t": self._t,
"y": self.dataio[self.coordinates['x']],
"x": self.dataio[self.coordinates['y']]},
dims=['t', 'x', 'y'])
coords=self._view_coordinates,
dims=self._view_dimensions)
_obj = _xrt
else:
_obj = self._dataio.dataset[var]
Expand Down Expand Up @@ -665,18 +681,25 @@ def __init__(self, data, read=[], varset=None,
_elev = copy.deepcopy(data[stratigraphy_from])

# set up coordinates of the array
self._z = strat._determine_strat_coordinates(
_z = strat._determine_strat_coordinates(
_elev.data, dz=dz, z=z, nz=nz)
self._z = xr.DataArray(_z, name='z', dims=['z'], coords={'z': _z})
self._H = len(self.z)
self._L, self._W = _elev.shape[1:]
self._Z = np.tile(self.z, (self.W, self.L, 1)).T

_out = strat.compute_boxy_stratigraphy_coordinates(
_elev, z=self.z, return_strata=True)
_elev, z=_z, return_strata=True)
self.strata_coords, self.data_coords, self.strata = _out
else:
raise TypeError('No other input types implemented yet.')

self._view_dimensions = ['z', *data._dataio.dims[1:]]
self._view_coordinates = copy.deepcopy({
self._view_dimensions[0]: self._z,
self._view_dimensions[1]: self.dim1_coords,
self._view_dimensions[2]: self.dim2_coords})

def __getitem__(self, var):
"""Return the variable.
Expand Down Expand Up @@ -718,8 +741,8 @@ def __getitem__(self, var):
self.strata_coords[:, 2]] = _cut
_obj = xr.DataArray(
_arr,
coords={"z": self._z, "y": self._x, "x": self._y},
dims=['z', 'x', 'y'])
coords=self._view_coordinates,
dims=self._view_dimensions)
return _obj

@property
Expand Down
51 changes: 42 additions & 9 deletions deltametrics/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,25 +163,58 @@ def connect(self):
self.data_path, "w", format="NETCDF4")
_tempdataset.close()

if os.path.splitext(self.data_path)[-1] == '.nc':
_ext = os.path.splitext(self.data_path)[-1]
if _ext == '.nc':
_engine = 'netcdf4'
elif os.path.splitext(self.data_path)[-1] == '.hdf5':
elif _ext == '.hdf5':
_engine = 'h5netcdf'
else:
TypeError('File format current unsupported by DeltaMetrics.')
TypeError('File format is not supported '
'by DeltaMetrics: {0}'.format(_ext))

try:
# open the dataset
_dataset = xr.open_dataset(self.data_path, engine=_engine)
if set(['time', 'x', 'y']).issubset(set(_dataset.variables)):
self.dataset = _dataset.set_coords(['time', 'y', 'x'])
else:
self.dataset = _dataset.set_coords([])
warn('Dimensions "time", "y", and "x" not provided in the \
given data file.', UserWarning)
except Exception as e:
raise TypeError(
f'File format out of scope for DeltaMetrics: {e}')

# try to find if coordinates have been preconfigured
_coords_list = list(_dataset.coords)
if set(['time', 'x', 'y']).issubset(set(_coords_list)):
# the coordinates are preconfigured
self.dataset = _dataset.set_coords(_coords_list)
self.dims = list(self.dataset.dims)
self.coords = list(self.dataset.coords)
elif set(['total_time', 'length', 'width']).issubset(set(_dataset.dims.keys())):
# the coordinates are not set, but there are matching arrays
# this is a legacy option, so issue a warning here
self.dataset = _dataset.set_coords(['x', 'y', 'time'])
self.dims = ['time', 'length', 'width']
self.coords = ['total_time', 'x', 'y']
warn('Coordinates for "time", and ("y", "x") were found as '
'variables in the underlying data file, '
'but are not specified as coordinates in the undelying '
'data file. Please reformat the data file for use '
'with DeltaMetrics. This warning may be replaced '
'with an Error in a future version.', UserWarning)
else:
# coordinates were not found and are not being set
raise NotImplementedError(
'Underlying NetCDF datasets without any specified coordinates '
'are not supported. See source for additional notes about '
'how to implement this feature.')
# DEVELOPER NOTE: it may be possible to support a netcdf file that
# does not have specified coordinates, but we need a test case to
# make it work. It may work to just pass everything along to the
# cube, and let xarray automatically handle the naming of
# coordinates, but I have not tested this.

# self.dataset = _dataset.set_coords([])
# self.dims = []
# warn('Coordinates for "time", and set("x", "y") not provided in the \
# given data file.', UserWarning)

try:
_meta = xr.open_dataset(self.data_path, group='meta',
engine=_engine)
Expand Down
24 changes: 12 additions & 12 deletions deltametrics/plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -1264,9 +1264,9 @@ def compute_channel_width(channelmask, section=None, return_widths=False):
section : :obj:`~deltametrics.section.BaseSection` subclass, or :obj:`ndarray`
The section along which to compute channel widths. If a `Section` type
is passed, the `.trace` attribute will be used to query the
is passed, the `.idx_trace` attribute will be used to query the
`ChannelMask` and determine widths. Otherwise, an `Nx2` array can be
passed, which specified the x-y coordinate pairs to use as the
passed, which specified the `dim1-dim2` coordinate pairs to use as the
trace.
return_widths : bool, optional
Expand Down Expand Up @@ -1311,8 +1311,8 @@ def compute_channel_width(channelmask, section=None, return_widths=False):
"""
if not (section is None):
if issubclass(type(section), dm_section.BaseSection):
section_trace = section._xytrace
section_coord = section._s
section_trace = section.idx_trace
section_coord = section._s.data
elif isinstance(section, np.ndarray):
section_trace = section
section_coord = np.arange(len(section))
Expand Down Expand Up @@ -1365,11 +1365,11 @@ def _get_channel_starts_and_ends(channelmask, section_trace):
.. important::
section_trace must be the index coordinates of the section trace, and
not the coordinate values that are returned from `section.trace`.
not the coordinate values that are returned from `section.idx_trace`.
"""
_channelseries = channelmask[section_trace[:, 1],
section_trace[:, 0]].astype(int)
_channelseries = channelmask[section_trace[:, 0],
section_trace[:, 1]].astype(int)
_padchannelseries = np.pad(_channelseries, (1,), 'constant',
constant_values=(False)).astype(int)
_channelseries_diff = _padchannelseries[1:] - _padchannelseries[:-1]
Expand Down Expand Up @@ -1407,9 +1407,9 @@ def compute_channel_depth(channelmask, depth, section=None,
section : :obj:`~deltametrics.section.BaseSection` subclass, or :obj:`ndarray`
The section along which to compute channel depths. If a `Section` type
is passed, the `.trace` attribute will be used to query the
is passed, the `.idx_trace` attribute will be used to query the
`ChannelMask` and determine depths. Otherwise, an `Nx2` array can be
passed, which specified the x-y coordinate pairs to use as the
passed, which specified the `dim1-dim2` coordinate pairs to use as the
trace.
depth_type : :obj:`str`
Expand All @@ -1434,8 +1434,8 @@ def compute_channel_depth(channelmask, depth, section=None,
"""
if not (section is None):
if issubclass(type(section), dm_section.BaseSection):
section_trace = section._xytrace
section_coord = section._s
section_trace = section.idx_trace
section_coord = section._s.data
elif isinstance(section, np.ndarray):
section_trace = section
section_coord = np.arange(len(section))
Expand Down Expand Up @@ -1464,7 +1464,7 @@ def compute_channel_depth(channelmask, depth, section=None,

# get the depth array along the section
_depthslice = np.copy(depth)
_depthseries = _depthslice[section_trace[:, 1], section_trace[:, 0]]
_depthseries = _depthslice[section_trace[:, 0], section_trace[:, 1]]

# for depth and area of channels, we loop through each discrete channel
_channel_depth_means = np.full(len(_channelwidths), np.nan)
Expand Down
12 changes: 6 additions & 6 deletions deltametrics/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,11 +634,11 @@ def get_display_arrays(VarInst, data=None):
# # DataSection # #
data = data or 'spacetime'
if data in VarInst.strat._spacetime_names:
_S, _Z = np.meshgrid(VarInst['s'], VarInst['z'])
_S, _Z = np.meshgrid(VarInst['s'], VarInst[VarInst.dims[0]])
return VarInst.values, _S, _Z
elif data in VarInst.strat._preserved_names:
VarInst.strat._check_knows_spacetime()
_S, _Z = np.meshgrid(VarInst['s'], VarInst['z'])
_S, _Z = np.meshgrid(VarInst['s'], VarInst[VarInst.dims[0]])
return VarInst.strat.as_preserved(), _S, _Z
elif data in VarInst.strat._stratigraphy_names:
_sp = VarInst.strat.as_stratigraphy()
Expand All @@ -657,7 +657,7 @@ def get_display_arrays(VarInst, data=None):
elif data in VarInst.strat._preserved_names:
VarInst.strat._check_knows_spacetime() # always False
elif data in VarInst.strat._stratigraphy_names:
_S, _Z = np.meshgrid(VarInst['s'], VarInst['z'])
_S, _Z = np.meshgrid(VarInst['s'], VarInst[VarInst.dims[0]])
return VarInst, _S, _Z
else:
raise ValueError('Bad data argument: %s' % str(data))
Expand Down Expand Up @@ -709,7 +709,7 @@ def _reshape_long(X):
return np.vstack((X[:, :-1].flatten(),
X[:, 1:].flatten())).T.reshape(-1, 2, 1)
data = data or 'spacetime'
_S, _Z = np.meshgrid(VarInst['s'], VarInst['z'])
_S, _Z = np.meshgrid(VarInst['s'], VarInst[VarInst.dims[0]])
if data in VarInst.strat._spacetime_names:
z = _reshape_long(_Z)
vals = VarInst[:, :-1]
Expand Down Expand Up @@ -782,7 +782,7 @@ def get_display_limits(VarInst, data=None, factor=1.5):
if VarInst.slicetype == 'data_section':
# # DataSection # #
data = data or 'spacetime'
_S, _Z = np.meshgrid(VarInst['s'], VarInst['z'])
_S, _Z = np.meshgrid(VarInst['s'], VarInst[VarInst.dims[0]])
if data in VarInst.strat._spacetime_names:
return np.min(_S), np.max(_S), \
np.min(_Z), np.max(_Z)
Expand All @@ -801,7 +801,7 @@ def get_display_limits(VarInst, data=None, factor=1.5):
elif VarInst.slicetype == 'stratigraphy_section':
# # StratigraphySection # #
data = data or 'stratigraphy'
_S, _Z = np.meshgrid(VarInst['s'], VarInst['z'])
_S, _Z = np.meshgrid(VarInst['s'], VarInst[VarInst.dims[0]])
if data in VarInst.strat._spacetime_names:
VarInst.strat._check_knows_spacetime() # always False
elif data in VarInst.strat._preserved_names:
Expand Down
Loading

0 comments on commit 7d3210d

Please sign in to comment.