Skip to content

Commit

Permalink
Add a TileDB Array Proxy (#22)
Browse files Browse the repository at this point in the history
* Add a tiledb data proxy

* Usage testing changes
  • Loading branch information
DPeterK authored Mar 2, 2020
1 parent 82de21c commit f953112
Showing 1 changed file with 42 additions and 9 deletions.
51 changes: 42 additions & 9 deletions nctotdb/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,36 @@
"_FillValue",
])


# Inspired by https://github.com/SciTools/iris/blob/master/lib/iris/fileformats/netcdf.py#L418.
class TileDBDataProxy:
"""A proxy to the data of a single TileDB array attribute."""

__slots__ = ("shape", "dtype", "path", "var_name")

def __init__(self, shape, dtype, path, var_name):
self.shape = shape
self.dtype = dtype
self.path = path
self.var_name = var_name

@property
def ndim(self):
return len(self.shape)

def __getitem__(self, keys):
with tiledb.open(self.path, 'r') as A:
data = A[keys][self.var_name]
return data

def __getstate__(self):
return {attr: getattr(self, attr) for attr in self.__slots__}

def __setstate__(self, state):
for key, value in state.items():
setattr(self, key, value)


class Reader(object):
"""
Abstract reader class that defines the API.
Expand Down Expand Up @@ -223,16 +253,20 @@ def _extract(self, array_name):

return named_array_path, dim_paths

def _array_shape(self, nonempty_domain):
def _array_shape(self, nonempty_domain, slices=False):
"""
Use the TileDB array's nonempty domain (i.e. the domain that encapsulates
all written cells) to set the shape of the data to be read out of the
TileDB array.
"""
# We need to include the stop index, not exclude it.
slices = [slice(start, stop+1, 1) for (start, stop) in nonempty_domain]
return tuple(slices) # Can only index with tuple, not list.
if slices:
slices = [slice(start, stop+1, 1) for (start, stop) in nonempty_domain]
return tuple(slices) # Can only index with tuple, not list.
else:
# TileDB describes the nonempty domain quite annoyingly!
return [filled[1]+1 for filled in nonempty_domain]

def _handle_attributes(self, attrs, exclude_keys=None):
"""
Expand All @@ -256,17 +290,16 @@ def _from_tdb_array(self, array_path, naming_key, array_name=None, to_dask=False
metadata = {k: v for k, v in A.meta.items()}
if array_name is None:
array_name = metadata[naming_key]
array_inds = self._array_shape(A.nonempty_domain())
# This may well not maintain lazy data...
if to_dask:
# Borrowed from:
# https://github.com/dask/dask/blob/master/dask/array/tiledb_io.py#L4-L6
schema = A.schema
dtype = schema.attr(array_name).dtype
chunks = [schema.domain.dim(i).tile for i in range(schema.ndim)]
points = da.from_array(A[array_inds][array_name],
chunks,
name=naming_key)
array_shape = self._array_shape(A.nonempty_domain())
proxy = TileDBDataProxy(array_shape, dtype, array_path, array_name)
points = da.from_array(proxy, chunks, name=naming_key)
else:
array_inds = self._array_shape(A.nonempty_domain(), slices=True)
points = A[array_inds][array_name]
return metadata, points

Expand Down

0 comments on commit f953112

Please sign in to comment.