Skip to content

Commit

Permalink
unpin xarray, numpy, pandas, netcdf4 (#25)
Browse files Browse the repository at this point in the history
* unpin xarray, numpy, pandas, netcdf4

* Fix deprecation "TypeError: Using a DataArray object to construct a variable is ambiguous, please extract the data using the .data property."  See pydata/xarray#6508.  There are other errors, however.

* See Unidata/netcdf4-python#1175 "Regression in createVariable between 1.5.8 and 1.6.0".

* Testing on Python 3.7 only covers through xarray 0.20.2.  This is an experiment.

* Fix get_metadata to account for the internal restructuring of indexes in xarray 2022.06.  Update tests for same.

* Fixed bug where we were stripping off attrs inadvertently when writing netCDF.

* TestPlainGroupby.test_on_data_array fails with Python = 3.8.13, numpy = 1.23.2, xarray = 2022.06.0.  That means it has nothing to do with BrainIO.

* test_on_data_array should not involve any BrainIO classes.  This is to test for bugs in xarray.  With xarray==2022.06.0, this test fails.

* xarray 2022.06.0 has a bug which breaks BrainIO:  pydata/xarray#6836.

* Adapt get_metadata to the change in the index API between 2022.03.0 and 2022.06.0.  Now test_get_metadata passes under 2022.03.0 and 2022.06.0.

* Getting an error from tests on Travis (but not locally):  RuntimeError: NetCDF: Filter error: bad id or parameters or duplicate filter.  This might fix it?

* Compression test failed:  assert 614732 > 615186.  This might fix it.

* Travis doesn't offer python 3.10 yet.  Make sample assembly bigger so compression has an effect.

* Bump minor version.

Authored-by: Jonathan Prescott-Roy <[email protected]> and Martin Schrimpf <[email protected]>
  • Loading branch information
mschrimpf authored Sep 27, 2022
1 parent 21f215f commit f913068
Show file tree
Hide file tree
Showing 7 changed files with 190 additions and 61 deletions.
8 changes: 6 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@ matrix:
include:
- name: 3.7 public
python: '3.7'
- name: 3.7 private
python: '3.7'
- name: 3.8 public
python: '3.8'
- name: 3.9 public
python: '3.9'
- name: 3.9 private
python: '3.9'
env:
- PRIVATE_ACCESS=1
- secure: "CzOQmNMkHXavXihZWYL+G5sbdYq8KLrBWnorZEPhvsKDIKy1hhORCc+pAMXg+bjrPRXfRqZnX0XRRCoZbD9Mo9VvA1hIsV7i5bBbjMoyBTUn3vED0CQNBCgjaA2rLsHlJMtYdLoCOOAiaU+rTu2xxf0grjgKARzLpVNENmPgP0YqiXPEc7rdY3cifalCBpHTQgvu7Z6FR1yAdRsMfskTIwPa/GlTCNF8ZR+efuobQJrtApfzBgiH7+NJI5Aq6u8PWD6LqONCm2ut0NKL7BMNRMgwS3pjERr2spRWrLiCz05Y4icaUmhajPjCl3kMIjuHdw1OgvwQHuSW9hcgt0AXZoIC8qJqg5V39LrsYYPd5/sg7vcTZ+VRhWF5zDBMvTO0PFt36tpj9xnr2ATIPlp1ACXwi+fGPkPAJp3ZIHbl36lji6sB4WLwIISongseizqTAHKowmpCGqEL6TZB65/MThWBeccRNB1N4a3wG34Eu7n1XXqecK1c+68JO98fOQxwmQ/utOkQRcVQzmGyARUk7WyupoqMmAZbWxOJ5AzyXPiK2OGXmiVJSwlMQKtF7eqkLs8wWeQD+zQj2qoSqF45LdFQsww19W2wC0wHuTV6nDBaKB59lY5qFufDWT+Gh06jLk8UpgYANh9f3fH5ZgUKfnH7I17StuDEpxCZ1kxVKcA="
Expand Down
50 changes: 47 additions & 3 deletions brainio/assemblies.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,8 +328,8 @@ def array_is_element(arr, element):
return len(arr) == 1 and arr[0] == element


def get_metadata(assembly, dims=None, names_only=False, include_coords=True,
include_indexes=True, include_multi_indexes=False, include_levels=True):
def get_metadata_before_2022_06(assembly, dims=None, names_only=False, include_coords=True,
include_indexes=True, include_multi_indexes=False, include_levels=True):
"""
Return coords and/or indexes or index levels from an assembly, yielding either `name` or `(name, dims, values)`.
"""
Expand Down Expand Up @@ -362,6 +362,50 @@ def what(name, dims, values, names_only):
yield what(name, values.dims, values.values, names_only)


def get_metadata_after_2022_06(assembly, dims=None, names_only=False, include_coords=True,
include_indexes=True, include_multi_indexes=False, include_levels=True):
"""
Return coords and/or indexes or index levels from an assembly, yielding either `name` or `(name, dims, values)`.
"""
def what(name, dims, values, names_only):
if names_only:
return name
else:
return name, dims, values
if dims is None:
dims = assembly.dims + (None,) # all dims plus dimensionless coords
for name, values in assembly.coords.items():
none_but_keep = (not values.dims) and None in dims
shared = not (set(values.dims).isdisjoint(set(dims)))
if none_but_keep or shared:
if name in assembly.indexes: # it's an index
index = assembly.indexes[name]
if len(index.names) > 1: # it's a MultiIndex or level
if name in index.names: # it's a level
if include_levels:
yield what(name, values.dims, values.values, names_only)
else: # it's a MultiIndex
if include_multi_indexes:
yield what(name, values.dims, values.values, names_only)
else: # it's a single Index
if include_indexes:
yield what(name, values.dims, values.values, names_only)
else: # it's a coord
if include_coords:
yield what(name, values.dims, values.values, names_only)


def get_metadata(assembly, dims=None, names_only=False, include_coords=True,
include_indexes=True, include_multi_indexes=False, include_levels=True):
try:
xr.DataArray().stack(create_index=True)
yield from get_metadata_after_2022_06(assembly, dims, names_only, include_coords,
include_indexes, include_multi_indexes, include_levels)
except TypeError as e:
yield from get_metadata_before_2022_06(assembly, dims, names_only, include_coords,
include_indexes, include_multi_indexes, include_levels)


def coords_for_dim(assembly, dim):
result = OrderedDict()
meta = get_metadata(assembly, dims=(dim,), include_indexes=False, include_levels=False)
Expand Down Expand Up @@ -415,7 +459,7 @@ def correct_stimulus_id_name(cls, assembly):
names = list(get_metadata(assembly, dims=('presentation',), names_only=True))
if 'image_id' in names and 'stimulus_id' not in names:
assembly = assembly.assign_coords(
stimulus_id=('presentation', assembly['image_id']),
stimulus_id=('presentation', assembly['image_id'].data),
)
return assembly

Expand Down
6 changes: 3 additions & 3 deletions brainio/packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,9 @@ def write_netcdf(assembly, target_netcdf_file, append=False, group=None, compres
mode = "a" if append else "w"
target_netcdf_file.parent.mkdir(parents=True, exist_ok=True)
if compress:
ds = assembly.to_dataset(name="data")
compression = dict(zlib=True, complevel=1)
encoding = {var: compression for var in ds.variables}
ds = assembly.to_dataset(name="data", promote_attrs=True)
compression = dict(zlib=True, complevel=9)
encoding = {var: compression for var in ds.data_vars}
ds.to_netcdf(target_netcdf_file, mode=mode, group=group, encoding=encoding)
else:
assembly.to_netcdf(target_netcdf_file, mode=mode, group=group)
Expand Down
10 changes: 5 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@
"tqdm",
"Pillow",
"entrypoints",
"numpy>=1.16.5, !=1.21.*",
"pandas>=1.2.0, !=1.3.0",
"xarray==0.17.0",
"netcdf4==1.5.8",
"numpy",
"pandas",
"xarray!=2022.06.0", # 2022.06.0 has a bug which breaks BrainIO: https://github.com/pydata/xarray/issues/6836
"netcdf4!=1.6.0", # https://github.com/Unidata/netcdf4-python/issues/1175,
]

setup(
name='brainio',
version='0.1.0',
version='0.2.0',
description="Data management for quantitative comparison of brains and brain-inspired systems",
long_description=readme,
author="Jon Prescott-Roy, Martin Schrimpf",
Expand Down
39 changes: 19 additions & 20 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,8 @@ def make_proto_assembly():


def scattered_floats(lo, hi, num):
# a kludge: looks stochastic, but deterministic
mid = (hi + lo) / 2
half = mid - lo
jump = 8
return [mid + np.sin(x) * half for x in range(2, num * (jump + 1), jump)][:num]
rng = np.random.default_rng(12345)
return rng.random(num) * (hi - lo) + lo


# taken from values in /braintree/data2/active/users/sachis/projects/oasis900/monkeys/oleo/mworksproc/oleo_oasis900_210216_113846_mwk.csv
Expand Down Expand Up @@ -90,23 +87,25 @@ def make_meta_assembly():
return a


def make_spk_assembly():
def make_spk_assembly(magnitude=3):
size = 10**magnitude
half = int((10**magnitude) / 2)
coords = {
"neuroid_id": ("event", ["A-019", "D-009"]*500),
"project": ("event", ["test"]*1000),
"datetime": ("event", np.repeat(np.datetime64('2021-02-16T11:41:55.000000000'), 1000)),
"animal": ("event", ["testo"]*1000),
"hemisphere": ("event", ["L", "R"]*500),
"region": ("event", ["V4", "IT"]*500),
"subregion": ("event", ["V4", "aIT"]*500),
"array": ("event", ["6250-002416", "4865-233455"]*500),
"bank": ("event", ["A", "D"]*500),
"electrode": ("event", ["019", "009"]*500),
"column": ("event", [5, 2]*500),
"row": ("event", [4, 8]*500),
"label": ("event", ["elec46", "elec123"]*500),
"neuroid_id": ("event", ["A-019", "D-009"]*half),
"project": ("event", ["test"]*size),
"datetime": ("event", np.repeat(np.datetime64('2021-02-16T11:41:55.000000000'), size)),
"animal": ("event", ["testo"]*size),
"hemisphere": ("event", ["L", "R"]*half),
"region": ("event", ["V4", "IT"]*half),
"subregion": ("event", ["V4", "aIT"]*half),
"array": ("event", ["6250-002416", "4865-233455"]*half),
"bank": ("event", ["A", "D"]*half),
"electrode": ("event", ["019", "009"]*half),
"column": ("event", [5, 2]*half),
"row": ("event", [4, 8]*half),
"label": ("event", ["elec46", "elec123"]*half),
}
data = sorted(scattered_floats(67.7, 21116.2, 1000))
data = sorted(scattered_floats(67.7, 21116.2, size))
a = SpikeTimesAssembly(
data=data,
coords=coords,
Expand Down
134 changes: 108 additions & 26 deletions tests/test_assemblies.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,48 @@
SpikeTimesAssembly, get_metadata


def test_get_metadata():
xr.show_versions()
# assembly, dims, names_only, include_coords, include_indexes, include_multi_indexes, include_levels
assy = DataAssembly(
data=[[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]],
coords={
'up': ("a", ['alpha', 'alpha', 'beta', 'beta', 'beta', 'beta']),
'down': ("a", [1, 1, 1, 1, 2, 2]),
'why': ("a", ['yes', 'yes', 'yes', 'yes', 'yes', 'no']),
'b': ('b', ['x', 'y', 'z']),
},
dims=['a', 'b']
)
assy = assy.reset_index('why')
assert set(get_metadata(assy, None, True, True, True, True, True)) == {'a', 'up', 'down', 'why', 'b'}
assert set(get_metadata(assy, None, True, True, True, True, False)) == {'a', 'why', 'b'}
assert set(get_metadata(assy, None, True, True, True, False, True)) == {'up', 'down', 'why', 'b'}
assert set(get_metadata(assy, None, True, True, True, False, False)) == {'why', 'b'}
assert set(get_metadata(assy, None, True, True, False, True, True)) == {'a', 'up', 'down', 'why'}
assert set(get_metadata(assy, None, True, True, False, True, False)) == {'a', 'why'}
assert set(get_metadata(assy, None, True, True, False, False, True)) == {'up', 'down', 'why'}
assert set(get_metadata(assy, None, True, True, False, False, False)) == {'why'}
assert set(get_metadata(assy, None, True, False, True, True, True)) == {'a', 'up', 'down', 'b'}
assert set(get_metadata(assy, None, True, False, True, True, False)) == {'a', 'b'}
assert set(get_metadata(assy, None, True, False, True, False, True)) == {'up', 'down', 'b'}
assert set(get_metadata(assy, None, True, False, True, False, False)) == {'b'}
assert set(get_metadata(assy, None, True, False, False, True, True)) == {'a', 'up', 'down'}
assert set(get_metadata(assy, None, True, False, False, True, False)) == {'a'}
assert set(get_metadata(assy, None, True, False, False, False, True)) == {'up', 'down'}
assert set(get_metadata(assy, None, True, False, False, False, False)) == set()

a = make_proto_assembly()
md_all = list(get_metadata(a))
assert len(md_all) == 4
md_coo = list(get_metadata(a, include_indexes=False, include_levels=False))
assert len(md_coo) == 0
md_ind = list(get_metadata(a, include_coords=False, include_indexes=True, include_multi_indexes=True, include_levels=False))
assert len(md_ind) == 2
md_lev = list(get_metadata(a, include_coords=False, include_indexes=False))
assert len(md_lev) == 4


def test_get_levels():
assy = DataAssembly(
data=[[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]],
Expand All @@ -40,12 +82,11 @@ def test_wrap_dataarray(self):
dims=['a', 'b']
)
assert "up" in da.coords
assert da["a"].variable.level_names is None
assert "a" not in da.indexes
da = gather_indexes(da)
assert da.coords.variables["a"].level_names == ["up", "down"]
assert da["a"].variable.level_names == ["up", "down"]
assert da.indexes["a"].names == ["up", "down"]
da = DataArray(da)
assert da.coords.variables["a"].level_names == ["up", "down"]
assert da.indexes["a"].names == ["up", "down"]
assert da["up"] is not None

def test_wrap_dataassembly(self):
Expand All @@ -58,11 +99,9 @@ def test_wrap_dataassembly(self):
},
dims=['a', 'b']
)
assert assy.coords.variables["a"].level_names == ["up", "down"]
assert assy["a"].variable.level_names == ["up", "down"]
assert assy.indexes["a"].names == ["up", "down"]
da = DataArray(assy)
assert da.coords.variables["a"].level_names == ["up", "down"]
assert da["a"].variable.level_names == ["up", "down"]
assert assy.indexes["a"].names == ["up", "down"]
assert da["up"] is not None

def test_reset_index(self):
Expand Down Expand Up @@ -109,6 +148,7 @@ def test_getitem(self):
)
single = assy[0, 0]
assert type(single) is type(assy)
assert single == 1

def test_is_fastpath(self):
"""In DataAssembly.__init__ we have to check whether fastpath is present in a set of arguments and true
Expand Down Expand Up @@ -164,17 +204,14 @@ def test_align(self):
dims=['a', 'b']
)
assert hasattr(da1, "up")
assert da1.coords.variables["a"].level_names == ["up", "down"]
assert da1["a"].variable.level_names == ["up", "down"]
assert da1.indexes["a"].names == ["up", "down"]
assert da1["up"] is not None
aligned1, aligned2 = xr.align(da1, da2, join="outer")
assert hasattr(aligned1, "up")
assert aligned1.coords.variables["a"].level_names == ["up", "down"]
assert aligned1["a"].variable.level_names == ["up", "down"]
assert aligned1.indexes["a"].names == ["up", "down"]
assert aligned1["up"] is not None
assert hasattr(aligned2, "up")
assert aligned2.coords.variables["a"].level_names == ["up", "down"]
assert aligned2["a"].variable.level_names == ["up", "down"]
assert aligned2.indexes["a"].names == ["up", "down"]
assert aligned2["up"] is not None


Expand Down Expand Up @@ -202,6 +239,60 @@ def test_incorrect_coord(self):
d.sel(coordB=0)


class TestPlainGroupy:

def test_on_data_array(self):
d = DataArray(
data=[
[0, 1, 2, 3, 4, 5, 6],
[7, 8, 9, 10, 11, 12, 13],
[14, 15, 16, 17, 18, 19, 20]
],
coords={
"greek": ("a", ['alpha', 'beta', 'gamma']),
"colors": ("a", ['red', 'green', 'blue']),
"compass": ("b", ['north', 'south', 'east', 'west', 'northeast', 'southeast', 'southwest']),
"integer": ("b", [0, 1, 2, 3, 4, 5, 6]),
},
dims=("a", "b")
)
d = gather_indexes(d)
g = d.groupby('greek')
# with xarray==2022.06.0, the following line fails with:
# ValueError: conflicting multi-index level name 'greek' with dimension 'greek'
m = g.mean(...)
c = DataArray(
data=[3, 10, 17],
coords={'greek': ('greek', ['alpha', 'beta', 'gamma'])},
dims=['greek']
)
assert m.equals(c)

def test_on_data_assembly(self):
d = DataAssembly(
data=[
[0, 1, 2, 3, 4, 5, 6],
[7, 8, 9, 10, 11, 12, 13],
[14, 15, 16, 17, 18, 19, 20]
],
coords={
"greek": ("a", ['alpha', 'beta', 'gamma']),
"colors": ("a", ['red', 'green', 'blue']),
"compass": ("b", ['north', 'south', 'east', 'west', 'northeast', 'southeast', 'southwest']),
"integer": ("b", [0, 1, 2, 3, 4, 5, 6]),
},
dims=("a", "b")
)
g = d.groupby('greek')
m = g.mean(...)
c = DataAssembly(
data=[3, 10, 17],
coords={'greek': ('greek', ['alpha', 'beta', 'gamma'])},
dims=['greek']
)
assert m.equals(c)


class TestMultiGroupby:
def test_single_dimension(self):
d = DataAssembly([[1, 2, 3], [4, 5, 6]], coords={'a': ['a', 'b'], 'b': ['x', 'y', 'z']}, dims=['a', 'b'])
Expand All @@ -228,13 +319,14 @@ def test_single_coord(self):
},
dims=("a", "b")
)
g = d.multi_groupby(['greek']).mean(...)
g = d.multi_groupby(['greek'])
m = g.mean(...)
c = DataAssembly(
data=[3, 10, 17],
coords={'greek': ('greek', ['alpha', 'beta', 'gamma'])},
dims=['greek']
)
assert g.equals(c)
assert m.equals(c)

def test_single_dim_multi_coord(self):
d = DataAssembly([1, 2, 3, 4, 5, 6],
Expand Down Expand Up @@ -452,15 +544,5 @@ def test_load_extras(self, test_stimulus_set_identifier):
assert extra.shape == (40,)


def test_get_metadata():
a = make_proto_assembly()
md_all = list(get_metadata(a))
assert len(md_all) == 4
md_coo = list(get_metadata(a, include_indexes=False, include_levels=False))
assert len(md_coo) == 0
md_ind = list(get_metadata(a, include_coords=False, include_indexes=True, include_multi_indexes=True, include_levels=False))
assert len(md_ind) == 2
md_lev = list(get_metadata(a, include_coords=False, include_indexes=False))
assert len(md_lev) == 4


4 changes: 2 additions & 2 deletions tests/test_packaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,9 +176,9 @@ def test_package_extras(test_stimulus_set_identifier, test_catalog_identifier, b


def test_compression(test_write_netcdf_path):
write_netcdf(make_spk_assembly(), test_write_netcdf_path, compress=False)
write_netcdf(make_spk_assembly(6), test_write_netcdf_path, compress=False)
uncompressed = test_write_netcdf_path.stat().st_size
write_netcdf(make_spk_assembly(), test_write_netcdf_path, compress=True)
write_netcdf(make_spk_assembly(6), test_write_netcdf_path, compress=True)
compressed = test_write_netcdf_path.stat().st_size
assert uncompressed > compressed

Expand Down

0 comments on commit f913068

Please sign in to comment.