diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7df2e81
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+__pycache__/
+*.n5
+*.ome.zarr
diff --git a/create_data.py b/create_data.py
new file mode 100644
index 0000000..051e126
--- /dev/null
+++ b/create_data.py
@@ -0,0 +1,120 @@
+import json
+import os
+
+import z5py
+from data_conversion import convert_bdv_n5
+from pybdv.metadata import (get_size, get_resolution,
+ write_size_and_resolution,
+ write_affine)
+
+from mobie.xml_utils import copy_xml_as_n5_s3
+from mobie.metadata.image_dict import default_layer_setting
+
+IMAGE_DICT = './data/images.json'
+
+
+def write_metadata(in_xml, out_xml, out_path):
+ bucket_name = 'i2k-2020'
+ path_in_bucket = os.path.split(out_path)[1]
+ copy_xml_as_n5_s3(in_xml, out_xml,
+ service_endpoint='https://s3.embl.de',
+ bucket_name=bucket_name,
+ path_in_bucket=path_in_bucket,
+ authentication='Anonymous',
+ bdv_type='bdv.zarr.s3')
+
+ with z5py.File(out_path, 'r') as f:
+ shape = f['setup0/timepoint0/s0'].shape[2:]
+
+ # check if we need to update the shape and resolution
+ exp_shape = get_size(out_xml, setup_id=0)
+ if shape != exp_shape:
+ resolution = get_resolution(out_xml, setup_id=0)
+ scale_factor = [float(esh) / sh for sh, esh in zip(shape, exp_shape)]
+ resolution = [round(res * sf, 2) for res, sf in zip(resolution, scale_factor)]
+ print("Updating shape and resolution to:")
+ print(shape)
+ print(resolution)
+
+ write_size_and_resolution(out_xml, setup_id=0,
+ size=shape, resolution=resolution)
+
+ # make transformation the hacky way ...
+ dz, dy, dx = resolution
+ oz, oy, ox = 0., 0., 0.
+ trafo = '{} 0.0 0.0 {} 0.0 {} 0.0 {} 0.0 0.0 {} {}'.format(dx, ox,
+ dy, oy,
+ dz, oz)
+ trafo = list(map(float, trafo.split(' ')))
+ write_affine(out_xml, setup_id=0, affine=trafo, overwrite=True)
+
+
+def add_to_image_dict(name, layer_type, xml_path):
+ settings = default_layer_setting(layer_type)
+ storage = {"remote": os.path.split(xml_path)[1]}
+ settings.update({"storage": storage})
+
+ if os.path.exists(IMAGE_DICT):
+ with open(IMAGE_DICT) as f:
+ image_dict = json.load(f)
+ else:
+ image_dict = {}
+
+ image_dict[name] = settings
+ with open(IMAGE_DICT, 'w') as f:
+ json.dump(image_dict, f, indent=2, sort_keys=True)
+
+
+def add_volume(in_path, vol_name, layer_type, start_scale=0):
+ out_path = os.path.join('data', f'{vol_name}.ome.zarr')
+
+ # convert to ome zarr
+ convert_bdv_n5(in_path=in_path,
+ out_path=out_path,
+ out_key='setup0/timepoint0',
+ vol_name=vol_name,
+ use_nested_store=False,
+ n_threads=8,
+ start_scale=start_scale)
+
+ # create the bdv.xml
+ in_xml = in_path.replace('.n5', '.xml')
+ out_xml = os.path.join('data', f'{vol_name}.xml')
+ write_metadata(in_xml, out_xml, out_path)
+
+ add_to_image_dict(vol_name, layer_type, out_xml)
+
+
+# def convert_bdv_n5(in_path, out_path, use_nested_store, n_threads):
+# add the myosin prospr data
+def add_myosin():
+ print("Add myosin")
+ in_path = os.path.join('/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/0.6.3',
+ 'images/local/prospr-6dpf-1-whole-mhcl4.n5')
+ add_volume(in_path, vol_name='prospr-myosin', layer_type='image')
+
+
+# add the em raw data
+def add_raw():
+ print("Add raw")
+ in_path = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/rawdata/sbem-6dpf-1-whole-raw.n5'
+ add_volume(in_path, vol_name='em-raw', layer_type='image', start_scale=3)
+
+
+# add the em cell segmentation
+def add_seg():
+ print("Add cells")
+ in_path = os.path.join('/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/1.0.1',
+ 'images/local/sbem-6dpf-1-whole-segmented-cells.n5')
+ add_volume(in_path, vol_name='em-cells', layer_type='segmentation', start_scale=2)
+
+
+def add_all_volumes():
+ os.makedirs('./data', exist_ok=True)
+ add_myosin()
+ add_raw()
+ add_seg()
+
+
+if __name__ == '__main__':
+ add_all_volumes()
diff --git a/data/em-cells.xml b/data/em-cells.xml
new file mode 100644
index 0000000..c6cdff1
--- /dev/null
+++ b/data/em-cells.xml
@@ -0,0 +1,43 @@
+
+ .
+
+
+
+
+ 0
+ 0
+
+
+
+ 0
+ Setup0
+ 3438 3240 2854
+
+ micrometer
+ 0.08 0.08 0.1
+
+
+ 0
+
+
+
+
+ 0
+ 0
+
+
+ em-cells.ome.zarr
+ us-west-2
+ https://s3.embl.de
+ i2k-2020
+ Anonymous
+
+
+
+
+
+ 0.08 0.0 0.0 0.0 0.0 0.08 0.0 0.0 0.0 0.0 0.1 0.0
+
+
+
+
diff --git a/data/em-raw.xml b/data/em-raw.xml
new file mode 100644
index 0000000..572e728
--- /dev/null
+++ b/data/em-raw.xml
@@ -0,0 +1,43 @@
+
+ .
+
+
+
+
+ 1
+ 1
+
+
+
+ 0
+ channel 1
+ 3438 3240 2854
+
+ micrometer
+ 0.08 0.08 0.1
+
+
+ 1
+
+
+
+
+ 0
+ 0
+
+
+ em-raw.ome.zarr
+ us-west-2
+ https://s3.embl.de
+ i2k-2020
+ Anonymous
+
+
+
+
+
+ 0.08 0.0 0.0 0.0 0.0 0.08 0.0 0.0 0.0 0.0 0.1 0.0
+
+
+
+
diff --git a/data/images.json b/data/images.json
new file mode 100644
index 0000000..6254a0b
--- /dev/null
+++ b/data/images.json
@@ -0,0 +1,35 @@
+{
+ "em-cells": {
+ "color": "randomFromGlasbey",
+ "contrastLimits": [
+ 0.0,
+ 1000.0
+ ],
+ "storage": {
+ "remote": "em-cells.xml"
+ },
+ "type": "segmentation"
+ },
+ "em-raw": {
+ "color": "white",
+ "contrastLimits": [
+ 0.0,
+ 255.0
+ ],
+ "storage": {
+ "remote": "em-raw.xml"
+ },
+ "type": "image"
+ },
+ "prospr-myosin": {
+ "color": "white",
+ "contrastLimits": [
+ 0.0,
+ 255.0
+ ],
+ "storage": {
+ "remote": "prospr-myosin.xml"
+ },
+ "type": "image"
+ }
+}
\ No newline at end of file
diff --git a/data/prospr-myosin.xml b/data/prospr-myosin.xml
new file mode 100644
index 0000000..94a5cd0
--- /dev/null
+++ b/data/prospr-myosin.xml
@@ -0,0 +1,43 @@
+
+ .
+
+
+
+
+ 0
+ 0
+
+
+
+ 0
+ Setup0
+ 500 471 519
+
+ micrometer
+ 0.55 0.55 0.55
+
+
+ 0
+
+
+
+
+ 0
+ 0
+
+
+ prospr-myosin.ome.zarr
+ us-west-2
+ https://s3.embl.de
+ i2k-2020
+ Anonymous
+
+
+
+
+
+ 0.55 0.0 0.0 0.0 0.0 0.55 0.0 0.0 0.0 0.0 0.55 0.0
+
+
+
+
diff --git a/data_conversion/__init__.py b/data_conversion/__init__.py
new file mode 100644
index 0000000..e28a454
--- /dev/null
+++ b/data_conversion/__init__.py
@@ -0,0 +1 @@
+from .to_ome_zarr import convert_bdv_n5
diff --git a/data_conversion/check_result.py b/data_conversion/check_result.py
new file mode 100644
index 0000000..1924db7
--- /dev/null
+++ b/data_conversion/check_result.py
@@ -0,0 +1,22 @@
+import sys
+import zarr
+
+
+def check_result(path, check_data):
+ with zarr.open(path, mode='r') as f:
+ for name, ds in f.items():
+ shape = ds.shape
+ chunks = ds.chunks
+ assert len(shape) == len(chunks) == 5
+ print(name, shape, chunks)
+
+ if check_data:
+ data = ds[:]
+ # print(data[0, 0, :10, :10, :10])
+ assert data.shape == shape
+
+ print("All tests passed")
+
+
+path = sys.argv[1]
+check_result(path, True)
diff --git a/data_conversion/joshs_script.py b/data_conversion/joshs_script.py
new file mode 100644
index 0000000..32cebe9
--- /dev/null
+++ b/data_conversion/joshs_script.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+# This assumes that n5-copy has already been used
+
+import argparse
+import zarr
+
+parser = argparse.ArgumentParser()
+parser.add_argument("input")
+parser.add_argument("output")
+ns = parser.parse_args()
+
+zin = zarr.open(ns.input)
+
+sizes = []
+
+def groups(z):
+ rv = sorted(list(z.groups()))
+ assert rv
+ assert not list(z.arrays())
+ return rv
+
+def arrays(z):
+ rv = sorted(list(z.arrays()))
+ assert rv
+ assert not list(z.groups())
+ return rv
+
+setups = groups(zin)
+assert len(setups) == 1 # TODO: multiple channels?
+for sname, setup in setups:
+ timepoints = groups(setup)
+ for tname, timepoint in timepoints:
+ resolutions = arrays(timepoint)
+ for idx, rtuple in enumerate(resolutions):
+ rname, resolution = rtuple
+ try:
+ expected = sizes[idx]
+ assert expected[0] == rname
+ assert expected[1] == resolution.shape
+ assert expected[2] == resolution.chunks
+ assert expected[3] == resolution.dtype
+ except:
+ sizes.append((rname,
+ resolution.shape,
+ resolution.chunks,
+ resolution.dtype))
+
+
+datasets = []
+out = zarr.open(ns.output, mode="w")
+
+for idx, size in enumerate(sizes):
+ name, shape, chunks, dtype = size
+ shape = tuple([len(timepoints), len(setups)] + list(shape))
+ chunks = tuple([1, 1] + list(chunks))
+ a = out.create_dataset(name, shape=shape, chunks=chunks, dtype=dtype)
+ datasets.append({"path": name})
+ for sidx, stuple in enumerate(groups(zin)):
+ for tidx, ttuple in enumerate(groups(stuple[1])):
+ resolutions = arrays(ttuple[1])
+ a[tidx, sidx, :, :, :] = resolutions[idx][1]
+out.attrs["multiscales"] = [
+ {
+ "version": "0.1",
+ "datasets": datasets,
+ }
+]
+
diff --git a/data_conversion/to_ome_zarr.py b/data_conversion/to_ome_zarr.py
new file mode 100644
index 0000000..9e07117
--- /dev/null
+++ b/data_conversion/to_ome_zarr.py
@@ -0,0 +1,184 @@
+import argparse
+import os
+import json
+import shutil
+from concurrent import futures
+
+import zarr
+import z5py # NOTE: once the issue with zarr opening n5 groups is resolved, we can also use zarr for reading the n5s
+from tqdm import tqdm
+from z5py.util import blocking
+from pybdv.util import get_scale_factors, relative_to_absolute_scale_factors
+
+
+def copy_dataset(ds_in, ds_out, n_threads, desc):
+ """ Copy input to output dataset in parallel.
+
+ Arguments:
+ ds_in [dataset] - input dataset (h5py, z5py or zarr dataset)
+ ds_out [dataset] - output dataset (h5py, z5py or zarr dataset)
+ n_threads [int] - number of threads, by default all are used (default: None)
+ Returns:
+ array_like - output
+ """
+
+ assert ds_in.shape == ds_out.shape
+ # only thread-safe for same chunk sizes !
+ assert ds_in.chunks == ds_out.chunks
+
+ blocks = blocking(ds_in.shape, ds_in.chunks)
+ blocks = [block for block in blocks]
+ n_blocks = len(blocks)
+
+ def _copy_chunk(block):
+ # make sure we don't copy empty blocks; I don't know
+ # if zarr makes sure not to write them out
+ data_in = ds_in[block]
+ if data_in.sum() == 0:
+ return
+ ds_out[block] = data_in
+
+ with futures.ThreadPoolExecutor(n_threads) as tp:
+ list(tqdm(tp.map(_copy_chunk, blocks), total=n_blocks, desc=desc))
+
+
+def is_int(some_string):
+ try:
+ int(some_string)
+ return True
+ except ValueError:
+ return False
+
+
+def expand_chunks_nested(ds_path):
+
+ chunk_files = os.listdir(ds_path)
+ chunk_files = [cf for cf in chunk_files if is_int(cf)]
+
+ dim0 = os.path.join(ds_path, 'tmp0')
+ dim1 = os.path.join(dim0, '0')
+
+ os.makedirs(dim1)
+ for cf in chunk_files:
+ shutil.move(os.path.join(ds_path, cf), os.path.join(dim1, cf))
+
+ shutil.move(dim0, os.path.join(ds_path, '0'))
+
+
+def expand_chunks_flat(ds_path):
+ def is_chunk(some_name):
+ chunk_idx = some_name.split('.')
+ return all(map(is_int, chunk_idx)) and len(chunk_idx) > 0
+
+ chunk_files = os.listdir(ds_path)
+ chunk_files = [cf for cf in chunk_files if is_chunk(cf)]
+
+ for cf in chunk_files:
+ shutil.move(os.path.join(ds_path, cf),
+ os.path.join(ds_path, '0.0.' + cf))
+
+
+# NOTE this works because zarr doesn't have a chunk header
+# expand the 2 leading dimensions of the zarr dataset
+def expand_dims(ds_path, use_nested_store):
+ attrs_file = os.path.join(ds_path, '.zarray')
+ assert os.path.exists(attrs_file), attrs_file
+
+ if use_nested_store:
+ expand_chunks_nested(ds_path)
+ else:
+ expand_chunks_flat(ds_path)
+
+ with open(attrs_file) as f:
+ attrs = json.load(f)
+
+ shape = attrs['shape']
+ shape = [1, 1] + shape
+ attrs['shape'] = shape
+
+ chunks = attrs['chunks']
+ chunks = [1, 1] + chunks
+ attrs['chunks'] = chunks
+
+ with open(attrs_file, 'w') as f:
+ json.dump(attrs, f, indent=2, sort_keys=True)
+
+
+def normalize_scales(scales, start_scale):
+ scales = scales[start_scale:]
+ normalized_scales = []
+
+ # to relative scale factors
+ s_prev = scales[0]
+ for scale in scales:
+ norm_scale = [s / sp for s, sp in zip(scale, s_prev)]
+ normalized_scales.append(norm_scale)
+ s_prev = scale
+
+ # to absolute scale factors
+ normalized_scales = relative_to_absolute_scale_factors(normalized_scales)
+ return normalized_scales
+
+
+def convert_bdv_n5(in_path, out_path, out_key, vol_name,
+ use_nested_store, n_threads, start_scale=0):
+
+ scales = get_scale_factors(in_path, setup_id=0)
+ if start_scale > 0:
+ scales = normalize_scales(scales, start_scale)
+
+ with z5py.File(in_path, mode='r') as f_in, zarr.open(out_path, mode='w') as f_out:
+ # we assume bdv.n5 file format and only a single channel
+ scale_group = f_in['setup0/timepoint0']
+ scale_names = [elem for elem in scale_group]
+ scale_names.sort()
+
+ if start_scale > 0:
+ scale_names = scale_names[start_scale:]
+
+ g_out = f_out.create_group(out_key)
+ out_names = []
+
+ for sid, name in enumerate(scale_names):
+ ds_in = scale_group[name]
+ out_name = f"s{sid}"
+
+ if use_nested_store:
+ store = zarr.NestedDirectoryStore(os.path.join(out_path, out_key, out_name))
+ else:
+ store = zarr.DirectoryStore(os.path.join(out_path, out_key, out_name))
+ ds_out = zarr.zeros(store=store,
+ shape=ds_in.shape,
+ chunks=ds_in.chunks,
+ dtype=ds_in.dtype)
+
+ desc = f"Copy {name} to {out_name}"
+ copy_dataset(ds_in, ds_out, n_threads, desc)
+
+ # this invalidates the shape and chunk attributes of our dataset,
+ # so we can't use it after that (but we also don't need to)
+ expand_dims(os.path.join(out_path, out_key, out_name), use_nested_store)
+ out_names.append(out_name)
+
+ assert len(out_names) == len(scales)
+
+ g_out.attrs['multiscales'] = [
+ {
+ "name": vol_name,
+ "version": "0.1",
+ "datasets": [{"path": name} for name in out_names],
+ "scales": [scale[::-1] for scale in scales]
+ }
+ ]
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser()
+ parser.add_argument('inp', type=str)
+ parser.add_argument('outp', type=str)
+ parser.add_argument('outk', type=str)
+ parser.add_argument('--use_nested_store', type=int, default=0)
+ parser.add_argument('--n_threads', type=int, default=8)
+
+ args = parser.parse_args()
+ convert_bdv_n5(args.inp, args.outp, args.outk, bool(args.use_nested_store), args.n_threads)