From b40706bb1e90a1e5beadecfb09a37479536ddfa3 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@embl.de>
Date: Thu, 12 Nov 2020 22:09:01 +0100
Subject: [PATCH 1/5] Add script to convert from n5.bdv to zarr.ome format

---
 data-conversion/check_result.py |  17 +++++
 data-conversion/to_ome_zarr.py  | 115 ++++++++++++++++++++++++++++++++
 2 files changed, 132 insertions(+)
 create mode 100644 data-conversion/check_result.py
 create mode 100644 data-conversion/to_ome_zarr.py

diff --git a/data-conversion/check_result.py b/data-conversion/check_result.py
new file mode 100644
index 0000000..294f97c
--- /dev/null
+++ b/data-conversion/check_result.py
@@ -0,0 +1,17 @@
+import sys
+import zarr
+
+
+def check_result(path):
+    with zarr.open(path, mode='r') as f:
+        for name, ds in f.items():
+            shape = ds.shape
+            chunks = ds.chunks
+            assert len(shape) == len(chunks) == 5
+            print(name, shape, chunks)
+
+    print("All tests passed")
+
+
+path = sys.argv[1]
+check_result(path)
diff --git a/data-conversion/to_ome_zarr.py b/data-conversion/to_ome_zarr.py
new file mode 100644
index 0000000..eb29aef
--- /dev/null
+++ b/data-conversion/to_ome_zarr.py
@@ -0,0 +1,115 @@
+import argparse
+import os
+import json
+import shutil
+from concurrent import futures
+
+import zarr
+import z5py
+from tqdm import tqdm
+from z5py.util import blocking
+
+
+def copy_dataset(ds_in, ds_out, n_threads):
+    """ Copy input to output dataset in parallel.
+
+    Arguments:
+        ds_in [dataset] - input dataset (h5py, z5py or zarr dataset)
+        ds_out [dataset] - output dataset (h5py, z5py or zarr dataset)
+        n_threads [int] - number of threads, by default all are used (default: None)
+    Returns:
+        array_like - output
+    """
+
+    assert ds_in.shape == ds_out.shape
+    # only thread-safe for same chunk sizes !
+    assert ds_in.chunks == ds_out.chunks
+
+    blocks = blocking(ds_in.shape, ds_in.chunks)
+    blocks = [block for block in blocks]
+    n_blocks = len(blocks)
+
+    def _copy_chunk(block):
+        ds_out[block] = ds_in[block]
+
+    with futures.ThreadPoolExecutor(n_threads) as tp:
+        list(tqdm(tp.map(_copy_chunk, blocks), total=n_blocks))
+
+
+# expand the 2 leading dimensions of the zarr dataset
+def expand_dims(ds_path):
+    attrs_file = os.path.join(ds_path, '.zarray')
+    assert os.path.exists(attrs_file)
+
+    def is_int(some_string):
+        try:
+            int(some_string)
+            return True
+        except ValueError:
+            return False
+
+    chunk_files = os.listdir(ds_path)
+    chunk_files = [cf for cf in chunk_files if is_int(cf)]
+
+    dim0 = os.path.join(ds_path, 'tmp0')
+    dim1 = os.path.join(dim0, '0')
+
+    os.makedirs(dim1)
+    for cf in chunk_files:
+        shutil.move(os.path.join(ds_path, cf), os.path.join(dim1, cf))
+
+    shutil.move(dim0, os.path.join(ds_path, '0'))
+
+    with open(attrs_file) as f:
+        attrs = json.load(f)
+
+    shape = attrs['shape']
+    shape = [1, 1] + shape
+    attrs['shape'] = shape
+
+    chunks = attrs['chunks']
+    chunks = [1, 1] + chunks
+    attrs['chunks'] = chunks
+
+    with open(attrs_file, 'w') as f:
+        json.dump(attrs, f, indent=2, sort_keys=True)
+
+
+def convert_bdv_n5(in_path, out_path, n_threads):
+    with z5py.File(in_path, mode='r') as f_in, zarr.open(out_path, mode='w') as f_out:
+        # we assume bdv.n5 file format and only a single channel
+        scale_group = f_in['setup0/timepoint0']
+        scale_names = [elem for elem in scale_group]
+        scale_names.sort()
+
+        for name in scale_names:
+            ds_in = scale_group[name]
+
+            store = zarr.NestedDirectoryStore(os.path.join(out_path, name))
+            ds_out = zarr.zeros(store=store,
+                                shape=ds_in.shape,
+                                chunks=ds_in.chunks,
+                                dtype=ds_in.dtype)
+
+            copy_dataset(ds_in, ds_out, n_threads)
+
+            # this invalidates the shape and chunk attributes of our dataset,
+            # so we can't use it after that (but we also don't need to)
+            expand_dims(os.path.join(out_path, name))
+
+        f_out.attrs['multiscalles'] = [
+            {
+                "version": "0.1",
+                "datasets": [{"path": name} for name in scale_names]
+            }
+        ]
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('inp', type=str)
+    parser.add_argument('outp', type=str)
+    parser.add_argument('--n_threads', type=int, default=8)
+
+    args = parser.parse_args()
+    convert_bdv_n5(args.inp, args.outp, args.n_threads)

From c7b6782f3d03fdd7303bb230b35e6a105476ccad Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@embl.de>
Date: Thu, 12 Nov 2020 22:26:56 +0100
Subject: [PATCH 2/5] Update ome zarr script to support flat chunk hierarchy

---
 data-conversion/check_result.py |  9 ++++--
 data-conversion/to_ome_zarr.py  | 55 ++++++++++++++++++++++++---------
 2 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/data-conversion/check_result.py b/data-conversion/check_result.py
index 294f97c..1924db7 100644
--- a/data-conversion/check_result.py
+++ b/data-conversion/check_result.py
@@ -2,7 +2,7 @@
 import zarr
 
 
-def check_result(path):
+def check_result(path, check_data):
     with zarr.open(path, mode='r') as f:
         for name, ds in f.items():
             shape = ds.shape
@@ -10,8 +10,13 @@ def check_result(path):
             assert len(shape) == len(chunks) == 5
             print(name, shape, chunks)
 
+            if check_data:
+                data = ds[:]
+                # print(data[0, 0, :10, :10, :10])
+                assert data.shape == shape
+
     print("All tests passed")
 
 
 path = sys.argv[1]
-check_result(path)
+check_result(path, True)
diff --git a/data-conversion/to_ome_zarr.py b/data-conversion/to_ome_zarr.py
index eb29aef..3f36ef4 100644
--- a/data-conversion/to_ome_zarr.py
+++ b/data-conversion/to_ome_zarr.py
@@ -36,17 +36,15 @@ def _copy_chunk(block):
         list(tqdm(tp.map(_copy_chunk, blocks), total=n_blocks))
 
 
-# expand the 2 leading dimensions of the zarr dataset
-def expand_dims(ds_path):
-    attrs_file = os.path.join(ds_path, '.zarray')
-    assert os.path.exists(attrs_file)
+def is_int(some_string):
+    try:
+        int(some_string)
+        return True
+    except ValueError:
+        return False
+
 
-    def is_int(some_string):
-        try:
-            int(some_string)
-            return True
-        except ValueError:
-            return False
+def expand_chunks_nested(ds_path):
 
     chunk_files = os.listdir(ds_path)
     chunk_files = [cf for cf in chunk_files if is_int(cf)]
@@ -60,6 +58,31 @@ def is_int(some_string):
 
     shutil.move(dim0, os.path.join(ds_path, '0'))
 
+
+def expand_chunks_flat(ds_path):
+    def is_chunk(some_name):
+        chunk_idx = some_name.split('.')
+        return all(map(is_int, chunk_idx)) and len(chunk_idx) > 0
+
+    chunk_files = os.listdir(ds_path)
+    chunk_files = [cf for cf in chunk_files if is_chunk(cf)]
+
+    for cf in chunk_files:
+        shutil.move(os.path.join(ds_path, cf),
+                    os.path.join(ds_path, '0.0.' + cf))
+
+
+# NOTE this works because zarr doesn't have a chunk header
+# expand the 2 leading dimensions of the zarr dataset
+def expand_dims(ds_path, use_nested_store):
+    attrs_file = os.path.join(ds_path, '.zarray')
+    assert os.path.exists(attrs_file)
+
+    if use_nested_store:
+        expand_chunks_nested(ds_path)
+    else:
+        expand_chunks_flat(ds_path)
+
     with open(attrs_file) as f:
         attrs = json.load(f)
 
@@ -75,7 +98,7 @@ def is_int(some_string):
         json.dump(attrs, f, indent=2, sort_keys=True)
 
 
-def convert_bdv_n5(in_path, out_path, n_threads):
+def convert_bdv_n5(in_path, out_path, use_nested_store, n_threads):
     with z5py.File(in_path, mode='r') as f_in, zarr.open(out_path, mode='w') as f_out:
         # we assume bdv.n5 file format and only a single channel
         scale_group = f_in['setup0/timepoint0']
@@ -85,7 +108,10 @@ def convert_bdv_n5(in_path, out_path, n_threads):
         for name in scale_names:
             ds_in = scale_group[name]
 
-            store = zarr.NestedDirectoryStore(os.path.join(out_path, name))
+            if use_nested_store:
+                store = zarr.NestedDirectoryStore(os.path.join(out_path, name))
+            else:
+                store = zarr.DirectoryStore(os.path.join(out_path, name))
             ds_out = zarr.zeros(store=store,
                                 shape=ds_in.shape,
                                 chunks=ds_in.chunks,
@@ -95,7 +121,7 @@ def convert_bdv_n5(in_path, out_path, n_threads):
 
             # this invalidates the shape and chunk attributes of our dataset,
             # so we can't use it after that (but we also don't need to)
-            expand_dims(os.path.join(out_path, name))
+            expand_dims(os.path.join(out_path, name), use_nested_store)
 
         f_out.attrs['multiscalles'] = [
             {
@@ -109,7 +135,8 @@ def convert_bdv_n5(in_path, out_path, n_threads):
     parser = argparse.ArgumentParser()
     parser.add_argument('inp', type=str)
     parser.add_argument('outp', type=str)
+    parser.add_argument('--use_nested_store', type=int, default=0)
     parser.add_argument('--n_threads', type=int, default=8)
 
     args = parser.parse_args()
-    convert_bdv_n5(args.inp, args.outp, args.n_threads)
+    convert_bdv_n5(args.inp, args.outp, bool(args.use_nested_store), args.n_threads)

From 5cddf277da06bb90c43b4f22bf27872158e9fbe6 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@embl.de>
Date: Fri, 13 Nov 2020 09:21:00 +0100
Subject: [PATCH 3/5] Avoid writing empty chunks

---
 data-conversion/to_ome_zarr.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/data-conversion/to_ome_zarr.py b/data-conversion/to_ome_zarr.py
index 3f36ef4..4727309 100644
--- a/data-conversion/to_ome_zarr.py
+++ b/data-conversion/to_ome_zarr.py
@@ -30,7 +30,12 @@ def copy_dataset(ds_in, ds_out, n_threads):
     n_blocks = len(blocks)
 
     def _copy_chunk(block):
-        ds_out[block] = ds_in[block]
+        # make sure we don't copy empty blocks; I don't know
+        # if zarr makes sure not to write them out
+        data_in = ds_in[block]
+        if data_in.sum() == 0:
+            return
+        ds_out[block] = data_in
 
     with futures.ThreadPoolExecutor(n_threads) as tp:
         list(tqdm(tp.map(_copy_chunk, blocks), total=n_blocks))

From 208300c9ee60290cab20a6dee3dd805248f42fe5 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@embl.de>
Date: Wed, 18 Nov 2020 21:16:14 +0100
Subject: [PATCH 4/5] Add platy data conversion script

---
 .gitignore                                    |  1 +
 create_data.py                                | 28 ++++++++
 data_conversion/__init__.py                   |  1 +
 .../check_result.py                           |  0
 data_conversion/joshs_script.py               | 69 +++++++++++++++++++
 .../to_ome_zarr.py                            | 16 +++--
 upload_to_s3.py                               |  1 +
 7 files changed, 109 insertions(+), 7 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 create_data.py
 create mode 100644 data_conversion/__init__.py
 rename {data-conversion => data_conversion}/check_result.py (100%)
 create mode 100644 data_conversion/joshs_script.py
 rename {data-conversion => data_conversion}/to_ome_zarr.py (88%)
 create mode 100644 upload_to_s3.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c18dd8d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+__pycache__/
diff --git a/create_data.py b/create_data.py
new file mode 100644
index 0000000..23b9462
--- /dev/null
+++ b/create_data.py
@@ -0,0 +1,28 @@
+import os
+from data_conversion import convert_bdv_n5
+
+
+# def convert_bdv_n5(in_path, out_path, use_nested_store, n_threads):
+# add the myosin prospr data
+def add_myosin():
+    in_path = os.path.join('/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/0.6.3',
+                           'images/local/prospr-6dpf-1-whole-non-muscle-mhc.n5')
+    convert_bdv_n5(in_path=in_path,
+                   out_path='platy.ome.zarr',
+                   out_key='prospr-myosin',
+                   use_nested_store=False,
+                   n_threads=4)
+
+
+# add the em raw data
+def add_raw():
+    pass
+
+
+# add the em cell segmentation
+def add_seg():
+    pass
+
+
+if __name__ == '__main__':
+    add_myosin()
diff --git a/data_conversion/__init__.py b/data_conversion/__init__.py
new file mode 100644
index 0000000..e28a454
--- /dev/null
+++ b/data_conversion/__init__.py
@@ -0,0 +1 @@
+from .to_ome_zarr import convert_bdv_n5
diff --git a/data-conversion/check_result.py b/data_conversion/check_result.py
similarity index 100%
rename from data-conversion/check_result.py
rename to data_conversion/check_result.py
diff --git a/data_conversion/joshs_script.py b/data_conversion/joshs_script.py
new file mode 100644
index 0000000..32cebe9
--- /dev/null
+++ b/data_conversion/joshs_script.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+
+# This assumes that n5-copy has already been used
+
+import argparse
+import zarr
+
+parser = argparse.ArgumentParser()
+parser.add_argument("input")
+parser.add_argument("output")
+ns = parser.parse_args()
+
+zin = zarr.open(ns.input)
+
+sizes = []
+
+def groups(z):
+    rv = sorted(list(z.groups()))
+    assert rv
+    assert not list(z.arrays())
+    return rv
+
+def arrays(z):
+    rv = sorted(list(z.arrays()))
+    assert rv
+    assert not list(z.groups())
+    return rv
+
+setups = groups(zin)
+assert len(setups) == 1  # TODO: multiple channels?
+for sname, setup in setups:
+    timepoints = groups(setup)
+    for tname, timepoint in timepoints:
+        resolutions = arrays(timepoint)
+        for idx, rtuple in enumerate(resolutions):
+            rname, resolution = rtuple
+            try:
+                expected = sizes[idx]
+                assert expected[0] == rname
+                assert expected[1] == resolution.shape
+                assert expected[2] == resolution.chunks
+                assert expected[3] == resolution.dtype
+            except:
+                sizes.append((rname,
+                              resolution.shape,
+                              resolution.chunks,
+                              resolution.dtype))
+
+
+datasets = []
+out = zarr.open(ns.output, mode="w")
+
+for idx, size in enumerate(sizes):
+    name, shape, chunks, dtype = size
+    shape = tuple([len(timepoints), len(setups)] + list(shape))
+    chunks = tuple([1, 1] + list(chunks))
+    a = out.create_dataset(name, shape=shape, chunks=chunks, dtype=dtype)
+    datasets.append({"path": name})
+    for sidx, stuple in enumerate(groups(zin)):
+        for tidx, ttuple in enumerate(groups(stuple[1])):
+            resolutions = arrays(ttuple[1])
+            a[tidx, sidx, :, :, :] = resolutions[idx][1]
+out.attrs["multiscales"] = [
+    {
+        "version": "0.1",
+        "datasets": datasets,
+    }
+]
+
diff --git a/data-conversion/to_ome_zarr.py b/data_conversion/to_ome_zarr.py
similarity index 88%
rename from data-conversion/to_ome_zarr.py
rename to data_conversion/to_ome_zarr.py
index 4727309..3466178 100644
--- a/data-conversion/to_ome_zarr.py
+++ b/data_conversion/to_ome_zarr.py
@@ -5,7 +5,7 @@
 from concurrent import futures
 
 import zarr
-import z5py
+import z5py  # NOTE: once the issue with zarr opening n5 groups is resolved, we can also use zarr for reading the n5s
 from tqdm import tqdm
 from z5py.util import blocking
 
@@ -81,7 +81,7 @@ def is_chunk(some_name):
 # expand the 2 leading dimensions of the zarr dataset
 def expand_dims(ds_path, use_nested_store):
     attrs_file = os.path.join(ds_path, '.zarray')
-    assert os.path.exists(attrs_file)
+    assert os.path.exists(attrs_file), attrs_file
 
     if use_nested_store:
         expand_chunks_nested(ds_path)
@@ -103,7 +103,8 @@ def expand_dims(ds_path, use_nested_store):
         json.dump(attrs, f, indent=2, sort_keys=True)
 
 
-def convert_bdv_n5(in_path, out_path, use_nested_store, n_threads):
+def convert_bdv_n5(in_path, out_path, out_key,
+                   use_nested_store, n_threads):
     with z5py.File(in_path, mode='r') as f_in, zarr.open(out_path, mode='w') as f_out:
         # we assume bdv.n5 file format and only a single channel
         scale_group = f_in['setup0/timepoint0']
@@ -114,9 +115,9 @@ def convert_bdv_n5(in_path, out_path, use_nested_store, n_threads):
             ds_in = scale_group[name]
 
             if use_nested_store:
-                store = zarr.NestedDirectoryStore(os.path.join(out_path, name))
+                store = zarr.NestedDirectoryStore(os.path.join(out_path, out_key, name))
             else:
-                store = zarr.DirectoryStore(os.path.join(out_path, name))
+                store = zarr.DirectoryStore(os.path.join(out_path, out_key, name))
             ds_out = zarr.zeros(store=store,
                                 shape=ds_in.shape,
                                 chunks=ds_in.chunks,
@@ -126,7 +127,7 @@ def convert_bdv_n5(in_path, out_path, use_nested_store, n_threads):
 
             # this invalidates the shape and chunk attributes of our dataset,
             # so we can't use it after that (but we also don't need to)
-            expand_dims(os.path.join(out_path, name), use_nested_store)
+            expand_dims(os.path.join(out_path, out_key, name), use_nested_store)
 
         f_out.attrs['multiscalles'] = [
             {
@@ -140,8 +141,9 @@ def convert_bdv_n5(in_path, out_path, use_nested_store, n_threads):
     parser = argparse.ArgumentParser()
     parser.add_argument('inp', type=str)
     parser.add_argument('outp', type=str)
+    parser.add_argument('outk', type=str)
     parser.add_argument('--use_nested_store', type=int, default=0)
     parser.add_argument('--n_threads', type=int, default=8)
 
     args = parser.parse_args()
-    convert_bdv_n5(args.inp, args.outp, bool(args.use_nested_store), args.n_threads)
+    convert_bdv_n5(args.inp, args.outp, args.outk, bool(args.use_nested_store), args.n_threads)
diff --git a/upload_to_s3.py b/upload_to_s3.py
new file mode 100644
index 0000000..4640904
--- /dev/null
+++ b/upload_to_s3.py
@@ -0,0 +1 @@
+# TODO

From 429c1d2c5771aada97eb23519e3a7d86859e1033 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@embl.de>
Date: Fri, 20 Nov 2020 13:09:56 +0100
Subject: [PATCH 5/5] Convert initial data to ome.zarr format

---
 .gitignore                     |   2 +
 create_data.py                 | 110 ++++++++++++++++++++++++++++++---
 data/em-cells.xml              |  43 +++++++++++++
 data/em-raw.xml                |  43 +++++++++++++
 data/images.json               |  35 +++++++++++
 data/prospr-myosin.xml         |  43 +++++++++++++
 data_conversion/to_ome_zarr.py |  57 +++++++++++++----
 upload_to_s3.py                |   1 -
 8 files changed, 313 insertions(+), 21 deletions(-)
 create mode 100644 data/em-cells.xml
 create mode 100644 data/em-raw.xml
 create mode 100644 data/images.json
 create mode 100644 data/prospr-myosin.xml
 delete mode 100644 upload_to_s3.py

diff --git a/.gitignore b/.gitignore
index c18dd8d..7df2e81 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
 __pycache__/
+*.n5
+*.ome.zarr
diff --git a/create_data.py b/create_data.py
index 23b9462..051e126 100644
--- a/create_data.py
+++ b/create_data.py
@@ -1,28 +1,120 @@
+import json
 import os
+
+import z5py
 from data_conversion import convert_bdv_n5
+from pybdv.metadata import (get_size, get_resolution,
+                            write_size_and_resolution,
+                            write_affine)
+
+from mobie.xml_utils import copy_xml_as_n5_s3
+from mobie.metadata.image_dict import default_layer_setting
+
+IMAGE_DICT = './data/images.json'
+
+
+def write_metadata(in_xml, out_xml, out_path):
+    bucket_name = 'i2k-2020'
+    path_in_bucket = os.path.split(out_path)[1]
+    copy_xml_as_n5_s3(in_xml, out_xml,
+                      service_endpoint='https://s3.embl.de',
+                      bucket_name=bucket_name,
+                      path_in_bucket=path_in_bucket,
+                      authentication='Anonymous',
+                      bdv_type='bdv.zarr.s3')
+
+    with z5py.File(out_path, 'r') as f:
+        shape = f['setup0/timepoint0/s0'].shape[2:]
+
+    # check if we need to update the shape and resolution
+    exp_shape = get_size(out_xml, setup_id=0)
+    if shape != exp_shape:
+        resolution = get_resolution(out_xml, setup_id=0)
+        scale_factor = [float(esh) / sh for sh, esh in zip(shape, exp_shape)]
+        resolution = [round(res * sf, 2) for res, sf in zip(resolution, scale_factor)]
+        print("Updating shape and resolution to:")
+        print(shape)
+        print(resolution)
+
+        write_size_and_resolution(out_xml, setup_id=0,
+                                  size=shape, resolution=resolution)
+
+        # make transformation the hacky way ...
+        dz, dy, dx = resolution
+        oz, oy, ox = 0., 0., 0.
+        trafo = '{} 0.0 0.0 {} 0.0 {} 0.0 {} 0.0 0.0 {} {}'.format(dx, ox,
+                                                                   dy, oy,
+                                                                   dz, oz)
+        trafo = list(map(float, trafo.split(' ')))
+        write_affine(out_xml, setup_id=0, affine=trafo, overwrite=True)
+
+
+def add_to_image_dict(name, layer_type, xml_path):
+    settings = default_layer_setting(layer_type)
+    storage = {"remote": os.path.split(xml_path)[1]}
+    settings.update({"storage": storage})
+
+    if os.path.exists(IMAGE_DICT):
+        with open(IMAGE_DICT) as f:
+            image_dict = json.load(f)
+    else:
+        image_dict = {}
+
+    image_dict[name] = settings
+    with open(IMAGE_DICT, 'w') as f:
+        json.dump(image_dict, f, indent=2, sort_keys=True)
+
+
+def add_volume(in_path, vol_name, layer_type, start_scale=0):
+    out_path = os.path.join('data', f'{vol_name}.ome.zarr')
+
+    # convert to ome zarr
+    convert_bdv_n5(in_path=in_path,
+                   out_path=out_path,
+                   out_key='setup0/timepoint0',
+                   vol_name=vol_name,
+                   use_nested_store=False,
+                   n_threads=8,
+                   start_scale=start_scale)
+
+    # create the bdv.xml
+    in_xml = in_path.replace('.n5', '.xml')
+    out_xml = os.path.join('data', f'{vol_name}.xml')
+    write_metadata(in_xml, out_xml, out_path)
+
+    add_to_image_dict(vol_name, layer_type, out_xml)
 
 
 # def convert_bdv_n5(in_path, out_path, use_nested_store, n_threads):
 # add the myosin prospr data
 def add_myosin():
+    print("Add myosin")
     in_path = os.path.join('/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/0.6.3',
-                           'images/local/prospr-6dpf-1-whole-non-muscle-mhc.n5')
-    convert_bdv_n5(in_path=in_path,
-                   out_path='platy.ome.zarr',
-                   out_key='prospr-myosin',
-                   use_nested_store=False,
-                   n_threads=4)
+                           'images/local/prospr-6dpf-1-whole-mhcl4.n5')
+    add_volume(in_path, vol_name='prospr-myosin', layer_type='image')
 
 
 # add the em raw data
 def add_raw():
-    pass
+    print("Add raw")
+    in_path = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/rawdata/sbem-6dpf-1-whole-raw.n5'
+    add_volume(in_path, vol_name='em-raw', layer_type='image', start_scale=3)
 
 
 # add the em cell segmentation
 def add_seg():
-    pass
+    print("Add cells")
+    in_path = os.path.join('/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/1.0.1',
+                           'images/local/sbem-6dpf-1-whole-segmented-cells.n5')
+    add_volume(in_path, vol_name='em-cells', layer_type='segmentation', start_scale=2)
 
 
-if __name__ == '__main__':
+def add_all_volumes():
+    os.makedirs('./data', exist_ok=True)
     add_myosin()
+    add_raw()
+    add_seg()
+
+
+if __name__ == '__main__':
+    add_all_volumes()
diff --git a/data/em-cells.xml b/data/em-cells.xml
new file mode 100644
index 0000000..c6cdff1
--- /dev/null
+++ b/data/em-cells.xml
@@ -0,0 +1,43 @@
+<SpimData version="0.2">
+  <BasePath type="relative">.</BasePath>
+  <SequenceDescription>
+    <ViewSetups>
+      <Attributes name="channel">
+        <Channel>
+          <id>0</id>
+          <name>0</name>
+        </Channel>
+      </Attributes>
+      <ViewSetup>
+        <id>0</id>
+        <name>Setup0</name>
+        <size>3438 3240 2854</size>
+        <voxelSize>
+          <unit>micrometer</unit>
+          <size>0.08 0.08 0.1</size>
+        </voxelSize>
+        <attributes>
+          <channel>0</channel>
+        </attributes>
+      </ViewSetup>
+    </ViewSetups>
+    <Timepoints type="range">
+      <first>0</first>
+      <last>0</last>
+    </Timepoints>
+    <ImageLoader format="bdv.zarr.s3">
+      <Key>em-cells.ome.zarr</Key>
+      <SigningRegion>us-west-2</SigningRegion>
+      <ServiceEndpoint>https://s3.embl.de</ServiceEndpoint>
+      <BucketName>i2k-2020</BucketName>
+      <Authentication>Anonymous</Authentication>
+    </ImageLoader>
+  </SequenceDescription>
+  <ViewRegistrations>
+    <ViewRegistration setup="0" timepoint="0">
+      <ViewTransform type="affine">
+        <affine>0.08 0.0 0.0 0.0 0.0 0.08 0.0 0.0 0.0 0.0 0.1 0.0</affine>
+      </ViewTransform>
+    </ViewRegistration>
+  </ViewRegistrations>
+</SpimData>
diff --git a/data/em-raw.xml b/data/em-raw.xml
new file mode 100644
index 0000000..572e728
--- /dev/null
+++ b/data/em-raw.xml
@@ -0,0 +1,43 @@
+<SpimData version="0.2">
+  <BasePath type="relative">.</BasePath>
+  <SequenceDescription>
+    <ViewSetups>
+      <Attributes name="channel">
+        <Channel>
+          <id>1</id>
+          <name>1</name>
+        </Channel>
+      </Attributes>
+      <ViewSetup>
+        <id>0</id>
+        <name>channel 1</name>
+        <size>3438 3240 2854</size>
+        <voxelSize>
+          <unit>micrometer</unit>
+          <size>0.08 0.08 0.1</size>
+        </voxelSize>
+        <attributes>
+          <channel>1</channel>
+        </attributes>
+      </ViewSetup>
+    </ViewSetups>
+    <Timepoints type="range">
+      <first>0</first>
+      <last>0</last>
+    </Timepoints>
+    <ImageLoader format="bdv.zarr.s3">
+      <Key>em-raw.ome.zarr</Key>
+      <SigningRegion>us-west-2</SigningRegion>
+      <ServiceEndpoint>https://s3.embl.de</ServiceEndpoint>
+      <BucketName>i2k-2020</BucketName>
+      <Authentication>Anonymous</Authentication>
+    </ImageLoader>
+  </SequenceDescription>
+  <ViewRegistrations>
+    <ViewRegistration setup="0" timepoint="0">
+      <ViewTransform type="affine">
+        <affine>0.08 0.0 0.0 0.0 0.0 0.08 0.0 0.0 0.0 0.0 0.1 0.0</affine>
+      </ViewTransform>
+    </ViewRegistration>
+  </ViewRegistrations>
+</SpimData>
diff --git a/data/images.json b/data/images.json
new file mode 100644
index 0000000..6254a0b
--- /dev/null
+++ b/data/images.json
@@ -0,0 +1,35 @@
+{
+  "em-cells": {
+    "color": "randomFromGlasbey",
+    "contrastLimits": [
+      0.0,
+      1000.0
+    ],
+    "storage": {
+      "remote": "em-cells.xml"
+    },
+    "type": "segmentation"
+  },
+  "em-raw": {
+    "color": "white",
+    "contrastLimits": [
+      0.0,
+      255.0
+    ],
+    "storage": {
+      "remote": "em-raw.xml"
+    },
+    "type": "image"
+  },
+  "prospr-myosin": {
+    "color": "white",
+    "contrastLimits": [
+      0.0,
+      255.0
+    ],
+    "storage": {
+      "remote": "prospr-myosin.xml"
+    },
+    "type": "image"
+  }
+}
\ No newline at end of file
diff --git a/data/prospr-myosin.xml b/data/prospr-myosin.xml
new file mode 100644
index 0000000..94a5cd0
--- /dev/null
+++ b/data/prospr-myosin.xml
@@ -0,0 +1,43 @@
+<SpimData version="0.2">
+  <BasePath type="relative">.</BasePath>
+  <SequenceDescription>
+    <ViewSetups>
+      <Attributes name="channel">
+        <Channel>
+          <id>0</id>
+          <name>0</name>
+        </Channel>
+      </Attributes>
+      <ViewSetup>
+        <id>0</id>
+        <name>Setup0</name>
+        <size>500 471 519</size>
+        <voxelSize>
+          <unit>micrometer</unit>
+          <size>0.55 0.55 0.55</size>
+        </voxelSize>
+        <attributes>
+          <channel>0</channel>
+        </attributes>
+      </ViewSetup>
+    </ViewSetups>
+    <Timepoints type="range">
+      <first>0</first>
+      <last>0</last>
+    </Timepoints>
+    <ImageLoader format="bdv.zarr.s3">
+      <Key>prospr-myosin.ome.zarr</Key>
+      <SigningRegion>us-west-2</SigningRegion>
+      <ServiceEndpoint>https://s3.embl.de</ServiceEndpoint>
+      <BucketName>i2k-2020</BucketName>
+      <Authentication>Anonymous</Authentication>
+    </ImageLoader>
+  </SequenceDescription>
+  <ViewRegistrations>
+    <ViewRegistration setup="0" timepoint="0">
+      <ViewTransform type="affine">
+        <affine>0.55 0.0 0.0 0.0 0.0 0.55 0.0 0.0 0.0 0.0 0.55 0.0</affine>
+      </ViewTransform>
+    </ViewRegistration>
+  </ViewRegistrations>
+</SpimData>
diff --git a/data_conversion/to_ome_zarr.py b/data_conversion/to_ome_zarr.py
index 3466178..9e07117 100644
--- a/data_conversion/to_ome_zarr.py
+++ b/data_conversion/to_ome_zarr.py
@@ -8,9 +8,10 @@
 import z5py  # NOTE: once the issue with zarr opening n5 groups is resolved, we can also use zarr for reading the n5s
 from tqdm import tqdm
 from z5py.util import blocking
+from pybdv.util import get_scale_factors, relative_to_absolute_scale_factors
 
 
-def copy_dataset(ds_in, ds_out, n_threads):
+def copy_dataset(ds_in, ds_out, n_threads, desc):
     """ Copy input to output dataset in parallel.
 
     Arguments:
@@ -38,7 +39,7 @@ def _copy_chunk(block):
         ds_out[block] = data_in
 
     with futures.ThreadPoolExecutor(n_threads) as tp:
-        list(tqdm(tp.map(_copy_chunk, blocks), total=n_blocks))
+        list(tqdm(tp.map(_copy_chunk, blocks), total=n_blocks, desc=desc))
 
 
 def is_int(some_string):
@@ -103,36 +104,70 @@ def expand_dims(ds_path, use_nested_store):
         json.dump(attrs, f, indent=2, sort_keys=True)
 
 
-def convert_bdv_n5(in_path, out_path, out_key,
-                   use_nested_store, n_threads):
+def normalize_scales(scales, start_scale):
+    scales = scales[start_scale:]
+    normalized_scales = []
+
+    # to relative scale factors
+    s_prev = scales[0]
+    for scale in scales:
+        norm_scale = [s / sp for s, sp in zip(scale, s_prev)]
+        normalized_scales.append(norm_scale)
+        s_prev = scale
+
+    # to absolute scale factors
+    normalized_scales = relative_to_absolute_scale_factors(normalized_scales)
+    return normalized_scales
+
+
+def convert_bdv_n5(in_path, out_path, out_key, vol_name,
+                   use_nested_store, n_threads, start_scale=0):
+
+    scales = get_scale_factors(in_path, setup_id=0)
+    if start_scale > 0:
+        scales = normalize_scales(scales, start_scale)
+
     with z5py.File(in_path, mode='r') as f_in, zarr.open(out_path, mode='w') as f_out:
         # we assume bdv.n5 file format and only a single channel
         scale_group = f_in['setup0/timepoint0']
         scale_names = [elem for elem in scale_group]
         scale_names.sort()
 
-        for name in scale_names:
+        if start_scale > 0:
+            scale_names = scale_names[start_scale:]
+
+        g_out = f_out.create_group(out_key)
+        out_names = []
+
+        for sid, name in enumerate(scale_names):
             ds_in = scale_group[name]
+            out_name = f"s{sid}"
 
             if use_nested_store:
-                store = zarr.NestedDirectoryStore(os.path.join(out_path, out_key, name))
+                store = zarr.NestedDirectoryStore(os.path.join(out_path, out_key, out_name))
             else:
-                store = zarr.DirectoryStore(os.path.join(out_path, out_key, name))
+                store = zarr.DirectoryStore(os.path.join(out_path, out_key, out_name))
             ds_out = zarr.zeros(store=store,
                                 shape=ds_in.shape,
                                 chunks=ds_in.chunks,
                                 dtype=ds_in.dtype)
 
-            copy_dataset(ds_in, ds_out, n_threads)
+            desc = f"Copy {name} to {out_name}"
+            copy_dataset(ds_in, ds_out, n_threads, desc)
 
             # this invalidates the shape and chunk attributes of our dataset,
             # so we can't use it after that (but we also don't need to)
-            expand_dims(os.path.join(out_path, out_key, name), use_nested_store)
+            expand_dims(os.path.join(out_path, out_key, out_name), use_nested_store)
+            out_names.append(out_name)
+
+        assert len(out_names) == len(scales)
 
-        f_out.attrs['multiscalles'] = [
+        g_out.attrs['multiscales'] = [
             {
+                "name": vol_name,
                 "version": "0.1",
-                "datasets": [{"path": name} for name in scale_names]
+                "datasets": [{"path": name} for name in out_names],
+                "scales": [scale[::-1] for scale in scales]
             }
         ]
 
diff --git a/upload_to_s3.py b/upload_to_s3.py
deleted file mode 100644
index 4640904..0000000
--- a/upload_to_s3.py
+++ /dev/null
@@ -1 +0,0 @@
-# TODO