Skip to content

Commit

Permalink
Add platy data conversion script
Browse files Browse the repository at this point in the history
  • Loading branch information
constantinpape committed Nov 18, 2020
1 parent 5cddf27 commit 208300c
Show file tree
Hide file tree
Showing 7 changed files with 109 additions and 7 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__/
28 changes: 28 additions & 0 deletions create_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import os
from data_conversion import convert_bdv_n5


# def convert_bdv_n5(in_path, out_path, use_nested_store, n_threads):
# add the myosin prospr data
def add_myosin():
in_path = os.path.join('/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/0.6.3',
'images/local/prospr-6dpf-1-whole-non-muscle-mhc.n5')
convert_bdv_n5(in_path=in_path,
out_path='platy.ome.zarr',
out_key='prospr-myosin',
use_nested_store=False,
n_threads=4)


# add the em raw data
def add_raw():
pass


# add the em cell segmentation
def add_seg():
pass


if __name__ == '__main__':
add_myosin()
1 change: 1 addition & 0 deletions data_conversion/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .to_ome_zarr import convert_bdv_n5
File renamed without changes.
69 changes: 69 additions & 0 deletions data_conversion/joshs_script.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python

# This assumes that n5-copy has already been used

import argparse
import zarr

parser = argparse.ArgumentParser()
parser.add_argument("input")
parser.add_argument("output")
ns = parser.parse_args()

zin = zarr.open(ns.input)

sizes = []

def groups(z):
rv = sorted(list(z.groups()))
assert rv
assert not list(z.arrays())
return rv

def arrays(z):
rv = sorted(list(z.arrays()))
assert rv
assert not list(z.groups())
return rv

setups = groups(zin)
assert len(setups) == 1 # TODO: multiple channels?
for sname, setup in setups:
timepoints = groups(setup)
for tname, timepoint in timepoints:
resolutions = arrays(timepoint)
for idx, rtuple in enumerate(resolutions):
rname, resolution = rtuple
try:
expected = sizes[idx]
assert expected[0] == rname
assert expected[1] == resolution.shape
assert expected[2] == resolution.chunks
assert expected[3] == resolution.dtype
except:
sizes.append((rname,
resolution.shape,
resolution.chunks,
resolution.dtype))


datasets = []
out = zarr.open(ns.output, mode="w")

for idx, size in enumerate(sizes):
name, shape, chunks, dtype = size
shape = tuple([len(timepoints), len(setups)] + list(shape))
chunks = tuple([1, 1] + list(chunks))
a = out.create_dataset(name, shape=shape, chunks=chunks, dtype=dtype)
datasets.append({"path": name})
for sidx, stuple in enumerate(groups(zin)):
for tidx, ttuple in enumerate(groups(stuple[1])):
resolutions = arrays(ttuple[1])
a[tidx, sidx, :, :, :] = resolutions[idx][1]
out.attrs["multiscales"] = [
{
"version": "0.1",
"datasets": datasets,
}
]

Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from concurrent import futures

import zarr
import z5py
import z5py # NOTE: once the issue with zarr opening n5 groups is resolved, we can also use zarr for reading the n5s
from tqdm import tqdm
from z5py.util import blocking

Expand Down Expand Up @@ -81,7 +81,7 @@ def is_chunk(some_name):
# expand the 2 leading dimensions of the zarr dataset
def expand_dims(ds_path, use_nested_store):
attrs_file = os.path.join(ds_path, '.zarray')
assert os.path.exists(attrs_file)
assert os.path.exists(attrs_file), attrs_file

if use_nested_store:
expand_chunks_nested(ds_path)
Expand All @@ -103,7 +103,8 @@ def expand_dims(ds_path, use_nested_store):
json.dump(attrs, f, indent=2, sort_keys=True)


def convert_bdv_n5(in_path, out_path, use_nested_store, n_threads):
def convert_bdv_n5(in_path, out_path, out_key,
use_nested_store, n_threads):
with z5py.File(in_path, mode='r') as f_in, zarr.open(out_path, mode='w') as f_out:
# we assume bdv.n5 file format and only a single channel
scale_group = f_in['setup0/timepoint0']
Expand All @@ -114,9 +115,9 @@ def convert_bdv_n5(in_path, out_path, use_nested_store, n_threads):
ds_in = scale_group[name]

if use_nested_store:
store = zarr.NestedDirectoryStore(os.path.join(out_path, name))
store = zarr.NestedDirectoryStore(os.path.join(out_path, out_key, name))
else:
store = zarr.DirectoryStore(os.path.join(out_path, name))
store = zarr.DirectoryStore(os.path.join(out_path, out_key, name))
ds_out = zarr.zeros(store=store,
shape=ds_in.shape,
chunks=ds_in.chunks,
Expand All @@ -126,7 +127,7 @@ def convert_bdv_n5(in_path, out_path, use_nested_store, n_threads):

# this invalidates the shape and chunk attributes of our dataset,
# so we can't use it after that (but we also don't need to)
expand_dims(os.path.join(out_path, name), use_nested_store)
expand_dims(os.path.join(out_path, out_key, name), use_nested_store)

f_out.attrs['multiscalles'] = [
{
Expand All @@ -140,8 +141,9 @@ def convert_bdv_n5(in_path, out_path, use_nested_store, n_threads):
parser = argparse.ArgumentParser()
parser.add_argument('inp', type=str)
parser.add_argument('outp', type=str)
parser.add_argument('outk', type=str)
parser.add_argument('--use_nested_store', type=int, default=0)
parser.add_argument('--n_threads', type=int, default=8)

args = parser.parse_args()
convert_bdv_n5(args.inp, args.outp, bool(args.use_nested_store), args.n_threads)
convert_bdv_n5(args.inp, args.outp, args.outk, bool(args.use_nested_store), args.n_threads)
1 change: 1 addition & 0 deletions upload_to_s3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# TODO

0 comments on commit 208300c

Please sign in to comment.