Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

jzarr read/write (close #36) #37

Merged
merged 16 commits into from
Jun 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ jobs:
env:
ACTIONS_ALLOW_UNSECURE_COMMANDS: true

- name: Cache local Maven repository
uses: actions/cache@v2
with:
path: ~/.m2/repository
key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
restore-keys: |
${{ runner.os }}-maven-

- name: Run tests
shell: bash -l {0}
run: make
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ test: data
data/reference_image.png:
python generate_reference_image.py

.PHONY: jzarr
jzarr: data/reference_image.png
bash generate_data/jzarr/generate_data.sh

.PHONY: n5java
n5java: data/reference_image.png
bash generate_data/n5-java/generate_data.sh
Expand Down Expand Up @@ -33,7 +37,7 @@ xtensor_zarr: data/reference_image.png
bash generate_data/xtensor_zarr/generate_data.sh

.PHONY: data
data: n5java pyn5 z5py zarr js xtensor_zarr zarrita
data: jzarr n5java pyn5 z5py zarr js xtensor_zarr zarrita

.PHONY: test

Expand Down
19 changes: 19 additions & 0 deletions generate_data/jzarr/generate_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# cd to this directory
# https://stackoverflow.com/a/6393573/2700168
cd "${0%/*}"

set -e
set -u
set -x

MVN_FLAGS=${MVN_FLAGS:-"--no-transfer-progress"}
mvn "${MVN_FLAGS}" clean package

java -cp target/jzarr-1.0.0.jar zarr_implementations.jzarr.App "$@" && {
# Workaround for: https://github.com/bcdev/jzarr/issues/25
find ../../data/jzarr* -name .zarray -exec sed -ibak 's/>u1/|u1/' {} \;
} || {
echo jzarr failed
exit 2
}

71 changes: 71 additions & 0 deletions generate_data/jzarr/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>zarr_implementations</groupId>
<artifactId>jzarr</artifactId>
<packaging>jar</packaging>
<version>1.0.0</version>
<name>jzarr</name>
<url>http://maven.apache.org</url>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.bc.zarr</groupId>
<artifactId>jzarr</artifactId>
<version>0.3.3</version>
</dependency>
</dependencies>
<repositories>
<repository>
<id>bc-nexus-repo</id>
<name>Brockmann-Consult Public Maven Repository</name>
<url>https://nexus.senbox.net/nexus/content/groups/public/</url>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<artifactId>maven-enforcer-plugin</artifactId>
<executions>
<execution>
<id>enforce-rules</id>
<phase>none</phase>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.3</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<createDependencyReducedPom>true</createDependencyReducedPom>
<dependencyReducedPomLocation>
${java.io.tmpdir}/dependency-reduced-pom.xml
</dependencyReducedPomLocation>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
129 changes: 129 additions & 0 deletions generate_data/jzarr/src/main/java/zarr_implementations/jzarr/App.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
package zarr_implementations.jzarr;

import com.bc.zarr.ArrayParams;
import com.bc.zarr.CompressorFactory;
import com.bc.zarr.DataType;
import com.bc.zarr.ZarrArray;
import com.bc.zarr.ZarrGroup;

import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.awt.image.DataBuffer;
import java.awt.image.DataBufferByte;
import java.awt.image.DataBufferInt;
import java.awt.image.WritableRaster;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.stream.IntStream;


public class App {

enum Compression {
raw("null"),
zlib("zlib"),
blosc("blosc");

private final String value;

private Compression(final String value) {
this.value = value;
}

@Override
public String toString() {
return value;
}
}

// NOTE for now we use 100, 100, 1 as block-size in all examples
// maybe it's a better idea to make this more irregular though
private static final int WIDTH = 512;
private static final int HEIGHT = 512;
private static final int CHANNELS = 3;
private static final int[] CHUNKS = new int[]{100, 100, 1};
private static final int[] SHAPE = new int[] {WIDTH, HEIGHT, CHANNELS};
private static final Path IN_PATH = Paths.get("..", "..", "data", "reference_image.png");
private static final Path OUT_PATH = Paths.get("..", "..", "data", "jzarr_flat.zr");

private static int[] getTestData() throws IOException {
final BufferedImage image = ImageIO.read(new File(IN_PATH.toString()));
int[] result = new int[WIDTH * HEIGHT * CHANNELS];
for (int i = 0; i < WIDTH; i++) {
for (int j = 0; j < HEIGHT; j++) {
Color color = new Color(image.getRGB(i, j));
int index = (WIDTH*3*j) + (3*i);
result[index + 0] = color.getRed();
result[index + 1] = color.getGreen();
result[index + 2] = color.getBlue();
}
}
return result;
}


private static int[] getArrayData(ZarrArray zarr) throws Exception {
int[] data = new int[WIDTH * HEIGHT * CHANNELS];
zarr.read(data, SHAPE, new int[]{0, 0, 0});
int[] unsigned = new int[data.length];
for (int i = 0; i < data.length; i++) {
unsigned[i] = data[i] & 0xff;
}
return unsigned;
}

public static void main(String args[]) throws Exception {

if (args.length != 0 && args.length != 3) {
System.out.println("usage: App");
System.out.println("usage: App -verify fpath dsname");
System.exit(2); // EARLY EXIT
} else if (args.length == 3) {
String fpath = args[1];
String dsname = args[2];
ZarrArray verification = ZarrGroup.open(fpath).openArray(dsname);
int[] shape = verification.getShape();
if (!Arrays.equals(SHAPE, shape)) {
throw new RuntimeException(String.format(
"shape-mismatch expected:%s found:%s",
Arrays.toString(SHAPE), Arrays.toString(shape)
));
}
joshmoore marked this conversation as resolved.
Show resolved Hide resolved

int[] test = getTestData();
int[] verify = getArrayData(verification);
if (!Arrays.equals(test, verify)) {
throw new RuntimeException(String.format(
"values don't match"));
}
return; // EARLY EXIT
}

int[] data = getTestData();

final ZarrGroup container = ZarrGroup.create(OUT_PATH);
for (final Compression compressionType : Compression.values()) {
ArrayParams arrayParams = new ArrayParams()
.shape(SHAPE)
.chunks(CHUNKS)
.dataType(DataType.u1)
// .nested(nested) FIXME: requires a different branch
.compressor(CompressorFactory.create(compressionType.toString())); // jzarr name, "null"

String dsname = compressionType.name(); // zarr_implementation name, "raw"
if ("blosc".equals(dsname)) {
dsname = "blosc/lz4"; // FIXME: better workaround?
}
Path subdir = OUT_PATH.resolve(dsname);
ZarrArray zArray = ZarrArray.create(subdir, arrayParams);
// final ZarrArray zarr = ZarrArray.open(getRootPath().resolve(pathName));
zArray.write(data, SHAPE, new int[]{0, 0, 0});
}
}
}
4 changes: 3 additions & 1 deletion generate_data/n5-java/generate_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@
# https://stackoverflow.com/a/6393573/2700168
cd "${0%/*}"

mvn clean package
MVN_FLAGS=${MVN_FLAGS:-"--no-transfer-progress"}
mvn "${MVN_FLAGS}" clean package

java -cp target/n5_java-1.0.0.jar zarr_implementations.n5_java.App
45 changes: 31 additions & 14 deletions test/test_read_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
* READABLE_CODECS.{library_name}.{format_name}[{codec1}, {codec2}, ...]
* Write a function which takes a container path and dataset name,
and returns a numpy-esque array
* Add it to READ_FNS under the {library_name} key
* Add it to _get_read_fn under the {library_name} key

The matrix of tests is automatically generated,
and individual tests correctly fail on unavailable imports.
Expand Down Expand Up @@ -43,6 +43,11 @@


READABLE_CODECS: Dict[str, Dict[str, List[str]]] = {
"jzarr": {
"zarr": ["blosc", "raw", "zlib"],
"zarr-v3": [],
"N5": [],
},
"z5py": {
"zarr": ["blosc", "gzip", "raw", "zlib"],
"zarr-v3": [],
Expand Down Expand Up @@ -71,6 +76,20 @@
}


def read_with_jzarr(fpath, ds_name, nested=None):
if ds_name == "blosc":
ds_name = "blosc/lz4"

cmd = (
f"generate_data/jzarr/generate_data.sh "
f"-verify {str(fpath)} {ds_name}"
)

# will raise subprocess.CalledProcessError if return code is not 0
subprocess.check_output(cmd, shell=True)
return None


def read_with_zarr(fpath, ds_name, nested):
import zarr
if ds_name == "blosc":
Expand Down Expand Up @@ -122,15 +141,6 @@ def read_with_xtensor_zarr(fpath, ds_name, nested):
return np.load(fname)["a"]


READ_FNS = {
"zarr": read_with_zarr,
"zarrita": read_with_zarrita,
"pyn5": read_with_pyn5,
"z5py": read_with_z5py,
"xtensor_zarr": read_with_xtensor_zarr,
}


EXTENSIONS = {"zarr": ".zr", "N5": ".n5", "zarr-v3": ".zr3"}
HERE = Path(__file__).resolve().parent
DATA_DIR = HERE.parent / "data"
Expand Down Expand Up @@ -221,6 +231,7 @@ def create_params():

def _get_read_fn(reading_library):
read_fn = {
"jzarr": read_with_jzarr,
"zarr": read_with_zarr,
"pyn5": read_with_pyn5,
"z5py": read_with_z5py,
Expand All @@ -244,8 +255,10 @@ def test_correct_read(fmt, writing_library, reading_library, codec, nested,
"using 'make data'"
)
test = read_fn(fpath, codec, nested)
assert test.shape == reference.shape
assert np.allclose(test, reference)
# Assume if None is returned, the read function has verified.
if test is not None:
assert test.shape == reference.shape
assert np.allclose(test, reference)


def tabulate_test_results(params, per_codec_tables=False):
Expand All @@ -262,8 +275,12 @@ def tabulate_test_results(params, per_codec_tables=False):
fail_type = f"{type(e).__name__}: {e}"

if fail_type is None:
result = test.shape == reference.shape
result = result and np.allclose(test, reference)
if test is None:
# Assume implementation handled the verification
result = True
else:
result = test.shape == reference.shape
result = result and np.allclose(test, reference)
else:
result = fail_type

Expand Down