Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test zarrita compatibility #3

Merged
merged 52 commits into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
f6c5b8d
add zarrita python script
brokkoli71 May 14, 2024
a678fd4
add testReadFromZarrita
brokkoli71 May 14, 2024
8a8311e
renamed zarrita_write
brokkoli71 May 14, 2024
270e690
add testWriteToZarrita
brokkoli71 May 15, 2024
b23e392
Merge branch 'main' into test-zarrita-compatibility
brokkoli71 May 15, 2024
21ba578
parameterized codecs in testReadFromZarrita
brokkoli71 May 16, 2024
8b9828a
fixed camelcase in zarr.json
brokkoli71 May 16, 2024
9973117
add venv for executing zarrita in linux
brokkoli71 May 17, 2024
351b7f8
adapt test-setup for linux
brokkoli71 May 17, 2024
dd3e939
adapt test-setup for linux
brokkoli71 May 17, 2024
4125940
fix testReadFromZarrita for gzip
brokkoli71 May 21, 2024
c3f1510
parameterize codecs in testReadFromZarrita
brokkoli71 May 21, 2024
18693d7
remove unused imports
brokkoli71 May 21, 2024
ecd1dea
add testCodecsWriteRead
brokkoli71 May 21, 2024
adbcd43
reformat
brokkoli71 May 21, 2024
0f50859
remove unnecessary argument
brokkoli71 May 22, 2024
db122db
add testCodecTranspose
brokkoli71 May 22, 2024
842ad54
start remove constants "C" adn "F" from Transpose Codec's order
brokkoli71 May 22, 2024
cf0207d
add CoreArrayMetadata to codec object instead of passing as argument …
brokkoli71 May 24, 2024
363c37b
Merge remote-tracking branch 'origin/main' into fix-transpose-codec
brokkoli71 May 24, 2024
e502479
remove wrong dependency
brokkoli71 May 24, 2024
292ecc6
set fail-fast: false
brokkoli71 May 24, 2024
30bb41d
specify testSourceDirectory
brokkoli71 May 24, 2024
cf105f0
added property index_location to ShardingIndexedCodec
brokkoli71 May 25, 2024
e49f016
add indexLocation in ShardingCodec.
brokkoli71 May 27, 2024
770e1dd
change junit version for TestUtils
brokkoli71 May 27, 2024
fbd190a
remove creation of dir testdata
brokkoli71 May 27, 2024
9e3a240
update dependencies for JUnit 5
brokkoli71 May 27, 2024
4ca12b7
install zarrita in CI
brokkoli71 May 27, 2024
5b8ca3e
correct python version, maven no-transfer-progress
brokkoli71 May 27, 2024
db4f765
add venv for executing zarrita to CI
brokkoli71 May 27, 2024
6b793cd
add /venv_zarrita to .gitignore
brokkoli71 May 27, 2024
d553f17
remove deprecated zarrita venv
brokkoli71 May 27, 2024
cd84d08
test CI
brokkoli71 May 27, 2024
215f52d
fix tests for windows
brokkoli71 May 27, 2024
71c7548
update python path for windows in ci.yml
brokkoli71 May 27, 2024
bb4f50b
add Development Start-Guide to Run Tests Locally
brokkoli71 May 29, 2024
d62070c
correct Development Start-Guide to python3.11
brokkoli71 May 29, 2024
d2d3e04
Merge pull request #1 from scalableminds/fix-transpose-codec
brokkoli71 May 29, 2024
67f5cd9
add support of shardingCodec indexLocation=start
brokkoli71 May 29, 2024
4f9dc72
code cleanup
brokkoli71 May 29, 2024
e51ac9d
Merge pull request #2 from scalableminds/fix-sharding-codec
brokkoli71 May 29, 2024
dbcee48
add testZstdLibrary
brokkoli71 May 29, 2024
26964dc
fix Zstd compression and decompression
brokkoli71 May 30, 2024
2d8b7b2
cleanup code
brokkoli71 May 30, 2024
554715f
cleanup code
brokkoli71 May 30, 2024
29541d5
remove unused method
brokkoli71 May 30, 2024
9733e07
Merge pull request #4 from scalableminds/fix-zstd-codec
brokkoli71 May 30, 2024
8d4eae5
include crc32c codec into tests
brokkoli71 May 30, 2024
fbb2b67
incorporate feedback for java version and merge tests
brokkoli71 Jun 3, 2024
db47786
add more variation to codec-configuration in tests with zarrita
brokkoli71 Jun 3, 2024
39ffce5
fix BloscCodec with shuffle = "shuffle"
brokkoli71 Jun 3, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,41 @@ jobs:
build:
strategy:
matrix:
os: [ ubuntu, windows, macos ]
runs-on: ${{ matrix.os }}-latest
os: [ ubuntu-latest, windows-latest, macos-latest ]
fail-fast: false
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash

steps:
- uses: actions/checkout@v3

- name: Set up JDK
uses: actions/setup-java@v3
with:
java-version: '22'
distribution: 'temurin'
cache: maven

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: Install zarrita
run: |
python -m venv venv_zarrita
if [ "${{ runner.os }}" = "Windows" ]; then venv_zarrita/Scripts/pip install zarrita; else venv_zarrita/bin/pip install zarrita; fi

- name: Download blosc jar
run: |
mkdir -p ../blosc-java/target
curl https://static.webknossos.org/misc/blosc-java-0.1-1.21.4-SNAPSHOT.jar -o ../blosc-java/target/blosc-java-0.1-1.21.4-SNAPSHOT.jar

- name: Download testdata
run: |
mkdir testdata testoutput
mkdir testoutput
curl https://static.webknossos.org/data/zarr_v3/l4_sample.zip -o testdata/l4_sample.zip
cd testdata
unzip l4_sample.zip
Expand All @@ -44,7 +56,7 @@ jobs:
- name: Test
env:
MAVEN_OPTS: "-Xmx6g"
run: mvn test -DargLine="-Xmx6g"
run: mvn --no-transfer-progress test -DargLine="-Xmx6g"

- name: Assemble JAR
run: mvn package -DskipTests
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@ build/


### Custom ###
/testdata
/testdata/l4_sample
/testoutput
/venv_zarrita
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,19 @@ array.write(
ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1, 1024, 1024, 1024})
);
```
## Development Start-Guide

### Run Tests Locally
To be able to run the tests locally, make sure to have `python3.11` installed.
Also, you need to set up a venv for zarrita at the root of the project:
`python3.11 -m venv venv_zarrita`.

Then install zarrita there with `venv_zarrita/Scripts/pip install zarrita`
for Windows and `venv_zarrita/bin/pip install zarrita` for Linux.

Furthermore, you will need the `l4_sample` test data:

`curl https://static.webknossos.org/data/zarr_v3/l4_sample.zip -o testdata/l4_sample.zip
&& cd testdata
&& unzip l4_sample.zip
`
37 changes: 36 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,31 @@
<aws.version>1.12.477</aws.version>
<netcdfJavaVersion>5.5.3</netcdfJavaVersion>
<zstdVersion>1.5.5-5</zstdVersion>
<junit-jupiter-version>5.10.2</junit-jupiter-version>
</properties>

<dependencies>
<!-- JUnit 5 dependencies -->
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>${junit-jupiter-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<version>${junit-jupiter-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<version>${junit-jupiter-version}</version>
<scope>test</scope>
</dependency>

<!-- Other dependencies -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
Expand Down Expand Up @@ -54,6 +76,7 @@
<artifactId>okhttp</artifactId>
<version>2.7.5</version>
</dependency>
<!-- JUnit 4 dependency for backward compatibility if needed -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand All @@ -70,4 +93,16 @@
</repository>
</repositories>

</project>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.2.5</version>
<configuration>
<useSystemClassLoader>false</useSystemClassLoader>
</configuration>
</plugin>
</plugins>
</build>
</project>
20 changes: 20 additions & 0 deletions src/main/java/dev/zarr/zarrjava/utils/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,4 +77,24 @@ public static <T> T[] concatArrays(T[] array1, T[]... arrays) {
}
return result;
}

public static boolean isPermutation(int[] array) {
if (array.length==0){
return false;
}
int[] arange = new int[array.length];
Arrays.setAll(arange, i -> i);
int[] orderSorted = array.clone();
Arrays.sort(orderSorted);
return Arrays.equals(orderSorted, arange);
}

public static int[] inversePermutation(int[] origin){
assert isPermutation(origin);
int[] inverse = new int[origin.length];
for (int i = 0; i < origin.length; i++) {
inverse[origin[i]] = i;
}
return inverse;
}
}
9 changes: 4 additions & 5 deletions src/main/java/dev/zarr/zarrjava/v3/Array.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ protected Array(StoreHandle storeHandle, ArrayMetadata arrayMetadata)
throws IOException, ZarrException {
super(storeHandle);
this.metadata = arrayMetadata;
this.codecPipeline = new CodecPipeline(arrayMetadata.codecs);
this.codecPipeline = new CodecPipeline(arrayMetadata.codecs, arrayMetadata.coreArrayMetadata);
}

/**
Expand Down Expand Up @@ -171,8 +171,7 @@ public ucar.ma2.Array read(final long[] offset, final int[] shape) throws ZarrEx

if (codecPipeline.supportsPartialDecode()) {
final ucar.ma2.Array chunkArray = codecPipeline.decodePartial(chunkHandle,
Utils.toLongArray(chunkProjection.chunkOffset), chunkProjection.shape,
metadata.coreArrayMetadata);
Utils.toLongArray(chunkProjection.chunkOffset), chunkProjection.shape);
MultiArrayUtils.copyRegion(chunkArray, new int[metadata.ndim()], outputArray,
chunkProjection.outOffset, chunkProjection.shape
);
Expand Down Expand Up @@ -223,7 +222,7 @@ public ucar.ma2.Array readChunk(long[] chunkCoords)
return metadata.allocateFillValueChunk();
}

return codecPipeline.decode(chunkBytes, metadata.coreArrayMetadata);
return codecPipeline.decode(chunkBytes);
}

/**
Expand Down Expand Up @@ -299,7 +298,7 @@ public void writeChunk(long[] chunkCoords, ucar.ma2.Array chunkArray) throws Zar
if (MultiArrayUtils.allValuesEqual(chunkArray, metadata.parsedFillValue)) {
chunkHandle.delete();
} else {
ByteBuffer chunkBytes = codecPipeline.encode(chunkArray, metadata.coreArrayMetadata);
ByteBuffer chunkBytes = codecPipeline.encode(chunkArray);
chunkHandle.set(chunkBytes);
}
}
Expand Down
7 changes: 3 additions & 4 deletions src/main/java/dev/zarr/zarrjava/v3/codec/ArrayArrayCodec.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
package dev.zarr.zarrjava.v3.codec;

import dev.zarr.zarrjava.ZarrException;
import dev.zarr.zarrjava.v3.ArrayMetadata.CoreArrayMetadata;
import ucar.ma2.Array;

public interface ArrayArrayCodec extends Codec {
public abstract class ArrayArrayCodec extends Codec {

Array encode(Array chunkArray, CoreArrayMetadata arrayMetadata)
protected abstract Array encode(Array chunkArray)
throws ZarrException;

Array decode(Array chunkArray, CoreArrayMetadata arrayMetadata)
protected abstract Array decode(Array chunkArray)
throws ZarrException;

}
17 changes: 9 additions & 8 deletions src/main/java/dev/zarr/zarrjava/v3/codec/ArrayBytesCodec.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,24 @@

import dev.zarr.zarrjava.ZarrException;
import dev.zarr.zarrjava.store.StoreHandle;
import dev.zarr.zarrjava.v3.ArrayMetadata.CoreArrayMetadata;
import java.nio.ByteBuffer;
import ucar.ma2.Array;

public interface ArrayBytesCodec extends Codec {
public abstract class ArrayBytesCodec extends Codec {

ByteBuffer encode(Array chunkArray, CoreArrayMetadata arrayMetadata)
protected abstract ByteBuffer encode(Array chunkArray)
throws ZarrException;

Array decode(ByteBuffer chunkBytes, CoreArrayMetadata arrayMetadata)
protected abstract Array decode(ByteBuffer chunkBytes)
throws ZarrException;

interface WithPartialDecode extends ArrayBytesCodec {
public abstract static class WithPartialDecode extends ArrayBytesCodec {

Array decodePartial(
StoreHandle handle, long[] offset, int[] shape,
CoreArrayMetadata arrayMetadata
public abstract Array decode(ByteBuffer shardBytes) throws ZarrException;
public abstract ByteBuffer encode(Array shardArray) throws ZarrException;

protected abstract Array decodePartial(
StoreHandle handle, long[] offset, int[] shape
) throws ZarrException;
}
}
Expand Down
10 changes: 4 additions & 6 deletions src/main/java/dev/zarr/zarrjava/v3/codec/BytesBytesCodec.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,13 @@
package dev.zarr.zarrjava.v3.codec;

import dev.zarr.zarrjava.ZarrException;
import dev.zarr.zarrjava.v3.ArrayMetadata.CoreArrayMetadata;

import java.nio.ByteBuffer;

public interface BytesBytesCodec extends Codec {
public abstract class BytesBytesCodec extends Codec {

ByteBuffer encode(ByteBuffer chunkBytes, CoreArrayMetadata arrayMetadata)
throws ZarrException;
protected abstract ByteBuffer encode(ByteBuffer chunkBytes) throws ZarrException;

ByteBuffer decode(ByteBuffer chunkBytes, CoreArrayMetadata arrayMetadata)
throws ZarrException;
public abstract ByteBuffer decode(ByteBuffer chunkBytes) throws ZarrException;

}
19 changes: 16 additions & 3 deletions src/main/java/dev/zarr/zarrjava/v3/codec/Codec.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,22 @@
import dev.zarr.zarrjava.v3.ArrayMetadata;

@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "name")
public interface Codec {
public abstract class Codec {

long computeEncodedSize(long inputByteLength, ArrayMetadata.CoreArrayMetadata arrayMetadata)
throws ZarrException;
protected ArrayMetadata.CoreArrayMetadata arrayMetadata;

protected ArrayMetadata.CoreArrayMetadata resolveArrayMetadata() throws ZarrException {
if (arrayMetadata == null) {
throw new ZarrException("arrayMetadata needs to get set in for every codec");
}
return this.arrayMetadata;
}

protected abstract long computeEncodedSize(long inputByteLength, ArrayMetadata.CoreArrayMetadata arrayMetadata)
throws ZarrException;

public void setCoreArrayMetadata(ArrayMetadata.CoreArrayMetadata arrayMetadata) throws ZarrException{
this.arrayMetadata = arrayMetadata;
}
}

36 changes: 23 additions & 13 deletions src/main/java/dev/zarr/zarrjava/v3/codec/CodecBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ public CodecBuilder withBlosc(
}

public CodecBuilder withBlosc(String cname, String shuffle, int clevel, int blockSize) {
if (shuffle.equals("shuffle")){
shuffle = "byteshuffle";
}
return withBlosc(Blosc.Compressor.fromString(cname), Blosc.Shuffle.fromString(shuffle), clevel,
dataType.getByteCount(), blockSize
);
Expand All @@ -62,13 +65,9 @@ public CodecBuilder withBlosc() {
return withBlosc("zstd");
}

public CodecBuilder withTranspose(String order) {
try {
public CodecBuilder withTranspose(int[] order) {
codecs.add(new TransposeCodec(new TransposeCodec.Configuration(order)));
} catch (ZarrException e) {
throw new RuntimeException(e);
}
return this;
return this;
}

public CodecBuilder withBytes(Endian endian) {
Expand Down Expand Up @@ -113,29 +112,40 @@ public CodecBuilder withZstd(int clevel) {
public CodecBuilder withSharding(int[] chunkShape) {
try {
codecs.add(
new ShardingIndexedCodec(new ShardingIndexedCodec.Configuration(chunkShape,
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE))},
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE)), new Crc32cCodec()})));
new ShardingIndexedCodec(new ShardingIndexedCodec.Configuration(chunkShape,
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE))},
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE)), new Crc32cCodec()},
"end")));
} catch (ZarrException e) {
throw new RuntimeException(e);
}
return this;
}

public CodecBuilder withSharding(int[] chunkShape,
Function<CodecBuilder, CodecBuilder> codecBuilder) {
Function<CodecBuilder, CodecBuilder> codecBuilder) {
return withSharding(chunkShape, codecBuilder, "end");
}

public CodecBuilder withSharding(int[] chunkShape,
Function<CodecBuilder, CodecBuilder> codecBuilder, String indexLocation) {
CodecBuilder nestedBuilder = new CodecBuilder(dataType);
try {
codecs.add(new ShardingIndexedCodec(
new ShardingIndexedCodec.Configuration(chunkShape,
codecBuilder.apply(nestedBuilder).build(),
new Codec[]{new BytesCodec(Endian.LITTLE), new Crc32cCodec()})));
new ShardingIndexedCodec.Configuration(chunkShape,
codecBuilder.apply(nestedBuilder).build(),
new Codec[]{new BytesCodec(Endian.LITTLE), new Crc32cCodec()},
indexLocation)));
} catch (ZarrException e) {
throw new RuntimeException(e);
}
return this;
}

public CodecBuilder withCrc32c() {
codecs.add(new Crc32cCodec());
return this;
}
private void autoInsertBytesCodec() {
if (codecs.stream().noneMatch(c -> c instanceof ArrayBytesCodec)) {
Codec[] arrayArrayCodecs = codecs.stream().filter(c -> c instanceof ArrayArrayCodec)
Expand Down
Loading
Loading