Skip to content

Commit

Permalink
Merge pull request #2 from scalableminds/fix-sharding-codec
Browse files Browse the repository at this point in the history
Fix sharding codec
  • Loading branch information
brokkoli71 authored May 29, 2024
2 parents d2d3e04 + 4f9dc72 commit e51ac9d
Show file tree
Hide file tree
Showing 1,631 changed files with 316 additions and 564,032 deletions.
19 changes: 15 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,41 @@ jobs:
build:
strategy:
matrix:
os: [ ubuntu, windows, macos ]
os: [ ubuntu-latest, windows-latest, macos-latest ]
fail-fast: false
runs-on: ${{ matrix.os }}-latest
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash

steps:
- uses: actions/checkout@v3

- name: Set up JDK
uses: actions/setup-java@v3
with:
java-version: '22'
distribution: 'temurin'
cache: maven

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.11'

- name: Install zarrita
run: |
python -m venv venv_zarrita
if [ "${{ runner.os }}" = "Windows" ]; then venv_zarrita/Scripts/pip install zarrita; else venv_zarrita/bin/pip install zarrita; fi
- name: Download blosc jar
run: |
mkdir -p ../blosc-java/target
curl https://static.webknossos.org/misc/blosc-java-0.1-1.21.4-SNAPSHOT.jar -o ../blosc-java/target/blosc-java-0.1-1.21.4-SNAPSHOT.jar
- name: Download testdata
run: |
mkdir testdata testoutput
mkdir testoutput
curl https://static.webknossos.org/data/zarr_v3/l4_sample.zip -o testdata/l4_sample.zip
cd testdata
unzip l4_sample.zip
Expand All @@ -45,7 +56,7 @@ jobs:
- name: Test
env:
MAVEN_OPTS: "-Xmx6g"
run: mvn test -DargLine="-Xmx6g"
run: mvn --no-transfer-progress test -DargLine="-Xmx6g"

- name: Assemble JAR
run: mvn package -DskipTests
Expand Down
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,6 @@ build/


### Custom ###
/testdata
/testdata/l4_sample
/testoutput
/venv_zarrita
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,19 @@ array.write(
ucar.ma2.Array.factory(ucar.ma2.DataType.UINT, new int[]{1, 1024, 1024, 1024})
);
```
## Development Start-Guide

### Run Tests Locally
To be able to run the tests locally, make sure to have `python3.11` installed.
Also, you need to set up a venv for zarrita at the root of the project:
`python3.11 -m venv venv_zarrita`.

Then install zarrita there with `venv_zarrita/Scripts/pip install zarrita`
for Windows and `venv_zarrita/bin/pip install zarrita` for Linux.

Furthermore, you will need the `l4_sample` test data:

`curl https://static.webknossos.org/data/zarr_v3/l4_sample.zip -o testdata/l4_sample.zip
&& cd testdata
&& unzip l4_sample.zip
`
34 changes: 29 additions & 5 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,31 @@
<aws.version>1.12.477</aws.version>
<netcdfJavaVersion>5.5.3</netcdfJavaVersion>
<zstdVersion>1.5.5-5</zstdVersion>
<junit-jupiter-params.version>5.10.2</junit-jupiter-params.version>
<junit-jupiter-version>5.10.2</junit-jupiter-version>
</properties>

<dependencies>
<!-- https://mvnrepository.com/artifact/org.junit.jupiter/junit-jupiter-params -->
<!-- JUnit 5 dependencies -->
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<version>${junit-jupiter-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-engine</artifactId>
<version>${junit-jupiter-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-params</artifactId>
<version>${junit-jupiter-params.version}</version>
<version>${junit-jupiter-version}</version>
<scope>test</scope>
</dependency>

<!-- Other dependencies -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
Expand Down Expand Up @@ -62,6 +76,7 @@
<artifactId>okhttp</artifactId>
<version>2.7.5</version>
</dependency>
<!-- JUnit 4 dependency for backward compatibility if needed -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand All @@ -79,6 +94,15 @@
</repositories>

<build>
<testSourceDirectory>src/test/java/dev/zarr/zarrjava</testSourceDirectory>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>3.2.5</version>
<configuration>
<useSystemClassLoader>false</useSystemClassLoader>
</configuration>
</plugin>
</plugins>
</build>
</project>
</project>
21 changes: 14 additions & 7 deletions src/main/java/dev/zarr/zarrjava/v3/codec/CodecBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -109,23 +109,30 @@ public CodecBuilder withZstd(int clevel) {
public CodecBuilder withSharding(int[] chunkShape) {
try {
codecs.add(
new ShardingIndexedCodec(new ShardingIndexedCodec.Configuration(chunkShape,
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE))},
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE)), new Crc32cCodec()})));
new ShardingIndexedCodec(new ShardingIndexedCodec.Configuration(chunkShape,
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE))},
new Codec[]{new BytesCodec(new Configuration(Endian.LITTLE)), new Crc32cCodec()},
"end")));
} catch (ZarrException e) {
throw new RuntimeException(e);
}
return this;
}

public CodecBuilder withSharding(int[] chunkShape,
Function<CodecBuilder, CodecBuilder> codecBuilder) {
Function<CodecBuilder, CodecBuilder> codecBuilder) {
return withSharding(chunkShape, codecBuilder, "end");
}

public CodecBuilder withSharding(int[] chunkShape,
Function<CodecBuilder, CodecBuilder> codecBuilder, String indexLocation) {
CodecBuilder nestedBuilder = new CodecBuilder(dataType);
try {
codecs.add(new ShardingIndexedCodec(
new ShardingIndexedCodec.Configuration(chunkShape,
codecBuilder.apply(nestedBuilder).build(),
new Codec[]{new BytesCodec(Endian.LITTLE), new Crc32cCodec()})));
new ShardingIndexedCodec.Configuration(chunkShape,
codecBuilder.apply(nestedBuilder).build(),
new Codec[]{new BytesCodec(Endian.LITTLE), new Crc32cCodec()},
indexLocation)));
} catch (ZarrException e) {
throw new RuntimeException(e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,11 @@ public ByteBuffer encode(final Array shardArray) throws ZarrException {
final ByteBuffer chunkBytes = codecPipeline.encode(chunkArray);
synchronized (chunkBytesList) {
int chunkByteOffset = chunkBytesList.stream()
.mapToInt(ByteBuffer::capacity)
.sum();
.mapToInt(ByteBuffer::capacity)
.sum();
if (configuration.indexLocation.equals("start")) {
chunkByteOffset += (int) getShardIndexSize(arrayMetadata);
}
setValueFromShardIndexArray(shardIndexArray, chunkCoords, 0, chunkByteOffset);
setValueFromShardIndexArray(shardIndexArray, chunkCoords, 1,
chunkBytes.capacity());
Expand All @@ -149,11 +152,15 @@ public ByteBuffer encode(final Array shardArray) throws ZarrException {
.mapToInt(ByteBuffer::capacity)
.sum() + (int) getShardIndexSize(arrayMetadata);
final ByteBuffer shardBytes = ByteBuffer.allocate(shardBytesLength);
if(configuration.indexLocation.equals("start")){
shardBytes.put(indexCodecPipeline.encode(shardIndexArray));
}
for (final ByteBuffer chunkBytes : chunkBytesList) {
shardBytes.put(chunkBytes);
}
shardBytes.put(
indexCodecPipeline.encode(shardIndexArray));
if(configuration.indexLocation.equals("end")){
shardBytes.put(indexCodecPipeline.encode(shardIndexArray));
}
shardBytes.rewind();
return shardBytes;
}
Expand All @@ -179,8 +186,14 @@ private Array decodeInternal(

final Array outputArray = Array.factory(arrayMetadata.dataType.getMA2DataType(), shape);
final int shardIndexByteLength = (int) getShardIndexSize(arrayMetadata);
ByteBuffer shardIndexBytes = dataProvider.readSuffix(shardIndexByteLength);

ByteBuffer shardIndexBytes;
if (this.configuration.indexLocation.equals("start")) {
shardIndexBytes = dataProvider.readPrefix(shardIndexByteLength);
}else if(this.configuration.indexLocation.equals("end")){
shardIndexBytes = dataProvider.readSuffix(shardIndexByteLength);
}else{
throw new ZarrException("Only index_location \"start\" or \"end\" are supported.");
}
if (shardIndexBytes == null) {
throw new ZarrException("Could not read shard index.");
}
Expand Down Expand Up @@ -243,6 +256,8 @@ interface DataProvider {
ByteBuffer read(long start, long length);

ByteBuffer readSuffix(long suffixLength);

ByteBuffer readPrefix(long prefixLength);
}

public static final class Configuration {
Expand All @@ -255,16 +270,27 @@ public static final class Configuration {
@Nonnull
@JsonProperty("index_codecs")
public final Codec[] indexCodecs;
@Nonnull
@JsonProperty("index_location")
public String indexLocation;

@JsonCreator(mode = JsonCreator.Mode.PROPERTIES)
public Configuration(
@JsonProperty(value = "chunk_shape", required = true) int[] chunkShape,
@Nonnull @JsonProperty("codecs") Codec[] codecs,
@Nonnull @JsonProperty("index_codecs") Codec[] indexCodecs
) {
@JsonProperty(value = "chunk_shape", required = true) int[] chunkShape,
@Nonnull @JsonProperty("codecs") Codec[] codecs,
@Nonnull @JsonProperty("index_codecs") Codec[] indexCodecs,
@JsonProperty(value = "index_location", defaultValue = "end") String indexLocation
) throws ZarrException {
if (indexLocation == null) {
indexLocation = "end";
}
if (!indexLocation.equals("start") && !indexLocation.equals("end")) {
throw new ZarrException("Only index_location \"start\" or \"end\" are supported.");
}
this.chunkShape = chunkShape;
this.codecs = codecs;
this.indexCodecs = indexCodecs;
this.indexLocation = indexLocation;
}
}

Expand All @@ -285,6 +311,12 @@ public ByteBuffer readSuffix(long suffixLength) {
return bufferSlice.slice();
}

public ByteBuffer readPrefix(long prefixLength) {
ByteBuffer bufferSlice = buffer.slice();
bufferSlice.limit((int) (prefixLength));
return bufferSlice.slice();
}

@Override
public ByteBuffer read(long start, long length) {
ByteBuffer bufferSlice = buffer.slice();
Expand All @@ -309,6 +341,11 @@ public ByteBuffer readSuffix(long suffixLength) {
return storeHandle.read(-suffixLength);
}

@Override
public ByteBuffer readPrefix(long prefixLength) {
return storeHandle.read(0, prefixLength);
}

@Override
public ByteBuffer read(long start, long length) {
return storeHandle.read(start, start + length);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public ByteBuffer encode(ByteBuffer chunkBytes)
zstdStream.close();
return ByteBuffer.wrap(outputStream.toByteArray());
} catch (IOException ex) {
throw new ZarrException("Error in decoding zstd.", ex);
throw new ZarrException("Error in encoding zstd.", ex);
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/test/java/dev/zarr/zarrjava/TestUtils.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
package dev.zarr.zarrjava;


import org.junit.Test;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import java.util.Arrays;

Expand Down
Loading

0 comments on commit e51ac9d

Please sign in to comment.