Skip to content

Commit

Permalink
Reduce byte copies.
Browse files Browse the repository at this point in the history
  • Loading branch information
Mark Hale committed Aug 14, 2024
1 parent 36ebaf3 commit c50ec1a
Show file tree
Hide file tree
Showing 11 changed files with 52 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ public ByteBuffer writeTo(ByteBuffer bb) {
return bb.put(arr);
}

@Override
public ByteBuffer asReadOnlyBuffer() {
return ByteBuffer.wrap(arr).asReadOnlyBuffer();
}

@Override
public byte[] copyBytes() {
byte[] copy = new byte[arr.length];
System.arraycopy(arr, 0, copy, 0, arr.length);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ public abstract class ByteSequence {
public abstract ByteBuffer writeTo(ByteBuffer bb);
public abstract int size();

public ByteBuffer asReadOnlyBuffer() {
ByteBuffer bb = ByteBuffer.allocate(size());
writeTo(bb);
return bb.asReadOnlyBuffer();
}

public byte[] copyBytes() {
byte[] copy = new byte[size()];
writeTo(ByteBuffer.wrap(copy));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,18 @@
import java.nio.ByteBuffer;

final class EmptyBytes extends ByteSequence {
private static final byte[] EMPTY_BYTES = new byte[0];

@Override
public ByteBuffer writeTo(ByteBuffer bb) {
return bb;
}

@Override
public byte[] copyBytes() {
return EMPTY_BYTES;
}

@Override
public int size() {
return 0;
Expand Down
11 changes: 6 additions & 5 deletions common/src/main/java/com/msd/gin/halyard/common/Hashes.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.google.common.hash.Hashing;

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
Expand Down Expand Up @@ -75,7 +76,7 @@ public static byte[] toBytes(String s) {
}


public static abstract class HashFunction implements Function<byte[],byte[]> {
public static abstract class HashFunction implements Function<ByteBuffer,byte[]> {
private final String name;
private final int size;

Expand All @@ -97,12 +98,12 @@ public final int size() {
}

@Override
public final byte[] apply(byte[] bb) {
public final byte[] apply(ByteBuffer bb) {
byte[] hash = calculateHash(bb);
return (size != hash.length) ? Arrays.copyOf(hash, size) : hash;
}

protected abstract byte[] calculateHash(byte[] bb);
protected abstract byte[] calculateHash(ByteBuffer bb);
}

static final class MessageDigestHashFunction extends HashFunction {
Expand All @@ -114,7 +115,7 @@ static final class MessageDigestHashFunction extends HashFunction {
}

@Override
protected byte[] calculateHash(byte[] bb) {
protected byte[] calculateHash(ByteBuffer bb) {
try {
md.update(bb);
return md.digest();
Expand All @@ -133,7 +134,7 @@ static final class GuavaHashFunction extends HashFunction {
}

@Override
protected byte[] calculateHash(byte[] bb) {
protected byte[] calculateHash(ByteBuffer bb) {
return hf.hashBytes(bb).asBytes();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -137,18 +137,11 @@ protected final int getEncodingType() {
}

private IdSer createIdSer(ValueIdentifier id, boolean makeId, ByteArray ser, @Nonnull RDFFactory rdfFactory) {
byte[] serBytes;
if (ser == null) {
serBytes = rdfFactory.valueWriter.toBytes(this);
ser = new ByteArray(serBytes);
} else {
serBytes = null;
ser = new ByteArray(rdfFactory.valueWriter.toBytes(this));
}
if (id == null && makeId) {
if (serBytes == null) {
serBytes = ser.copyBytes();
}
id = rdfFactory.getId(serBytes);
id = rdfFactory.getId(ser);
}
return new IdSer(id, ser, rdfFactory);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -357,14 +357,14 @@ public ValueIdentifier id(Value v) {
IdentifiableValue idv = (IdentifiableValue) v;
id = idv.getId(this);
} else {
byte[] ser = valueWriter.toBytes(v);
ByteArray ser = new ByteArray(valueWriter.toBytes(v));
id = getId(ser);
}
return id;
}
}

ValueIdentifier getId(byte[] ser) {
ValueIdentifier getId(ByteSequence ser) {
return idFormat.id(ser, valueReader);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ protected final ValueIdentifier calculateId() {
if (val instanceof IdentifiableValue) {
return ((IdentifiableValue) val).getId(rdfFactory);
} else {
return rdfFactory.getId(getSerializedForm().copyBytes());
return rdfFactory.getId(getSerializedForm());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,20 +146,21 @@ int getSaltSize() {
/**
* Thread-safe.
*/
ValueIdentifier id(byte[] ser, ValueIO.Reader reader) {
ValueIdentifier id(ByteSequence ser, ValueIO.Reader reader) {
byte[] hash = new byte[size];
ByteBuffer serbb = ser.asReadOnlyBuffer();
if (hashFuncProvider != null) {
byte[] algoHash = hashFuncProvider.get().apply(ser);
byte[] algoHash = hashFuncProvider.get().apply(serbb);
System.arraycopy(algoHash, 0, hash, 0, algoHash.length);
}
ByteBuffer bb = ByteBuffer.wrap(ser);
ValueType type = reader.getValueType(bb);
serbb.rewind();
ValueType type = reader.getValueType(serbb);
CoreDatatype datatype;
Value val = null;
if (type == ValueType.LITERAL) {
datatype = reader.getCoreDatatype(bb);
datatype = reader.getCoreDatatype(serbb);
if (datatype == null) {
val = reader.readValue(bb, valueFactory);
val = reader.readValue(serbb, valueFactory);
datatype = ((Literal)val).getCoreDatatype();
}
} else {
Expand All @@ -168,7 +169,7 @@ ValueIdentifier id(byte[] ser, ValueIO.Reader reader) {

if (hasJavaHash) {
if (val == null) {
val = reader.readValue(bb, valueFactory);
val = reader.readValue(serbb, valueFactory);
}
int jhash = val.hashCode();
int i = size - 1;
Expand Down Expand Up @@ -328,6 +329,11 @@ public ByteBuffer writeTo(ByteBuffer bb) {
return bb.put(idBytes);
}

@Override
public ByteBuffer asReadOnlyBuffer() {
return ByteBuffer.wrap(idBytes).asReadOnlyBuffer();
}

ByteBuffer writeSliceTo(int offset, int len, ByteBuffer bb) {
return bb.put(idBytes, offset, len);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import com.msd.gin.halyard.common.Hashes.HashFunction;

import java.nio.ByteBuffer;

import org.apache.hadoop.hbase.util.Bytes;
import org.junit.jupiter.api.Test;

Expand All @@ -19,11 +21,11 @@ public void testHash16() {
@Test
public void testMurmur3() {
HashFunction hf = Hashes.getHash("Murmur3-128", 0);
byte[] h128 = hf.apply(Bytes.toBytes("foobar"));
byte[] h128 = hf.apply(ByteBuffer.wrap(Bytes.toBytes("foobar")));
assertEquals(128/Byte.SIZE, h128.length);

hf = Hashes.getHash("Murmur3-128", 8);
byte[] h64 = hf.apply(Bytes.toBytes("foobar"));
byte[] h64 = hf.apply(ByteBuffer.wrap(Bytes.toBytes("foobar")));
assertEquals(8, h64.length);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public void testHashCode_shortID() {
ValueFactory vf = SimpleValueFactory.getInstance();
ValueIdentifier.Format f = new ValueIdentifier.Format("Murmur3-128", 3, 1, TypeNibble.BIG_NIBBLE, false);
Literal l = vf.createLiteral("foobar");
ValueIdentifier id = f.id(ValueIO.getDefaultWriter().toBytes(l), ValueIO.getDefaultReader());
ValueIdentifier id = f.id(new ByteArray(ValueIO.getDefaultWriter().toBytes(l)), ValueIO.getDefaultReader());
assertThrows(IllegalArgumentException.class, () ->
id.valueHashCode(f)
);
Expand All @@ -34,7 +34,7 @@ public void testHashCode_longID() {
ValueFactory vf = SimpleValueFactory.getInstance();
ValueIdentifier.Format f = new ValueIdentifier.Format("Murmur3-128", 6, 1, TypeNibble.BIG_NIBBLE, true);
Literal l = vf.createLiteral("foobar");
ValueIdentifier id = f.id(ValueIO.getDefaultWriter().toBytes(l), ValueIO.getDefaultReader());
ValueIdentifier id = f.id(new ByteArray(ValueIO.getDefaultWriter().toBytes(l)), ValueIO.getDefaultReader());
assertEquals(l.hashCode(), id.valueHashCode(f));
}

Expand All @@ -44,7 +44,7 @@ public void testHashCode_nonCanonical() {
Literal expected = vf.createLiteral("2018-04-06Z", XSD.DATE);
ValueIdentifier.Format f = new ValueIdentifier.Format("Murmur3-128", 6, 1, TypeNibble.BIG_NIBBLE, true);
Literal l = vf.createLiteral("2018-04-06+00:00", XSD.DATE);
ValueIdentifier id = f.id(ValueIO.getDefaultWriter().toBytes(l), ValueIO.getDefaultReader());
ValueIdentifier id = f.id(new ByteArray(ValueIO.getDefaultWriter().toBytes(l)), ValueIO.getDefaultReader());
assertEquals(expected.hashCode(), id.valueHashCode(f));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import java.io.OutputStream;
import java.math.BigInteger;
import java.net.URI;
import java.nio.ByteBuffer;
import java.text.MessageFormat;
import java.util.BitSet;
import java.util.Collections;
Expand Down Expand Up @@ -453,7 +454,7 @@ public void reduce(ImmutableBytesWritable key, Iterable<LongWritable> values, Co
write(sliceRPred, RDFS.LABEL, SVF.createLiteral("slice rdfs:range with cardinality " + cardinality));
write(sliceRPred, CARDINALITY, SVF.createLiteral(BigInteger.valueOf(cardinality)));
}
IRI generatedRoot = SVF.createIRI(NAMESPACE, ByteUtils.encode(keyHash.apply(key.copyBytes())));
IRI generatedRoot = SVF.createIRI(NAMESPACE, ByteUtils.encode(keyHash.apply(ByteBuffer.wrap(key.copyBytes()))));
write(generatedRoot, slicePPred, firstKey);
write(generatedRoot, sliceDPred, SVF.createIRI(dis.readUTF()));
write(generatedRoot, sliceRPred, SVF.createIRI(dis.readUTF()));
Expand Down

0 comments on commit c50ec1a

Please sign in to comment.