diff --git a/common/src/main/java/com/msd/gin/halyard/common/ByteArray.java b/common/src/main/java/com/msd/gin/halyard/common/ByteArray.java index eceff9ade..c717b14ff 100644 --- a/common/src/main/java/com/msd/gin/halyard/common/ByteArray.java +++ b/common/src/main/java/com/msd/gin/halyard/common/ByteArray.java @@ -27,6 +27,12 @@ public ByteBuffer writeTo(ByteBuffer bb) { return bb.put(arr); } + @Override + public ByteBuffer asReadOnlyBuffer() { + return ByteBuffer.wrap(arr).asReadOnlyBuffer(); + } + + @Override public byte[] copyBytes() { byte[] copy = new byte[arr.length]; System.arraycopy(arr, 0, copy, 0, arr.length); diff --git a/common/src/main/java/com/msd/gin/halyard/common/ByteSequence.java b/common/src/main/java/com/msd/gin/halyard/common/ByteSequence.java index 347191394..876dda8d1 100644 --- a/common/src/main/java/com/msd/gin/halyard/common/ByteSequence.java +++ b/common/src/main/java/com/msd/gin/halyard/common/ByteSequence.java @@ -12,6 +12,12 @@ public abstract class ByteSequence { public abstract ByteBuffer writeTo(ByteBuffer bb); public abstract int size(); + public ByteBuffer asReadOnlyBuffer() { + ByteBuffer bb = ByteBuffer.allocate(size()); + writeTo(bb); + return bb.asReadOnlyBuffer(); + } + public byte[] copyBytes() { byte[] copy = new byte[size()]; writeTo(ByteBuffer.wrap(copy)); diff --git a/common/src/main/java/com/msd/gin/halyard/common/EmptyBytes.java b/common/src/main/java/com/msd/gin/halyard/common/EmptyBytes.java index d14694161..8ce238368 100644 --- a/common/src/main/java/com/msd/gin/halyard/common/EmptyBytes.java +++ b/common/src/main/java/com/msd/gin/halyard/common/EmptyBytes.java @@ -3,11 +3,18 @@ import java.nio.ByteBuffer; final class EmptyBytes extends ByteSequence { + private static final byte[] EMPTY_BYTES = new byte[0]; + @Override public ByteBuffer writeTo(ByteBuffer bb) { return bb; } + @Override + public byte[] copyBytes() { + return EMPTY_BYTES; + } + @Override public int size() { return 0; diff --git a/common/src/main/java/com/msd/gin/halyard/common/Hashes.java b/common/src/main/java/com/msd/gin/halyard/common/Hashes.java index 483586fb9..5b2b6f022 100644 --- a/common/src/main/java/com/msd/gin/halyard/common/Hashes.java +++ b/common/src/main/java/com/msd/gin/halyard/common/Hashes.java @@ -2,6 +2,7 @@ import com.google.common.hash.Hashing; +import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -75,7 +76,7 @@ public static byte[] toBytes(String s) { } - public static abstract class HashFunction implements Function { + public static abstract class HashFunction implements Function { private final String name; private final int size; @@ -97,12 +98,12 @@ public final int size() { } @Override - public final byte[] apply(byte[] bb) { + public final byte[] apply(ByteBuffer bb) { byte[] hash = calculateHash(bb); return (size != hash.length) ? Arrays.copyOf(hash, size) : hash; } - protected abstract byte[] calculateHash(byte[] bb); + protected abstract byte[] calculateHash(ByteBuffer bb); } static final class MessageDigestHashFunction extends HashFunction { @@ -114,7 +115,7 @@ static final class MessageDigestHashFunction extends HashFunction { } @Override - protected byte[] calculateHash(byte[] bb) { + protected byte[] calculateHash(ByteBuffer bb) { try { md.update(bb); return md.digest(); @@ -133,7 +134,7 @@ static final class GuavaHashFunction extends HashFunction { } @Override - protected byte[] calculateHash(byte[] bb) { + protected byte[] calculateHash(ByteBuffer bb) { return hf.hashBytes(bb).asBytes(); } } diff --git a/common/src/main/java/com/msd/gin/halyard/common/IdentifiableValue.java b/common/src/main/java/com/msd/gin/halyard/common/IdentifiableValue.java index b434384aa..6ac285efb 100644 --- a/common/src/main/java/com/msd/gin/halyard/common/IdentifiableValue.java +++ b/common/src/main/java/com/msd/gin/halyard/common/IdentifiableValue.java @@ -137,18 +137,11 @@ protected final int getEncodingType() { } private IdSer createIdSer(ValueIdentifier id, boolean makeId, ByteArray ser, @Nonnull RDFFactory rdfFactory) { - byte[] serBytes; if (ser == null) { - serBytes = rdfFactory.valueWriter.toBytes(this); - ser = new ByteArray(serBytes); - } else { - serBytes = null; + ser = new ByteArray(rdfFactory.valueWriter.toBytes(this)); } if (id == null && makeId) { - if (serBytes == null) { - serBytes = ser.copyBytes(); - } - id = rdfFactory.getId(serBytes); + id = rdfFactory.getId(ser); } return new IdSer(id, ser, rdfFactory); } diff --git a/common/src/main/java/com/msd/gin/halyard/common/RDFFactory.java b/common/src/main/java/com/msd/gin/halyard/common/RDFFactory.java index a09e89d81..c1199642f 100644 --- a/common/src/main/java/com/msd/gin/halyard/common/RDFFactory.java +++ b/common/src/main/java/com/msd/gin/halyard/common/RDFFactory.java @@ -357,14 +357,14 @@ public ValueIdentifier id(Value v) { IdentifiableValue idv = (IdentifiableValue) v; id = idv.getId(this); } else { - byte[] ser = valueWriter.toBytes(v); + ByteArray ser = new ByteArray(valueWriter.toBytes(v)); id = getId(ser); } return id; } } - ValueIdentifier getId(byte[] ser) { + ValueIdentifier getId(ByteSequence ser) { return idFormat.id(ser, valueReader); } diff --git a/common/src/main/java/com/msd/gin/halyard/common/RDFValue.java b/common/src/main/java/com/msd/gin/halyard/common/RDFValue.java index 2af3f0e94..337855bd7 100644 --- a/common/src/main/java/com/msd/gin/halyard/common/RDFValue.java +++ b/common/src/main/java/com/msd/gin/halyard/common/RDFValue.java @@ -52,7 +52,7 @@ protected final ValueIdentifier calculateId() { if (val instanceof IdentifiableValue) { return ((IdentifiableValue) val).getId(rdfFactory); } else { - return rdfFactory.getId(getSerializedForm().copyBytes()); + return rdfFactory.getId(getSerializedForm()); } } diff --git a/common/src/main/java/com/msd/gin/halyard/common/ValueIdentifier.java b/common/src/main/java/com/msd/gin/halyard/common/ValueIdentifier.java index c36703c37..ddf81cd78 100644 --- a/common/src/main/java/com/msd/gin/halyard/common/ValueIdentifier.java +++ b/common/src/main/java/com/msd/gin/halyard/common/ValueIdentifier.java @@ -146,20 +146,21 @@ int getSaltSize() { /** * Thread-safe. */ - ValueIdentifier id(byte[] ser, ValueIO.Reader reader) { + ValueIdentifier id(ByteSequence ser, ValueIO.Reader reader) { byte[] hash = new byte[size]; + ByteBuffer serbb = ser.asReadOnlyBuffer(); if (hashFuncProvider != null) { - byte[] algoHash = hashFuncProvider.get().apply(ser); + byte[] algoHash = hashFuncProvider.get().apply(serbb); System.arraycopy(algoHash, 0, hash, 0, algoHash.length); } - ByteBuffer bb = ByteBuffer.wrap(ser); - ValueType type = reader.getValueType(bb); + serbb.rewind(); + ValueType type = reader.getValueType(serbb); CoreDatatype datatype; Value val = null; if (type == ValueType.LITERAL) { - datatype = reader.getCoreDatatype(bb); + datatype = reader.getCoreDatatype(serbb); if (datatype == null) { - val = reader.readValue(bb, valueFactory); + val = reader.readValue(serbb, valueFactory); datatype = ((Literal)val).getCoreDatatype(); } } else { @@ -168,7 +169,7 @@ ValueIdentifier id(byte[] ser, ValueIO.Reader reader) { if (hasJavaHash) { if (val == null) { - val = reader.readValue(bb, valueFactory); + val = reader.readValue(serbb, valueFactory); } int jhash = val.hashCode(); int i = size - 1; @@ -328,6 +329,11 @@ public ByteBuffer writeTo(ByteBuffer bb) { return bb.put(idBytes); } + @Override + public ByteBuffer asReadOnlyBuffer() { + return ByteBuffer.wrap(idBytes).asReadOnlyBuffer(); + } + ByteBuffer writeSliceTo(int offset, int len, ByteBuffer bb) { return bb.put(idBytes, offset, len); } diff --git a/common/src/test/java/com/msd/gin/halyard/common/HashesTest.java b/common/src/test/java/com/msd/gin/halyard/common/HashesTest.java index 705d60bcc..2f359a273 100644 --- a/common/src/test/java/com/msd/gin/halyard/common/HashesTest.java +++ b/common/src/test/java/com/msd/gin/halyard/common/HashesTest.java @@ -2,6 +2,8 @@ import com.msd.gin.halyard.common.Hashes.HashFunction; +import java.nio.ByteBuffer; + import org.apache.hadoop.hbase.util.Bytes; import org.junit.jupiter.api.Test; @@ -19,11 +21,11 @@ public void testHash16() { @Test public void testMurmur3() { HashFunction hf = Hashes.getHash("Murmur3-128", 0); - byte[] h128 = hf.apply(Bytes.toBytes("foobar")); + byte[] h128 = hf.apply(ByteBuffer.wrap(Bytes.toBytes("foobar"))); assertEquals(128/Byte.SIZE, h128.length); hf = Hashes.getHash("Murmur3-128", 8); - byte[] h64 = hf.apply(Bytes.toBytes("foobar")); + byte[] h64 = hf.apply(ByteBuffer.wrap(Bytes.toBytes("foobar"))); assertEquals(8, h64.length); } diff --git a/common/src/test/java/com/msd/gin/halyard/common/ValueIdentifierTest.java b/common/src/test/java/com/msd/gin/halyard/common/ValueIdentifierTest.java index 58c100e16..e883f7be3 100644 --- a/common/src/test/java/com/msd/gin/halyard/common/ValueIdentifierTest.java +++ b/common/src/test/java/com/msd/gin/halyard/common/ValueIdentifierTest.java @@ -23,7 +23,7 @@ public void testHashCode_shortID() { ValueFactory vf = SimpleValueFactory.getInstance(); ValueIdentifier.Format f = new ValueIdentifier.Format("Murmur3-128", 3, 1, TypeNibble.BIG_NIBBLE, false); Literal l = vf.createLiteral("foobar"); - ValueIdentifier id = f.id(ValueIO.getDefaultWriter().toBytes(l), ValueIO.getDefaultReader()); + ValueIdentifier id = f.id(new ByteArray(ValueIO.getDefaultWriter().toBytes(l)), ValueIO.getDefaultReader()); assertThrows(IllegalArgumentException.class, () -> id.valueHashCode(f) ); @@ -34,7 +34,7 @@ public void testHashCode_longID() { ValueFactory vf = SimpleValueFactory.getInstance(); ValueIdentifier.Format f = new ValueIdentifier.Format("Murmur3-128", 6, 1, TypeNibble.BIG_NIBBLE, true); Literal l = vf.createLiteral("foobar"); - ValueIdentifier id = f.id(ValueIO.getDefaultWriter().toBytes(l), ValueIO.getDefaultReader()); + ValueIdentifier id = f.id(new ByteArray(ValueIO.getDefaultWriter().toBytes(l)), ValueIO.getDefaultReader()); assertEquals(l.hashCode(), id.valueHashCode(f)); } @@ -44,7 +44,7 @@ public void testHashCode_nonCanonical() { Literal expected = vf.createLiteral("2018-04-06Z", XSD.DATE); ValueIdentifier.Format f = new ValueIdentifier.Format("Murmur3-128", 6, 1, TypeNibble.BIG_NIBBLE, true); Literal l = vf.createLiteral("2018-04-06+00:00", XSD.DATE); - ValueIdentifier id = f.id(ValueIO.getDefaultWriter().toBytes(l), ValueIO.getDefaultReader()); + ValueIdentifier id = f.id(new ByteArray(ValueIO.getDefaultWriter().toBytes(l)), ValueIO.getDefaultReader()); assertEquals(expected.hashCode(), id.valueHashCode(f)); } } diff --git a/tools/src/main/java/com/msd/gin/halyard/tools/HalyardSummary.java b/tools/src/main/java/com/msd/gin/halyard/tools/HalyardSummary.java index 6cc202943..4606501db 100644 --- a/tools/src/main/java/com/msd/gin/halyard/tools/HalyardSummary.java +++ b/tools/src/main/java/com/msd/gin/halyard/tools/HalyardSummary.java @@ -36,6 +36,7 @@ import java.io.OutputStream; import java.math.BigInteger; import java.net.URI; +import java.nio.ByteBuffer; import java.text.MessageFormat; import java.util.BitSet; import java.util.Collections; @@ -453,7 +454,7 @@ public void reduce(ImmutableBytesWritable key, Iterable values, Co write(sliceRPred, RDFS.LABEL, SVF.createLiteral("slice rdfs:range with cardinality " + cardinality)); write(sliceRPred, CARDINALITY, SVF.createLiteral(BigInteger.valueOf(cardinality))); } - IRI generatedRoot = SVF.createIRI(NAMESPACE, ByteUtils.encode(keyHash.apply(key.copyBytes()))); + IRI generatedRoot = SVF.createIRI(NAMESPACE, ByteUtils.encode(keyHash.apply(ByteBuffer.wrap(key.copyBytes())))); write(generatedRoot, slicePPred, firstKey); write(generatedRoot, sliceDPred, SVF.createIRI(dis.readUTF())); write(generatedRoot, sliceRPred, SVF.createIRI(dis.readUTF()));