Skip to content

Commit

Permalink
Correct hashCode calculation.
Browse files Browse the repository at this point in the history
  • Loading branch information
Mark Hale committed Aug 7, 2024
1 parent f044ef3 commit 4c5a6e3
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ private IdSer createIdSer(ValueIdentifier id, boolean makeId, ByteArray ser, @No
if (serBytes == null) {
serBytes = ser.copyBytes();
}
id = rdfFactory.id(this, serBytes);
id = rdfFactory.getId(serBytes);
}
return new IdSer(id, ser, rdfFactory);
}
Expand Down
10 changes: 4 additions & 6 deletions common/src/main/java/com/msd/gin/halyard/common/RDFFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,7 @@ private RDFFactory(HalyardTableConfiguration halyardConfig) {
idSize = confIdSize;
}
int typeIndex = lessThan(lessThanOrEqual(greaterThanOrEqual(halyardConfig.getInt(TableConfig.ID_TYPE_INDEX), 0), Short.BYTES), idSize);
if (confIdJavaHash) {
greaterThanOrEqual(idSize, typeIndex + 1 + Integer.BYTES); // salt & typing bytes + 4 bytes for the Java int hash code
}
greaterThanOrEqual(idSize, ValueIdentifier.Format.minSize(typeIndex, confIdJavaHash));
ValueIdentifier.TypeNibble typeNibble = halyardConfig.getBoolean(TableConfig.ID_TYPE_NIBBLE) ? ValueIdentifier.TypeNibble.LITTLE_NIBBLE : ValueIdentifier.TypeNibble.BIG_NIBBLE;
idFormat = new ValueIdentifier.Format(confIdAlgo, idSize, typeIndex, typeNibble, confIdJavaHash);
LOGGER.info("Identifier format: {}", idFormat);
Expand Down Expand Up @@ -363,7 +361,7 @@ public ValueIdentifier id(Value v) {
id = idv.getId(this);
} else {
byte[] ser = valueWriter.toBytes(v);
id = id(v, ser);
id = getId(ser);
}
}
return id;
Expand All @@ -376,8 +374,8 @@ public ValueIdentifier id(byte[] idBytes) {
return new ValueIdentifier(idBytes);
}

ValueIdentifier id(Value v, byte[] ser) {
return idFormat.id(v, ser);
ValueIdentifier getId(byte[] ser) {
return idFormat.id(ser, valueReader);
}

public ValueIdentifier idFromString(String s) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ protected final ValueIdentifier calculateId() {
if (val instanceof IdentifiableValue) {
return ((IdentifiableValue) val).getId(rdfFactory);
} else {
return rdfFactory.id(val, getSerializedForm().copyBytes());
return rdfFactory.getId(getSerializedForm().copyBytes());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;

/**
* Immutable wrapper around a byte array identifier.
Expand Down Expand Up @@ -78,6 +79,11 @@ private TypeNibble(byte literalTypeBits, byte tripleTypeBits, byte iriTypeBits,
static final class Format implements Serializable {
private static final long serialVersionUID = -7777885367792871664L;

static int minSize(int typeIndex, boolean hasJavaHash) {
// salt & typing bytes + 4 bytes for the Java int hash code
return typeIndex + 1 + (hasJavaHash ? Integer.BYTES : 0);
}

final String algorithm;
final int size;
final int typeIndex;
Expand All @@ -94,6 +100,9 @@ static final class Format implements Serializable {
* @param hasJavaHash indicates whether to include the Java hash as part of the ID.
*/
Format(String algorithm, int size, int typeIndex, TypeNibble typeNibble, boolean hasJavaHash) {
if (hasJavaHash && size < minSize(typeIndex, hasJavaHash)) {
throw new IllegalArgumentException("Size is too small");
}
this.size = size;
this.algorithm = algorithm;
this.typeIndex = typeIndex;
Expand Down Expand Up @@ -135,12 +144,13 @@ int getSaltSize() {
/**
* Thread-safe.
*/
ValueIdentifier id(Value v, byte[] ser) {
ValueIdentifier id(byte[] ser, ValueIO.Reader reader) {
byte[] hash = new byte[size];
if (hashFuncProvider != null) {
byte[] algoHash = hashFuncProvider.get().apply(ser);
System.arraycopy(algoHash, 0, hash, 0, algoHash.length);
}
Value v = reader.readValue(ByteBuffer.wrap(ser), SimpleValueFactory.getInstance());
if (hasJavaHash) {
int jhash = v.hashCode();
int i = size - 1;
Expand Down Expand Up @@ -336,11 +346,16 @@ public boolean equals(Object o) {
*/
@Override
public int hashCode() {
int h = 0;
for (int i = Math.min(idBytes.length - Integer.BYTES, 0); i < idBytes.length; i++) {
h = (h << 8) | (idBytes[i] & 0xFF);
if (idBytes.length >= Integer.BYTES) {
int i = idBytes.length - Integer.BYTES;
return (idBytes[i++] & 0xFF) << 24 | (idBytes[i++] & 0xFF) << 16 | (idBytes[i++] & 0xFF) << 8 | (idBytes[i++] & 0xFF);
} else {
int h = 0;
for (int i = Math.max(idBytes.length - Integer.BYTES, 0); i < idBytes.length; i++) {
h = (h << 8) | (idBytes[i] & 0xFF);
}
return h;
}
return h;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package com.msd.gin.halyard.common;

import com.msd.gin.halyard.common.ValueIdentifier.TypeNibble;

import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.*;

public class ValueIdentifierTest {
@Test
public void testIDTooShort() {
assertThrows(IllegalArgumentException.class, () ->
new ValueIdentifier.Format("Murmur3-128", 5, 1, TypeNibble.BIG_NIBBLE, true)
);
}

@Test
public void testHashCode_shortID() {
ValueFactory vf = SimpleValueFactory.getInstance();
ValueIdentifier.Format f = new ValueIdentifier.Format("Murmur3-128", 3, 1, TypeNibble.BIG_NIBBLE, false);
Literal l = vf.createLiteral("foobar");
ValueIdentifier id = f.id(ValueIO.getDefaultWriter().toBytes(l), ValueIO.getDefaultReader());
assertEquals(9124272, id.hashCode());
}

@Test
public void testHashCode_longID() {
ValueFactory vf = SimpleValueFactory.getInstance();
ValueIdentifier.Format f = new ValueIdentifier.Format("Murmur3-128", 6, 1, TypeNibble.BIG_NIBBLE, true);
Literal l = vf.createLiteral("foobar");
ValueIdentifier id = f.id(ValueIO.getDefaultWriter().toBytes(l), ValueIO.getDefaultReader());
assertEquals(l.hashCode(), id.hashCode());
}
}

0 comments on commit 4c5a6e3

Please sign in to comment.