Skip to content

Commit

Permalink
[h7Q1epCi] Support arbitrary value types in hash function
Browse files Browse the repository at this point in the history
Make hashing functions stable on all neo4j types.
These functions are only intended for strings,
but unfortunately their signature allows all types.
This is a safety for users that use them for other values.
  • Loading branch information
loveleif committed Jan 23, 2024
1 parent 6b6e24c commit 655f2cf
Show file tree
Hide file tree
Showing 2 changed files with 169 additions and 11 deletions.
58 changes: 47 additions & 11 deletions core/src/main/java/apoc/util/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,14 @@
*/
package apoc.util;

import static java.util.stream.Collectors.joining;
import static org.apache.commons.codec.binary.StringUtils.getBytesUtf8;

import java.security.MessageDigest;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.codec.digest.DigestUtils;
import org.neo4j.graphdb.TransactionTerminatedException;
import org.neo4j.procedure.*;
Expand All @@ -38,37 +43,32 @@ public class Utils {
@Description("Returns the SHA1 of the concatenation of all `STRING` values in the given `LIST<ANY>`.\n"
+ "SHA1 is a weak hashing algorithm which is unsuitable for cryptographic use-cases.")
public String sha1(@Name("values") List<Object> values) {
String value = values.stream().map(v -> v == null ? "" : v.toString()).collect(Collectors.joining());
return DigestUtils.sha1Hex(value);
return hexHash(DigestUtils.getSha1Digest(), values);
}

@UserFunction("apoc.util.sha256")
@Description("Returns the SHA256 of the concatenation of all `STRING` values in the given `LIST<ANY>`.")
public String sha256(@Name("values") List<Object> values) {
String value = values.stream().map(v -> v == null ? "" : v.toString()).collect(Collectors.joining());
return DigestUtils.sha256Hex(value);
return hexHash(DigestUtils.getSha256Digest(), values);
}

@UserFunction("apoc.util.sha384")
@Description("Returns the SHA384 of the concatenation of all `STRING` values in the given `LIST<ANY>`.")
public String sha384(@Name("values") List<Object> values) {
String value = values.stream().map(v -> v == null ? "" : v.toString()).collect(Collectors.joining());
return DigestUtils.sha384Hex(value);
return hexHash(DigestUtils.getSha384Digest(), values);
}

@UserFunction("apoc.util.sha512")
@Description("Returns the SHA512 of the concatenation of all `STRING` values in the `LIST<ANY>`.")
public String sha512(@Name("values") List<Object> values) {
String value = values.stream().map(v -> v == null ? "" : v.toString()).collect(Collectors.joining());
return DigestUtils.sha512Hex(value);
return hexHash(DigestUtils.getSha512Digest(), values);
}

@UserFunction("apoc.util.md5")
@Description("Returns the MD5 checksum of the concatenation of all `STRING` values in the given `LIST<ANY>`.\n"
+ "MD5 is a weak hashing algorithm which is unsuitable for cryptographic use-cases.")
public String md5(@Name("values") List<Object> values) {
String value = values.stream().map(v -> v == null ? "" : v.toString()).collect(Collectors.joining());
return DigestUtils.md5Hex(value);
return hexHash(DigestUtils.getMd5Digest(), values);
}

@Procedure("apoc.util.sleep")
Expand Down Expand Up @@ -134,4 +134,40 @@ public byte[] compress(
CompressionConfig conf = new CompressionConfig(config, CompressionAlgo.GZIP.name());
return CompressionAlgo.valueOf(conf.getCompressionAlgo()).compress(data, conf.getCharset());
}

private static String hexHash(final MessageDigest digest, final List<Object> values) {
for (final var value : values) digest.update(getBytesUtf8(toHashString(value)));
return Hex.encodeHexString(digest.digest());
}

/*
* This is not the most efficient way to produce a hash, but it is backwards compatible.
* This function is only intended to be used on strings (as documented on the hash functions above)
* But it turns out that is not how everyone is using it, so as a safety we have stable implementations
* for all neo4j types.
*/
private static String toHashString(Object value) {
if (value instanceof String string) return string;
else if (value == null) return "";
else if (value instanceof List<?> list) {
return list.stream().map(Utils::toHashString).collect(joining(", ", "[", "]"));
} else if (value instanceof Map<?, ?> map) {
return map.entrySet().stream()
.map(e -> Map.entry(e.getKey().toString(), toHashString(e.getValue())))
.sorted(Map.Entry.comparingByKey())
.map(e -> e.getKey() + "=" + e.getValue())
.collect(joining(", ", "{", "}"));
} else if (value.getClass().isArray()) {
if (value instanceof Object[] objectArray) return Arrays.toString(objectArray);
else if (value instanceof int[] intArray) return Arrays.toString(intArray);
else if (value instanceof long[] longArray) return Arrays.toString(longArray);
else if (value instanceof double[] doubleArray) return Arrays.toString(doubleArray);
else if (value instanceof short[] shortArray) return Arrays.toString(shortArray);
else if (value instanceof boolean[] boolArray) return Arrays.toString(boolArray);
else if (value instanceof byte[] byteArray) return Arrays.toString(byteArray);
else if (value instanceof float[] floatArray) return Arrays.toString(floatArray);
else if (value instanceof char[] charArray) return Arrays.toString(charArray);
else return value.toString();
} else return value.toString();
}
}
122 changes: 122 additions & 0 deletions core/src/test/java/apoc/util/UtilsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,26 +20,36 @@

import static apoc.util.MapUtil.map;
import static apoc.util.TestUtil.testCallEmpty;
import static java.util.Arrays.stream;
import static java.util.stream.Collectors.toUnmodifiableMap;
import static java.util.stream.IntStream.range;
import static org.junit.Assert.*;

import java.util.ArrayList;
import java.util.Base64;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.hamcrest.Matchers;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.ClassRule;
import org.junit.Test;
import org.neo4j.graphdb.Label;
import org.neo4j.graphdb.QueryExecutionException;
import org.neo4j.graphdb.Result;
import org.neo4j.graphdb.Transaction;
import org.neo4j.test.rule.DbmsRule;
import org.neo4j.test.rule.ImpermanentDbmsRule;
import org.neo4j.values.storable.RandomValues;
import org.neo4j.values.storable.ValueType;

/**
* @author mh
Expand Down Expand Up @@ -291,6 +301,118 @@ public void testMd5() {
r -> assertEquals("902fbdd2b1df0c4f70b4a5d23525e932", r.get("value")));
}

@Test
public void md5WithNestedTypes() {
TestUtil.testCall(
db,
"RETURN apoc.util.md5(['ABC', true, {b: 'hej', a: [1, 2]}, ['hej', {b:'hå'}]]) AS value",
r -> assertEquals("f0603bd38267c6e59c0cf6896f4b4bcd", r.get("value")));
}

@Test
public void md5IsStableOnAllTypes() {
hashIsStable("md5");
}

@Test
public void sha1IsStableOnAllTypes() {
hashIsStable("sha1");
}

@Test
public void sha256IsStableOnAllTypes() {
hashIsStable("sha256");
}

@Test
public void sha384IsStableOnAllTypes() {
hashIsStable("sha384");
}

public void hashIsStable(String hashFunc) {
final var seed = new Random().nextLong();
final var rand = RandomValues.create(new Random(seed));
final var randStorables = stream(ValueType.values())
.map(t -> rand.nextValueOfType(t).asObject())
.toList();
final var randMap =
range(0, randStorables.size()).boxed().collect(toUnmodifiableMap(i -> "p" + i, randStorables::get));

// Create node with random property values
try (final var tx = db.beginTx()) {
final var node = tx.createNode(Label.label("HashFunctionsAreStable"));
for (int i = randStorables.size() - 1; i >= 0; --i) {
node.setProperty("p" + i, randStorables.get(i));
}
tx.commit();
}

try (final var tx = db.beginTx()) {
for (int i = 0; i < randStorables.size(); ++i) {
final var value = randStorables.get(i);
final var query =
"""
match (n:HashFunctionsAreStable)
return
apoc.util.%s([n.%s]) as hash1,
apoc.util.%s([$value]) as hash2,
apoc.util.%s($list) as hash3
"""
.formatted(hashFunc, "p" + i, hashFunc, hashFunc);
final var params = Map.of("value", value, "list", List.of(value));
assertStableHash(seed, value, tx, query, params);
}

final var mapQuery =
"""
match (n:HashFunctionsAreStable)
return
apoc.util.%s([properties(n)]) as hash1,
apoc.util.%s($value) as hash2
"""
.formatted(hashFunc, hashFunc);
assertStableHash(seed, randMap, tx, mapQuery, Map.of("value", List.of(randMap)));

final var listCypher = IntStream.range(0, randStorables.size())
.mapToObj(i -> "n.p" + i)
.collect(Collectors.joining(","));
final var listQuery =
"""
match (n:HashFunctionsAreStable)
return
apoc.util.%s([%s]) as hash1,
apoc.util.%s($value) as hash2
"""
.formatted(hashFunc, listCypher, hashFunc);
assertStableHash(seed, randStorables, tx, listQuery, Map.of("value", randStorables));

final var listOfListsQuery =
"""
match (n:HashFunctionsAreStable)
return
apoc.util.%s([[%s]]) as hash1,
apoc.util.%s([$value]) as hash2
"""
.formatted(hashFunc, listCypher, hashFunc);
assertStableHash(seed, randStorables, tx, listOfListsQuery, Map.of("value", randStorables));
} finally {
db.executeTransactionally("cypher runtime=slotted match (n) detach delete n");
}
}

private static void assertStableHash(
long seed, Object val, Transaction tx, String query, Map<String, Object> params) {
final var a = tx.execute(query, params).stream().toList();
final var b = tx.execute(query, params).stream().toList();
final var message = "%s should have stable hash (seed=%s)".formatted(val, seed);
assertEquals(message, a, b);
assertEquals(1, a.size());
final var first = a.get(0).entrySet().iterator().next();
for (final var e : a.get(0).entrySet()) {
assertEquals(message, first.getValue(), e.getValue());
}
}

@Test
public void testValidateFalse() {
TestUtil.testResult(db, "CALL apoc.util.validate(false,'message',null)", r -> assertEquals(false, r.hasNext()));
Expand Down

0 comments on commit 655f2cf

Please sign in to comment.