diff --git a/atlas-core/src/main/scala/com/netflix/atlas/core/model/ItemId.scala b/atlas-core/src/main/scala/com/netflix/atlas/core/model/ItemId.scala index 4fe3aae30..46ee96323 100644 --- a/atlas-core/src/main/scala/com/netflix/atlas/core/model/ItemId.scala +++ b/atlas-core/src/main/scala/com/netflix/atlas/core/model/ItemId.scala @@ -16,28 +16,37 @@ package com.netflix.atlas.core.model import java.math.BigInteger - import com.netflix.atlas.core.util.Strings -import scala.util.hashing.MurmurHash3 +import java.lang.invoke.MethodHandles +import java.nio.ByteOrder /** - * Represents an identifier for a tagged item. + * Represents an identifier for a tagged item. If using for a hash map the + * bytes used for the id should come from a decent hash function as 4 bytes + * from the middle are used for the hash code of the id object. * * @param data * Bytes for the id. This is usually the results of computing a SHA1 hash * over a normalized representation of the tags. - * @param hc - * Precomputed hash code for the bytes. */ -class ItemId private (private val data: Array[Byte], private val hc: Int) - extends Comparable[ItemId] { +class ItemId private (private val data: Array[Byte]) extends Comparable[ItemId] { + + // Typically it should be 20 bytes for SHA1. Require at least 16 to avoid + // checks for other operations. + require(data.length >= 16) - override def hashCode(): Int = hc + override def hashCode(): Int = { + // Choose middle byte. The id should be generated using decent hash + // function so in theory any subset will do. In some cases data is + // routed based on the prefix or a modulo of the intValue. Choosing a + // byte toward the middle helps to mitigate that. + ItemId.intHandle.get(data, 12) + } override def equals(obj: Any): Boolean = { obj match { - case other: ItemId => hc == other.hc && java.util.Arrays.equals(data, other.data) + case other: ItemId => java.util.Arrays.equals(data, other.data) case _ => false } } @@ -60,21 +69,16 @@ class ItemId private (private val data: Array[Byte], private val hc: Int) def toBigInteger: BigInteger = new BigInteger(1, data) def intValue: Int = { - var result = 0 - val end = math.max(0, data.length - 4) - var i = data.length - 1 - var shift = 0 - while (i >= end) { - result |= (data(i) & 0xFF) << shift - i -= 1 - shift += 8 - } - result + ItemId.intHandle.get(data, data.length - 4) } } object ItemId { + // Helper to access integer from byte array + private val intHandle = + MethodHandles.byteArrayViewVarHandle(classOf[Array[Int]], ByteOrder.BIG_ENDIAN) + private val hexValueForByte = (0 until 256).toArray.map { i => Strings.zeroPad(i, 2) } @@ -84,7 +88,7 @@ object ItemId { * using MurmurHash3. */ def apply(data: Array[Byte]): ItemId = { - new ItemId(data, MurmurHash3.bytesHash(data)) + new ItemId(data) } /** diff --git a/atlas-core/src/test/scala/com/netflix/atlas/core/model/ItemIdSuite.scala b/atlas-core/src/test/scala/com/netflix/atlas/core/model/ItemIdSuite.scala index 9d18777e6..2e30fd3fc 100644 --- a/atlas-core/src/test/scala/com/netflix/atlas/core/model/ItemIdSuite.scala +++ b/atlas-core/src/test/scala/com/netflix/atlas/core/model/ItemIdSuite.scala @@ -19,44 +19,48 @@ import com.netflix.atlas.core.util.Hash import com.netflix.atlas.core.util.Strings import munit.FunSuite -import scala.util.hashing.MurmurHash3 - class ItemIdSuite extends FunSuite { + def testByteArray: Array[Byte] = { + (1 to 20).map(_.toByte).toArray + } + test("create from byte array") { - val bytes = Array(1.toByte, 2.toByte) + val bytes = testByteArray val id = ItemId(bytes) - assertEquals(id.hashCode(), MurmurHash3.bytesHash(bytes)) + assertEquals(id.hashCode(), 219025168) } test("equals") { - val id1 = ItemId(Array(1.toByte, 2.toByte)) - val id2 = ItemId(Array(1.toByte, 2.toByte)) + val id1 = ItemId(testByteArray) + val id2 = ItemId(testByteArray) assertEquals(id1, id1) assertEquals(id1, id2) } test("not equals") { - val id1 = ItemId(Array(1.toByte, 2.toByte)) - val id2 = ItemId(Array(1.toByte, 3.toByte)) + val id1 = ItemId(testByteArray) + val bytes = testByteArray + bytes(13) = 3.toByte // perturb byte used with hashing + val id2 = ItemId(bytes) assertNotEquals(id1, id2) assertNotEquals(id1.hashCode(), id2.hashCode()) } test("does not equal wrong object type") { - val id1 = ItemId(Array(1.toByte, 2.toByte)) + val id1 = ItemId(testByteArray) assert(!id1.equals("foo")) } test("does not equal null") { - val id1 = ItemId(Array(1.toByte, 2.toByte)) + val id1 = ItemId(testByteArray) assert(id1 != null) } test("toString") { - val bytes = Array(1.toByte, 2.toByte) + val bytes = testByteArray val id = ItemId(bytes) - assertEquals(id.toString, "0102") + assertEquals(id.toString, "0102030405060708090a0b0c0d0e0f1011121314") } test("toString sha1 bytes 0") { @@ -79,15 +83,15 @@ class ItemIdSuite extends FunSuite { } test("from String lower") { - val bytes = Array(10.toByte, 11.toByte) + val bytes = testByteArray val id = ItemId(bytes) - assertEquals(id, ItemId("0a0b")) + assertEquals(id, ItemId("0102030405060708090a0b0c0d0e0f1011121314")) } test("from String upper") { - val bytes = Array(10.toByte, 11.toByte) + val bytes = testByteArray val id = ItemId(bytes) - assertEquals(id, ItemId("0A0B")) + assertEquals(id, ItemId("0102030405060708090A0B0C0D0E0F1011121314")) } test("from String invalid") { @@ -97,15 +101,17 @@ class ItemIdSuite extends FunSuite { } test("compareTo equals") { - val id1 = ItemId(Array(1.toByte, 2.toByte)) - val id2 = ItemId(Array(1.toByte, 2.toByte)) + val id1 = ItemId(testByteArray) + val id2 = ItemId(testByteArray) assertEquals(id1.compareTo(id1), 0) assertEquals(id1.compareTo(id2), 0) } test("compareTo less than/greater than") { - val id1 = ItemId(Array(1.toByte, 2.toByte)) - val id2 = ItemId(Array(1.toByte, 3.toByte)) + val id1 = ItemId(testByteArray) + val bytes = testByteArray + bytes(bytes.length - 1) = 21.toByte + val id2 = ItemId(bytes) assert(id1.compareTo(id2) < 0) assert(id2.compareTo(id1) > 0) } @@ -116,19 +122,4 @@ class ItemIdSuite extends FunSuite { assertEquals(id.intValue, id.toBigInteger.intValue()) } } - - test("int value: one byte") { - val id = ItemId(Array(57.toByte)) - assertEquals(id.intValue, id.toBigInteger.intValue()) - } - - test("int value: two bytes") { - val id = ItemId(Array(1.toByte, 2.toByte)) - assertEquals(id.intValue, id.toBigInteger.intValue()) - } - - test("int value: three bytes") { - val id = ItemId(Array(1.toByte, 2.toByte, 0xFF.toByte)) - assertEquals(id.intValue, id.toBigInteger.intValue()) - } } diff --git a/atlas-jmh/src/main/scala/com/netflix/atlas/core/model/ItemIdHashCode.scala b/atlas-jmh/src/main/scala/com/netflix/atlas/core/model/ItemIdHashCode.scala new file mode 100644 index 000000000..cbf840997 --- /dev/null +++ b/atlas-jmh/src/main/scala/com/netflix/atlas/core/model/ItemIdHashCode.scala @@ -0,0 +1,60 @@ +/* + * Copyright 2014-2024 Netflix, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.netflix.atlas.core.model + +import org.openjdk.jmh.annotations.Benchmark +import org.openjdk.jmh.annotations.Scope +import org.openjdk.jmh.annotations.State +import org.openjdk.jmh.infra.Blackhole + +import scala.util.Random + +/** + * ``` + * > jmh:run -prof gc -prof stack -wi 5 -i 10 -f1 -t1 .*ItemIdHashCode.* + * ``` + * + * ItemId.hashCode: + * + * ``` + * Benchmark Mode Cnt Score Error Units + * before thrpt 10 1547433250.866 ± 52995197.574 ops/s + * after thrpt 10 1162791300.824 ± 36880550.752 ops/s + * ``` + * + * + * ItemId.intValue: + * + * ``` + * before thrpt 10 292282926.019 ± 2643333.767 ops/s + * after thrpt 10 1101391907.134 ± 14583585.092 ops/s + * ``` + */ +@State(Scope.Thread) +class ItemIdHashCode { + + private val id = ItemId(Random.nextBytes(20)) + + @Benchmark + def intValue(bh: Blackhole): Unit = { + bh.consume(id.intValue) + } + + @Benchmark + def hashCode(bh: Blackhole): Unit = { + bh.consume(id.hashCode()) + } +}