Skip to content

Commit

Permalink
Improvements to LongHash
Browse files Browse the repository at this point in the history
Signed-off-by: Ketan Verma <[email protected]>
  • Loading branch information
ketanv3 committed May 22, 2023
1 parent 63834d9 commit a222e8b
Show file tree
Hide file tree
Showing 4 changed files with 372 additions and 3 deletions.
153 changes: 153 additions & 0 deletions benchmarks/src/main/java/org/opensearch/common/LongHashBenchmark.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common;

import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.util.BigArrays;
import org.opensearch.common.util.LongHash;
import org.opensearch.common.util.LongRHHash;
import org.opensearch.common.util.PageCacheRecycler;

import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.TimeUnit;

@Fork(3)
@Warmup(iterations = 0)
@Measurement(iterations = 3)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public class LongHashBenchmark {

private static final int ITERATIONS = (1 << 24);

private static final BigArrays BIG_ARRAYS = new BigArrays(
new PageCacheRecycler(Settings.EMPTY),
null,
"REQUEST"
);

@State(Scope.Benchmark)
public static class BenchmarkState {
@Param({
"1", "2", "3", "6", "8", "10", "13", "14", "15", "18", "20", "24", "27", "31", "35", "38", "40", "42", "46",
"48", "53", "60", "68", "73", "77", "81", "86", "93", "100", "104", "109", "118", "130", "136", "149", "162",
"168", "174", "186", "201", "210", "229", "248", "261", "274", "284", "296", "313", "336", "352", "377",
"388", "408", "431", "449", "469", "488", "502", "520", "535", "545", "607", "647", "733", "764", "826",
"866", "881", "953", "968", "1017", "1032", "1078", "1132", "1196", "1209", "1263", "1358", "1390", "1444",
"1552", "2304", "2466", "3175", "3675", "4102", "4635", "4751", "5469", "6187", "6935", "7072", "7404",
"7808", "8336", "9037", "9154", "9844", "9996", "10287"
})
public int size;

@Param({"1", "128", "1024"})
public long capacity;

@Param({"0.6"})
public float loadFactor;

public long[] data;
public long[] queries;

// Pre-populated instances to benchmark the 'find' method.
public LongHash lh;
public LongRHHash rh;

@Setup(Level.Trial)
public void setUp() {
data = nyc_taxis();
// data = http_logs();
// data = random();

// Fisher-Yates shuffle.
// This will avoid hitting the cache lines which would otherwise unfairly favour
// the naive linear-probing algorithm.
Random random = new Random(0);
queries = Arrays.copyOf(data, size);
for (int i = size - 1; i > 0; i--) {
int j = Math.abs(random.nextInt()) % (i + 1);
long temp = queries[i];
queries[i] = queries[j];
queries[j] = temp;
}

lh = new LongHash(capacity, loadFactor, BIG_ARRAYS);
rh = new LongRHHash(capacity, loadFactor, BIG_ARRAYS);
for (int i = 0; i < data.length * 2; i++) {
lh.add(data[i % data.length]);
rh.add(data[i % data.length]);
}
}

private long[] nyc_taxis() {
long[] data = new long[size];
for (int i = 0; i < size; i++) {
data[i] = 1420070400000L + 86400000L * i;
}
return data;
}

private long[] http_logs() {
long[] data = new long[size];
for (int i = 0; i < size; i++) {
data[i] = 893962800000L + 3600000L * i;
}
return data;
}

private long[] random() {
Random random = new Random(0);
long[] data = new long[size];
for (int i = 0; i < size; i++) {
data[i] = random.nextLong();
}
return data;
}
}

/* Benchmarks for the 'add' method. */
@Benchmark
public void baselineAdd(Blackhole bh, BenchmarkState s) {
try (LongHash h = new LongHash(s.capacity, s.loadFactor, BIG_ARRAYS)) {
for (int i = 0; i < ITERATIONS; i++) {
long key = s.queries[i % s.queries.length];
bh.consume(h.add(key));
}
}
}

@Benchmark
public void contenderAdd(Blackhole bh, BenchmarkState s) {
try (LongRHHash h = new LongRHHash(s.capacity, s.loadFactor, BIG_ARRAYS)) {
for (int i = 0; i < ITERATIONS; i++) {
long key = s.queries[i % s.queries.length];
bh.consume(h.add(key));
}
}
}

/* Benchmarks for the 'find' method. */
@Benchmark
public void baselineFind(Blackhole bh, BenchmarkState s) {
for (int i = 0; i < ITERATIONS; i++) {
long key = s.queries[i % s.queries.length];
bh.consume(s.lh.find(key));
}
}

@Benchmark
public void contenderFind(Blackhole bh, BenchmarkState s) {
for (int i = 0; i < ITERATIONS; i++) {
long key = s.queries[i % s.queries.length];
bh.consume(s.rh.find(key));
}
}
}
153 changes: 153 additions & 0 deletions server/src/main/java/org/opensearch/common/util/LongRHHash.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.util;

import com.carrotsearch.hppc.BitMixer;
import org.opensearch.common.lease.Releasable;


public class LongRHHash implements Releasable {
private final float loadFactor;
private final BigArrays bigArrays;

private long capacity;
private long mask;
private long size;
private long grow;

private LongArray ords;
private LongArray keys;

public LongRHHash(long capacity, float loadFactor, BigArrays bigArrays) {
capacity = (long) (capacity / loadFactor);
capacity = Math.max(1, Long.highestOneBit(capacity - 1) << 1); // next power of two

this.loadFactor = loadFactor;
this.bigArrays = bigArrays;
this.capacity = capacity;
this.mask = capacity - 1;
this.size = 0;
this.grow = (long) (capacity * loadFactor);

this.ords = bigArrays.newLongArray(capacity, false);
this.ords.fill(0, capacity, -1);
this.keys = bigArrays.newLongArray(capacity, false);
}

public long add(final long key) {
final long found = find(key);
if (found != -1) {
return -(1 + found);
}

if (size >= grow) {
grow();
}

return set(key, size);
}

private long set(final long key, final long ordinal) {
long idx = slot(key), ord = ordinal, psl = 0;
long curOrd, curPsl;

do {
if ((curOrd = ords.get(idx)) == -1) {
ords.set(idx, ord);
keys = bigArrays.grow(keys, size + 1);
keys.set(ordinal, key);
return size++;
} else if ((curPsl = psl(keys.get(curOrd), idx)) < psl) {
ord = ords.set(idx, ord);
psl = curPsl;
}
idx = (idx + 1) & mask;
psl++;
} while (true);
}

public long get(final long ordinal) {
return keys.get(ordinal);
}

public long find(final long key) {
for (long idx = slot(key);; idx = (idx + 1) & mask) {
final long ord = ords.get(idx);
if (ord == -1 || keys.get(ord) == key) {
return ord;
}
}
}

private long slot(final long key) {
return BitMixer.mix64(key) & mask;
}

private long psl(final long key, final long idx) {
return (capacity + idx - slot(key)) & mask;
}

public long size() {
return size;
}

public long maxPsl() {
long maxPsl = 0;

for (long idx = 0; idx < capacity; idx++) {
long ordinal = ords.get(idx);
if (ordinal == -1) {
continue;
}

long key = keys.get(ordinal);
maxPsl = Math.max(maxPsl, psl(key, idx));
}

return maxPsl;
}

public double avgPsl() {
long pslSum = 0;

for (long idx = 0; idx < capacity; idx++) {
long ordinal = ords.get(idx);
if (ordinal == -1) {
continue;
}

long key = keys.get(ordinal);
pslSum += psl(key, idx);
}

return (double) pslSum / size;
}

private void grow() {
final long oldSize = size;

capacity <<= 1;
mask = capacity - 1;
size = 0;
grow = (long) (capacity * loadFactor);

ords = bigArrays.resize(ords, capacity);
ords.fill(0, capacity, -1);

for (long ordinal = 0; ordinal < oldSize; ordinal++) {
set(keys.get(ordinal), ordinal);
}
}

@Override
public void close() {
ords.close();
keys.close();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@

import org.opensearch.common.lease.Releasable;
import org.opensearch.common.util.BigArrays;
import org.opensearch.common.util.LongHash;
import org.opensearch.common.util.LongLongHash;
import org.opensearch.common.util.LongRHHash;
import org.opensearch.search.aggregations.CardinalityUpperBound;

/**
Expand Down Expand Up @@ -148,10 +148,10 @@ public long value() {
* @opensearch.internal
*/
public static class FromSingle extends LongKeyedBucketOrds {
private final LongHash ords;
private final LongRHHash ords;

public FromSingle(BigArrays bigArrays) {
ords = new LongHash(1, bigArrays);
ords = new LongRHHash(1, 0.6f, bigArrays);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.util;

import org.opensearch.test.OpenSearchTestCase;

import java.util.Map;
import java.util.TreeMap;

public class LongRHHashTests extends OpenSearchTestCase {

public void testFuzzy() {
Map<Long, Long> reference = new TreeMap<>();
LongRHHash h = new LongRHHash(1, 0.6f, BigArrays.NON_RECYCLING_INSTANCE);

for (int i = 0; i < (1 << 20); i++) {
long key = randomLong() % (1 << 12);
if (reference.containsKey(key)) {
long expectedOrdinal = reference.get(key);
assertEquals(-1-expectedOrdinal, h.add(key));
assertEquals(expectedOrdinal, h.find(key));
} else {
assertEquals(-1, h.find(key));
reference.put(key, (long) reference.size());
assertEquals((long) reference.get(key), h.add(key));
}
}

h.close();
}

public void testReport() {
int[] sizes = new int[]{
1, 2, 3, 6, 8, 10, 13, 14, 15, 18, 20, 24, 27, 31, 35, 38, 40, 42, 46,
48, 53, 60, 68, 73, 77, 81, 86, 93, 100, 104, 109, 118, 130, 136, 149, 162,
168, 174, 186, 201, 210, 229, 248, 261, 274, 284, 296, 313, 336, 352, 377,
388, 408, 431, 449, 469, 488, 502, 520, 535, 545, 607, 647, 733, 764, 826,
866, 881, 953, 968, 1017, 1032, 1078, 1132, 1196, 1209, 1263, 1358, 1390, 1444,
1552, 2304, 2466, 3175, 3675, 4102, 4635, 4751, 5469, 6187, 6935, 7072, 7404,
7808, 8336, 9037, 9154, 9844, 9996, 10287
};

for (int size : sizes) {
try (LongRHHash h = new LongRHHash(1, 0.6f, BigArrays.NON_RECYCLING_INSTANCE)) {
for (int i = 0; i < size; i++) {
long key = 1420070400000L + 86400000L * i;
h.add(key);
}
for (int i = 0; i < size; i++) {
long key = 1420070400000L + 86400000L * i;
h.add(key);
}
System.out.println("size: " + size + ", max_psl: " + h.maxPsl() + ", avg_psl: " + h.avgPsl());
}
}
}
}

0 comments on commit a222e8b

Please sign in to comment.