-
Notifications
You must be signed in to change notification settings - Fork 326
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add benchmarks related to
add_row_number
performance investigation (#…
…8091) - Follow-up of #8055 - Adds a benchmark comparing performance of Enso Map and Java HashMap in two scenarios - _only incremental_ updates (like `Vector.distinct`) and _replacing_ updates (like keeping a counter for each key). These benchmarks can be used as a metric for #8090
- Loading branch information
Showing
7 changed files
with
271 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
36 changes: 36 additions & 0 deletions
36
...s/benchmark-java-helpers/src/main/java/org/enso/benchmark_helpers/JavaHashMapWrapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
package org.enso.benchmark_helpers; | ||
|
||
import java.util.HashMap; | ||
|
||
/** | ||
* Wraps a Java HashMap into an interface hiding it, to ensure that we are calling the raw HashMap | ||
* and are not using the Enso conversions that may be applied automatically. This allows us to | ||
* compare the raw HashMap performance with other variants. | ||
*/ | ||
public class JavaHashMapWrapper { | ||
private final HashMap<Object, Object> map = new HashMap<>(); | ||
|
||
public JavaHashMapWrapper insert(Object key, Object value) { | ||
map.put(key, value); | ||
return this; | ||
} | ||
|
||
public Object get(Object key) { | ||
return map.get(key); | ||
} | ||
|
||
public long size() { | ||
return map.size(); | ||
} | ||
|
||
public Object[][] to_vector() { | ||
Object[][] result = new Object[map.size()][2]; | ||
int i = 0; | ||
for (var entry : map.entrySet()) { | ||
result[i][0] = entry.getKey(); | ||
result[i][1] = entry.getValue(); | ||
i++; | ||
} | ||
return result; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
from Standard.Base import all | ||
|
||
from Standard.Table import Column, Value_Type, Auto | ||
import Standard.Table.Data.Type.Value_Type.Bits | ||
|
||
from Standard.Test import Bench | ||
|
||
polyglot java import org.enso.benchmark_helpers.JavaHashMapWrapper | ||
|
||
options = Bench.options . set_warmup (Bench.phase_conf 2 2) . set_measure (Bench.phase_conf 2 3) | ||
|
||
type Data | ||
Value ~ints | ||
|
||
create n = | ||
create_ints = | ||
rng = Random.new | ||
Vector.new n _-> | ||
rng.integer 0 (n.div 100) | ||
Data.Value create_ints | ||
|
||
type Scenario | ||
Instance map_constructor | ||
|
||
# Counts distinct values in a vector | ||
run_distinct self ints = | ||
new_map = ints.fold (self.map_constructor Nothing) acc-> x-> | ||
if acc.get x . is_nothing . not then acc else | ||
acc.insert x True | ||
new_map.size | ||
|
||
# Finds the most frequent value in a vector | ||
run_count_keys self ints = | ||
new_map = ints.fold (self.map_constructor Nothing) acc-> x-> | ||
current_count = (acc.get x . if_nothing 0) + 1 | ||
acc.insert x current_count | ||
max_key = new_map.to_vector.fold (Pair.new Nothing 0) acc-> entry-> | ||
freq = entry.second | ||
if freq > acc.second then Pair.new entry.first freq else acc | ||
max_key | ||
|
||
collect_benches = Bench.build builder-> | ||
n = 100000 | ||
data = Data.create n | ||
|
||
builder.group ("Enso_Hash_Map_" + n.to_text) options group_builder-> | ||
# Scenario similar to what is done in distinct | ||
group_builder.specify "Enso_Incremental" <| | ||
Scenario.Instance (_ -> Map.empty) . run_distinct data.ints | ||
group_builder.specify "Java_Incremental" <| | ||
Scenario.Instance (_ -> JavaHashMapWrapper.new) . run_distinct data.ints | ||
|
||
# A scenario similar to what is done in add_row_number with grouping | ||
group_builder.specify "Enso_Replacement" <| | ||
Scenario.Instance (_ -> Map.empty) . run_count_keys data.ints | ||
group_builder.specify "Java_Replacement" <| | ||
Scenario.Instance (_ -> JavaHashMapWrapper.new) . run_count_keys data.ints | ||
|
||
main = collect_benches . run_main |
119 changes: 119 additions & 0 deletions
119
test/Benchmarks/src/Table/Internal/Multi_Value_Key.enso
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
from Standard.Base import all | ||
|
||
from Standard.Table import Table, Value_Type, Aggregate_Column | ||
import Standard.Table.Internal.Multi_Value_Key.Ordered_Multi_Value_Key | ||
import Standard.Table.Internal.Multi_Value_Key.Unordered_Multi_Value_Key | ||
from Standard.Test import Bench | ||
|
||
polyglot java import org.enso.table.data.index.OrderedMultiValueKey | ||
polyglot java import org.enso.table.data.index.UnorderedMultiValueKey | ||
polyglot java import org.enso.base.text.TextFoldingStrategy | ||
|
||
options = Bench.options . set_warmup (Bench.phase_conf 2 3) . set_measure (Bench.phase_conf 2 2) | ||
|
||
type My_Pair | ||
Value x1 x2 | ||
|
||
type My_Pair_Comparator | ||
compare x y = | ||
Ordering.compare x.x2 y.x2 . and_then <| | ||
Ordering.compare x.x1 y.x1 | ||
|
||
hash x = x.x1.bit_xor x.x2 | ||
|
||
Comparable.from (_:My_Pair) = My_Pair_Comparator | ||
|
||
create_table : Integer -> Table | ||
create_table num_rows = | ||
rng = Random.new 42 | ||
x = Vector.new num_rows _-> rng.integer min=0 max=100 | ||
y = Vector.new num_rows _-> rng.integer min=0 max=20 . to_text | ||
z = Vector.new num_rows _-> | ||
a = rng.integer min=0 max=100 | ||
b = rng.integer min=0 max=100 | ||
My_Pair.Value a b | ||
t = Table.new [["X", x], ["Y", y], ["Z", z]] | ||
|
||
assert condition = | ||
if condition.not then Panic.throw "Assertion failed" | ||
|
||
assert ((t.at "X" . value_type) == Value_Type.Integer) | ||
assert ((t.at "Y" . value_type) == Value_Type.Char) | ||
assert ((t.at "Z" . value_type) == Value_Type.Mixed) | ||
t | ||
|
||
|
||
type Data | ||
Value ~table | ||
|
||
create num_rows = Data.Value (create_table num_rows) | ||
|
||
compare_ordered_keys make_key table compare_keys = | ||
n = table.row_count | ||
keys = 0.up_to n . map ix-> make_key ix | ||
blackhole = 1.up_to n . fold 0 acc-> ix-> | ||
current = keys.at ix | ||
previous = keys.at (ix - 1) | ||
if compare_keys current previous then acc+1 else acc-1 | ||
blackhole | ||
|
||
compute_hashcodes make_key table get_hash = | ||
n = table.row_count | ||
keys = 0.up_to n . map ix-> make_key ix | ||
blackhole = keys.fold 0 acc-> key-> | ||
h = get_hash key | ||
(acc + h) % 1997 | ||
blackhole | ||
|
||
collect_benches = Bench.build builder-> | ||
num_rows = 100000 | ||
data = Data.create num_rows | ||
|
||
builder.group ("Ordered_Multi_Value_Key" + num_rows.to_text) options group_builder-> | ||
run_enso table = | ||
key_columns = table.columns | ||
directions = Vector.fill key_columns.length False | ||
make_key row_ix = Ordered_Multi_Value_Key.from_row key_columns directions row_ix | ||
compare_keys key1 key2 = key1 < key2 | ||
compare_ordered_keys make_key table compare_keys | ||
|
||
run_java table = | ||
key_storages = table.columns.map c-> c.java_column.getStorage | ||
directions = Vector.fill key_storages.length 1 | ||
make_key row_ix = OrderedMultiValueKey.new key_storages row_ix directions | ||
compare_keys key1 key2 = key1.compareTo key2 < 0 | ||
compare_ordered_keys make_key table compare_keys | ||
|
||
group_builder.specify "Primitive_Enso" <| | ||
run_enso (data.table.select_columns ["X", "Y"]) | ||
group_builder.specify "Primitive_Java" <| | ||
run_java (data.table.select_columns ["X", "Y"]) | ||
group_builder.specify "Custom_Object_Enso" <| | ||
run_enso (data.table.select_columns ["X", "Z"]) | ||
group_builder.specify "Custom_Object_Java" <| | ||
run_java (data.table.select_columns ["X", "Z"]) | ||
|
||
builder.group ("Unordered_Multi_Value_Key" + num_rows.to_text) options group_builder-> | ||
run_enso table = | ||
key_columns = table.columns | ||
make_key row_ix = Unordered_Multi_Value_Key.from_row key_columns row_ix | ||
get_hash key = key.hash_code | ||
compute_hashcodes make_key table get_hash | ||
|
||
run_java table = | ||
key_storages = table.columns.map c-> c.java_column.getStorage | ||
text_folding_strategies = Vector.fill key_storages.length TextFoldingStrategy.unicodeNormalizedFold | ||
make_key row_ix = UnorderedMultiValueKey.new key_storages row_ix text_folding_strategies | ||
get_hash key = key.hashCode | ||
compute_hashcodes make_key table get_hash | ||
|
||
group_builder.specify "Primitive_Enso" <| | ||
run_enso (data.table.select_columns ["X", "Y"]) | ||
group_builder.specify "Primitive_Java" <| | ||
run_java (data.table.select_columns ["X", "Y"]) | ||
group_builder.specify "Custom_Object_Enso" <| | ||
run_enso (data.table.select_columns ["X", "Z"]) | ||
group_builder.specify "Custom_Object_Java" <| | ||
run_java (data.table.select_columns ["X", "Z"]) | ||
|
||
main = collect_benches . run_main |