Skip to content

Commit

Permalink
Add benchmarks related to add_row_number performance investigation (#…
Browse files Browse the repository at this point in the history
…8091)

- Follow-up of #8055
- Adds a benchmark comparing performance of Enso Map and Java HashMap in two scenarios - _only incremental_ updates (like `Vector.distinct`) and _replacing_ updates (like keeping a counter for each key). These benchmarks can be used as a metric for #8090
  • Loading branch information
radeusgd authored Oct 18, 2023
1 parent cec115d commit 93a31fc
Show file tree
Hide file tree
Showing 7 changed files with 271 additions and 13 deletions.
26 changes: 25 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,8 @@ lazy val enso = (project in file("."))
`std-aws`,
`simple-httpbin`,
`enso-test-java-helpers`,
`exploratory-benchmark-java-helpers`
`exploratory-benchmark-java-helpers`,
`benchmark-java-helpers`
)
.settings(Global / concurrentRestrictions += Tags.exclusive(Exclusive))
.settings(
Expand Down Expand Up @@ -1386,6 +1387,7 @@ lazy val runtime = (project in file("engine/runtime"))
(Runtime / compile) := (Runtime / compile)
.dependsOn(`std-base` / Compile / packageBin)
.dependsOn(`enso-test-java-helpers` / Compile / packageBin)
.dependsOn(`benchmark-java-helpers` / Compile / packageBin)
.dependsOn(`exploratory-benchmark-java-helpers` / Compile / packageBin)
.dependsOn(`std-image` / Compile / packageBin)
.dependsOn(`std-database` / Compile / packageBin)
Expand Down Expand Up @@ -2200,6 +2202,26 @@ lazy val `exploratory-benchmark-java-helpers` = project
.dependsOn(`std-base` % "provided")
.dependsOn(`std-table` % "provided")

lazy val `benchmark-java-helpers` = project
.in(
file(
"test/Benchmarks/polyglot-sources/benchmark-java-helpers"
)
)
.settings(
frgaalJavaCompilerSetting,
autoScalaLibrary := false,
Compile / packageBin / artifactPath :=
file(
"test/Benchmarks/polyglot/java/benchmark-java-helpers.jar"
),
libraryDependencies ++= Seq(
"org.graalvm.sdk" % "graal-sdk" % graalMavenPackagesVersion % "provided"
)
)
.dependsOn(`std-base` % "provided")
.dependsOn(`std-table` % "provided")

lazy val `std-table` = project
.in(file("std-bits") / "table")
.enablePlugins(Antlr4Plugin)
Expand Down Expand Up @@ -2531,12 +2553,14 @@ pkgStdLibInternal := Def.inputTask {
case "TestHelpers" =>
(`enso-test-java-helpers` / Compile / packageBin).value
(`exploratory-benchmark-java-helpers` / Compile / packageBin).value
(`benchmark-java-helpers` / Compile / packageBin).value
case "AWS" =>
(`std-aws` / Compile / packageBin).value
case _ if buildAllCmd =>
(`std-base` / Compile / packageBin).value
(`enso-test-java-helpers` / Compile / packageBin).value
(`exploratory-benchmark-java-helpers` / Compile / packageBin).value
(`benchmark-java-helpers` / Compile / packageBin).value
(`std-table` / Compile / packageBin).value
(`std-database` / Compile / packageBin).value
(`std-image` / Compile / packageBin).value
Expand Down
33 changes: 21 additions & 12 deletions distribution/lib/Standard/Test/0.0.0-dev/src/Bench.enso
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,10 @@ type Bench
count = self.total_specs
IO.println <| "Found " + count.to_text + " cases to execute (ETA " + self.estimated_runtime.to_display_text + ")"

case Environment.get "ENSO_BENCHMARK_REPORT_PATH" of
case get_benchmark_report_path of
Nothing -> Nothing
path ->
line = 'Label,Phase,"Invocations count","Average time (ms)"'
line = 'Label,Phase,"Invocations count","Average time (ms)","Time Stdev"'
line.write path on_existing_file=Existing_File_Behavior.Backup

self.fold Nothing _-> g-> s->
Expand Down Expand Up @@ -211,8 +211,10 @@ type Bench
computation.
single_call ~act =
start = System.nano_time
Runtime.no_inline act
r = Runtime.no_inline act
end = System.nano_time
# If the computation returned a dataflow error, we raise it to a panic - we do not want silent failures in benchmarks.
Panic.rethrow r
end - start

## Run a single phase of the benchmark.
Expand All @@ -237,30 +239,33 @@ type Bench
durations_builder.append dur
@Tail_Call go (cur_ns + dur)
go phase_start
durations = durations_builder.to_vector
sum = durations.reduce (_ + _)
nanos_in_ms = 1000000
durations = durations_builder.to_vector.map (x-> x / nanos_in_ms)
stats = durations.compute_bulk [Statistic.Mean, Statistic.Standard_Deviation]
avg = stats.first
stddev = stats.second
run_iters = durations.length
avg = (sum / run_iters) / 1000000
phase_end = System.nano_time
phase_duration = Duration.new nanoseconds=(phase_end - phase_start)
Bench.summarize_phase label phase_name run_iters avg phase_duration
Bench.summarize_phase label phase_name run_iters avg stddev phase_duration

## PRIVATE
This is a very simple implementation of summarizing the benchmark
results.

We may want to improve it later, but it gets the job done to give us
simple summary that can be analysed more easily than logs.
summarize_phase (label:Text) (phase_name:Text) (invocations:Integer) (average_time:Float) (phase_duration:Duration) =
fmt = average_time.format "#.###"
summarize_phase (label:Text) (phase_name:Text) (invocations:Integer) (average_time:Float) (time_stddev:Float) (phase_duration:Duration) =
avg_fmt = average_time.format "#.###"
stddev_fmt = time_stddev.format "#.###"
IO.println <| phase_name + " duration: " + (phase_duration.total_milliseconds.format "#.##") + " ms"
IO.println <| phase_name + " invocations: " + invocations.to_text
IO.println <| phase_name + " avg time: " + fmt + " ms"
IO.println <| phase_name + " avg time: " + avg_fmt + " ms (+-" + stddev_fmt + "))"

case Environment.get "ENSO_BENCHMARK_REPORT_PATH" of
case get_benchmark_report_path of
Nothing -> Nothing
path ->
line = '\n"'+label+'","'+phase_name+'",'+invocations.to_text+','+fmt
line = '\n"'+label+'","'+phase_name+'",'+invocations.to_text+','+avg_fmt+','+stddev_fmt
line.write path on_existing_file=Existing_File_Behavior.Append

## PRIVATE
Expand All @@ -274,3 +279,7 @@ validate_name name =
valid_java_identifier_regex = Regex.compile "[A-Za-z_$][a-zA-Z0-9_$]*"
if valid_java_identifier_regex.matches name then Nothing else
Panic.throw (Illegal_Argument.Error ("Invalid benchmark name: '" + name + "'"))

## PRIVATE
get_benchmark_report_path : Text | Nothing
get_benchmark_report_path = Environment.get "ENSO_BENCHMARK_REPORT_PATH"
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,9 @@ public int hashCode() {
"Currently no hash_code implementation consistent with the ObjectComparator is exposed, so"
+ " OrderedMultiValueKey is not hashable.");
}

@Override
public String toString() {
return "OrderedMultiValueKey{row="+rowIndex+"}";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package org.enso.benchmark_helpers;

import java.util.HashMap;

/**
* Wraps a Java HashMap into an interface hiding it, to ensure that we are calling the raw HashMap
* and are not using the Enso conversions that may be applied automatically. This allows us to
* compare the raw HashMap performance with other variants.
*/
public class JavaHashMapWrapper {
private final HashMap<Object, Object> map = new HashMap<>();

public JavaHashMapWrapper insert(Object key, Object value) {
map.put(key, value);
return this;
}

public Object get(Object key) {
return map.get(key);
}

public long size() {
return map.size();
}

public Object[][] to_vector() {
Object[][] result = new Object[map.size()][2];
int i = 0;
for (var entry : map.entrySet()) {
result[i][0] = entry.getKey();
result[i][1] = entry.getValue();
i++;
}
return result;
}
}
6 changes: 6 additions & 0 deletions test/Benchmarks/src/Main.enso
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import project.Table.Arithmetic
import project.Table.Column_From_Vector
import project.Table.Cross_Tab
import project.Table.Sorting
import project.Table.Internal.Multi_Value_Key
import project.Text.Build
import project.Text.Compare
import project.Text.Contains
Expand All @@ -22,6 +23,7 @@ import project.Collections
import project.Column_Numeric
import project.Equality
import project.Json_Bench
import project.Map.Hash_Map
import project.Natural_Order_Sort
import project.Number_Parse
import project.Numeric
Expand All @@ -39,6 +41,9 @@ all_benchmarks =
builder.append Operations.collect_benches
builder.append Sort.collect_benches

# Map
builder.append Hash_Map.collect_benches

# Statistics
builder.append Count_Min_Max.collect_benches

Expand All @@ -49,6 +54,7 @@ all_benchmarks =
builder.append Column_From_Vector.collect_benches
builder.append Cross_Tab.collect_benches
builder.append Sorting.collect_benches
builder.append Multi_Value_Key.collect_benches

# Text
builder.append Build.collect_benches
Expand Down
59 changes: 59 additions & 0 deletions test/Benchmarks/src/Map/Hash_Map.enso
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from Standard.Base import all

from Standard.Table import Column, Value_Type, Auto
import Standard.Table.Data.Type.Value_Type.Bits

from Standard.Test import Bench

polyglot java import org.enso.benchmark_helpers.JavaHashMapWrapper

options = Bench.options . set_warmup (Bench.phase_conf 2 2) . set_measure (Bench.phase_conf 2 3)

type Data
Value ~ints

create n =
create_ints =
rng = Random.new
Vector.new n _->
rng.integer 0 (n.div 100)
Data.Value create_ints

type Scenario
Instance map_constructor

# Counts distinct values in a vector
run_distinct self ints =
new_map = ints.fold (self.map_constructor Nothing) acc-> x->
if acc.get x . is_nothing . not then acc else
acc.insert x True
new_map.size

# Finds the most frequent value in a vector
run_count_keys self ints =
new_map = ints.fold (self.map_constructor Nothing) acc-> x->
current_count = (acc.get x . if_nothing 0) + 1
acc.insert x current_count
max_key = new_map.to_vector.fold (Pair.new Nothing 0) acc-> entry->
freq = entry.second
if freq > acc.second then Pair.new entry.first freq else acc
max_key

collect_benches = Bench.build builder->
n = 100000
data = Data.create n

builder.group ("Enso_Hash_Map_" + n.to_text) options group_builder->
# Scenario similar to what is done in distinct
group_builder.specify "Enso_Incremental" <|
Scenario.Instance (_ -> Map.empty) . run_distinct data.ints
group_builder.specify "Java_Incremental" <|
Scenario.Instance (_ -> JavaHashMapWrapper.new) . run_distinct data.ints

# A scenario similar to what is done in add_row_number with grouping
group_builder.specify "Enso_Replacement" <|
Scenario.Instance (_ -> Map.empty) . run_count_keys data.ints
group_builder.specify "Java_Replacement" <|
Scenario.Instance (_ -> JavaHashMapWrapper.new) . run_count_keys data.ints

main = collect_benches . run_main
119 changes: 119 additions & 0 deletions test/Benchmarks/src/Table/Internal/Multi_Value_Key.enso
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
from Standard.Base import all

from Standard.Table import Table, Value_Type, Aggregate_Column
import Standard.Table.Internal.Multi_Value_Key.Ordered_Multi_Value_Key
import Standard.Table.Internal.Multi_Value_Key.Unordered_Multi_Value_Key
from Standard.Test import Bench

polyglot java import org.enso.table.data.index.OrderedMultiValueKey
polyglot java import org.enso.table.data.index.UnorderedMultiValueKey
polyglot java import org.enso.base.text.TextFoldingStrategy

options = Bench.options . set_warmup (Bench.phase_conf 2 3) . set_measure (Bench.phase_conf 2 2)

type My_Pair
Value x1 x2

type My_Pair_Comparator
compare x y =
Ordering.compare x.x2 y.x2 . and_then <|
Ordering.compare x.x1 y.x1

hash x = x.x1.bit_xor x.x2

Comparable.from (_:My_Pair) = My_Pair_Comparator

create_table : Integer -> Table
create_table num_rows =
rng = Random.new 42
x = Vector.new num_rows _-> rng.integer min=0 max=100
y = Vector.new num_rows _-> rng.integer min=0 max=20 . to_text
z = Vector.new num_rows _->
a = rng.integer min=0 max=100
b = rng.integer min=0 max=100
My_Pair.Value a b
t = Table.new [["X", x], ["Y", y], ["Z", z]]

assert condition =
if condition.not then Panic.throw "Assertion failed"

assert ((t.at "X" . value_type) == Value_Type.Integer)
assert ((t.at "Y" . value_type) == Value_Type.Char)
assert ((t.at "Z" . value_type) == Value_Type.Mixed)
t


type Data
Value ~table

create num_rows = Data.Value (create_table num_rows)

compare_ordered_keys make_key table compare_keys =
n = table.row_count
keys = 0.up_to n . map ix-> make_key ix
blackhole = 1.up_to n . fold 0 acc-> ix->
current = keys.at ix
previous = keys.at (ix - 1)
if compare_keys current previous then acc+1 else acc-1
blackhole

compute_hashcodes make_key table get_hash =
n = table.row_count
keys = 0.up_to n . map ix-> make_key ix
blackhole = keys.fold 0 acc-> key->
h = get_hash key
(acc + h) % 1997
blackhole

collect_benches = Bench.build builder->
num_rows = 100000
data = Data.create num_rows

builder.group ("Ordered_Multi_Value_Key" + num_rows.to_text) options group_builder->
run_enso table =
key_columns = table.columns
directions = Vector.fill key_columns.length False
make_key row_ix = Ordered_Multi_Value_Key.from_row key_columns directions row_ix
compare_keys key1 key2 = key1 < key2
compare_ordered_keys make_key table compare_keys

run_java table =
key_storages = table.columns.map c-> c.java_column.getStorage
directions = Vector.fill key_storages.length 1
make_key row_ix = OrderedMultiValueKey.new key_storages row_ix directions
compare_keys key1 key2 = key1.compareTo key2 < 0
compare_ordered_keys make_key table compare_keys

group_builder.specify "Primitive_Enso" <|
run_enso (data.table.select_columns ["X", "Y"])
group_builder.specify "Primitive_Java" <|
run_java (data.table.select_columns ["X", "Y"])
group_builder.specify "Custom_Object_Enso" <|
run_enso (data.table.select_columns ["X", "Z"])
group_builder.specify "Custom_Object_Java" <|
run_java (data.table.select_columns ["X", "Z"])

builder.group ("Unordered_Multi_Value_Key" + num_rows.to_text) options group_builder->
run_enso table =
key_columns = table.columns
make_key row_ix = Unordered_Multi_Value_Key.from_row key_columns row_ix
get_hash key = key.hash_code
compute_hashcodes make_key table get_hash

run_java table =
key_storages = table.columns.map c-> c.java_column.getStorage
text_folding_strategies = Vector.fill key_storages.length TextFoldingStrategy.unicodeNormalizedFold
make_key row_ix = UnorderedMultiValueKey.new key_storages row_ix text_folding_strategies
get_hash key = key.hashCode
compute_hashcodes make_key table get_hash

group_builder.specify "Primitive_Enso" <|
run_enso (data.table.select_columns ["X", "Y"])
group_builder.specify "Primitive_Java" <|
run_java (data.table.select_columns ["X", "Y"])
group_builder.specify "Custom_Object_Enso" <|
run_enso (data.table.select_columns ["X", "Z"])
group_builder.specify "Custom_Object_Java" <|
run_java (data.table.select_columns ["X", "Z"])

main = collect_benches . run_main

0 comments on commit 93a31fc

Please sign in to comment.