Skip to content

Commit

Permalink
Closes #3560 Update argsort_benchmark (#3838)
Browse files Browse the repository at this point in the history
Co-authored-by: Amanda Potts <[email protected]>
  • Loading branch information
ajpotts and ajpotts authored Oct 14, 2024
1 parent 8d2e09c commit 287b278
Show file tree
Hide file tree
Showing 25 changed files with 127 additions and 48 deletions.
5 changes: 4 additions & 1 deletion benchmark.ini
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ testpaths =
benchmark_v2/no_op_benchmark.py
benchmark_v2/io_benchmark.py
benchmark_v2/sort_cases_benchmark.py
python_functions = bench_*
python_functions = bench_* check_correctness*
env =
D:ARKOUDA_SERVER_HOST=localhost
D:ARKOUDA_SERVER_PORT=5555
Expand All @@ -34,3 +34,6 @@ env =
D:ARKOUDA_VERBOSE=True
D:ARKOUDA_CLIENT_TIMEOUT=0
D:ARKOUDA_LOG_LEVEL=DEBUG
markers =
skip_correctness_only
skip_numpy
2 changes: 1 addition & 1 deletion benchmark_v2/aggregate_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def run_agg(g, vals, op):

return vals.size + vals.itemsize


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="GroupBy.aggregate")
@pytest.mark.parametrize("op", ak.GroupBy.Reductions)
def bench_aggs(benchmark, op):
Expand Down
54 changes: 52 additions & 2 deletions benchmark_v2/argsort_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import arkouda as ak
import numpy as np
import pytest

import arkouda as ak

TYPES = ("int64", "uint64", "float64", "str")


@pytest.mark.skip_correctness_only(True)
@pytest.mark.parametrize("dtype", TYPES)
def bench_argsort(benchmark, dtype):
"""
Expand Down Expand Up @@ -33,4 +37,50 @@ def bench_argsort(benchmark, dtype):
benchmark.extra_info["description"] = "Measures the performance of ak.argsort"
benchmark.extra_info["problem_size"] = pytest.prob_size
benchmark.extra_info["transfer_rate"] = "{:.4f} GiB/sec".format(
(nbytes / benchmark.stats["mean"]) / 2 ** 30)
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_numpy(False)
@pytest.mark.skip_correctness_only(True)
@pytest.mark.parametrize("dtype", TYPES)
def bench_np_argsort(benchmark, dtype):
cfg = ak.get_config()
N = pytest.prob_size * cfg["numLocales"]
if dtype in pytest.dtype:
np.random.seed(pytest.seed)
if dtype == "int64":
a = np.random.randint(0, 2**32, N)
elif dtype == "uint64":
a = np.random.randint(0, 2**32, N, dtype=np.uint64)
elif dtype == "float64":
a = np.random.random(N)
elif dtype == "str":
a = np.cast["str"](np.random.randint(0, 2**32, N))

benchmark.pedantic(np.argsort, args=[a], rounds=pytest.trials)

benchmark.extra_info["description"] = "Measures the performance of np.argsort"
benchmark.extra_info["problem_size"] = pytest.prob_size
benchmark.extra_info["average_rate"] = "{:.4f} GiB/sec".format(
((a.size * a.itemsize) / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(False)
@pytest.mark.parametrize("dtype", TYPES)
@pytest.mark.parametrize("seed", [pytest.seed])
def check_correctness(dtype, seed):
N = 10**4
if dtype == "int64":
a = ak.randint(0, 2**32, N, seed=seed)
elif dtype == "uint64":
a = ak.randint(0, 2**32, N, dtype=ak.uint64, seed=seed)
elif dtype == "float64":
a = ak.randint(0, 1, N, dtype=ak.float64, seed=seed)
elif dtype == "str":
a = ak.random_strings_uniform(1, 16, N, seed=seed)

perm = ak.argsort(a)
if dtype in ("int64", "uint64", "float64"):
assert ak.is_sorted(a[perm])
4 changes: 2 additions & 2 deletions benchmark_v2/array_create_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def _create_np_array(size, op, dtype, seed):

return a


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="AK Array Create")
@pytest.mark.parametrize("op", OPS)
@pytest.mark.parametrize("dtype", TYPES)
Expand All @@ -56,7 +56,7 @@ def bench_ak_array_create(benchmark, op, dtype):
benchmark.extra_info["transfer_rate"] = "{:.4f} GiB/sec".format(
(nbytes / benchmark.stats["mean"]) / 2 ** 30)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="NP Array Create")
@pytest.mark.parametrize("op", OPS)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down
3 changes: 2 additions & 1 deletion benchmark_v2/array_transfer_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

TYPES = ("int64", "float64", "bigint")

@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="ArrayTransfer_tondarray")
@pytest.mark.parametrize("dtype", TYPES)
def bench_array_transfer_tondarray(benchmark, dtype):
Expand All @@ -27,7 +28,7 @@ def bench_array_transfer_tondarray(benchmark, dtype):
(nb / benchmark.stats["mean"]) / 2 ** 30)
benchmark.extra_info["max_bit"] = pytest.max_bits # useful when looking at bigint


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="ArrayTransfer_ak.array")
@pytest.mark.parametrize("dtype", TYPES)
def bench_array_transfer_akarray(benchmark, dtype):
Expand Down
2 changes: 1 addition & 1 deletion benchmark_v2/bigint_bitwise_binops_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def _perform_or_binop(a, b):
def _perform_shift_binop(a):
return a >> 10


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Bigint Bitwise Binops")
@pytest.mark.parametrize("op", OPS)
def bench_ak_bitwise_binops(benchmark, op):
Expand Down
2 changes: 2 additions & 0 deletions benchmark_v2/bigint_conversion_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import arkouda as ak
import pytest

@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="BigInt_Conversion")
def bench_to_bigint(benchmark):
cfg = ak.get_config()
Expand All @@ -25,6 +26,7 @@ def bench_to_bigint(benchmark):
(tot_bytes / benchmark.stats["mean"]) / 2 ** 30)
benchmark.extra_info["max_bits"] = pytest.max_bits

@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="BigInt_Conversion")
def bench_from_bigint(benchmark):
cfg = ak.get_config()
Expand Down
4 changes: 2 additions & 2 deletions benchmark_v2/coargsort_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
TYPES = ["int64", "uint64", "float64", "str"]
NUM_ARR = [1, 2, 8, 16]


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_CoArgSort")
@pytest.mark.parametrize("numArrays", NUM_ARR)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down Expand Up @@ -38,7 +38,7 @@ def bench_coargsort(benchmark, dtype, numArrays):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="NumPy_CoArgSort")
@pytest.mark.parametrize("numArrays", NUM_ARR)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down
22 changes: 22 additions & 0 deletions benchmark_v2/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,12 @@ def pytest_addoption(parser):
default=os.path.join(os.getcwd(), "ak_io_benchmark"),
help="Benchmark only option. Target path for measuring read/write rates",
)
parser.addoption(
"--correctness_only",
default=False,
action="store_true",
help="Only check correctness, not performance.",
)


def pytest_configure(config):
Expand Down Expand Up @@ -157,6 +163,8 @@ def pytest_configure(config):
pytest.io_read = config.getoption("io_only_read")
pytest.io_write = config.getoption("io_only_write")

pytest.correctness_only = config.getoption("correctness_only")


@pytest.fixture(scope="module", autouse=True)
def startup_teardown():
Expand Down Expand Up @@ -208,3 +216,17 @@ def manage_connection():
ak.disconnect()
except Exception as e:
raise ConnectionError(e)


@pytest.fixture(autouse=True)
def skip_correctness_only(request):
if request.node.get_closest_marker("skip_correctness_only"):
if request.node.get_closest_marker("skip_correctness_only").args[0] == pytest.correctness_only:
pytest.skip("this test requires --correctness_only != {}".format(pytest.correctness_only))


@pytest.fixture(autouse=True)
def skip_numpy(request):
if request.node.get_closest_marker("skip_numpy"):
if request.node.get_closest_marker("skip_numpy").args[0] == pytest.numpy:
pytest.skip("this test requires --numpy != {}".format(pytest.numpy))
2 changes: 1 addition & 1 deletion benchmark_v2/dataframe_indexing_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def generate_dataframe():
)
return ak.DataFrame(df_dict)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Dataframe_Indexing")
@pytest.mark.parametrize("op", OPS)
def bench_ak_dataframe(benchmark, op):
Expand Down
3 changes: 2 additions & 1 deletion benchmark_v2/encoding_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
ENCODINGS = ("idna", "ascii")


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Strings_EncodeDecode")
@pytest.mark.parametrize("encoding", ENCODINGS)
def bench_encode(benchmark, encoding):
Expand All @@ -19,7 +20,7 @@ def bench_encode(benchmark, encoding):
benchmark.extra_info["transfer_rate"] = "{:.4f} GiB/sec".format(
(nbytes / benchmark.stats["mean"]) / 2 ** 30)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Strings_EncodeDecode")
@pytest.mark.parametrize("encoding", ENCODINGS)
def bench_decode(benchmark, encoding):
Expand Down
4 changes: 2 additions & 2 deletions benchmark_v2/gather_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def _run_gather(a, i):
"""
return a[i]


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="AK_Gather")
@pytest.mark.parametrize("dtype", TYPES)
def bench_ak_gather(benchmark, dtype):
Expand Down Expand Up @@ -56,7 +56,7 @@ def bench_ak_gather(benchmark, dtype):
benchmark.extra_info["transfer_rate"] = "{:.4f} GiB/sec".format(
(bytes_per_sec / 2 ** 30))


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="NumPy_Gather")
@pytest.mark.parametrize("dtype", TYPES)
def bench_np_gather(benchmark, dtype):
Expand Down
2 changes: 1 addition & 1 deletion benchmark_v2/groupby_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def generate_arrays(dtype, numArrays):
arrays = arrays[0]
return arrays, totalbytes


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="GroupBy_Creation")
@pytest.mark.parametrize("numArrays", NUM_ARR)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down
2 changes: 1 addition & 1 deletion benchmark_v2/in1d_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
SIZES = {"MEDIUM": THRESHOLD - 1, "LARGE": THRESHOLD + 1}
MAXSTRLEN = 5


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_in1d")
@pytest.mark.parametrize("dtype", TYPES)
@pytest.mark.parametrize("size", SIZES)
Expand Down
16 changes: 8 additions & 8 deletions benchmark_v2/io_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def _generate_df(N, dtype, returnDict=False):
}
return df_dict if returnDict else ak.DataFrame(df_dict)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Write_HDF5")
@pytest.mark.parametrize("dtype", TYPES)
def bench_ak_write_hdf(benchmark, dtype):
Expand All @@ -90,7 +90,7 @@ def bench_ak_write_hdf(benchmark, dtype):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Write_Parquet")
@pytest.mark.parametrize("dtype", TYPES)
@pytest.mark.parametrize("comp", COMPRESSIONS)
Expand All @@ -117,7 +117,7 @@ def bench_ak_write_parquet(benchmark, dtype, comp):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Write_Parquet")
@pytest.mark.parametrize("dtype", TYPES)
@pytest.mark.parametrize("comp", COMPRESSIONS)
Expand Down Expand Up @@ -147,7 +147,7 @@ def bench_ak_write_parquet_multi(benchmark, dtype, comp):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Write_Parquet")
@pytest.mark.parametrize("dtype", TYPES)
@pytest.mark.parametrize("comp", COMPRESSIONS)
Expand Down Expand Up @@ -176,7 +176,7 @@ def bench_ak_write_parquet_append(benchmark, dtype, comp):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Read_HDF5")
@pytest.mark.parametrize("dtype", TYPES)
def bench_ak_read_hdf(benchmark, dtype):
Expand All @@ -198,7 +198,7 @@ def bench_ak_read_hdf(benchmark, dtype):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Read_Parquet")
@pytest.mark.parametrize("dtype", TYPES)
@pytest.mark.parametrize("comp", COMPRESSIONS)
Expand All @@ -221,7 +221,7 @@ def bench_ak_read_parquet(benchmark, dtype, comp):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Read_Parquet")
@pytest.mark.parametrize("dtype", TYPES)
@pytest.mark.parametrize("comp", COMPRESSIONS)
Expand All @@ -248,7 +248,7 @@ def bench_ak_read_parquet_multi_column(benchmark, dtype, comp):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_IO_Delete")
def bench_ak_delete(benchmark):
if pytest.io_delete or (not pytest.io_write and not pytest.io_read):
Expand Down
4 changes: 2 additions & 2 deletions benchmark_v2/no_op_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

SECONDS = pytest.trials


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_No_Op",
max_time=SECONDS
)
Expand All @@ -17,7 +17,7 @@ def bench_ak_noop(benchmark):
benchmark.extra_info["transfer_rate"] = f"{benchmark.stats['rounds'] / benchmark.stats['total']:.4f} " \
f"operations per second"


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_No_Op",
max_time=SECONDS
)
Expand Down
4 changes: 2 additions & 2 deletions benchmark_v2/reduce_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
OPS = ("sum", "prod", "min", "max")
TYPES = ("int64", "float64")


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Arkouda_Reduce")
@pytest.mark.parametrize("op", OPS)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down Expand Up @@ -33,7 +33,7 @@ def bench_ak_reduce(benchmark, op, dtype):
(nbytes / benchmark.stats["mean"]) / 2 ** 30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Numpy_Reduce")
@pytest.mark.parametrize("op", OPS)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down
4 changes: 2 additions & 2 deletions benchmark_v2/scan_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
OPS = ("cumsum", "cumprod")
TYPES = ("int64", "float64")


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="AK_Scan")
@pytest.mark.parametrize("op", OPS)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down Expand Up @@ -35,7 +35,7 @@ def bench_ak_scan(benchmark, op, dtype):
(nbytes / benchmark.stats["mean"]) / 2**30
)


@pytest.mark.skip_correctness_only(True)
@pytest.mark.benchmark(group="Numpy_Scan")
@pytest.mark.parametrize("op", OPS)
@pytest.mark.parametrize("dtype", TYPES)
Expand Down
Loading

0 comments on commit 287b278

Please sign in to comment.