Skip to content

Commit

Permalink
Merge branch 'master' into 3714-pdarray.shape-should-be-a-tuple
Browse files Browse the repository at this point in the history
  • Loading branch information
ajpotts authored Oct 1, 2024
2 parents 7e41aac + b90155f commit a191247
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 42 deletions.
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -568,9 +568,10 @@ CLEAN_TARGETS += test-clean
test-clean:
$(RM) $(TEST_TARGETS) $(addsuffix _real,$(TEST_TARGETS))

size = 10**8
.PHONY: benchmark
benchmark:
python3 -m pytest -c benchmark.ini --benchmark-autosave --benchmark-storage=file://benchmark_v2/.benchmarks
python3 -m pytest -c benchmark.ini --benchmark-autosave --benchmark-storage=file://benchmark_v2/.benchmarks --size=$(size)

version:
@echo $(VERSION);
Expand Down
2 changes: 1 addition & 1 deletion benchmark_v2/array_create_benchmark.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import numpy as np
import pytest
from context import arkouda as ak
import arkouda as ak

OPS = ("zeros", "ones", "randint")
TYPES = ("int64", "float64", "uint64")
Expand Down
133 changes: 122 additions & 11 deletions benchmark_v2/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,119 @@
default_compression = [None, "snappy", "gzip", "brotli", "zstd", "lz4"]


def pytest_addoption(parser):
parser.addoption(
"--optional-parquet", action="store_true", default=False, help="run optional parquet tests"
)

parser.addoption(
"--size",
action="store",
default="10**8",
help="Benchmark only option. Problem size: length of array to use for benchmarks.",
)
parser.addoption(
"--trials",
action="store",
default="5",
help="Benchmark only option. Problem size: length of array to use for benchmarks. For tests that run "
"as many trials as possible in a given time, will be treated as number of seconds to run for.",
)
parser.addoption(
"--seed",
action="store",
default="",
help="Benchmark only option. Value to initialize random number generator.",
)
parser.addoption(
"--dtype",
action="store",
default="",
help="Benchmark only option. Dtypes to run benchmarks against. Comma separated list "
"(NO SPACES) allowing for multiple. Accepted values: int64, uint64, bigint, float64, bool, str and mixed."
"Mixed is used to generate sets of multiple types.",
)
parser.addoption(
"--numpy",
action="store_true",
default=False,
help="Benchmark only option. When set, runs numpy comparison benchmarks.",
)
parser.addoption(
"--maxbits",
action="store",
default="-1",
help="Benchmark only option. Only applies to bigint testing."
"Maximum number of bits, so values > 2**max_bits will wraparound. -1 is interpreted as no maximum.",
)
parser.addoption(
"--alpha", action="store", default="1.0", help="Benchmark only option. Scalar multiple"
)
parser.addoption(
"--randomize",
action="store_true",
default=False,
help="Benchmark only option. Fill arrays with random values instead of ones",
)
parser.addoption(
"--index_size",
action="store",
default="",
help="Benchmark only option. Length of index array (number of gathers to perform)",
)
parser.addoption(
"--value_size",
action="store",
default="",
help="Benchmark only option.Length of array from which values are gathered",
)
parser.addoption(
"--encoding",
action="store",
default="",
help="Benchmark only option. Only applies to encoding benchmarks."
"Comma separated list (NO SPACES) allowing for multiple"
"Encoding to be used. Accepted values: idna, ascii",
)
parser.addoption(
"--io_only_write",
action="store_true",
default=False,
help="Benchmark only option. Only write the files; files will not be removed",
)
parser.addoption(
"--io_only_read",
action="store_true",
default=False,
help="Benchmark only option. Only read the files; files will not be removed",
)
parser.addoption(
"--io_only_delete",
action="store_true",
default=False,
help="Benchmark only option. Only delete files created from writing with this benchmark",
)
parser.addoption(
"--io_files_per_loc",
action="store",
default="1",
help="Benchmark only option. Number of files to create per locale",
)
parser.addoption(
"--io_compression",
action="store",
default="",
help="Benchmark only option. Compression types to run IO benchmarks against. Comma delimited list"
"(NO SPACES) allowing for multiple. Accepted values: none, snappy, gzip, brotli, zstd, and lz4",
)
parser.addoption(
"--io_path",
action="store",
default=os.path.join(os.getcwd(), "ak_io_benchmark"),
help="Benchmark only option. Target path for measuring read/write rates",
)


def pytest_configure(config):
pytest.prob_size = eval(config.getoption("size"))
pytest.trials = eval(config.getoption("trials"))
Expand All @@ -28,8 +141,12 @@ def pytest_configure(config):
pytest.numpy = config.getoption("numpy")
encode_str = config.getoption("encoding")
pytest.encoding = default_encoding if encode_str == "" else encode_str.split(",")
pytest.idx_size = None if config.getoption("index_size") == "" else eval(config.getoption("index_size"))
pytest.val_size = None if config.getoption("value_size") == "" else eval(config.getoption("value_size"))
pytest.idx_size = (
None if config.getoption("index_size") == "" else eval(config.getoption("index_size"))
)
pytest.val_size = (
None if config.getoption("value_size") == "" else eval(config.getoption("value_size"))
)

# IO settings
comp_str = config.getoption("io_compression")
Expand Down Expand Up @@ -64,11 +181,7 @@ def startup_teardown():
e,
)
else:
print(
"in client stack test mode with host: {} port: {}".format(
server, port
)
)
print("in client stack test mode with host: {} port: {}".format(server, port))

yield

Expand All @@ -85,9 +198,7 @@ def manage_connection():
server = os.getenv("ARKOUDA_SERVER_HOST", "localhost")
timeout = int(os.getenv("ARKOUDA_CLIENT_TIMEOUT", 5))
try:
ak.connect(
server=server, port=port, timeout=timeout
)
ak.connect(server=server, port=port, timeout=timeout)
except Exception as e:
raise ConnectionError(e)

Expand All @@ -96,4 +207,4 @@ def manage_connection():
try:
ak.disconnect()
except Exception as e:
raise ConnectionError(e)
raise ConnectionError(e)
13 changes: 4 additions & 9 deletions benchmark_v2/sort_cases_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from arkouda.sorting import SortingAlgorithm

TYPES = ("int64", "float64")
POWERLAW_DATA = None


def get_nbytes(data):
Expand Down Expand Up @@ -63,15 +62,11 @@ def bench_random_uniform(benchmark, algo, dtype, bits):


def _generate_power_law_data():
global POWERLAW_DATA
y = ak.uniform(pytest.prob_size)
a = -2.5 # power law exponent, between -2 and -3
ub = 2**32 # upper bound

if POWERLAW_DATA is None:
y = ak.uniform(pytest.prob_size)
a = -2.5 # power law exponent, between -2 and -3
ub = 2 ** 32 # upper bound
POWERLAW_DATA = ((ub ** (a + 1) - 1) * y + 1) ** (1 / (a + 1))

return POWERLAW_DATA
return ((ub ** (a + 1) - 1) * y + 1) ** (1 / (a + 1))


@pytest.mark.benchmark(group="AK_Sort_Cases")
Expand Down
34 changes: 14 additions & 20 deletions benchmark_v2/str_locality_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,45 +5,36 @@
"Hashing": lambda x: x.hash(),
"Regex_Search": lambda x: x.contains(r"\d{3,5}\.\d{5,8}", regex=True),
"Casting": lambda x: ak.cast(x, ak.float64),
"Scalar_Compare": lambda x: (x == "5.5")
"Scalar_Compare": lambda x: (x == "5.5"),
}

# Good - generates random Strings object with "good" locality
# poor - generates a sorted Strings object with "poor" locality
LOCALITY = {"Good", "Poor"}


RAND_DATA = None
SORT_DATA = None


def _generate_data(loc):
"""
Generate the test data. In an interest to leverage the same data for the benchmark
The data is all created at once.
"""
global RAND_DATA, SORT_DATA

# early out if already set
if loc == "Good" and RAND_DATA is not None:
return RAND_DATA
if loc == "Poor" and SORT_DATA is not None:
return SORT_DATA

# otherwise set both and return the desired one.
N = pytest.prob_size * ak.get_config()["numLocales"]
prefix = ak.random_strings_uniform(minlen=1, maxlen=16, size=N, seed=pytest.seed, characters="numeric")
prefix = ak.random_strings_uniform(
minlen=1, maxlen=16, size=N, seed=pytest.seed, characters="numeric"
)
if pytest.seed is not None:
pytest.seed += 1
suffix = ak.random_strings_uniform(minlen=1, maxlen=16, size=N, seed=pytest.seed, characters="numeric")
suffix = ak.random_strings_uniform(
minlen=1, maxlen=16, size=N, seed=pytest.seed, characters="numeric"
)
random_strings = prefix.stick(suffix, delimiter=".")
RAND_DATA = random_strings

perm = ak.argsort(random_strings.get_lengths())
sorted_strings = random_strings[perm]
SORT_DATA = sorted_strings

return RAND_DATA if loc == "Good" else SORT_DATA
return random_strings if loc == "Good" else sorted_strings


@pytest.mark.benchmark(group="String_Locality")
Expand All @@ -53,8 +44,11 @@ def bench_str_locality(benchmark, op, loc):
data = _generate_data(loc)
benchmark.pedantic(OPS[op], args=[data], rounds=pytest.trials)

benchmark.extra_info["description"] = "Measure the performance of various string operations on " \
"strings with good locality (random) and poor locality (sorted)."
benchmark.extra_info["description"] = (
"Measure the performance of various string operations on "
"strings with good locality (random) and poor locality (sorted)."
)
benchmark.extra_info["problem_size"] = pytest.prob_size
benchmark.extra_info["transfer_rate"] = "{:.4f} GiB/sec".format(
(data.nbytes / benchmark.stats["mean"]) / 2 ** 30)
(data.nbytes / benchmark.stats["mean"]) / 2**30
)

0 comments on commit a191247

Please sign in to comment.