From 287b27852d119f4712798c6dea87999c7f05a239 Mon Sep 17 00:00:00 2001
From: ajpotts <amanda.j.potts@gmail.com>
Date: Mon, 14 Oct 2024 16:05:46 -0400
Subject: [PATCH] Closes #3560 Update argsort_benchmark (#3838)

Co-authored-by: Amanda Potts <ajpotts@users.noreply.github.com>
---
 benchmark.ini                                 |  5 +-
 benchmark_v2/aggregate_benchmark.py           |  2 +-
 benchmark_v2/argsort_benchmark.py             | 54 ++++++++++++++++++-
 benchmark_v2/array_create_benchmark.py        |  4 +-
 benchmark_v2/array_transfer_benchmark.py      |  3 +-
 .../bigint_bitwise_binops_benchmark.py        |  2 +-
 benchmark_v2/bigint_conversion_benchmark.py   |  2 +
 benchmark_v2/coargsort_benchmark.py           |  4 +-
 benchmark_v2/conftest.py                      | 22 ++++++++
 benchmark_v2/dataframe_indexing_benchmark.py  |  2 +-
 benchmark_v2/encoding_benchmark.py            |  3 +-
 benchmark_v2/gather_benchmark.py              |  4 +-
 benchmark_v2/groupby_benchmark.py             |  2 +-
 benchmark_v2/in1d_benchmark.py                |  2 +-
 benchmark_v2/io_benchmark.py                  | 16 +++---
 benchmark_v2/no_op_benchmark.py               |  4 +-
 benchmark_v2/reduce_benchmark.py              |  4 +-
 benchmark_v2/scan_benchmark.py                |  4 +-
 benchmark_v2/scatter_benchmark.py             |  4 +-
 benchmark_v2/setops_benchmark.py              |  6 +--
 benchmark_v2/sort_cases_benchmark.py          | 14 ++---
 benchmark_v2/split_benchmark.py               |  6 +--
 benchmark_v2/str_locality_benchmark.py        |  2 +-
 benchmark_v2/stream_benchmark.py              |  2 +-
 benchmark_v2/substring_search_benchmark.py    |  2 +-
 25 files changed, 127 insertions(+), 48 deletions(-)

diff --git a/benchmark.ini b/benchmark.ini
index d9170123c6..103a2ad563 100644
--- a/benchmark.ini
+++ b/benchmark.ini
@@ -25,7 +25,7 @@ testpaths =
     benchmark_v2/no_op_benchmark.py
     benchmark_v2/io_benchmark.py
     benchmark_v2/sort_cases_benchmark.py
-python_functions = bench_*
+python_functions = bench_* check_correctness*
 env =
     D:ARKOUDA_SERVER_HOST=localhost
     D:ARKOUDA_SERVER_PORT=5555
@@ -34,3 +34,6 @@ env =
     D:ARKOUDA_VERBOSE=True
     D:ARKOUDA_CLIENT_TIMEOUT=0
     D:ARKOUDA_LOG_LEVEL=DEBUG
+markers =
+    skip_correctness_only
+    skip_numpy
diff --git a/benchmark_v2/aggregate_benchmark.py b/benchmark_v2/aggregate_benchmark.py
index abc7fb8a01..7e94dc0db4 100644
--- a/benchmark_v2/aggregate_benchmark.py
+++ b/benchmark_v2/aggregate_benchmark.py
@@ -24,7 +24,7 @@ def run_agg(g, vals, op):
 
     return vals.size + vals.itemsize
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="GroupBy.aggregate")
 @pytest.mark.parametrize("op", ak.GroupBy.Reductions)
 def bench_aggs(benchmark, op):
diff --git a/benchmark_v2/argsort_benchmark.py b/benchmark_v2/argsort_benchmark.py
index 40e7f8a1c9..9dd53fcb4e 100644
--- a/benchmark_v2/argsort_benchmark.py
+++ b/benchmark_v2/argsort_benchmark.py
@@ -1,8 +1,12 @@
-import arkouda as ak
+import numpy as np
 import pytest
 
+import arkouda as ak
+
 TYPES = ("int64", "uint64", "float64", "str")
 
+
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.parametrize("dtype", TYPES)
 def bench_argsort(benchmark, dtype):
     """
@@ -33,4 +37,50 @@ def bench_argsort(benchmark, dtype):
         benchmark.extra_info["description"] = "Measures the performance of ak.argsort"
         benchmark.extra_info["problem_size"] = pytest.prob_size
         benchmark.extra_info["transfer_rate"] = "{:.4f} GiB/sec".format(
-            (nbytes / benchmark.stats["mean"]) / 2 ** 30)
+            (nbytes / benchmark.stats["mean"]) / 2**30
+        )
+
+
+@pytest.mark.skip_numpy(False)
+@pytest.mark.skip_correctness_only(True)
+@pytest.mark.parametrize("dtype", TYPES)
+def bench_np_argsort(benchmark, dtype):
+    cfg = ak.get_config()
+    N = pytest.prob_size * cfg["numLocales"]
+    if dtype in pytest.dtype:
+        np.random.seed(pytest.seed)
+        if dtype == "int64":
+            a = np.random.randint(0, 2**32, N)
+        elif dtype == "uint64":
+            a = np.random.randint(0, 2**32, N, dtype=np.uint64)
+        elif dtype == "float64":
+            a = np.random.random(N)
+        elif dtype == "str":
+            a = np.cast["str"](np.random.randint(0, 2**32, N))
+
+    benchmark.pedantic(np.argsort, args=[a], rounds=pytest.trials)
+
+    benchmark.extra_info["description"] = "Measures the performance of np.argsort"
+    benchmark.extra_info["problem_size"] = pytest.prob_size
+    benchmark.extra_info["average_rate"] = "{:.4f} GiB/sec".format(
+        ((a.size * a.itemsize) / benchmark.stats["mean"]) / 2**30
+    )
+
+
+@pytest.mark.skip_correctness_only(False)
+@pytest.mark.parametrize("dtype", TYPES)
+@pytest.mark.parametrize("seed", [pytest.seed])
+def check_correctness(dtype, seed):
+    N = 10**4
+    if dtype == "int64":
+        a = ak.randint(0, 2**32, N, seed=seed)
+    elif dtype == "uint64":
+        a = ak.randint(0, 2**32, N, dtype=ak.uint64, seed=seed)
+    elif dtype == "float64":
+        a = ak.randint(0, 1, N, dtype=ak.float64, seed=seed)
+    elif dtype == "str":
+        a = ak.random_strings_uniform(1, 16, N, seed=seed)
+
+    perm = ak.argsort(a)
+    if dtype in ("int64", "uint64", "float64"):
+        assert ak.is_sorted(a[perm])
diff --git a/benchmark_v2/array_create_benchmark.py b/benchmark_v2/array_create_benchmark.py
index 15b9d3df82..ea6cdcaca9 100644
--- a/benchmark_v2/array_create_benchmark.py
+++ b/benchmark_v2/array_create_benchmark.py
@@ -34,7 +34,7 @@ def _create_np_array(size, op, dtype, seed):
 
     return a
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK Array Create")
 @pytest.mark.parametrize("op", OPS)
 @pytest.mark.parametrize("dtype", TYPES)
@@ -56,7 +56,7 @@ def bench_ak_array_create(benchmark, op, dtype):
         benchmark.extra_info["transfer_rate"] = "{:.4f} GiB/sec".format(
             (nbytes / benchmark.stats["mean"]) / 2 ** 30)
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="NP Array Create")
 @pytest.mark.parametrize("op", OPS)
 @pytest.mark.parametrize("dtype", TYPES)
diff --git a/benchmark_v2/array_transfer_benchmark.py b/benchmark_v2/array_transfer_benchmark.py
index 7c81fb035d..28922c4d47 100644
--- a/benchmark_v2/array_transfer_benchmark.py
+++ b/benchmark_v2/array_transfer_benchmark.py
@@ -3,6 +3,7 @@
 
 TYPES = ("int64", "float64", "bigint")
 
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="ArrayTransfer_tondarray")
 @pytest.mark.parametrize("dtype", TYPES)
 def bench_array_transfer_tondarray(benchmark, dtype):
@@ -27,7 +28,7 @@ def bench_array_transfer_tondarray(benchmark, dtype):
             (nb / benchmark.stats["mean"]) / 2 ** 30)
         benchmark.extra_info["max_bit"] = pytest.max_bits  # useful when looking at bigint
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="ArrayTransfer_ak.array")
 @pytest.mark.parametrize("dtype", TYPES)
 def bench_array_transfer_akarray(benchmark, dtype):
diff --git a/benchmark_v2/bigint_bitwise_binops_benchmark.py b/benchmark_v2/bigint_bitwise_binops_benchmark.py
index d44bc15c2e..f75a89957c 100644
--- a/benchmark_v2/bigint_bitwise_binops_benchmark.py
+++ b/benchmark_v2/bigint_bitwise_binops_benchmark.py
@@ -16,7 +16,7 @@ def _perform_or_binop(a, b):
 def _perform_shift_binop(a):
     return a >> 10
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Bigint Bitwise Binops")
 @pytest.mark.parametrize("op", OPS)
 def bench_ak_bitwise_binops(benchmark, op):
diff --git a/benchmark_v2/bigint_conversion_benchmark.py b/benchmark_v2/bigint_conversion_benchmark.py
index 28e0d78bf4..86e15f0604 100644
--- a/benchmark_v2/bigint_conversion_benchmark.py
+++ b/benchmark_v2/bigint_conversion_benchmark.py
@@ -1,6 +1,7 @@
 import arkouda as ak
 import pytest
 
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="BigInt_Conversion")
 def bench_to_bigint(benchmark):
     cfg = ak.get_config()
@@ -25,6 +26,7 @@ def bench_to_bigint(benchmark):
         (tot_bytes / benchmark.stats["mean"]) / 2 ** 30)
     benchmark.extra_info["max_bits"] = pytest.max_bits
 
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="BigInt_Conversion")
 def bench_from_bigint(benchmark):
     cfg = ak.get_config()
diff --git a/benchmark_v2/coargsort_benchmark.py b/benchmark_v2/coargsort_benchmark.py
index d5e4d74b2b..3551ee252c 100644
--- a/benchmark_v2/coargsort_benchmark.py
+++ b/benchmark_v2/coargsort_benchmark.py
@@ -6,7 +6,7 @@
 TYPES = ["int64", "uint64", "float64", "str"]
 NUM_ARR = [1, 2, 8, 16]
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Arkouda_CoArgSort")
 @pytest.mark.parametrize("numArrays", NUM_ARR)
 @pytest.mark.parametrize("dtype", TYPES)
@@ -38,7 +38,7 @@ def bench_coargsort(benchmark, dtype, numArrays):
             (nbytes / benchmark.stats["mean"]) / 2**30
         )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="NumPy_CoArgSort")
 @pytest.mark.parametrize("numArrays", NUM_ARR)
 @pytest.mark.parametrize("dtype", TYPES)
diff --git a/benchmark_v2/conftest.py b/benchmark_v2/conftest.py
index a6a2c9f6d0..cd4bc012ef 100644
--- a/benchmark_v2/conftest.py
+++ b/benchmark_v2/conftest.py
@@ -127,6 +127,12 @@ def pytest_addoption(parser):
         default=os.path.join(os.getcwd(), "ak_io_benchmark"),
         help="Benchmark only option. Target path for measuring read/write rates",
     )
+    parser.addoption(
+        "--correctness_only",
+        default=False,
+        action="store_true",
+        help="Only check correctness, not performance.",
+    )
 
 
 def pytest_configure(config):
@@ -157,6 +163,8 @@ def pytest_configure(config):
     pytest.io_read = config.getoption("io_only_read")
     pytest.io_write = config.getoption("io_only_write")
 
+    pytest.correctness_only = config.getoption("correctness_only")
+
 
 @pytest.fixture(scope="module", autouse=True)
 def startup_teardown():
@@ -208,3 +216,17 @@ def manage_connection():
         ak.disconnect()
     except Exception as e:
         raise ConnectionError(e)
+
+
+@pytest.fixture(autouse=True)
+def skip_correctness_only(request):
+    if request.node.get_closest_marker("skip_correctness_only"):
+        if request.node.get_closest_marker("skip_correctness_only").args[0] == pytest.correctness_only:
+            pytest.skip("this test requires --correctness_only != {}".format(pytest.correctness_only))
+
+
+@pytest.fixture(autouse=True)
+def skip_numpy(request):
+    if request.node.get_closest_marker("skip_numpy"):
+        if request.node.get_closest_marker("skip_numpy").args[0] == pytest.numpy:
+            pytest.skip("this test requires --numpy != {}".format(pytest.numpy))
diff --git a/benchmark_v2/dataframe_indexing_benchmark.py b/benchmark_v2/dataframe_indexing_benchmark.py
index 39745462e5..97b44145a9 100644
--- a/benchmark_v2/dataframe_indexing_benchmark.py
+++ b/benchmark_v2/dataframe_indexing_benchmark.py
@@ -32,7 +32,7 @@ def generate_dataframe():
             )
     return ak.DataFrame(df_dict)
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Dataframe_Indexing")
 @pytest.mark.parametrize("op", OPS)
 def bench_ak_dataframe(benchmark, op):
diff --git a/benchmark_v2/encoding_benchmark.py b/benchmark_v2/encoding_benchmark.py
index b110d29529..6b83337cd0 100644
--- a/benchmark_v2/encoding_benchmark.py
+++ b/benchmark_v2/encoding_benchmark.py
@@ -4,6 +4,7 @@
 ENCODINGS = ("idna", "ascii")
 
 
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Strings_EncodeDecode")
 @pytest.mark.parametrize("encoding", ENCODINGS)
 def bench_encode(benchmark, encoding):
@@ -19,7 +20,7 @@ def bench_encode(benchmark, encoding):
         benchmark.extra_info["transfer_rate"] = "{:.4f} GiB/sec".format(
             (nbytes / benchmark.stats["mean"]) / 2 ** 30)
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Strings_EncodeDecode")
 @pytest.mark.parametrize("encoding", ENCODINGS)
 def bench_decode(benchmark, encoding):
diff --git a/benchmark_v2/gather_benchmark.py b/benchmark_v2/gather_benchmark.py
index 3e9ede3d96..6e920acf51 100644
--- a/benchmark_v2/gather_benchmark.py
+++ b/benchmark_v2/gather_benchmark.py
@@ -11,7 +11,7 @@ def _run_gather(a, i):
     """
     return a[i]
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK_Gather")
 @pytest.mark.parametrize("dtype", TYPES)
 def bench_ak_gather(benchmark, dtype):
@@ -56,7 +56,7 @@ def bench_ak_gather(benchmark, dtype):
         benchmark.extra_info["transfer_rate"] = "{:.4f} GiB/sec".format(
             (bytes_per_sec / 2 ** 30))
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="NumPy_Gather")
 @pytest.mark.parametrize("dtype", TYPES)
 def bench_np_gather(benchmark, dtype):
diff --git a/benchmark_v2/groupby_benchmark.py b/benchmark_v2/groupby_benchmark.py
index b1262ecf36..85f5e7e288 100644
--- a/benchmark_v2/groupby_benchmark.py
+++ b/benchmark_v2/groupby_benchmark.py
@@ -37,7 +37,7 @@ def generate_arrays(dtype, numArrays):
         arrays = arrays[0]
     return arrays, totalbytes
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="GroupBy_Creation")
 @pytest.mark.parametrize("numArrays", NUM_ARR)
 @pytest.mark.parametrize("dtype", TYPES)
diff --git a/benchmark_v2/in1d_benchmark.py b/benchmark_v2/in1d_benchmark.py
index 01d83fc9e6..bd06fc119b 100644
--- a/benchmark_v2/in1d_benchmark.py
+++ b/benchmark_v2/in1d_benchmark.py
@@ -8,7 +8,7 @@
 SIZES = {"MEDIUM": THRESHOLD - 1, "LARGE": THRESHOLD + 1}
 MAXSTRLEN = 5
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Arkouda_in1d")
 @pytest.mark.parametrize("dtype", TYPES)
 @pytest.mark.parametrize("size", SIZES)
diff --git a/benchmark_v2/io_benchmark.py b/benchmark_v2/io_benchmark.py
index a697e0a846..6fe4285ea5 100644
--- a/benchmark_v2/io_benchmark.py
+++ b/benchmark_v2/io_benchmark.py
@@ -65,7 +65,7 @@ def _generate_df(N, dtype, returnDict=False):
     }
     return df_dict if returnDict else ak.DataFrame(df_dict)
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Arkouda_IO_Write_HDF5")
 @pytest.mark.parametrize("dtype", TYPES)
 def bench_ak_write_hdf(benchmark, dtype):
@@ -90,7 +90,7 @@ def bench_ak_write_hdf(benchmark, dtype):
             (nbytes / benchmark.stats["mean"]) / 2**30
         )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Arkouda_IO_Write_Parquet")
 @pytest.mark.parametrize("dtype", TYPES)
 @pytest.mark.parametrize("comp", COMPRESSIONS)
@@ -117,7 +117,7 @@ def bench_ak_write_parquet(benchmark, dtype, comp):
             (nbytes / benchmark.stats["mean"]) / 2**30
         )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Arkouda_IO_Write_Parquet")
 @pytest.mark.parametrize("dtype", TYPES)
 @pytest.mark.parametrize("comp", COMPRESSIONS)
@@ -147,7 +147,7 @@ def bench_ak_write_parquet_multi(benchmark, dtype, comp):
             (nbytes / benchmark.stats["mean"]) / 2**30
         )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Arkouda_IO_Write_Parquet")
 @pytest.mark.parametrize("dtype", TYPES)
 @pytest.mark.parametrize("comp", COMPRESSIONS)
@@ -176,7 +176,7 @@ def bench_ak_write_parquet_append(benchmark, dtype, comp):
             (nbytes / benchmark.stats["mean"]) / 2**30
         )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Arkouda_IO_Read_HDF5")
 @pytest.mark.parametrize("dtype", TYPES)
 def bench_ak_read_hdf(benchmark, dtype):
@@ -198,7 +198,7 @@ def bench_ak_read_hdf(benchmark, dtype):
             (nbytes / benchmark.stats["mean"]) / 2**30
         )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Arkouda_IO_Read_Parquet")
 @pytest.mark.parametrize("dtype", TYPES)
 @pytest.mark.parametrize("comp", COMPRESSIONS)
@@ -221,7 +221,7 @@ def bench_ak_read_parquet(benchmark, dtype, comp):
             (nbytes / benchmark.stats["mean"]) / 2**30
         )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Arkouda_IO_Read_Parquet")
 @pytest.mark.parametrize("dtype", TYPES)
 @pytest.mark.parametrize("comp", COMPRESSIONS)
@@ -248,7 +248,7 @@ def bench_ak_read_parquet_multi_column(benchmark, dtype, comp):
             (nbytes / benchmark.stats["mean"]) / 2**30
         )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Arkouda_IO_Delete")
 def bench_ak_delete(benchmark):
     if pytest.io_delete or (not pytest.io_write and not pytest.io_read):
diff --git a/benchmark_v2/no_op_benchmark.py b/benchmark_v2/no_op_benchmark.py
index 4768002bd4..52058bf263 100644
--- a/benchmark_v2/no_op_benchmark.py
+++ b/benchmark_v2/no_op_benchmark.py
@@ -4,7 +4,7 @@
 
 SECONDS = pytest.trials
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Arkouda_No_Op",
                        max_time=SECONDS
                        )
@@ -17,7 +17,7 @@ def bench_ak_noop(benchmark):
     benchmark.extra_info["transfer_rate"] = f"{benchmark.stats['rounds'] / benchmark.stats['total']:.4f} " \
                                             f"operations per second"
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Arkouda_No_Op",
                        max_time=SECONDS
                        )
diff --git a/benchmark_v2/reduce_benchmark.py b/benchmark_v2/reduce_benchmark.py
index fa5e49691a..8e94cd184a 100644
--- a/benchmark_v2/reduce_benchmark.py
+++ b/benchmark_v2/reduce_benchmark.py
@@ -5,7 +5,7 @@
 OPS = ("sum", "prod", "min", "max")
 TYPES = ("int64", "float64")
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Arkouda_Reduce")
 @pytest.mark.parametrize("op", OPS)
 @pytest.mark.parametrize("dtype", TYPES)
@@ -33,7 +33,7 @@ def bench_ak_reduce(benchmark, op, dtype):
             (nbytes / benchmark.stats["mean"]) / 2 ** 30
         )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Numpy_Reduce")
 @pytest.mark.parametrize("op", OPS)
 @pytest.mark.parametrize("dtype", TYPES)
diff --git a/benchmark_v2/scan_benchmark.py b/benchmark_v2/scan_benchmark.py
index 41e81a9fdb..4aca1805e5 100644
--- a/benchmark_v2/scan_benchmark.py
+++ b/benchmark_v2/scan_benchmark.py
@@ -6,7 +6,7 @@
 OPS = ("cumsum", "cumprod")
 TYPES = ("int64", "float64")
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK_Scan")
 @pytest.mark.parametrize("op", OPS)
 @pytest.mark.parametrize("dtype", TYPES)
@@ -35,7 +35,7 @@ def bench_ak_scan(benchmark, op, dtype):
             (nbytes / benchmark.stats["mean"]) / 2**30
         )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Numpy_Scan")
 @pytest.mark.parametrize("op", OPS)
 @pytest.mark.parametrize("dtype", TYPES)
diff --git a/benchmark_v2/scatter_benchmark.py b/benchmark_v2/scatter_benchmark.py
index 7bfc36ac71..55649a8d79 100644
--- a/benchmark_v2/scatter_benchmark.py
+++ b/benchmark_v2/scatter_benchmark.py
@@ -8,7 +8,7 @@
 def _run_scatter(a, i, v):
     a[i] = v
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK_Scatter")
 @pytest.mark.parametrize("dtype", TYPES)
 def bench_ak_scatter(benchmark, dtype):
@@ -41,7 +41,7 @@ def bench_ak_scatter(benchmark, dtype):
     benchmark.extra_info["transfer_rate"] = "{:.4f} GiB/sec".format(
         (bytes_per_sec / 2 ** 30))
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="NumPy_Scatter")
 @pytest.mark.parametrize("dtype", TYPES)
 def bench_np_scatter(benchmark, dtype):
diff --git a/benchmark_v2/setops_benchmark.py b/benchmark_v2/setops_benchmark.py
index 276d37ec38..8a0ccd30ad 100644
--- a/benchmark_v2/setops_benchmark.py
+++ b/benchmark_v2/setops_benchmark.py
@@ -7,7 +7,7 @@
 OPS1D = ("intersect1d", "union1d", "setxor1d", "setdiff1d")
 TYPES = ("int64", "uint64")
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Segarray_Setops")
 @pytest.mark.parametrize("op", OPS)
 @pytest.mark.parametrize("dtype", TYPES)
@@ -48,7 +48,7 @@ def bench_segarr_setops(benchmark, op, dtype):
             (nbytes / benchmark.stats["mean"]) / 2**30
         )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK_Setops")
 @pytest.mark.parametrize("op", OPS1D)
 @pytest.mark.parametrize("dtype", TYPES)
@@ -78,7 +78,7 @@ def bench_ak_setops(benchmark, op, dtype):
             (nbytes / benchmark.stats["mean"]) / 2**30
         )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="Numpy_Setops")
 @pytest.mark.parametrize("op", OPS1D)
 @pytest.mark.parametrize("dtype", TYPES)
diff --git a/benchmark_v2/sort_cases_benchmark.py b/benchmark_v2/sort_cases_benchmark.py
index 9ffe5402dc..0bf4f7d31f 100644
--- a/benchmark_v2/sort_cases_benchmark.py
+++ b/benchmark_v2/sort_cases_benchmark.py
@@ -21,7 +21,7 @@ def do_argsort(data, algo):
     else:
         return ak.coargsort(data, algo)
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK_Sort_Cases")
 @pytest.mark.parametrize("algo", SortingAlgorithm)
 @pytest.mark.parametrize("dtype", TYPES)
@@ -68,7 +68,7 @@ def _generate_power_law_data():
 
     return ((ub ** (a + 1) - 1) * y + 1) ** (1 / (a + 1))
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK_Sort_Cases")
 @pytest.mark.parametrize("algo", SortingAlgorithm)
 @pytest.mark.parametrize("dtype", TYPES)
@@ -90,7 +90,7 @@ def bench_power_law(benchmark, algo, dtype):
             (nbytes / benchmark.stats["mean"]) / 2**30
         )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK_Sort_Cases")
 @pytest.mark.parametrize("algo", SortingAlgorithm)
 def bench_rmat(benchmark, algo):
@@ -130,7 +130,7 @@ def bench_rmat(benchmark, algo):
         (nbytes / benchmark.stats["mean"]) / 2**30
     )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK_Sort_Cases")
 @pytest.mark.parametrize("algo", SortingAlgorithm)
 @pytest.mark.parametrize("mode", ("concat", "interleaved"))
@@ -162,7 +162,7 @@ def bench_block_sorted(benchmark, algo, mode):
         (nbytes / benchmark.stats["mean"]) / 2**30
     )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK_Sort_Cases")
 @pytest.mark.parametrize("algo", SortingAlgorithm)
 def bench_refinement(benchmark, algo):
@@ -186,7 +186,7 @@ def bench_refinement(benchmark, algo):
         (nbytes / benchmark.stats["mean"]) / 2**30
     )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK_Sort_Cases")
 @pytest.mark.parametrize("algo", SortingAlgorithm)
 def bench_time_like(benchmark, algo):
@@ -214,7 +214,7 @@ def bench_time_like(benchmark, algo):
         (nbytes / benchmark.stats["mean"]) / 2**30
     )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK_Sort_Cases")
 @pytest.mark.parametrize("algo", SortingAlgorithm)
 def bench_ip_like(benchmark, algo):
diff --git a/benchmark_v2/split_benchmark.py b/benchmark_v2/split_benchmark.py
index 0ae44092d9..0858e79f37 100644
--- a/benchmark_v2/split_benchmark.py
+++ b/benchmark_v2/split_benchmark.py
@@ -12,7 +12,7 @@ def _generate_test_data():
 
     return thickrange, nbytes
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK_Flatten")
 def bench_split_nonregex(benchmark):
     thickrange, nbytes = _generate_test_data()
@@ -24,7 +24,7 @@ def bench_split_nonregex(benchmark):
         (nbytes / benchmark.stats["mean"]) / 2**30
     )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK_Flatten")
 def bench_split_regexliteral(benchmark):
     thickrange, nbytes = _generate_test_data()
@@ -36,7 +36,7 @@ def bench_split_regexliteral(benchmark):
         (nbytes / benchmark.stats["mean"]) / 2**30
     )
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="AK_Flatten")
 def bench_split_regexpattern(benchmark):
     thickrange, nbytes = _generate_test_data()
diff --git a/benchmark_v2/str_locality_benchmark.py b/benchmark_v2/str_locality_benchmark.py
index a0b2cdb824..f15899241d 100644
--- a/benchmark_v2/str_locality_benchmark.py
+++ b/benchmark_v2/str_locality_benchmark.py
@@ -36,7 +36,7 @@ def _generate_data(loc):
 
     return random_strings if loc == "Good" else sorted_strings
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="String_Locality")
 @pytest.mark.parametrize("op", OPS)
 @pytest.mark.parametrize("loc", LOCALITY)
diff --git a/benchmark_v2/stream_benchmark.py b/benchmark_v2/stream_benchmark.py
index 43f40f182c..e01abcd11a 100644
--- a/benchmark_v2/stream_benchmark.py
+++ b/benchmark_v2/stream_benchmark.py
@@ -7,7 +7,7 @@
 def run_test(a, b, alpha):
     return a + b * alpha
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.benchmark(group="ak_stream")
 @pytest.mark.parametrize("dtype", DTYPES)
 def bench_ak_stream(benchmark, dtype):
diff --git a/benchmark_v2/substring_search_benchmark.py b/benchmark_v2/substring_search_benchmark.py
index aa86a5663c..d88a6108c4 100644
--- a/benchmark_v2/substring_search_benchmark.py
+++ b/benchmark_v2/substring_search_benchmark.py
@@ -8,7 +8,7 @@
     "Regex_Pattern": ["\\d string \\d", True]
 }
 
-
+@pytest.mark.skip_correctness_only(True)
 @pytest.mark.parametrize("s", SEARCHES)
 def bench_substring_search(benchmark, s):
     cfg = ak.get_config()