Feature/python benchmarking (#11125)

This PR ports the benchmarks in https://github.com/vyasr/cudf_benchmarks, adding official benchmarks to the repository. The new benchmarks are designed from the ground up to make the best use of pytest, pytest-benchmark, and pytest-cases to simplify writing and maintaining benchmarks. Extended discussions of various previous design questions may be found on [the original repo](https://github.com/vyasr/cudf_benchmarks). Reviewers may also benefit from reviewing the companion PR creating documentation for how to write benchmarks, #11122. Tests will not pass here until rapidsai/integration#492 is merged. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - AJ Schmidt (https://github.com/ajschmidt8) - Bradley Dice (https://github.com/bdice) - Michael Wang (https://github.com/isVoid) - GALI PREM SAGAR (https://github.com/galipremsagar) - Matthew Roeschke (https://github.com/mroeschke) URL: #11125
rapidsai · Jun 27, 2022 · c75baeb · c75baeb
1 parent c541d35
commit c75baeb
Show file tree

Hide file tree

Showing 18 changed files with 1,292 additions and 0 deletions.
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
@@ -243,6 +243,17 @@ cd "$WORKSPACE/python/custreamz"
 gpuci_logger "Python py.test for cuStreamz"
 py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term custreamz
 
+# Run benchmarks with both cudf and pandas to ensure compatibility is maintained.
+# Benchmarks are run in DEBUG_ONLY mode, meaning that only small data sizes are used.
+# Therefore, these runs only verify that benchmarks are valid.
+# They do not generate meaningful performance measurements.
+cd "$WORKSPACE/python/cudf"
+gpuci_logger "Python pytest for cuDF benchmarks"
+CUDF_BENCHMARKS_DEBUG_ONLY=ON pytest -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" -v --dist=loadscope benchmarks
+
+gpuci_logger "Python pytest for cuDF benchmarks using pandas"
+CUDF_BENCHMARKS_USE_PANDAS=ON CUDF_BENCHMARKS_DEBUG_ONLY=ON pytest -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" -v --dist=loadscope benchmarks
+
 gpuci_logger "Test notebooks"
 "$WORKSPACE/ci/gpu/test-notebooks.sh" 2>&1 | tee nbtest.log
 python "$WORKSPACE/ci/utils/nbtestlog2junitxml.py" nbtest.log

diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
@@ -29,6 +29,7 @@ dependencies:
   - fsspec>=0.6.0
   - pytest
   - pytest-benchmark
+  - pytest-cases
   - pytest-xdist
   - sphinx
   - sphinxcontrib-websupport

diff --git a/python/cudf/benchmarks/API/bench_dataframe.py b/python/cudf/benchmarks/API/bench_dataframe.py
@@ -0,0 +1,117 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+"""Benchmarks of DataFrame methods."""
+
+import string
+
+import numpy
+import pytest
+from config import cudf, cupy
+from utils import benchmark_with_object
+
+
+@pytest.mark.parametrize("N", [100, 1_000_000])
+def bench_construction(benchmark, N):
+    benchmark(cudf.DataFrame, {None: cupy.random.rand(N)})
+
+
+@benchmark_with_object(cls="dataframe", dtype="float", cols=6)
+@pytest.mark.parametrize(
+    "expr", ["a+b", "a+b+c+d+e", "a / (sin(a) + cos(b)) * tanh(d*e*f)"]
+)
+def bench_eval_func(benchmark, expr, dataframe):
+    benchmark(dataframe.eval, expr)
+
+
+@benchmark_with_object(cls="dataframe", dtype="int", nulls=False, cols=6)
+@pytest.mark.parametrize(
+    "num_key_cols",
+    [2, 3, 4],
+)
+def bench_merge(benchmark, dataframe, num_key_cols):
+    benchmark(
+        dataframe.merge, dataframe, on=list(dataframe.columns[:num_key_cols])
+    )
+
+
+# TODO: Some of these cases could be generalized to an IndexedFrame benchmark
+# instead of a DataFrame benchmark.
+@benchmark_with_object(cls="dataframe", dtype="int")
+@pytest.mark.parametrize(
+    "values",
+    [
+        range(1000),
+        {f"key{i}": range(1000) for i in range(10)},
+        cudf.DataFrame({f"key{i}": range(1000) for i in range(10)}),
+        cudf.Series(range(1000)),
+    ],
+)
+def bench_isin(benchmark, dataframe, values):
+    benchmark(dataframe.isin, values)
+
+
+@pytest.fixture(
+    params=[0, numpy.random.RandomState, cupy.random.RandomState],
+    ids=["Seed", "NumpyRandomState", "CupyRandomState"],
+)
+def random_state(request):
+    rs = request.param
+    return rs if isinstance(rs, int) else rs(seed=42)
+
+
+@benchmark_with_object(cls="dataframe", dtype="int")
+@pytest.mark.parametrize("frac", [0.5])
+def bench_sample(benchmark, dataframe, axis, frac, random_state):
+    if axis == 1 and isinstance(random_state, cupy.random.RandomState):
+        pytest.skip("Unsupported params.")
+    benchmark(
+        dataframe.sample, frac=frac, axis=axis, random_state=random_state
+    )
+
+
+@benchmark_with_object(cls="dataframe", dtype="int", nulls=False, cols=6)
+@pytest.mark.parametrize(
+    "num_key_cols",
+    [2, 3, 4],
+)
+def bench_groupby(benchmark, dataframe, num_key_cols):
+    benchmark(dataframe.groupby, by=list(dataframe.columns[:num_key_cols]))
+
+
+@benchmark_with_object(cls="dataframe", dtype="int", nulls=False, cols=6)
+@pytest.mark.parametrize(
+    "agg",
+    [
+        "sum",
+        ["sum", "mean"],
+        {
+            f"{string.ascii_lowercase[i]}": ["sum", "mean", "count"]
+            for i in range(6)
+        },
+    ],
+)
+@pytest.mark.parametrize(
+    "num_key_cols",
+    [2, 3, 4],
+)
+@pytest.mark.parametrize("as_index", [True, False])
+@pytest.mark.parametrize("sort", [True, False])
+def bench_groupby_agg(benchmark, dataframe, agg, num_key_cols, as_index, sort):
+    by = list(dataframe.columns[:num_key_cols])
+    benchmark(dataframe.groupby(by=by, as_index=as_index, sort=sort).agg, agg)
+
+
+@benchmark_with_object(cls="dataframe", dtype="int")
+@pytest.mark.parametrize("num_cols_to_sort", [1])
+def bench_sort_values(benchmark, dataframe, num_cols_to_sort):
+    benchmark(
+        dataframe.sort_values, list(dataframe.columns[:num_cols_to_sort])
+    )
+
+
+@benchmark_with_object(cls="dataframe", dtype="int")
+@pytest.mark.parametrize("num_cols_to_sort", [1])
+@pytest.mark.parametrize("n", [10])
+def bench_nsmallest(benchmark, dataframe, num_cols_to_sort, n):
+    by = list(dataframe.columns[:num_cols_to_sort])
+    benchmark(dataframe.nsmallest, n, by)
diff --git a/python/cudf/benchmarks/API/bench_frame_or_index.py b/python/cudf/benchmarks/API/bench_frame_or_index.py
@@ -0,0 +1,88 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+"""Benchmarks of methods that exist for both Frame and BaseIndex."""
+
+import operator
+
+import numpy as np
+import pytest
+from utils import benchmark_with_object, make_gather_map
+
+
+@benchmark_with_object(cls="frame_or_index", dtype="int")
+@pytest.mark.parametrize("gather_how", ["sequence", "reverse", "random"])
+@pytest.mark.parametrize("fraction", [0.4])
+def bench_take(benchmark, gather_how, fraction, frame_or_index):
+    nr = len(frame_or_index)
+    gather_map = make_gather_map(nr * fraction, nr, gather_how)
+    benchmark(frame_or_index.take, gather_map)
+
+
+@pytest.mark.pandas_incompatible  # Series/Index work, but not DataFrame
+@benchmark_with_object(cls="frame_or_index", dtype="int")
+def bench_argsort(benchmark, frame_or_index):
+    benchmark(frame_or_index.argsort)
+
+
+@benchmark_with_object(cls="frame_or_index", dtype="int")
+def bench_min(benchmark, frame_or_index):
+    benchmark(frame_or_index.min)
+
+
+@benchmark_with_object(cls="frame_or_index", dtype="int")
+def bench_where(benchmark, frame_or_index):
+    cond = frame_or_index % 2 == 0
+    benchmark(frame_or_index.where, cond, 0)
+
+
+@benchmark_with_object(cls="frame_or_index", dtype="int", nulls=False)
+@pytest.mark.pandas_incompatible
+def bench_values_host(benchmark, frame_or_index):
+    benchmark(lambda: frame_or_index.values_host)
+
+
+@benchmark_with_object(cls="frame_or_index", dtype="int", nulls=False)
+def bench_values(benchmark, frame_or_index):
+    benchmark(lambda: frame_or_index.values)
+
+
+@benchmark_with_object(cls="frame_or_index", dtype="int")
+def bench_nunique(benchmark, frame_or_index):
+    benchmark(frame_or_index.nunique)
+
+
+@benchmark_with_object(cls="frame_or_index", dtype="int", nulls=False)
+def bench_to_numpy(benchmark, frame_or_index):
+    benchmark(frame_or_index.to_numpy)
+
+
+@benchmark_with_object(cls="frame_or_index", dtype="int", nulls=False)
+@pytest.mark.pandas_incompatible
+def bench_to_cupy(benchmark, frame_or_index):
+    benchmark(frame_or_index.to_cupy)
+
+
+@benchmark_with_object(cls="frame_or_index", dtype="int")
+@pytest.mark.pandas_incompatible
+def bench_to_arrow(benchmark, frame_or_index):
+    benchmark(frame_or_index.to_arrow)
+
+
+@benchmark_with_object(cls="frame_or_index", dtype="int")
+def bench_astype(benchmark, frame_or_index):
+    benchmark(frame_or_index.astype, float)
+
+
+@pytest.mark.parametrize("ufunc", [np.add, np.logical_and])
+@benchmark_with_object(cls="frame_or_index", dtype="int")
+def bench_ufunc_series_binary(benchmark, frame_or_index, ufunc):
+    benchmark(ufunc, frame_or_index, frame_or_index)
+
+
+@pytest.mark.parametrize(
+    "op",
+    [operator.add, operator.mul, operator.eq],
+)
+@benchmark_with_object(cls="frame_or_index", dtype="int")
+def bench_binops(benchmark, op, frame_or_index):
+    benchmark(lambda: op(frame_or_index, frame_or_index))
diff --git a/python/cudf/benchmarks/API/bench_functions.py b/python/cudf/benchmarks/API/bench_functions.py
@@ -0,0 +1,52 @@
+# Copyright (c) 2022, NVIDIA CORPORATION.
+
+"""Benchmarks of free functions that accept cudf objects."""
+
+import pytest
+import pytest_cases
+from config import cudf, cupy
+
+
+@pytest_cases.parametrize_with_cases("objs", prefix="concat")
+@pytest.mark.parametrize(
+    "axis",
+    [
+        1,
+    ],
+)
+@pytest.mark.parametrize("join", ["inner", "outer"])
+@pytest.mark.parametrize("ignore_index", [True, False])
+def bench_concat_axis_1(benchmark, objs, axis, join, ignore_index):
+    benchmark(
+        cudf.concat, objs=objs, axis=axis, join=join, ignore_index=ignore_index
+    )
+
+
+@pytest.mark.parametrize("size", [10_000, 100_000])
+@pytest.mark.parametrize("cardinality", [10, 100, 1000])
+@pytest.mark.parametrize("dtype", [cupy.bool_, cupy.float64])
+def bench_get_dummies_high_cardinality(benchmark, size, cardinality, dtype):
+    """Benchmark when the cardinality of column to encode is high."""
+    df = cudf.DataFrame(
+        {
+            "col": cudf.Series(
+                cupy.random.randint(low=0, high=cardinality, size=size)
+            ).astype("category")
+        }
+    )
+    benchmark(cudf.get_dummies, df, columns=["col"], dtype=dtype)
+
+
+@pytest.mark.parametrize("prefix", [None, "pre"])
+def bench_get_dummies_simple(benchmark, prefix):
+    """Benchmark with small input to test the efficiency of the API itself."""
+    df = cudf.DataFrame(
+        {
+            "col1": list(range(10)),
+            "col2": list("abcdefghij"),
+            "col3": cudf.Series(list(range(100, 110)), dtype="category"),
+        }
+    )
+    benchmark(
+        cudf.get_dummies, df, columns=["col1", "col2", "col3"], prefix=prefix
+    )