From 566b3d105bf58bcd6050a539fabb022782e050ab Mon Sep 17 00:00:00 2001 From: Gregory Kimball Date: Tue, 11 Oct 2022 08:17:08 -0700 Subject: [PATCH] Conform "bench_isin" to match generator column names (#11549) The version of `bench_isin` merged in #11125 used key and column names of the format `f"key{i}"` rather than the format `f"{string.ascii_lowercase[i]}"` as is used in the dataframe generator. As a result the `isin` benchmark using a dictionary argument short-circuits with no matching keys, and the `isin` benchmark using a dataframe argument finds no matches. This PR also adjusts the `isin` arguments from `range(1000)` to `range(50)` to better match the input dataframe cardinality of 100. With `range(1000)`, every element matches but with `range(50)` only 50% of the elements match. Authors: - Gregory Kimball (https://github.com/GregoryKimball) Approvers: - Bradley Dice (https://github.com/bdice) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/11549 --- python/cudf/benchmarks/API/bench_dataframe.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/python/cudf/benchmarks/API/bench_dataframe.py b/python/cudf/benchmarks/API/bench_dataframe.py index 9bad637f6ae..42bfa854396 100644 --- a/python/cudf/benchmarks/API/bench_dataframe.py +++ b/python/cudf/benchmarks/API/bench_dataframe.py @@ -41,14 +41,16 @@ def bench_merge(benchmark, dataframe, num_key_cols): @pytest.mark.parametrize( "values", [ - range(1000), - {f"key{i}": range(1000) for i in range(10)}, - cudf.DataFrame({f"key{i}": range(1000) for i in range(10)}), - cudf.Series(range(1000)), + lambda: range(50), + lambda: {f"{string.ascii_lowercase[i]}": range(50) for i in range(10)}, + lambda: cudf.DataFrame( + {f"{string.ascii_lowercase[i]}": range(50) for i in range(10)} + ), + lambda: cudf.Series(range(50)), ], ) def bench_isin(benchmark, dataframe, values): - benchmark(dataframe.isin, values) + benchmark(dataframe.isin, values()) @pytest.fixture(