Address most review comments.

vyasr · Jun 23, 2022 · 1025be4 · 1025be4
1 parent 690577f
commit 1025be4
Show file tree

Hide file tree

Showing 6 changed files with 29 additions and 33 deletions.
diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
@@ -28,8 +28,8 @@ dependencies:
   - cython>=0.29,<0.30
   - fsspec>=0.6.0
   - pytest
-  - pytest-cases
   - pytest-benchmark
+  - pytest-cases
   - pytest-xdist
   - sphinx
   - sphinxcontrib-websupport

diff --git a/python/cudf/benchmarks/API/bench_dataframe.py b/python/cudf/benchmarks/API/bench_dataframe.py
@@ -27,11 +27,11 @@ def bench_eval_func(benchmark, expr, dataframe):
     cls="dataframe", dtype="int", nulls=False, cols=6, name="df"
 )
 @pytest.mark.parametrize(
-    "nkey_cols",
+    "num_key_cols",
     [2, 3, 4],
 )
-def bench_merge(benchmark, df, nkey_cols):
-    benchmark(df.merge, df, on=list(df.columns[:nkey_cols]))
+def bench_merge(benchmark, df, num_key_cols):
+    benchmark(df.merge, df, on=list(df.columns[:num_key_cols]))
 
 
 # TODO: Some of these cases could be generalized to an IndexedFrame benchmark
@@ -71,11 +71,11 @@ def bench_sample(benchmark, dataframe, axis, frac, random_state):
 
 @accepts_cudf_fixture(cls="dataframe", dtype="int", nulls=False, cols=6)
 @pytest.mark.parametrize(
-    "nkey_cols",
+    "num_key_cols",
     [2, 3, 4],
 )
-def bench_groupby(benchmark, dataframe, nkey_cols):
-    benchmark(dataframe.groupby, by=list(dataframe.columns[:nkey_cols]))
+def bench_groupby(benchmark, dataframe, num_key_cols):
+    benchmark(dataframe.groupby, by=list(dataframe.columns[:num_key_cols]))
 
 
 @accepts_cudf_fixture(cls="dataframe", dtype="int", nulls=False, cols=6)
@@ -91,25 +91,27 @@ def bench_groupby(benchmark, dataframe, nkey_cols):
     ],
 )
 @pytest.mark.parametrize(
-    "nkey_cols",
+    "num_key_cols",
     [2, 3, 4],
 )
 @pytest.mark.parametrize("as_index", [True, False])
 @pytest.mark.parametrize("sort", [True, False])
-def bench_groupby_agg(benchmark, dataframe, agg, nkey_cols, as_index, sort):
-    by = list(dataframe.columns[:nkey_cols])
+def bench_groupby_agg(benchmark, dataframe, agg, num_key_cols, as_index, sort):
+    by = list(dataframe.columns[:num_key_cols])
     benchmark(dataframe.groupby(by=by, as_index=as_index, sort=sort).agg, agg)
 
 
 @accepts_cudf_fixture(cls="dataframe", dtype="int")
-@pytest.mark.parametrize("ncol_sort", [1])
-def bench_sort_values(benchmark, dataframe, ncol_sort):
-    benchmark(dataframe.sort_values, list(dataframe.columns[:ncol_sort]))
+@pytest.mark.parametrize("num_cols_to_sort", [1])
+def bench_sort_values(benchmark, dataframe, num_cols_to_sort):
+    benchmark(
+        dataframe.sort_values, list(dataframe.columns[:num_cols_to_sort])
+    )
 
 
 @accepts_cudf_fixture(cls="dataframe", dtype="int")
-@pytest.mark.parametrize("ncol_sort", [1])
+@pytest.mark.parametrize("num_cols_to_sort", [1])
 @pytest.mark.parametrize("n", [10])
-def bench_nsmallest(benchmark, dataframe, ncol_sort, n):
-    by = list(dataframe.columns[:ncol_sort])
+def bench_nsmallest(benchmark, dataframe, num_cols_to_sort, n):
+    by = list(dataframe.columns[:num_cols_to_sort])
     benchmark(dataframe.nsmallest, n, by)
diff --git a/python/cudf/benchmarks/API/bench_functions.py b/python/cudf/benchmarks/API/bench_functions.py
@@ -26,9 +26,7 @@ def bench_concat_axis_1(benchmark, objs, axis, join, ignore_index):
 @pytest.mark.parametrize("cardinality", [10, 100, 1000])
 @pytest.mark.parametrize("dtype", [cupy.bool_, cupy.float64])
 def bench_get_dummies_high_cardinality(benchmark, size, cardinality, dtype):
-    """This test is mean to test the performance of get_dummies given the
-    cardinality of column to encode is high.
-    """
+    """Benchmark when the cardinality of column to encode is high."""
     df = cudf.DataFrame(
         {
             "col": cudf.Series(
@@ -41,9 +39,7 @@ def bench_get_dummies_high_cardinality(benchmark, size, cardinality, dtype):
 
 @pytest.mark.parametrize("prefix", [None, "pre"])
 def bench_get_dummies_simple(benchmark, prefix):
-    """This test provides a small input to get_dummies to test the efficiency
-    of the API itself.
-    """
+    """Benchmark with small input to test the efficiency of the API itself."""
     df = cudf.DataFrame(
         {
             "col1": list(range(10)),

diff --git a/python/cudf/benchmarks/common/utils.py b/python/cudf/benchmarks/common/utils.py
@@ -50,7 +50,7 @@ def accepts_cudf_fixture(
     The fixture generation logic in conftest.py provides a plethora of useful
     fixtures to allow developers to easily select an appropriate cross-section
     of the space of objects to apply a particular benchmark to. However, the
-    usage of this fixtures is cumbersome because creating them in a principled
+    usage of these fixtures is cumbersome because creating them in a principled
     fashion results in long names and very specific naming schemes. This
     decorator abstracts that naming logic away from the developer, allowing
     them to instead focus on defining the fixture semantically by describing
@@ -74,7 +74,7 @@ def accepts_cudf_fixture(
     rows : Optional[int], None
         The number of rows. If None, use all possible numbers of rows.
         Specifying multiple values is unsupported.
-    fixture : str, default None
+    name : str, default None
         The name of the fixture as used in the decorated test. If None,
         defaults to `cls.lower()` if cls is a string, otherwise
         `cls.__name__.lower()`. Use of this name allows the decorated function
@@ -107,17 +107,15 @@ def bench_columns(benchmark, df):
         "frame_or_index",
     )
     assert cls in supported_classes, (
-        f"cls {cls} is invalid, choose from "
-        f"{', '.join(c for c in supported_classes)}"
+        f"cls {cls} is invalid, choose from " f"{', '.join(supported_classes)}"
     )
 
     name = name or cls
 
     if not isinstance(dtype, list):
         dtype = [dtype]
     assert all(dt in column_generators for dt in dtype), (
-        f"The only supported dtypes are "
-        f"{', '.join(dt for dt in column_generators)}"
+        f"The only supported dtypes are " f"{', '.join(column_generators)}"
     )
 
     dtype_str = "_dtype_" + "_or_".join(dtype)
@@ -130,22 +128,22 @@ def bench_columns(benchmark, df):
     if cols is not None:
         assert cols in NUM_COLS, (
             f"You have requested a DataFrame with {cols} columns but fixtures "
-            f"only exist for the values {', '.join(c for c in NUM_COLS)}"
+            f"only exist for the values {', '.join(NUM_COLS)}"
         )
         col_str = f"_cols_{cols}"
 
     row_str = ""
     if rows is not None:
         assert rows in NUM_ROWS, (
             f"You have requested a {cls} with {rows} rows but fixtures "
-            f"only exist for the values {', '.join(c for c in NUM_ROWS)}"
+            f"only exist for the values {', '.join(NUM_ROWS)}"
         )
         row_str = f"_rows_{rows}"
 
     fixture_name = f"{cls}{dtype_str}{null_str}{col_str}{row_str}"
 
     def deco(bm):
-        # pytests's test collection process relies on parsing the globals dict
+        # pytest's test collection process relies on parsing the globals dict
         # to find test functions and identify their parameters for the purpose
         # of fixtures and parameters. Therefore, the primary purpose of this
         # decorator is to define a new benchmark function with a signature

diff --git a/python/cudf/benchmarks/conftest.py b/python/cudf/benchmarks/conftest.py
@@ -31,7 +31,7 @@
 which they are valid, e.g. `def bench_sort_values(frame_or_index)`.
 
 The generated fixtures are named according to the following convention:
-`{classname}_dtype_{dtype}[_nulls_{true|false}][[_cols_{num_cols}]_rows_{num_rows}]`
+`{classname}_dtype_{dtype}[_nulls_{true|false}][_cols_{num_cols}][_rows_{num_rows}]`
 where classname is one of the following: index, series, dataframe,
 indexedframe, frame, frame_or_index. Note that in the case of indexes, to match
 Series/DataFrame we simply set `classname=index` and rely on the

diff --git a/python/cudf/benchmarks/internal/conftest.py b/python/cudf/benchmarks/internal/conftest.py
@@ -2,7 +2,7 @@
 
 """Defines pytest fixtures for internal benchmarks."""
 
-from config import NUM_ROWS, cudf  # noqa: E402
+from config import NUM_ROWS, cudf
 from utils import (
     OrderedSet,
     collapse_fixtures,