Skip to content

Commit

Permalink
Address most review comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
vyasr committed Jun 23, 2022
1 parent 690577f commit 1025be4
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 33 deletions.
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ dependencies:
- cython>=0.29,<0.30
- fsspec>=0.6.0
- pytest
- pytest-cases
- pytest-benchmark
- pytest-cases
- pytest-xdist
- sphinx
- sphinxcontrib-websupport
Expand Down
32 changes: 17 additions & 15 deletions python/cudf/benchmarks/API/bench_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@ def bench_eval_func(benchmark, expr, dataframe):
cls="dataframe", dtype="int", nulls=False, cols=6, name="df"
)
@pytest.mark.parametrize(
"nkey_cols",
"num_key_cols",
[2, 3, 4],
)
def bench_merge(benchmark, df, nkey_cols):
benchmark(df.merge, df, on=list(df.columns[:nkey_cols]))
def bench_merge(benchmark, df, num_key_cols):
benchmark(df.merge, df, on=list(df.columns[:num_key_cols]))


# TODO: Some of these cases could be generalized to an IndexedFrame benchmark
Expand Down Expand Up @@ -71,11 +71,11 @@ def bench_sample(benchmark, dataframe, axis, frac, random_state):

@accepts_cudf_fixture(cls="dataframe", dtype="int", nulls=False, cols=6)
@pytest.mark.parametrize(
"nkey_cols",
"num_key_cols",
[2, 3, 4],
)
def bench_groupby(benchmark, dataframe, nkey_cols):
benchmark(dataframe.groupby, by=list(dataframe.columns[:nkey_cols]))
def bench_groupby(benchmark, dataframe, num_key_cols):
benchmark(dataframe.groupby, by=list(dataframe.columns[:num_key_cols]))


@accepts_cudf_fixture(cls="dataframe", dtype="int", nulls=False, cols=6)
Expand All @@ -91,25 +91,27 @@ def bench_groupby(benchmark, dataframe, nkey_cols):
],
)
@pytest.mark.parametrize(
"nkey_cols",
"num_key_cols",
[2, 3, 4],
)
@pytest.mark.parametrize("as_index", [True, False])
@pytest.mark.parametrize("sort", [True, False])
def bench_groupby_agg(benchmark, dataframe, agg, nkey_cols, as_index, sort):
by = list(dataframe.columns[:nkey_cols])
def bench_groupby_agg(benchmark, dataframe, agg, num_key_cols, as_index, sort):
by = list(dataframe.columns[:num_key_cols])
benchmark(dataframe.groupby(by=by, as_index=as_index, sort=sort).agg, agg)


@accepts_cudf_fixture(cls="dataframe", dtype="int")
@pytest.mark.parametrize("ncol_sort", [1])
def bench_sort_values(benchmark, dataframe, ncol_sort):
benchmark(dataframe.sort_values, list(dataframe.columns[:ncol_sort]))
@pytest.mark.parametrize("num_cols_to_sort", [1])
def bench_sort_values(benchmark, dataframe, num_cols_to_sort):
benchmark(
dataframe.sort_values, list(dataframe.columns[:num_cols_to_sort])
)


@accepts_cudf_fixture(cls="dataframe", dtype="int")
@pytest.mark.parametrize("ncol_sort", [1])
@pytest.mark.parametrize("num_cols_to_sort", [1])
@pytest.mark.parametrize("n", [10])
def bench_nsmallest(benchmark, dataframe, ncol_sort, n):
by = list(dataframe.columns[:ncol_sort])
def bench_nsmallest(benchmark, dataframe, num_cols_to_sort, n):
by = list(dataframe.columns[:num_cols_to_sort])
benchmark(dataframe.nsmallest, n, by)
8 changes: 2 additions & 6 deletions python/cudf/benchmarks/API/bench_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,7 @@ def bench_concat_axis_1(benchmark, objs, axis, join, ignore_index):
@pytest.mark.parametrize("cardinality", [10, 100, 1000])
@pytest.mark.parametrize("dtype", [cupy.bool_, cupy.float64])
def bench_get_dummies_high_cardinality(benchmark, size, cardinality, dtype):
"""This test is mean to test the performance of get_dummies given the
cardinality of column to encode is high.
"""
"""Benchmark when the cardinality of column to encode is high."""
df = cudf.DataFrame(
{
"col": cudf.Series(
Expand All @@ -41,9 +39,7 @@ def bench_get_dummies_high_cardinality(benchmark, size, cardinality, dtype):

@pytest.mark.parametrize("prefix", [None, "pre"])
def bench_get_dummies_simple(benchmark, prefix):
"""This test provides a small input to get_dummies to test the efficiency
of the API itself.
"""
"""Benchmark with small input to test the efficiency of the API itself."""
df = cudf.DataFrame(
{
"col1": list(range(10)),
Expand Down
16 changes: 7 additions & 9 deletions python/cudf/benchmarks/common/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def accepts_cudf_fixture(
The fixture generation logic in conftest.py provides a plethora of useful
fixtures to allow developers to easily select an appropriate cross-section
of the space of objects to apply a particular benchmark to. However, the
usage of this fixtures is cumbersome because creating them in a principled
usage of these fixtures is cumbersome because creating them in a principled
fashion results in long names and very specific naming schemes. This
decorator abstracts that naming logic away from the developer, allowing
them to instead focus on defining the fixture semantically by describing
Expand All @@ -74,7 +74,7 @@ def accepts_cudf_fixture(
rows : Optional[int], None
The number of rows. If None, use all possible numbers of rows.
Specifying multiple values is unsupported.
fixture : str, default None
name : str, default None
The name of the fixture as used in the decorated test. If None,
defaults to `cls.lower()` if cls is a string, otherwise
`cls.__name__.lower()`. Use of this name allows the decorated function
Expand Down Expand Up @@ -107,17 +107,15 @@ def bench_columns(benchmark, df):
"frame_or_index",
)
assert cls in supported_classes, (
f"cls {cls} is invalid, choose from "
f"{', '.join(c for c in supported_classes)}"
f"cls {cls} is invalid, choose from " f"{', '.join(supported_classes)}"
)

name = name or cls

if not isinstance(dtype, list):
dtype = [dtype]
assert all(dt in column_generators for dt in dtype), (
f"The only supported dtypes are "
f"{', '.join(dt for dt in column_generators)}"
f"The only supported dtypes are " f"{', '.join(column_generators)}"
)

dtype_str = "_dtype_" + "_or_".join(dtype)
Expand All @@ -130,22 +128,22 @@ def bench_columns(benchmark, df):
if cols is not None:
assert cols in NUM_COLS, (
f"You have requested a DataFrame with {cols} columns but fixtures "
f"only exist for the values {', '.join(c for c in NUM_COLS)}"
f"only exist for the values {', '.join(NUM_COLS)}"
)
col_str = f"_cols_{cols}"

row_str = ""
if rows is not None:
assert rows in NUM_ROWS, (
f"You have requested a {cls} with {rows} rows but fixtures "
f"only exist for the values {', '.join(c for c in NUM_ROWS)}"
f"only exist for the values {', '.join(NUM_ROWS)}"
)
row_str = f"_rows_{rows}"

fixture_name = f"{cls}{dtype_str}{null_str}{col_str}{row_str}"

def deco(bm):
# pytests's test collection process relies on parsing the globals dict
# pytest's test collection process relies on parsing the globals dict
# to find test functions and identify their parameters for the purpose
# of fixtures and parameters. Therefore, the primary purpose of this
# decorator is to define a new benchmark function with a signature
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/benchmarks/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
which they are valid, e.g. `def bench_sort_values(frame_or_index)`.
The generated fixtures are named according to the following convention:
`{classname}_dtype_{dtype}[_nulls_{true|false}][[_cols_{num_cols}]_rows_{num_rows}]`
`{classname}_dtype_{dtype}[_nulls_{true|false}][_cols_{num_cols}][_rows_{num_rows}]`
where classname is one of the following: index, series, dataframe,
indexedframe, frame, frame_or_index. Note that in the case of indexes, to match
Series/DataFrame we simply set `classname=index` and rely on the
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/benchmarks/internal/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

"""Defines pytest fixtures for internal benchmarks."""

from config import NUM_ROWS, cudf # noqa: E402
from config import NUM_ROWS, cudf
from utils import (
OrderedSet,
collapse_fixtures,
Expand Down

0 comments on commit 1025be4

Please sign in to comment.