From 4d7ae603a3839f9bf8fda59e387a05f8c7475522 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 2 Feb 2022 19:12:51 -0800 Subject: [PATCH 01/20] address first few slowest tests --- python/cudf/cudf/tests/test_csv.py | 9 +++++---- python/cudf/cudf/tests/test_indexing.py | 22 ++++++++++------------ python/cudf/cudf/tests/test_orc.py | 10 +++++----- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index 9208b8c7cd4..f49532e28f7 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -8,6 +8,7 @@ from io import BytesIO, StringIO from pathlib import Path +import cupy as cp import numpy as np import pandas as pd import pytest @@ -1009,17 +1010,17 @@ def test_small_zip(tmpdir): def test_csv_reader_carriage_return(tmpdir): rows = 1000 names = ["int_row", "int_double_row"] - buffer = ",".join(names) + "\r\n" for row in range(rows): buffer += str(row) + ", " + str(2 * row) + "\r\n" df = read_csv(StringIO(buffer)) + expect = cudf.DataFrame( + {"int_row": cp.arange(rows), "int_double_row": cp.arange(rows) * 2} + ) assert len(df) == rows - for row in range(0, rows): - assert df[names[0]][row] == row - assert df[names[1]][row] == 2 * row + assert_eq(expect, df) def test_csv_reader_tabs(): diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 102e5b57e8e..ed837c92549 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -1294,8 +1294,8 @@ def test_loc_datetime_index(sli, is_dataframe): @pytest.mark.parametrize( "gdf", [ - cudf.DataFrame({"a": range(1000000)}), - cudf.DataFrame({"a": range(1000000), "b": range(1000000)}), + cudf.DataFrame({"a": range(100000)}), + cudf.DataFrame({"a": range(100000), "b": range(100000)}), cudf.DataFrame({"a": range(20), "b": range(20)}), cudf.DataFrame( { @@ -1305,25 +1305,23 @@ def test_loc_datetime_index(sli, is_dataframe): } ), cudf.DataFrame(index=[1, 2, 3]), - cudf.DataFrame(index=range(1000000)), + cudf.DataFrame(index=range(100000)), cudf.DataFrame(columns=["a", "b", "c", "d"]), - cudf.DataFrame(columns=["a"], index=range(1000000)), - cudf.DataFrame( - columns=["a", "col2", "...col n"], index=range(1000000) - ), - cudf.DataFrame(index=cudf.Series(range(1000000)).astype("str")), + cudf.DataFrame(columns=["a"], index=range(100000)), + cudf.DataFrame(columns=["a", "col2", "...col n"], index=range(100000)), + cudf.DataFrame(index=cudf.Series(range(100000)).astype("str")), cudf.DataFrame( columns=["a", "b", "c", "d"], - index=cudf.Series(range(1000000)).astype("str"), + index=cudf.Series(range(100000)).astype("str"), ), ], ) @pytest.mark.parametrize( "slice", [ - slice(250000, 500000), - slice(250000, 250001), - slice(500000), + slice(25000, 50000), + slice(25000, 25001), + slice(50000), slice(1, 10), slice(10, 20), slice(15, 24000), diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 8689f773a02..7a7a07ba7fa 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -16,6 +16,7 @@ import cudf from cudf.io.orc import ORCWriter +from cudf.testing import assert_frame_equal from cudf.testing._utils import ( assert_eq, gen_rand_series, @@ -93,7 +94,7 @@ def test_orc_reader_basic(datadir, inputfile, columns, use_index, engine): path, engine=engine, columns=columns, use_index=use_index ) - assert_eq(expect, got, check_categorical=False) + assert_frame_equal(cudf.from_pandas(expect), got, check_categorical=False) def test_orc_reader_filenotfound(tmpdir): @@ -388,7 +389,7 @@ def test_orc_writer(datadir, tmpdir, reference_file, columns, compression): cudf.from_pandas(expect).to_orc(gdf_fname.strpath, compression=compression) got = pa.orc.ORCFile(gdf_fname).read(columns=columns).to_pandas() - assert_eq(expect, got) + assert_frame_equal(cudf.from_pandas(expect), cudf.from_pandas(got)) @pytest.mark.parametrize("stats_freq", ["NONE", "STRIPE", "ROWGROUP"]) @@ -409,7 +410,7 @@ def test_orc_writer_statistics_frequency(datadir, tmpdir, stats_freq): cudf.from_pandas(expect).to_orc(gdf_fname.strpath, statistics=stats_freq) got = pa.orc.ORCFile(gdf_fname).read().to_pandas() - assert_eq(expect, got) + assert_frame_equal(cudf.from_pandas(expect), cudf.from_pandas(got)) @pytest.mark.parametrize("stats_freq", ["NONE", "STRIPE", "ROWGROUP"]) @@ -492,8 +493,7 @@ def test_chunked_orc_writer( writer.close() got = pa.orc.ORCFile(gdf_fname).read(columns=columns).to_pandas() - - assert_eq(expect, got) + assert_frame_equal(cudf.from_pandas(expect), cudf.from_pandas(got)) @pytest.mark.parametrize( From 1f9e36da60589c0bc1fcf552817175ad4def9396 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 7 Feb 2022 08:07:43 -0800 Subject: [PATCH 02/20] continue reducing runtime --- .../test_avro_reader_fastavro_integration.py | 2 +- python/cudf/cudf/tests/test_binops.py | 5 ++- python/cudf/cudf/tests/test_dataframe.py | 38 ------------------- python/cudf/cudf/tests/test_parquet.py | 8 ++-- python/cudf/cudf/tests/test_repr.py | 7 ++-- python/cudf/cudf/tests/test_reshape.py | 6 +-- python/cudf/cudf/tests/test_string.py | 2 - python/cudf/cudf/tests/test_udf_masked_ops.py | 4 +- 8 files changed, 17 insertions(+), 55 deletions(-) diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py index a064bec9e82..72d976f0958 100644 --- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py +++ b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py @@ -210,7 +210,7 @@ def test_can_parse_no_schema(): assert_eq(expected, actual) -@pytest.mark.parametrize("rows", [0, 1, 10, 100000]) +@pytest.mark.parametrize("rows", [0, 1, 10, 1000]) @pytest.mark.parametrize("codec", ["null", "deflate", "snappy"]) def test_avro_compression(rows, codec): schema = { diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 921f2de38c2..f461bacb909 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -218,9 +218,12 @@ def test_series_compare(cmpop, obj_class, dtype): def _series_compare_nulls_typegen(): tests = [] + + numeric_type_subset = ['uint64', 'int64', 'float64', 'bool'] + tests += list(product(DATETIME_TYPES, DATETIME_TYPES)) tests += list(product(TIMEDELTA_TYPES, TIMEDELTA_TYPES)) - tests += list(product(NUMERIC_TYPES, NUMERIC_TYPES)) + tests += list(product(numeric_type_subset, numeric_type_subset)) tests += list(product(STRING_TYPES, STRING_TYPES)) return tests diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 889662c8a1c..584bcc934a4 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -6675,7 +6675,6 @@ def test_dataframe_info_null_counts(): "data1", [ [1, 2, 3, 4, 5, 6, 7], - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], [ 1.9876543, 2.9876654, @@ -6694,31 +6693,12 @@ def test_dataframe_info_null_counts(): -6.88918237, -7.00001, ], - [ - 1.987654321, - 2.987654321, - 3.987654321, - 0.1221, - 2.1221, - 0.112121, - -21.1212, - ], - [ - -1.987654321, - -2.987654321, - -3.987654321, - -0.1221, - -2.1221, - -0.112121, - 21.1212, - ], ], ) @pytest.mark.parametrize( "data2", [ [1, 2, 3, 4, 5, 6, 7], - [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], [ 1.9876543, 2.9876654, @@ -6737,24 +6717,6 @@ def test_dataframe_info_null_counts(): -6.88918237, -7.00001, ], - [ - 1.987654321, - 2.987654321, - 3.987654321, - 0.1221, - 2.1221, - 0.112121, - -21.1212, - ], - [ - -1.987654321, - -2.987654321, - -3.987654321, - -0.1221, - -2.1221, - -0.112121, - 21.1212, - ], ], ) @pytest.mark.parametrize("rtol", [0, 0.01, 1e-05, 1e-08, 5e-1, 50.12]) diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index 80ab0671a0d..8f5566a7299 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -1105,9 +1105,9 @@ def test_parquet_reader_list_large_multi_rowgroup_nulls(tmpdir): assert_eq(expect, got) -@pytest.mark.parametrize("skip", range(0, 128)) +@pytest.mark.parametrize("skip", range(0, 10)) def test_parquet_reader_list_skiprows(skip, tmpdir): - num_rows = 128 + num_rows = 10 src = pd.DataFrame( { "a": list_gen(int_gen, 0, num_rows, 80, 50), @@ -1124,9 +1124,9 @@ def test_parquet_reader_list_skiprows(skip, tmpdir): assert_eq(expect, got, check_dtype=False) -@pytest.mark.parametrize("skip", range(0, 120)) +@pytest.mark.parametrize("skip", range(0, 10)) def test_parquet_reader_list_num_rows(skip, tmpdir): - num_rows = 128 + num_rows = 20 src = pd.DataFrame( { "a": list_gen(int_gen, 0, num_rows, 80, 50), diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index ca02ee55df0..fae9a1b4d59 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -13,7 +13,7 @@ from cudf.testing import _utils as utils from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes -repr_categories = utils.NUMERIC_TYPES + ["str", "category", "datetime64[ns]"] +repr_categories = ["int64" , "float64", "str", "category", "datetime64[ns]"] @pytest.mark.parametrize("dtype", repr_categories) @@ -85,15 +85,14 @@ def test_full_series(nrows, dtype): @pytest.mark.parametrize("dtype", repr_categories) -@pytest.mark.parametrize("nrows", [0, 1, 2, 9, 20 / 2, 11, 20 - 1, 20, 20 + 1]) -@pytest.mark.parametrize("ncols", [0, 1, 2, 9, 20 / 2, 11, 20 - 1, 20, 20 + 1]) +@pytest.mark.parametrize("nrows", [0, 1, 10, 20, 21]) +@pytest.mark.parametrize("ncols", [0, 1, 10, 20, 21]) def test_full_dataframe_20(dtype, nrows, ncols): size = 20 pdf = pd.DataFrame( {idx: np.random.randint(0, 100, size) for idx in range(size)} ).astype(dtype) gdf = cudf.from_pandas(pdf) - assert pdf.__repr__() == gdf.__repr__() assert pdf._repr_html_() == gdf._repr_html_() assert pdf._repr_latex_() == gdf._repr_latex_() diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index b8f975f233e..8fc3b8012e4 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -17,9 +17,9 @@ ) -@pytest.mark.parametrize("num_id_vars", [0, 1, 2, 10]) -@pytest.mark.parametrize("num_value_vars", [0, 1, 2, 10]) -@pytest.mark.parametrize("num_rows", [1, 2, 1000]) +@pytest.mark.parametrize("num_id_vars", [0, 1, 2]) +@pytest.mark.parametrize("num_value_vars", [0, 1, 2]) +@pytest.mark.parametrize("num_rows", [1, 2, 100]) @pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES) @pytest.mark.parametrize("nulls", ["none", "some", "all"]) def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype): diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 75cf2e6c892..b959a5c9015 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -535,8 +535,6 @@ def _cat_convert_seq_to_cudf(others): "index", [ ["1", "2", "3", "4", "5"], - pd.Series(["1", "2", "3", "4", "5"]), - pd.Index(["1", "2", "3", "4", "5"]), ], ) def test_string_cat(ps_gs, others, sep, na_rep, index): diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 56090c8eacf..25185a75424 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -238,8 +238,8 @@ def func(row): run_masked_udf_test(func, gdf, check_dtype=False) -@pytest.mark.parametrize("dtype_a", list(NUMERIC_TYPES)) -@pytest.mark.parametrize("dtype_b", list(NUMERIC_TYPES)) +@pytest.mark.parametrize("dtype_a", ['uint64', 'int64', 'float64']) +@pytest.mark.parametrize("dtype_b", ['uint64', 'int64', 'float64']) @pytest.mark.parametrize("op", [operator.add, operator.and_, operator.eq]) def test_apply_mixed_dtypes(dtype_a, dtype_b, op): """ From 70932d5d25886677b22583470dad6ee6221616b0 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 7 Feb 2022 12:33:29 -0800 Subject: [PATCH 03/20] updates --- python/cudf/cudf/testing/_utils.py | 11 +++++++++++ python/cudf/cudf/tests/test_binops.py | 12 +++++------- .../cudf/cudf/tests/test_extension_compilation.py | 14 +++++++++++--- python/cudf/cudf/tests/test_udf_masked_ops.py | 13 +++++++------ 4 files changed, 34 insertions(+), 16 deletions(-) diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py index 41dac26edf8..ac472eb4ec2 100644 --- a/python/cudf/cudf/testing/_utils.py +++ b/python/cudf/cudf/testing/_utils.py @@ -4,6 +4,7 @@ from collections.abc import Mapping, Sequence from contextlib import contextmanager from decimal import Decimal +from tkinter import NUMERIC import cupy import numpy as np @@ -11,6 +12,8 @@ import pytest from pandas import testing as tm +import itertools + import cudf from cudf._lib.null_mask import bitmask_allocation_size_bytes from cudf.core.column.datetime import _numpy_to_pandas_conversion @@ -321,3 +324,11 @@ def does_not_raise(): def xfail_param(param, **kwargs): return pytest.param(param, marks=pytest.mark.xfail(**kwargs)) + +deduped_numeric_dtype_tests = pytest.mark.parametrize( + "left_dtype,right_dtype", list( + itertools.combinations_with_replacement( + NUMERIC_TYPES, 2 + ) + ) + ) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index f461bacb909..2a499494a77 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -5,7 +5,7 @@ import decimal import operator import random -from itertools import product +from itertools import combinations, product, combinations_with_replacement import cupy as cp import numpy as np @@ -219,12 +219,10 @@ def test_series_compare(cmpop, obj_class, dtype): def _series_compare_nulls_typegen(): tests = [] - numeric_type_subset = ['uint64', 'int64', 'float64', 'bool'] - - tests += list(product(DATETIME_TYPES, DATETIME_TYPES)) - tests += list(product(TIMEDELTA_TYPES, TIMEDELTA_TYPES)) - tests += list(product(numeric_type_subset, numeric_type_subset)) - tests += list(product(STRING_TYPES, STRING_TYPES)) + tests += list(combinations_with_replacement(DATETIME_TYPES, 2)) + tests += list(combinations_with_replacement(DATETIME_TYPES, 2)) + tests += list(combinations_with_replacement(NUMERIC_TYPES, 2)) + tests += list(combinations_with_replacement(STRING_TYPES, 2)) return tests diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index 47c9448cf63..1af79cadacf 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -5,10 +5,16 @@ from numba import cuda, types from numba.cuda import compile_ptx +from cudf.testing._utils import deduped_numeric_dtype_tests + + +import numpy as np from cudf import NA from cudf.core.udf.api import Masked from cudf.core.udf.typing import MaskedType +from numba.np.numpy_support import from_dtype + arith_ops = ( operator.add, operator.sub, @@ -159,19 +165,21 @@ def func(x): @pytest.mark.parametrize("op", ops) -@pytest.mark.parametrize("ty1", number_types, ids=number_ids) -@pytest.mark.parametrize("ty2", number_types, ids=number_ids) +@deduped_numeric_dtype_tests @pytest.mark.parametrize( "masked", ((False, True), (True, False), (True, True)), ids=("um", "mu", "mm"), ) -def test_compile_arith_masked_ops(op, ty1, ty2, masked): +def test_compile_arith_masked_ops(op, left_dtype, right_dtype, masked): def func(x, y): return op(x, y) cc = (7, 5) + ty1 = from_dtype(np.dtype(left_dtype)) + ty2 = from_dtype(np.dtype(right_dtype)) + if masked[0]: ty1 = MaskedType(ty1) if masked[1]: diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 25185a75424..88da7ce518c 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -5,6 +5,8 @@ import pytest from numba import cuda +from cudf.testing._utils import deduped_numeric_dtype_tests + import cudf from cudf.core.scalar import NA from cudf.core.udf._ops import ( @@ -238,10 +240,9 @@ def func(row): run_masked_udf_test(func, gdf, check_dtype=False) -@pytest.mark.parametrize("dtype_a", ['uint64', 'int64', 'float64']) -@pytest.mark.parametrize("dtype_b", ['uint64', 'int64', 'float64']) +@deduped_numeric_dtype_tests @pytest.mark.parametrize("op", [operator.add, operator.and_, operator.eq]) -def test_apply_mixed_dtypes(dtype_a, dtype_b, op): +def test_apply_mixed_dtypes(left_dtype, right_dtype, op): """ Test that operations can be performed between columns of different dtypes and return a column with the correct @@ -251,7 +252,7 @@ def test_apply_mixed_dtypes(dtype_a, dtype_b, op): # First perform the op on two dummy data on host, if numpy can # safely type cast, we should expect it to work in udf too. try: - op(getattr(np, dtype_a)(0), getattr(np, dtype_b)(42)) + op(getattr(np, left_dtype)(0), getattr(np, right_dtype)(42)) except TypeError: pytest.skip("Operation is unsupported for corresponding dtype.") @@ -261,8 +262,8 @@ def func(row): return op(x, y) gdf = cudf.DataFrame({"a": [1.5, None, 3, None], "b": [4, 5, None, None]}) - gdf["a"] = gdf["a"].astype(dtype_a) - gdf["b"] = gdf["b"].astype(dtype_b) + gdf["a"] = gdf["a"].astype(left_dtype) + gdf["b"] = gdf["b"].astype(right_dtype) run_masked_udf_test(func, gdf, check_dtype=False) From b94cce4ebdcdca0259f5d3626ed9e30f1da983dc Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 7 Feb 2022 12:34:39 -0800 Subject: [PATCH 04/20] ??? IDE added this somehow --- python/cudf/cudf/testing/_utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py index ac472eb4ec2..139aff0494a 100644 --- a/python/cudf/cudf/testing/_utils.py +++ b/python/cudf/cudf/testing/_utils.py @@ -4,7 +4,6 @@ from collections.abc import Mapping, Sequence from contextlib import contextmanager from decimal import Decimal -from tkinter import NUMERIC import cupy import numpy as np From 67e3994c6bf33f8e140322477b4b9eb9851f64e6 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 7 Feb 2022 12:37:48 -0800 Subject: [PATCH 05/20] style --- python/cudf/cudf/testing/_utils.py | 13 +++++-------- python/cudf/cudf/tests/test_binops.py | 2 +- .../cudf/cudf/tests/test_extension_compilation.py | 9 +++------ python/cudf/cudf/tests/test_repr.py | 2 +- python/cudf/cudf/tests/test_string.py | 5 +---- python/cudf/cudf/tests/test_udf_masked_ops.py | 8 +++++--- 6 files changed, 16 insertions(+), 23 deletions(-) diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py index 139aff0494a..ac21d30d55e 100644 --- a/python/cudf/cudf/testing/_utils.py +++ b/python/cudf/cudf/testing/_utils.py @@ -1,5 +1,6 @@ # Copyright (c) 2020-2021, NVIDIA CORPORATION. +import itertools import re from collections.abc import Mapping, Sequence from contextlib import contextmanager @@ -11,8 +12,6 @@ import pytest from pandas import testing as tm -import itertools - import cudf from cudf._lib.null_mask import bitmask_allocation_size_bytes from cudf.core.column.datetime import _numpy_to_pandas_conversion @@ -324,10 +323,8 @@ def does_not_raise(): def xfail_param(param, **kwargs): return pytest.param(param, marks=pytest.mark.xfail(**kwargs)) + deduped_numeric_dtype_tests = pytest.mark.parametrize( - "left_dtype,right_dtype", list( - itertools.combinations_with_replacement( - NUMERIC_TYPES, 2 - ) - ) - ) + "left_dtype,right_dtype", + list(itertools.combinations_with_replacement(NUMERIC_TYPES, 2)), +) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 2a499494a77..1051c2c4191 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -5,7 +5,7 @@ import decimal import operator import random -from itertools import combinations, product, combinations_with_replacement +from itertools import combinations, combinations_with_replacement, product import cupy as cp import numpy as np diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index 1af79cadacf..13b6dbbdb3d 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -1,19 +1,16 @@ import operator import cupy as cp +import numpy as np import pytest from numba import cuda, types from numba.cuda import compile_ptx +from numba.np.numpy_support import from_dtype -from cudf.testing._utils import deduped_numeric_dtype_tests - - -import numpy as np from cudf import NA from cudf.core.udf.api import Masked from cudf.core.udf.typing import MaskedType - -from numba.np.numpy_support import from_dtype +from cudf.testing._utils import deduped_numeric_dtype_tests arith_ops = ( operator.add, diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index fae9a1b4d59..b2f2240c41a 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -13,7 +13,7 @@ from cudf.testing import _utils as utils from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes -repr_categories = ["int64" , "float64", "str", "category", "datetime64[ns]"] +repr_categories = ["int64", "float64", "str", "category", "datetime64[ns]"] @pytest.mark.parametrize("dtype", repr_categories) diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index b959a5c9015..ca2948ef9f4 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -532,10 +532,7 @@ def _cat_convert_seq_to_cudf(others): @pytest.mark.parametrize("sep", [None, "", " ", "|", ",", "|||"]) @pytest.mark.parametrize("na_rep", [None, "", "null", "a"]) @pytest.mark.parametrize( - "index", - [ - ["1", "2", "3", "4", "5"], - ], + "index", [["1", "2", "3", "4", "5"],], ) def test_string_cat(ps_gs, others, sep, na_rep, index): ps, gs = ps_gs diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 88da7ce518c..687ab9ab725 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -5,8 +5,6 @@ import pytest from numba import cuda -from cudf.testing._utils import deduped_numeric_dtype_tests - import cudf from cudf.core.scalar import NA from cudf.core.udf._ops import ( @@ -16,7 +14,11 @@ unary_ops, ) from cudf.core.udf.utils import precompiled -from cudf.testing._utils import NUMERIC_TYPES, _decimal_series, assert_eq +from cudf.testing._utils import ( + _decimal_series, + assert_eq, + deduped_numeric_dtype_tests, +) def run_masked_udf_test(func, data, args=(), **kwargs): From e05d8bc8b0e5eaa350f2a0aee7bf1127702d1094 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 7 Feb 2022 12:43:15 -0800 Subject: [PATCH 06/20] style --- python/cudf/cudf/tests/test_binops.py | 2 +- python/cudf/cudf/tests/test_string.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 1051c2c4191..6fc5ad458b9 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -5,7 +5,7 @@ import decimal import operator import random -from itertools import combinations, combinations_with_replacement, product +from itertools import combinations_with_replacement, product import cupy as cp import numpy as np diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index ca2948ef9f4..c3a8a3a604b 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -532,7 +532,7 @@ def _cat_convert_seq_to_cudf(others): @pytest.mark.parametrize("sep", [None, "", " ", "|", ",", "|||"]) @pytest.mark.parametrize("na_rep", [None, "", "null", "a"]) @pytest.mark.parametrize( - "index", [["1", "2", "3", "4", "5"],], + "index", [["1", "2", "3", "4", "5"]], ) def test_string_cat(ps_gs, others, sep, na_rep, index): ps, gs = ps_gs From f367e0194983f756e591a353685f678369f428f3 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 7 Feb 2022 14:31:02 -0800 Subject: [PATCH 07/20] remove unused parameters --- python/cudf/cudf/tests/test_repr.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index b2f2240c41a..34248818e7a 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -85,9 +85,7 @@ def test_full_series(nrows, dtype): @pytest.mark.parametrize("dtype", repr_categories) -@pytest.mark.parametrize("nrows", [0, 1, 10, 20, 21]) -@pytest.mark.parametrize("ncols", [0, 1, 10, 20, 21]) -def test_full_dataframe_20(dtype, nrows, ncols): +def test_full_dataframe_20(dtype): size = 20 pdf = pd.DataFrame( {idx: np.random.randint(0, 100, size) for idx in range(size)} From d298d4a7744193f5787d833fec61575d90f4f4f1 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Tue, 8 Feb 2022 09:11:25 -0600 Subject: [PATCH 08/20] Update python/cudf/cudf/tests/test_binops.py Co-authored-by: Michael Wang --- python/cudf/cudf/tests/test_binops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index f5ba65bd3d5..7ad5fd743e4 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -219,7 +219,7 @@ def _series_compare_nulls_typegen(): tests = [] tests += list(combinations_with_replacement(DATETIME_TYPES, 2)) - tests += list(combinations_with_replacement(DATETIME_TYPES, 2)) + tests += list(combinations_with_replacement(TIMEDELTA_TYPES, 2)) tests += list(combinations_with_replacement(NUMERIC_TYPES, 2)) tests += list(combinations_with_replacement(STRING_TYPES, 2)) From 6e9a0ef0438a3afb6613b4dca47da742736e219b Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 8 Feb 2022 07:12:17 -0800 Subject: [PATCH 09/20] address reviews --- python/cudf/cudf/testing/_utils.py | 2 +- python/cudf/cudf/tests/test_extension_compilation.py | 4 ++-- python/cudf/cudf/tests/test_udf_masked_ops.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py index 5e632f59509..63410a814e3 100644 --- a/python/cudf/cudf/testing/_utils.py +++ b/python/cudf/cudf/testing/_utils.py @@ -333,7 +333,7 @@ def xfail_param(param, **kwargs): return pytest.param(param, marks=pytest.mark.xfail(**kwargs)) -deduped_numeric_dtype_tests = pytest.mark.parametrize( +numeric_dtypes_pairwise = pytest.mark.parametrize( "left_dtype,right_dtype", list(itertools.combinations_with_replacement(NUMERIC_TYPES, 2)), ) diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index 13b6dbbdb3d..d77e9936092 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -10,7 +10,7 @@ from cudf import NA from cudf.core.udf.api import Masked from cudf.core.udf.typing import MaskedType -from cudf.testing._utils import deduped_numeric_dtype_tests +from cudf.testing._utils import numeric_dtypes_pairwise arith_ops = ( operator.add, @@ -162,7 +162,7 @@ def func(x): @pytest.mark.parametrize("op", ops) -@deduped_numeric_dtype_tests +@numeric_dtypes_pairwise @pytest.mark.parametrize( "masked", ((False, True), (True, False), (True, True)), diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 687ab9ab725..89344211fc4 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -17,7 +17,7 @@ from cudf.testing._utils import ( _decimal_series, assert_eq, - deduped_numeric_dtype_tests, + numeric_dtypes_pairwise, ) @@ -242,7 +242,7 @@ def func(row): run_masked_udf_test(func, gdf, check_dtype=False) -@deduped_numeric_dtype_tests +@numeric_dtypes_pairwise @pytest.mark.parametrize("op", [operator.add, operator.and_, operator.eq]) def test_apply_mixed_dtypes(left_dtype, right_dtype, op): """ From e4a98d0227b33c13d932a945763b4eb45ff08df0 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 8 Feb 2022 07:21:19 -0800 Subject: [PATCH 10/20] less from_pandas() --- python/cudf/cudf/tests/test_orc.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py index 7a7a07ba7fa..623098741a9 100644 --- a/python/cudf/cudf/tests/test_orc.py +++ b/python/cudf/cudf/tests/test_orc.py @@ -385,11 +385,13 @@ def test_orc_writer(datadir, tmpdir, reference_file, columns, compression): else: print(type(excpr).__name__) - expect = orcfile.read(columns=columns).to_pandas() - cudf.from_pandas(expect).to_orc(gdf_fname.strpath, compression=compression) - got = pa.orc.ORCFile(gdf_fname).read(columns=columns).to_pandas() + expect = cudf.from_pandas(orcfile.read(columns=columns).to_pandas()) + expect.to_orc(gdf_fname.strpath, compression=compression) + got = cudf.from_pandas( + pa.orc.ORCFile(gdf_fname).read(columns=columns).to_pandas() + ) - assert_frame_equal(cudf.from_pandas(expect), cudf.from_pandas(got)) + assert_frame_equal(expect, got) @pytest.mark.parametrize("stats_freq", ["NONE", "STRIPE", "ROWGROUP"]) @@ -406,11 +408,11 @@ def test_orc_writer_statistics_frequency(datadir, tmpdir, stats_freq): else: print(type(excpr).__name__) - expect = orcfile.read().to_pandas() - cudf.from_pandas(expect).to_orc(gdf_fname.strpath, statistics=stats_freq) - got = pa.orc.ORCFile(gdf_fname).read().to_pandas() + expect = cudf.from_pandas(orcfile.read().to_pandas()) + expect.to_orc(gdf_fname.strpath, statistics=stats_freq) + got = cudf.from_pandas(pa.orc.ORCFile(gdf_fname).read().to_pandas()) - assert_frame_equal(cudf.from_pandas(expect), cudf.from_pandas(got)) + assert_frame_equal(expect, got) @pytest.mark.parametrize("stats_freq", ["NONE", "STRIPE", "ROWGROUP"]) From 289a13bb2faa8f8ada8e051dbdc5ce633e43c4f3 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 8 Feb 2022 10:21:01 -0800 Subject: [PATCH 11/20] combine repr tests --- python/cudf/cudf/tests/test_repr.py | 34 ++++++++++++++--------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index 34248818e7a..01e33896b84 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -13,7 +13,15 @@ from cudf.testing import _utils as utils from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes -repr_categories = ["int64", "float64", "str", "category", "datetime64[ns]"] +repr_categories = [ + "bool", + "uint16", + "int64", + "float64", + "str", + "category", + "datetime64[ns]", +] @pytest.mark.parametrize("dtype", repr_categories) @@ -84,23 +92,11 @@ def test_full_series(nrows, dtype): pd.reset_option("display.max_rows") +@pytest.mark.parametrize("nrows", [5, 10, 15]) +@pytest.mark.parametrize("ncols", [5, 10, 15]) +@pytest.mark.parametrize("size", [20, 21]) @pytest.mark.parametrize("dtype", repr_categories) -def test_full_dataframe_20(dtype): - size = 20 - pdf = pd.DataFrame( - {idx: np.random.randint(0, 100, size) for idx in range(size)} - ).astype(dtype) - gdf = cudf.from_pandas(pdf) - assert pdf.__repr__() == gdf.__repr__() - assert pdf._repr_html_() == gdf._repr_html_() - assert pdf._repr_latex_() == gdf._repr_latex_() - - -@pytest.mark.parametrize("dtype", repr_categories) -@pytest.mark.parametrize("nrows", [9, 21 / 2, 11, 21 - 1]) -@pytest.mark.parametrize("ncols", [9, 21 / 2, 11, 21 - 1]) -def test_full_dataframe_21(dtype, nrows, ncols): - size = 21 +def test_full_dataframe_20(dtype, size, nrows, ncols): pdf = pd.DataFrame( {idx: np.random.randint(0, 100, size) for idx in range(size)} ).astype(dtype) @@ -108,7 +104,11 @@ def test_full_dataframe_21(dtype, nrows, ncols): pd.options.display.max_rows = int(nrows) pd.options.display.max_columns = int(ncols) + assert pdf.__repr__() == gdf.__repr__() + assert pdf._repr_html_() == gdf._repr_html_() + assert pdf._repr_latex_() == gdf._repr_latex_() + pd.reset_option("display.max_rows") pd.reset_option("display.max_columns") From 2e25ed2aa6482f6bb109f14085592f0eb8d04d3b Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Tue, 8 Feb 2022 12:23:31 -0600 Subject: [PATCH 12/20] Update python/cudf/cudf/tests/test_indexing.py Co-authored-by: GALI PREM SAGAR --- python/cudf/cudf/tests/test_indexing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index ed837c92549..8caabd47572 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -1296,7 +1296,6 @@ def test_loc_datetime_index(sli, is_dataframe): [ cudf.DataFrame({"a": range(100000)}), cudf.DataFrame({"a": range(100000), "b": range(100000)}), - cudf.DataFrame({"a": range(20), "b": range(20)}), cudf.DataFrame( { "a": range(20), From 9393bd023a34ec87d921aacf13fa2fe95037bb0a Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 8 Feb 2022 12:19:14 -0800 Subject: [PATCH 13/20] update copyrights --- python/cudf/cudf/testing/_utils.py | 2 +- python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py | 2 +- python/cudf/cudf/tests/test_csv.py | 2 +- python/cudf/cudf/tests/test_extension_compilation.py | 1 + python/cudf/cudf/tests/test_indexing.py | 2 +- python/cudf/cudf/tests/test_repr.py | 2 +- python/cudf/cudf/tests/test_udf_masked_ops.py | 1 + 7 files changed, 7 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py index 63410a814e3..00b22b2a490 100644 --- a/python/cudf/cudf/testing/_utils.py +++ b/python/cudf/cudf/testing/_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Copyright (c) 2020-2022, NVIDIA CORPORATION. import itertools import re diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py index 72d976f0958..0bf1813a06c 100644 --- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py +++ b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py index f49532e28f7..f3d69e1745e 100644 --- a/python/cudf/cudf/tests/test_csv.py +++ b/python/cudf/cudf/tests/test_csv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2021, NVIDIA CORPORATION. +# Copyright (c) 2018-2022, NVIDIA CORPORATION. import gzip import os diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index d77e9936092..328e7b89840 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -1,3 +1,4 @@ +# Copyright (c) 2021-2022, NVIDIA CORPORATION. import operator import cupy as cp diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 8caabd47572..9265812ac14 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. from itertools import combinations diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index 01e33896b84..21874e013f8 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021, NVIDIA CORPORATION. +# Copyright (c) 2019-2022, NVIDIA CORPORATION. import textwrap diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 89344211fc4..99acb6dc744 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -1,3 +1,4 @@ +# Copyright (c) 2021-2022, NVIDIA CORPORATION. import math import operator From a20102ee2413011a3f8aaa6bbca46ce8a6bcbb04 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Wed, 9 Feb 2022 15:02:31 -0600 Subject: [PATCH 14/20] Apply suggestions from code review Co-authored-by: Bradley Dice --- python/cudf/cudf/testing/_utils.py | 2 +- python/cudf/cudf/tests/test_udf_masked_ops.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py index 00b22b2a490..efaf8a228b6 100644 --- a/python/cudf/cudf/testing/_utils.py +++ b/python/cudf/cudf/testing/_utils.py @@ -333,7 +333,7 @@ def xfail_param(param, **kwargs): return pytest.param(param, marks=pytest.mark.xfail(**kwargs)) -numeric_dtypes_pairwise = pytest.mark.parametrize( +parametrize_numeric_dtypes_pairwise = pytest.mark.parametrize( "left_dtype,right_dtype", list(itertools.combinations_with_replacement(NUMERIC_TYPES, 2)), ) diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 99acb6dc744..57a777446a9 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -255,7 +255,7 @@ def test_apply_mixed_dtypes(left_dtype, right_dtype, op): # First perform the op on two dummy data on host, if numpy can # safely type cast, we should expect it to work in udf too. try: - op(getattr(np, left_dtype)(0), getattr(np, right_dtype)(42)) + op(np.dtype(left_dtype).type(0), np.dtype(right_dtype).type(42)) except TypeError: pytest.skip("Operation is unsupported for corresponding dtype.") From 9c800cd9c0fc455ed114a3f2b943f1e723204975 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 9 Feb 2022 13:06:17 -0800 Subject: [PATCH 15/20] address reviews --- python/cudf/cudf/tests/test_binops.py | 14 ++++---- python/cudf/cudf/tests/test_dataframe.py | 36 +++++++++++++++++++ .../cudf/tests/test_extension_compilation.py | 4 +-- python/cudf/cudf/tests/test_indexing.py | 35 +++++++++--------- python/cudf/cudf/tests/test_parquet.py | 4 +-- python/cudf/cudf/tests/test_repr.py | 16 ++++----- python/cudf/cudf/tests/test_udf_masked_ops.py | 4 +-- 7 files changed, 72 insertions(+), 41 deletions(-) diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py index 7ad5fd743e4..02ca7a0cd58 100644 --- a/python/cudf/cudf/tests/test_binops.py +++ b/python/cudf/cudf/tests/test_binops.py @@ -216,14 +216,12 @@ def test_series_compare(cmpop, obj_class, dtype): def _series_compare_nulls_typegen(): - tests = [] - - tests += list(combinations_with_replacement(DATETIME_TYPES, 2)) - tests += list(combinations_with_replacement(TIMEDELTA_TYPES, 2)) - tests += list(combinations_with_replacement(NUMERIC_TYPES, 2)) - tests += list(combinations_with_replacement(STRING_TYPES, 2)) - - return tests + return [ + *combinations_with_replacement(DATETIME_TYPES, 2), + *combinations_with_replacement(TIMEDELTA_TYPES, 2), + *combinations_with_replacement(NUMERIC_TYPES, 2), + *combinations_with_replacement(STRING_TYPES, 2), + ] @pytest.mark.parametrize("cmpop", _cmpops) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index b3305a50031..3e0990880a4 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -6688,6 +6688,24 @@ def test_dataframe_info_null_counts(): -6.88918237, -7.00001, ], + [ + 1.987654321, + 2.987654321, + 3.987654321, + 0.1221, + 2.1221, + 0.112121, + -21.1212, + ], + [ + -1.987654321, + -2.987654321, + -3.987654321, + -0.1221, + -2.1221, + -0.112121, + 21.1212, + ], ], ) @pytest.mark.parametrize( @@ -6712,6 +6730,24 @@ def test_dataframe_info_null_counts(): -6.88918237, -7.00001, ], + [ + 1.987654321, + 2.987654321, + 3.987654321, + 0.1221, + 2.1221, + 0.112121, + -21.1212, + ], + [ + -1.987654321, + -2.987654321, + -3.987654321, + -0.1221, + -2.1221, + -0.112121, + 21.1212, + ], ], ) @pytest.mark.parametrize("rtol", [0, 0.01, 1e-05, 1e-08, 5e-1, 50.12]) diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index 328e7b89840..692f40873d7 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -11,7 +11,7 @@ from cudf import NA from cudf.core.udf.api import Masked from cudf.core.udf.typing import MaskedType -from cudf.testing._utils import numeric_dtypes_pairwise +from cudf.testing._utils import parametrize_numeric_dtypes_pairwise arith_ops = ( operator.add, @@ -163,7 +163,7 @@ def func(x): @pytest.mark.parametrize("op", ops) -@numeric_dtypes_pairwise +@parametrize_numeric_dtypes_pairwise @pytest.mark.parametrize( "masked", ((False, True), (True, False), (True, True)), diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py index 9265812ac14..19d7c8a10ab 100644 --- a/python/cudf/cudf/tests/test_indexing.py +++ b/python/cudf/cudf/tests/test_indexing.py @@ -1292,27 +1292,27 @@ def test_loc_datetime_index(sli, is_dataframe): @pytest.mark.parametrize( - "gdf", + "gdf_kwargs", [ - cudf.DataFrame({"a": range(100000)}), - cudf.DataFrame({"a": range(100000), "b": range(100000)}), - cudf.DataFrame( - { + {"data": {"a": range(100000)}}, + {"data": {"a": range(100000), "b": range(100000)}}, + { + "data": { "a": range(20), "b": range(20), "c": ["abc", "def", "xyz", "def", "pqr"] * 4, } - ), - cudf.DataFrame(index=[1, 2, 3]), - cudf.DataFrame(index=range(100000)), - cudf.DataFrame(columns=["a", "b", "c", "d"]), - cudf.DataFrame(columns=["a"], index=range(100000)), - cudf.DataFrame(columns=["a", "col2", "...col n"], index=range(100000)), - cudf.DataFrame(index=cudf.Series(range(100000)).astype("str")), - cudf.DataFrame( - columns=["a", "b", "c", "d"], - index=cudf.Series(range(100000)).astype("str"), - ), + }, + {"index": [1, 2, 3]}, + {"index": range(100000)}, + {"columns": ["a", "b", "c", "d"]}, + {"columns": ["a"], "index": range(100000)}, + {"columns": ["a", "col2", "...col n"], "index": range(100000)}, + {"index": cudf.Series(range(100000)).astype("str")}, + { + "columns": ["a", "b", "c", "d"], + "index": cudf.Series(range(100000)).astype("str"), + }, ], ) @pytest.mark.parametrize( @@ -1327,7 +1327,8 @@ def test_loc_datetime_index(sli, is_dataframe): slice(6), ], ) -def test_dataframe_sliced(gdf, slice): +def test_dataframe_sliced(gdf_kwargs, slice): + gdf = cudf.DataFrame(**gdf_kwargs) pdf = gdf.to_pandas() actual = gdf[slice] diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py index 98c2c9b8d32..7feaa400446 100644 --- a/python/cudf/cudf/tests/test_parquet.py +++ b/python/cudf/cudf/tests/test_parquet.py @@ -1105,7 +1105,7 @@ def test_parquet_reader_list_large_multi_rowgroup_nulls(tmpdir): assert_eq(expect, got) -@pytest.mark.parametrize("skip", range(0, 10)) +@pytest.mark.parametrize("skip", [0, 1, 5, 10]) def test_parquet_reader_list_skiprows(skip, tmpdir): num_rows = 10 src = pd.DataFrame( @@ -1124,7 +1124,7 @@ def test_parquet_reader_list_skiprows(skip, tmpdir): assert_eq(expect, got, check_dtype=False) -@pytest.mark.parametrize("skip", range(0, 10)) +@pytest.mark.parametrize("skip", [0, 1, 5, 10]) def test_parquet_reader_list_num_rows(skip, tmpdir): num_rows = 20 src = pd.DataFrame( diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py index 21874e013f8..8f2e4811e36 100644 --- a/python/cudf/cudf/tests/test_repr.py +++ b/python/cudf/cudf/tests/test_repr.py @@ -14,7 +14,6 @@ from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes repr_categories = [ - "bool", "uint16", "int64", "float64", @@ -102,15 +101,12 @@ def test_full_dataframe_20(dtype, size, nrows, ncols): ).astype(dtype) gdf = cudf.from_pandas(pdf) - pd.options.display.max_rows = int(nrows) - pd.options.display.max_columns = int(ncols) - - assert pdf.__repr__() == gdf.__repr__() - assert pdf._repr_html_() == gdf._repr_html_() - assert pdf._repr_latex_() == gdf._repr_latex_() - - pd.reset_option("display.max_rows") - pd.reset_option("display.max_columns") + with pd.option_context( + "display.max_rows", int(nrows), "display.max_columns", int(ncols) + ): + assert repr(pdf) == repr(gdf) + assert pdf._repr_html_() == gdf._repr_html_() + assert pdf._repr_latex_() == gdf._repr_latex_() @given( diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py index 57a777446a9..faaea6eec08 100644 --- a/python/cudf/cudf/tests/test_udf_masked_ops.py +++ b/python/cudf/cudf/tests/test_udf_masked_ops.py @@ -18,7 +18,7 @@ from cudf.testing._utils import ( _decimal_series, assert_eq, - numeric_dtypes_pairwise, + parametrize_numeric_dtypes_pairwise, ) @@ -243,7 +243,7 @@ def func(row): run_masked_udf_test(func, gdf, check_dtype=False) -@numeric_dtypes_pairwise +@parametrize_numeric_dtypes_pairwise @pytest.mark.parametrize("op", [operator.add, operator.and_, operator.eq]) def test_apply_mixed_dtypes(left_dtype, right_dtype, op): """ From 025c69db773c9f973f3a3df7fd4d949de2acdb70 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 9 Feb 2022 13:30:55 -0800 Subject: [PATCH 16/20] temporarily revert changes --- python/cudf/cudf/tests/test_dataframe.py | 36 ------------------------ 1 file changed, 36 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 3e0990880a4..b3305a50031 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -6688,24 +6688,6 @@ def test_dataframe_info_null_counts(): -6.88918237, -7.00001, ], - [ - 1.987654321, - 2.987654321, - 3.987654321, - 0.1221, - 2.1221, - 0.112121, - -21.1212, - ], - [ - -1.987654321, - -2.987654321, - -3.987654321, - -0.1221, - -2.1221, - -0.112121, - 21.1212, - ], ], ) @pytest.mark.parametrize( @@ -6730,24 +6712,6 @@ def test_dataframe_info_null_counts(): -6.88918237, -7.00001, ], - [ - 1.987654321, - 2.987654321, - 3.987654321, - 0.1221, - 2.1221, - 0.112121, - -21.1212, - ], - [ - -1.987654321, - -2.987654321, - -3.987654321, - -0.1221, - -2.1221, - -0.112121, - 21.1212, - ], ], ) @pytest.mark.parametrize("rtol", [0, 0.01, 1e-05, 1e-08, 5e-1, 50.12]) From 60a0a927d3e197e37c88418eaa413361b9dd4126 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 9 Feb 2022 13:32:15 -0800 Subject: [PATCH 17/20] copyright --- python/cudf/cudf/tests/test_reshape.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py index 8fc3b8012e4..2efa781c506 100644 --- a/python/cudf/cudf/tests/test_reshape.py +++ b/python/cudf/cudf/tests/test_reshape.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. import re From 227be3c579271ea63996eca5708337404b8eb815 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 14 Feb 2022 07:53:07 -0800 Subject: [PATCH 18/20] redo test_cudf_isclose parameterization --- python/cudf/cudf/tests/test_dataframe.py | 49 ++++++++++++------------ 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index b3305a50031..67230fa1171 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -3,6 +3,7 @@ import array as arr import datetime import io +import itertools import operator import random import re @@ -6666,10 +6667,11 @@ def test_dataframe_info_null_counts(): assert str_cmp == actual_string -@pytest.mark.parametrize( - "data1", - [ +def cudf_isclose_data(): + # generate pairs of data for isclsoe + data_list = [ [1, 2, 3, 4, 5, 6, 7], + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], [ 1.9876543, 2.9876654, @@ -6688,32 +6690,29 @@ def test_dataframe_info_null_counts(): -6.88918237, -7.00001, ], - ], -) -@pytest.mark.parametrize( - "data2", - [ - [1, 2, 3, 4, 5, 6, 7], [ - 1.9876543, - 2.9876654, - 3.9876543, - 4.1234587, - 5.23, - 6.88918237, - 7.00001, + 1.987654321, + 2.987654321, + 3.987654321, + 0.1221, + 2.1221, + 0.112121, + -21.1212, ], [ - -1.9876543, - -2.9876654, - -3.9876543, - -4.1234587, - -5.23, - -6.88918237, - -7.00001, + -1.987654321, + -2.987654321, + -3.987654321, + -0.1221, + -2.1221, + -0.112121, + 21.1212, ], - ], -) + ] + return list(itertools.combinations_with_replacement(data_list, 2)) + + +@pytest.mark.parametrize("data1,data2", cudf_isclose_data()) @pytest.mark.parametrize("rtol", [0, 0.01, 1e-05, 1e-08, 5e-1, 50.12]) @pytest.mark.parametrize("atol", [0, 0.01, 1e-05, 1e-08, 50.12]) def test_cudf_isclose(data1, data2, rtol, atol): From ecdb9860d3410250dc4ac7949b5858af7091f92a Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 15 Feb 2022 06:32:50 -0800 Subject: [PATCH 19/20] revert changes to test_cudf_isclose --- python/cudf/cudf/tests/test_dataframe.py | 57 ++++++++++++++++++++---- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 67230fa1171..fb173bc0eab 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -3,7 +3,6 @@ import array as arr import datetime import io -import itertools import operator import random import re @@ -6667,9 +6666,9 @@ def test_dataframe_info_null_counts(): assert str_cmp == actual_string -def cudf_isclose_data(): - # generate pairs of data for isclsoe - data_list = [ +@pytest.mark.parametrize( + "data1", + [ [1, 2, 3, 4, 5, 6, 7], [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], [ @@ -6708,11 +6707,51 @@ def cudf_isclose_data(): -0.112121, 21.1212, ], - ] - return list(itertools.combinations_with_replacement(data_list, 2)) - - -@pytest.mark.parametrize("data1,data2", cudf_isclose_data()) + ], +) +@pytest.mark.parametrize( + "data2", + [ + [1, 2, 3, 4, 5, 6, 7], + [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0], + [ + 1.9876543, + 2.9876654, + 3.9876543, + 4.1234587, + 5.23, + 6.88918237, + 7.00001, + ], + [ + -1.9876543, + -2.9876654, + -3.9876543, + -4.1234587, + -5.23, + -6.88918237, + -7.00001, + ], + [ + 1.987654321, + 2.987654321, + 3.987654321, + 0.1221, + 2.1221, + 0.112121, + -21.1212, + ], + [ + -1.987654321, + -2.987654321, + -3.987654321, + -0.1221, + -2.1221, + -0.112121, + 21.1212, + ], + ], +) @pytest.mark.parametrize("rtol", [0, 0.01, 1e-05, 1e-08, 5e-1, 50.12]) @pytest.mark.parametrize("atol", [0, 0.01, 1e-05, 1e-08, 50.12]) def test_cudf_isclose(data1, data2, rtol, atol): From 62d56be1fbe9aab315542c51de58407d0362a699 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 15 Feb 2022 06:43:33 -0800 Subject: [PATCH 20/20] copyright --- python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py index 0bf1813a06c..9eb01ae31b4 100644 --- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py +++ b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2022, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License.