From 4d7ae603a3839f9bf8fda59e387a05f8c7475522 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 2 Feb 2022 19:12:51 -0800
Subject: [PATCH 01/20] address first few slowest tests

---
 python/cudf/cudf/tests/test_csv.py      |  9 +++++----
 python/cudf/cudf/tests/test_indexing.py | 22 ++++++++++------------
 python/cudf/cudf/tests/test_orc.py      | 10 +++++-----
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index 9208b8c7cd4..f49532e28f7 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -8,6 +8,7 @@
 from io import BytesIO, StringIO
 from pathlib import Path
 
+import cupy as cp
 import numpy as np
 import pandas as pd
 import pytest
@@ -1009,17 +1010,17 @@ def test_small_zip(tmpdir):
 def test_csv_reader_carriage_return(tmpdir):
     rows = 1000
     names = ["int_row", "int_double_row"]
-
     buffer = ",".join(names) + "\r\n"
     for row in range(rows):
         buffer += str(row) + ", " + str(2 * row) + "\r\n"
 
     df = read_csv(StringIO(buffer))
+    expect = cudf.DataFrame(
+        {"int_row": cp.arange(rows), "int_double_row": cp.arange(rows) * 2}
+    )
 
     assert len(df) == rows
-    for row in range(0, rows):
-        assert df[names[0]][row] == row
-        assert df[names[1]][row] == 2 * row
+    assert_eq(expect, df)
 
 
 def test_csv_reader_tabs():
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 102e5b57e8e..ed837c92549 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -1294,8 +1294,8 @@ def test_loc_datetime_index(sli, is_dataframe):
 @pytest.mark.parametrize(
     "gdf",
     [
-        cudf.DataFrame({"a": range(1000000)}),
-        cudf.DataFrame({"a": range(1000000), "b": range(1000000)}),
+        cudf.DataFrame({"a": range(100000)}),
+        cudf.DataFrame({"a": range(100000), "b": range(100000)}),
         cudf.DataFrame({"a": range(20), "b": range(20)}),
         cudf.DataFrame(
             {
@@ -1305,25 +1305,23 @@ def test_loc_datetime_index(sli, is_dataframe):
             }
         ),
         cudf.DataFrame(index=[1, 2, 3]),
-        cudf.DataFrame(index=range(1000000)),
+        cudf.DataFrame(index=range(100000)),
         cudf.DataFrame(columns=["a", "b", "c", "d"]),
-        cudf.DataFrame(columns=["a"], index=range(1000000)),
-        cudf.DataFrame(
-            columns=["a", "col2", "...col n"], index=range(1000000)
-        ),
-        cudf.DataFrame(index=cudf.Series(range(1000000)).astype("str")),
+        cudf.DataFrame(columns=["a"], index=range(100000)),
+        cudf.DataFrame(columns=["a", "col2", "...col n"], index=range(100000)),
+        cudf.DataFrame(index=cudf.Series(range(100000)).astype("str")),
         cudf.DataFrame(
             columns=["a", "b", "c", "d"],
-            index=cudf.Series(range(1000000)).astype("str"),
+            index=cudf.Series(range(100000)).astype("str"),
         ),
     ],
 )
 @pytest.mark.parametrize(
     "slice",
     [
-        slice(250000, 500000),
-        slice(250000, 250001),
-        slice(500000),
+        slice(25000, 50000),
+        slice(25000, 25001),
+        slice(50000),
         slice(1, 10),
         slice(10, 20),
         slice(15, 24000),
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 8689f773a02..7a7a07ba7fa 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -16,6 +16,7 @@
 
 import cudf
 from cudf.io.orc import ORCWriter
+from cudf.testing import assert_frame_equal
 from cudf.testing._utils import (
     assert_eq,
     gen_rand_series,
@@ -93,7 +94,7 @@ def test_orc_reader_basic(datadir, inputfile, columns, use_index, engine):
         path, engine=engine, columns=columns, use_index=use_index
     )
 
-    assert_eq(expect, got, check_categorical=False)
+    assert_frame_equal(cudf.from_pandas(expect), got, check_categorical=False)
 
 
 def test_orc_reader_filenotfound(tmpdir):
@@ -388,7 +389,7 @@ def test_orc_writer(datadir, tmpdir, reference_file, columns, compression):
     cudf.from_pandas(expect).to_orc(gdf_fname.strpath, compression=compression)
     got = pa.orc.ORCFile(gdf_fname).read(columns=columns).to_pandas()
 
-    assert_eq(expect, got)
+    assert_frame_equal(cudf.from_pandas(expect), cudf.from_pandas(got))
 
 
 @pytest.mark.parametrize("stats_freq", ["NONE", "STRIPE", "ROWGROUP"])
@@ -409,7 +410,7 @@ def test_orc_writer_statistics_frequency(datadir, tmpdir, stats_freq):
     cudf.from_pandas(expect).to_orc(gdf_fname.strpath, statistics=stats_freq)
     got = pa.orc.ORCFile(gdf_fname).read().to_pandas()
 
-    assert_eq(expect, got)
+    assert_frame_equal(cudf.from_pandas(expect), cudf.from_pandas(got))
 
 
 @pytest.mark.parametrize("stats_freq", ["NONE", "STRIPE", "ROWGROUP"])
@@ -492,8 +493,7 @@ def test_chunked_orc_writer(
     writer.close()
 
     got = pa.orc.ORCFile(gdf_fname).read(columns=columns).to_pandas()
-
-    assert_eq(expect, got)
+    assert_frame_equal(cudf.from_pandas(expect), cudf.from_pandas(got))
 
 
 @pytest.mark.parametrize(

From 1f9e36da60589c0bc1fcf552817175ad4def9396 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 7 Feb 2022 08:07:43 -0800
Subject: [PATCH 02/20] continue reducing runtime

---
 .../test_avro_reader_fastavro_integration.py  |  2 +-
 python/cudf/cudf/tests/test_binops.py         |  5 ++-
 python/cudf/cudf/tests/test_dataframe.py      | 38 -------------------
 python/cudf/cudf/tests/test_parquet.py        |  8 ++--
 python/cudf/cudf/tests/test_repr.py           |  7 ++--
 python/cudf/cudf/tests/test_reshape.py        |  6 +--
 python/cudf/cudf/tests/test_string.py         |  2 -
 python/cudf/cudf/tests/test_udf_masked_ops.py |  4 +-
 8 files changed, 17 insertions(+), 55 deletions(-)

diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
index a064bec9e82..72d976f0958 100644
--- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
+++ b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
@@ -210,7 +210,7 @@ def test_can_parse_no_schema():
     assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize("rows", [0, 1, 10, 100000])
+@pytest.mark.parametrize("rows", [0, 1, 10, 1000])
 @pytest.mark.parametrize("codec", ["null", "deflate", "snappy"])
 def test_avro_compression(rows, codec):
     schema = {
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index 921f2de38c2..f461bacb909 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -218,9 +218,12 @@ def test_series_compare(cmpop, obj_class, dtype):
 
 def _series_compare_nulls_typegen():
     tests = []
+
+    numeric_type_subset = ['uint64', 'int64', 'float64', 'bool']
+
     tests += list(product(DATETIME_TYPES, DATETIME_TYPES))
     tests += list(product(TIMEDELTA_TYPES, TIMEDELTA_TYPES))
-    tests += list(product(NUMERIC_TYPES, NUMERIC_TYPES))
+    tests += list(product(numeric_type_subset, numeric_type_subset))
     tests += list(product(STRING_TYPES, STRING_TYPES))
 
     return tests
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 889662c8a1c..584bcc934a4 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -6675,7 +6675,6 @@ def test_dataframe_info_null_counts():
     "data1",
     [
         [1, 2, 3, 4, 5, 6, 7],
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
         [
             1.9876543,
             2.9876654,
@@ -6694,31 +6693,12 @@ def test_dataframe_info_null_counts():
             -6.88918237,
             -7.00001,
         ],
-        [
-            1.987654321,
-            2.987654321,
-            3.987654321,
-            0.1221,
-            2.1221,
-            0.112121,
-            -21.1212,
-        ],
-        [
-            -1.987654321,
-            -2.987654321,
-            -3.987654321,
-            -0.1221,
-            -2.1221,
-            -0.112121,
-            21.1212,
-        ],
     ],
 )
 @pytest.mark.parametrize(
     "data2",
     [
         [1, 2, 3, 4, 5, 6, 7],
-        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
         [
             1.9876543,
             2.9876654,
@@ -6737,24 +6717,6 @@ def test_dataframe_info_null_counts():
             -6.88918237,
             -7.00001,
         ],
-        [
-            1.987654321,
-            2.987654321,
-            3.987654321,
-            0.1221,
-            2.1221,
-            0.112121,
-            -21.1212,
-        ],
-        [
-            -1.987654321,
-            -2.987654321,
-            -3.987654321,
-            -0.1221,
-            -2.1221,
-            -0.112121,
-            21.1212,
-        ],
     ],
 )
 @pytest.mark.parametrize("rtol", [0, 0.01, 1e-05, 1e-08, 5e-1, 50.12])
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 80ab0671a0d..8f5566a7299 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -1105,9 +1105,9 @@ def test_parquet_reader_list_large_multi_rowgroup_nulls(tmpdir):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("skip", range(0, 128))
+@pytest.mark.parametrize("skip", range(0, 10))
 def test_parquet_reader_list_skiprows(skip, tmpdir):
-    num_rows = 128
+    num_rows = 10
     src = pd.DataFrame(
         {
             "a": list_gen(int_gen, 0, num_rows, 80, 50),
@@ -1124,9 +1124,9 @@ def test_parquet_reader_list_skiprows(skip, tmpdir):
     assert_eq(expect, got, check_dtype=False)
 
 
-@pytest.mark.parametrize("skip", range(0, 120))
+@pytest.mark.parametrize("skip", range(0, 10))
 def test_parquet_reader_list_num_rows(skip, tmpdir):
-    num_rows = 128
+    num_rows = 20
     src = pd.DataFrame(
         {
             "a": list_gen(int_gen, 0, num_rows, 80, 50),
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index ca02ee55df0..fae9a1b4d59 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -13,7 +13,7 @@
 from cudf.testing import _utils as utils
 from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes
 
-repr_categories = utils.NUMERIC_TYPES + ["str", "category", "datetime64[ns]"]
+repr_categories =  ["int64" , "float64", "str", "category", "datetime64[ns]"]
 
 
 @pytest.mark.parametrize("dtype", repr_categories)
@@ -85,15 +85,14 @@ def test_full_series(nrows, dtype):
 
 
 @pytest.mark.parametrize("dtype", repr_categories)
-@pytest.mark.parametrize("nrows", [0, 1, 2, 9, 20 / 2, 11, 20 - 1, 20, 20 + 1])
-@pytest.mark.parametrize("ncols", [0, 1, 2, 9, 20 / 2, 11, 20 - 1, 20, 20 + 1])
+@pytest.mark.parametrize("nrows", [0, 1, 10, 20, 21])
+@pytest.mark.parametrize("ncols", [0, 1, 10, 20, 21])
 def test_full_dataframe_20(dtype, nrows, ncols):
     size = 20
     pdf = pd.DataFrame(
         {idx: np.random.randint(0, 100, size) for idx in range(size)}
     ).astype(dtype)
     gdf = cudf.from_pandas(pdf)
-
     assert pdf.__repr__() == gdf.__repr__()
     assert pdf._repr_html_() == gdf._repr_html_()
     assert pdf._repr_latex_() == gdf._repr_latex_()
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index b8f975f233e..8fc3b8012e4 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -17,9 +17,9 @@
 )
 
 
-@pytest.mark.parametrize("num_id_vars", [0, 1, 2, 10])
-@pytest.mark.parametrize("num_value_vars", [0, 1, 2, 10])
-@pytest.mark.parametrize("num_rows", [1, 2, 1000])
+@pytest.mark.parametrize("num_id_vars", [0, 1, 2])
+@pytest.mark.parametrize("num_value_vars", [0, 1, 2])
+@pytest.mark.parametrize("num_rows", [1, 2, 100])
 @pytest.mark.parametrize("dtype", NUMERIC_TYPES + DATETIME_TYPES)
 @pytest.mark.parametrize("nulls", ["none", "some", "all"])
 def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype):
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index 75cf2e6c892..b959a5c9015 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -535,8 +535,6 @@ def _cat_convert_seq_to_cudf(others):
     "index",
     [
         ["1", "2", "3", "4", "5"],
-        pd.Series(["1", "2", "3", "4", "5"]),
-        pd.Index(["1", "2", "3", "4", "5"]),
     ],
 )
 def test_string_cat(ps_gs, others, sep, na_rep, index):
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 56090c8eacf..25185a75424 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -238,8 +238,8 @@ def func(row):
     run_masked_udf_test(func, gdf, check_dtype=False)
 
 
-@pytest.mark.parametrize("dtype_a", list(NUMERIC_TYPES))
-@pytest.mark.parametrize("dtype_b", list(NUMERIC_TYPES))
+@pytest.mark.parametrize("dtype_a", ['uint64', 'int64', 'float64'])
+@pytest.mark.parametrize("dtype_b", ['uint64', 'int64', 'float64'])
 @pytest.mark.parametrize("op", [operator.add, operator.and_, operator.eq])
 def test_apply_mixed_dtypes(dtype_a, dtype_b, op):
     """

From 70932d5d25886677b22583470dad6ee6221616b0 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 7 Feb 2022 12:33:29 -0800
Subject: [PATCH 03/20] updates

---
 python/cudf/cudf/testing/_utils.py                 | 11 +++++++++++
 python/cudf/cudf/tests/test_binops.py              | 12 +++++-------
 .../cudf/cudf/tests/test_extension_compilation.py  | 14 +++++++++++---
 python/cudf/cudf/tests/test_udf_masked_ops.py      | 13 +++++++------
 4 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 41dac26edf8..ac472eb4ec2 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -4,6 +4,7 @@
 from collections.abc import Mapping, Sequence
 from contextlib import contextmanager
 from decimal import Decimal
+from tkinter import NUMERIC
 
 import cupy
 import numpy as np
@@ -11,6 +12,8 @@
 import pytest
 from pandas import testing as tm
 
+import itertools
+
 import cudf
 from cudf._lib.null_mask import bitmask_allocation_size_bytes
 from cudf.core.column.datetime import _numpy_to_pandas_conversion
@@ -321,3 +324,11 @@ def does_not_raise():
 
 def xfail_param(param, **kwargs):
     return pytest.param(param, marks=pytest.mark.xfail(**kwargs))
+
+deduped_numeric_dtype_tests = pytest.mark.parametrize(
+    "left_dtype,right_dtype", list(
+        itertools.combinations_with_replacement(
+                NUMERIC_TYPES, 2
+            )
+        )
+    )
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index f461bacb909..2a499494a77 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -5,7 +5,7 @@
 import decimal
 import operator
 import random
-from itertools import product
+from itertools import combinations, product, combinations_with_replacement
 
 import cupy as cp
 import numpy as np
@@ -219,12 +219,10 @@ def test_series_compare(cmpop, obj_class, dtype):
 def _series_compare_nulls_typegen():
     tests = []
 
-    numeric_type_subset = ['uint64', 'int64', 'float64', 'bool']
-
-    tests += list(product(DATETIME_TYPES, DATETIME_TYPES))
-    tests += list(product(TIMEDELTA_TYPES, TIMEDELTA_TYPES))
-    tests += list(product(numeric_type_subset, numeric_type_subset))
-    tests += list(product(STRING_TYPES, STRING_TYPES))
+    tests += list(combinations_with_replacement(DATETIME_TYPES, 2))
+    tests += list(combinations_with_replacement(DATETIME_TYPES, 2))
+    tests += list(combinations_with_replacement(NUMERIC_TYPES, 2))
+    tests += list(combinations_with_replacement(STRING_TYPES, 2))
 
     return tests
 
diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py
index 47c9448cf63..1af79cadacf 100644
--- a/python/cudf/cudf/tests/test_extension_compilation.py
+++ b/python/cudf/cudf/tests/test_extension_compilation.py
@@ -5,10 +5,16 @@
 from numba import cuda, types
 from numba.cuda import compile_ptx
 
+from cudf.testing._utils import deduped_numeric_dtype_tests
+
+
+import numpy as np
 from cudf import NA
 from cudf.core.udf.api import Masked
 from cudf.core.udf.typing import MaskedType
 
+from numba.np.numpy_support import from_dtype
+
 arith_ops = (
     operator.add,
     operator.sub,
@@ -159,19 +165,21 @@ def func(x):
 
 
 @pytest.mark.parametrize("op", ops)
-@pytest.mark.parametrize("ty1", number_types, ids=number_ids)
-@pytest.mark.parametrize("ty2", number_types, ids=number_ids)
+@deduped_numeric_dtype_tests
 @pytest.mark.parametrize(
     "masked",
     ((False, True), (True, False), (True, True)),
     ids=("um", "mu", "mm"),
 )
-def test_compile_arith_masked_ops(op, ty1, ty2, masked):
+def test_compile_arith_masked_ops(op, left_dtype, right_dtype, masked):
     def func(x, y):
         return op(x, y)
 
     cc = (7, 5)
 
+    ty1 = from_dtype(np.dtype(left_dtype))
+    ty2 = from_dtype(np.dtype(right_dtype))
+
     if masked[0]:
         ty1 = MaskedType(ty1)
     if masked[1]:
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 25185a75424..88da7ce518c 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -5,6 +5,8 @@
 import pytest
 from numba import cuda
 
+from cudf.testing._utils import deduped_numeric_dtype_tests
+
 import cudf
 from cudf.core.scalar import NA
 from cudf.core.udf._ops import (
@@ -238,10 +240,9 @@ def func(row):
     run_masked_udf_test(func, gdf, check_dtype=False)
 
 
-@pytest.mark.parametrize("dtype_a", ['uint64', 'int64', 'float64'])
-@pytest.mark.parametrize("dtype_b", ['uint64', 'int64', 'float64'])
+@deduped_numeric_dtype_tests
 @pytest.mark.parametrize("op", [operator.add, operator.and_, operator.eq])
-def test_apply_mixed_dtypes(dtype_a, dtype_b, op):
+def test_apply_mixed_dtypes(left_dtype, right_dtype, op):
     """
     Test that operations can be performed between columns
     of different dtypes and return a column with the correct
@@ -251,7 +252,7 @@ def test_apply_mixed_dtypes(dtype_a, dtype_b, op):
     # First perform the op on two dummy data on host, if numpy can
     # safely type cast, we should expect it to work in udf too.
     try:
-        op(getattr(np, dtype_a)(0), getattr(np, dtype_b)(42))
+        op(getattr(np, left_dtype)(0), getattr(np, right_dtype)(42))
     except TypeError:
         pytest.skip("Operation is unsupported for corresponding dtype.")
 
@@ -261,8 +262,8 @@ def func(row):
         return op(x, y)
 
     gdf = cudf.DataFrame({"a": [1.5, None, 3, None], "b": [4, 5, None, None]})
-    gdf["a"] = gdf["a"].astype(dtype_a)
-    gdf["b"] = gdf["b"].astype(dtype_b)
+    gdf["a"] = gdf["a"].astype(left_dtype)
+    gdf["b"] = gdf["b"].astype(right_dtype)
 
     run_masked_udf_test(func, gdf, check_dtype=False)
 

From b94cce4ebdcdca0259f5d3626ed9e30f1da983dc Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 7 Feb 2022 12:34:39 -0800
Subject: [PATCH 04/20]  ??? IDE added this somehow

---
 python/cudf/cudf/testing/_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index ac472eb4ec2..139aff0494a 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -4,7 +4,6 @@
 from collections.abc import Mapping, Sequence
 from contextlib import contextmanager
 from decimal import Decimal
-from tkinter import NUMERIC
 
 import cupy
 import numpy as np

From 67e3994c6bf33f8e140322477b4b9eb9851f64e6 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 7 Feb 2022 12:37:48 -0800
Subject: [PATCH 05/20] style

---
 python/cudf/cudf/testing/_utils.py                  | 13 +++++--------
 python/cudf/cudf/tests/test_binops.py               |  2 +-
 .../cudf/cudf/tests/test_extension_compilation.py   |  9 +++------
 python/cudf/cudf/tests/test_repr.py                 |  2 +-
 python/cudf/cudf/tests/test_string.py               |  5 +----
 python/cudf/cudf/tests/test_udf_masked_ops.py       |  8 +++++---
 6 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 139aff0494a..ac21d30d55e 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -1,5 +1,6 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
+import itertools
 import re
 from collections.abc import Mapping, Sequence
 from contextlib import contextmanager
@@ -11,8 +12,6 @@
 import pytest
 from pandas import testing as tm
 
-import itertools
-
 import cudf
 from cudf._lib.null_mask import bitmask_allocation_size_bytes
 from cudf.core.column.datetime import _numpy_to_pandas_conversion
@@ -324,10 +323,8 @@ def does_not_raise():
 def xfail_param(param, **kwargs):
     return pytest.param(param, marks=pytest.mark.xfail(**kwargs))
 
+
 deduped_numeric_dtype_tests = pytest.mark.parametrize(
-    "left_dtype,right_dtype", list(
-        itertools.combinations_with_replacement(
-                NUMERIC_TYPES, 2
-            )
-        )
-    )
+    "left_dtype,right_dtype",
+    list(itertools.combinations_with_replacement(NUMERIC_TYPES, 2)),
+)
diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index 2a499494a77..1051c2c4191 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -5,7 +5,7 @@
 import decimal
 import operator
 import random
-from itertools import combinations, product, combinations_with_replacement
+from itertools import combinations, combinations_with_replacement, product
 
 import cupy as cp
 import numpy as np
diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py
index 1af79cadacf..13b6dbbdb3d 100644
--- a/python/cudf/cudf/tests/test_extension_compilation.py
+++ b/python/cudf/cudf/tests/test_extension_compilation.py
@@ -1,19 +1,16 @@
 import operator
 
 import cupy as cp
+import numpy as np
 import pytest
 from numba import cuda, types
 from numba.cuda import compile_ptx
+from numba.np.numpy_support import from_dtype
 
-from cudf.testing._utils import deduped_numeric_dtype_tests
-
-
-import numpy as np
 from cudf import NA
 from cudf.core.udf.api import Masked
 from cudf.core.udf.typing import MaskedType
-
-from numba.np.numpy_support import from_dtype
+from cudf.testing._utils import deduped_numeric_dtype_tests
 
 arith_ops = (
     operator.add,
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index fae9a1b4d59..b2f2240c41a 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -13,7 +13,7 @@
 from cudf.testing import _utils as utils
 from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes
 
-repr_categories =  ["int64" , "float64", "str", "category", "datetime64[ns]"]
+repr_categories = ["int64", "float64", "str", "category", "datetime64[ns]"]
 
 
 @pytest.mark.parametrize("dtype", repr_categories)
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index b959a5c9015..ca2948ef9f4 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -532,10 +532,7 @@ def _cat_convert_seq_to_cudf(others):
 @pytest.mark.parametrize("sep", [None, "", " ", "|", ",", "|||"])
 @pytest.mark.parametrize("na_rep", [None, "", "null", "a"])
 @pytest.mark.parametrize(
-    "index",
-    [
-        ["1", "2", "3", "4", "5"],
-    ],
+    "index", [["1", "2", "3", "4", "5"],],
 )
 def test_string_cat(ps_gs, others, sep, na_rep, index):
     ps, gs = ps_gs
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 88da7ce518c..687ab9ab725 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -5,8 +5,6 @@
 import pytest
 from numba import cuda
 
-from cudf.testing._utils import deduped_numeric_dtype_tests
-
 import cudf
 from cudf.core.scalar import NA
 from cudf.core.udf._ops import (
@@ -16,7 +14,11 @@
     unary_ops,
 )
 from cudf.core.udf.utils import precompiled
-from cudf.testing._utils import NUMERIC_TYPES, _decimal_series, assert_eq
+from cudf.testing._utils import (
+    _decimal_series,
+    assert_eq,
+    deduped_numeric_dtype_tests,
+)
 
 
 def run_masked_udf_test(func, data, args=(), **kwargs):

From e05d8bc8b0e5eaa350f2a0aee7bf1127702d1094 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 7 Feb 2022 12:43:15 -0800
Subject: [PATCH 06/20] style

---
 python/cudf/cudf/tests/test_binops.py | 2 +-
 python/cudf/cudf/tests/test_string.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index 1051c2c4191..6fc5ad458b9 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -5,7 +5,7 @@
 import decimal
 import operator
 import random
-from itertools import combinations, combinations_with_replacement, product
+from itertools import combinations_with_replacement, product
 
 import cupy as cp
 import numpy as np
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index ca2948ef9f4..c3a8a3a604b 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -532,7 +532,7 @@ def _cat_convert_seq_to_cudf(others):
 @pytest.mark.parametrize("sep", [None, "", " ", "|", ",", "|||"])
 @pytest.mark.parametrize("na_rep", [None, "", "null", "a"])
 @pytest.mark.parametrize(
-    "index", [["1", "2", "3", "4", "5"],],
+    "index", [["1", "2", "3", "4", "5"]],
 )
 def test_string_cat(ps_gs, others, sep, na_rep, index):
     ps, gs = ps_gs

From f367e0194983f756e591a353685f678369f428f3 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 7 Feb 2022 14:31:02 -0800
Subject: [PATCH 07/20] remove unused parameters

---
 python/cudf/cudf/tests/test_repr.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index b2f2240c41a..34248818e7a 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -85,9 +85,7 @@ def test_full_series(nrows, dtype):
 
 
 @pytest.mark.parametrize("dtype", repr_categories)
-@pytest.mark.parametrize("nrows", [0, 1, 10, 20, 21])
-@pytest.mark.parametrize("ncols", [0, 1, 10, 20, 21])
-def test_full_dataframe_20(dtype, nrows, ncols):
+def test_full_dataframe_20(dtype):
     size = 20
     pdf = pd.DataFrame(
         {idx: np.random.randint(0, 100, size) for idx in range(size)}

From d298d4a7744193f5787d833fec61575d90f4f4f1 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Tue, 8 Feb 2022 09:11:25 -0600
Subject: [PATCH 08/20] Update python/cudf/cudf/tests/test_binops.py

Co-authored-by: Michael Wang <isVoid@users.noreply.github.com>
---
 python/cudf/cudf/tests/test_binops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index f5ba65bd3d5..7ad5fd743e4 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -219,7 +219,7 @@ def _series_compare_nulls_typegen():
     tests = []
 
     tests += list(combinations_with_replacement(DATETIME_TYPES, 2))
-    tests += list(combinations_with_replacement(DATETIME_TYPES, 2))
+    tests += list(combinations_with_replacement(TIMEDELTA_TYPES, 2))
     tests += list(combinations_with_replacement(NUMERIC_TYPES, 2))
     tests += list(combinations_with_replacement(STRING_TYPES, 2))
 

From 6e9a0ef0438a3afb6613b4dca47da742736e219b Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 8 Feb 2022 07:12:17 -0800
Subject: [PATCH 09/20] address reviews

---
 python/cudf/cudf/testing/_utils.py                   | 2 +-
 python/cudf/cudf/tests/test_extension_compilation.py | 4 ++--
 python/cudf/cudf/tests/test_udf_masked_ops.py        | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 5e632f59509..63410a814e3 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -333,7 +333,7 @@ def xfail_param(param, **kwargs):
     return pytest.param(param, marks=pytest.mark.xfail(**kwargs))
 
 
-deduped_numeric_dtype_tests = pytest.mark.parametrize(
+numeric_dtypes_pairwise = pytest.mark.parametrize(
     "left_dtype,right_dtype",
     list(itertools.combinations_with_replacement(NUMERIC_TYPES, 2)),
 )
diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py
index 13b6dbbdb3d..d77e9936092 100644
--- a/python/cudf/cudf/tests/test_extension_compilation.py
+++ b/python/cudf/cudf/tests/test_extension_compilation.py
@@ -10,7 +10,7 @@
 from cudf import NA
 from cudf.core.udf.api import Masked
 from cudf.core.udf.typing import MaskedType
-from cudf.testing._utils import deduped_numeric_dtype_tests
+from cudf.testing._utils import numeric_dtypes_pairwise
 
 arith_ops = (
     operator.add,
@@ -162,7 +162,7 @@ def func(x):
 
 
 @pytest.mark.parametrize("op", ops)
-@deduped_numeric_dtype_tests
+@numeric_dtypes_pairwise
 @pytest.mark.parametrize(
     "masked",
     ((False, True), (True, False), (True, True)),
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 687ab9ab725..89344211fc4 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -17,7 +17,7 @@
 from cudf.testing._utils import (
     _decimal_series,
     assert_eq,
-    deduped_numeric_dtype_tests,
+    numeric_dtypes_pairwise,
 )
 
 
@@ -242,7 +242,7 @@ def func(row):
     run_masked_udf_test(func, gdf, check_dtype=False)
 
 
-@deduped_numeric_dtype_tests
+@numeric_dtypes_pairwise
 @pytest.mark.parametrize("op", [operator.add, operator.and_, operator.eq])
 def test_apply_mixed_dtypes(left_dtype, right_dtype, op):
     """

From e4a98d0227b33c13d932a945763b4eb45ff08df0 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 8 Feb 2022 07:21:19 -0800
Subject: [PATCH 10/20] less from_pandas()

---
 python/cudf/cudf/tests/test_orc.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 7a7a07ba7fa..623098741a9 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -385,11 +385,13 @@ def test_orc_writer(datadir, tmpdir, reference_file, columns, compression):
         else:
             print(type(excpr).__name__)
 
-    expect = orcfile.read(columns=columns).to_pandas()
-    cudf.from_pandas(expect).to_orc(gdf_fname.strpath, compression=compression)
-    got = pa.orc.ORCFile(gdf_fname).read(columns=columns).to_pandas()
+    expect = cudf.from_pandas(orcfile.read(columns=columns).to_pandas())
+    expect.to_orc(gdf_fname.strpath, compression=compression)
+    got = cudf.from_pandas(
+        pa.orc.ORCFile(gdf_fname).read(columns=columns).to_pandas()
+    )
 
-    assert_frame_equal(cudf.from_pandas(expect), cudf.from_pandas(got))
+    assert_frame_equal(expect, got)
 
 
 @pytest.mark.parametrize("stats_freq", ["NONE", "STRIPE", "ROWGROUP"])
@@ -406,11 +408,11 @@ def test_orc_writer_statistics_frequency(datadir, tmpdir, stats_freq):
         else:
             print(type(excpr).__name__)
 
-    expect = orcfile.read().to_pandas()
-    cudf.from_pandas(expect).to_orc(gdf_fname.strpath, statistics=stats_freq)
-    got = pa.orc.ORCFile(gdf_fname).read().to_pandas()
+    expect = cudf.from_pandas(orcfile.read().to_pandas())
+    expect.to_orc(gdf_fname.strpath, statistics=stats_freq)
+    got = cudf.from_pandas(pa.orc.ORCFile(gdf_fname).read().to_pandas())
 
-    assert_frame_equal(cudf.from_pandas(expect), cudf.from_pandas(got))
+    assert_frame_equal(expect, got)
 
 
 @pytest.mark.parametrize("stats_freq", ["NONE", "STRIPE", "ROWGROUP"])

From 289a13bb2faa8f8ada8e051dbdc5ce633e43c4f3 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 8 Feb 2022 10:21:01 -0800
Subject: [PATCH 11/20] combine repr tests

---
 python/cudf/cudf/tests/test_repr.py | 34 ++++++++++++++---------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index 34248818e7a..01e33896b84 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -13,7 +13,15 @@
 from cudf.testing import _utils as utils
 from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes
 
-repr_categories = ["int64", "float64", "str", "category", "datetime64[ns]"]
+repr_categories = [
+    "bool",
+    "uint16",
+    "int64",
+    "float64",
+    "str",
+    "category",
+    "datetime64[ns]",
+]
 
 
 @pytest.mark.parametrize("dtype", repr_categories)
@@ -84,23 +92,11 @@ def test_full_series(nrows, dtype):
     pd.reset_option("display.max_rows")
 
 
+@pytest.mark.parametrize("nrows", [5, 10, 15])
+@pytest.mark.parametrize("ncols", [5, 10, 15])
+@pytest.mark.parametrize("size", [20, 21])
 @pytest.mark.parametrize("dtype", repr_categories)
-def test_full_dataframe_20(dtype):
-    size = 20
-    pdf = pd.DataFrame(
-        {idx: np.random.randint(0, 100, size) for idx in range(size)}
-    ).astype(dtype)
-    gdf = cudf.from_pandas(pdf)
-    assert pdf.__repr__() == gdf.__repr__()
-    assert pdf._repr_html_() == gdf._repr_html_()
-    assert pdf._repr_latex_() == gdf._repr_latex_()
-
-
-@pytest.mark.parametrize("dtype", repr_categories)
-@pytest.mark.parametrize("nrows", [9, 21 / 2, 11, 21 - 1])
-@pytest.mark.parametrize("ncols", [9, 21 / 2, 11, 21 - 1])
-def test_full_dataframe_21(dtype, nrows, ncols):
-    size = 21
+def test_full_dataframe_20(dtype, size, nrows, ncols):
     pdf = pd.DataFrame(
         {idx: np.random.randint(0, 100, size) for idx in range(size)}
     ).astype(dtype)
@@ -108,7 +104,11 @@ def test_full_dataframe_21(dtype, nrows, ncols):
 
     pd.options.display.max_rows = int(nrows)
     pd.options.display.max_columns = int(ncols)
+
     assert pdf.__repr__() == gdf.__repr__()
+    assert pdf._repr_html_() == gdf._repr_html_()
+    assert pdf._repr_latex_() == gdf._repr_latex_()
+
     pd.reset_option("display.max_rows")
     pd.reset_option("display.max_columns")
 

From 2e25ed2aa6482f6bb109f14085592f0eb8d04d3b Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Tue, 8 Feb 2022 12:23:31 -0600
Subject: [PATCH 12/20] Update python/cudf/cudf/tests/test_indexing.py

Co-authored-by: GALI PREM SAGAR <sagarprem75@gmail.com>
---
 python/cudf/cudf/tests/test_indexing.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index ed837c92549..8caabd47572 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -1296,7 +1296,6 @@ def test_loc_datetime_index(sli, is_dataframe):
     [
         cudf.DataFrame({"a": range(100000)}),
         cudf.DataFrame({"a": range(100000), "b": range(100000)}),
-        cudf.DataFrame({"a": range(20), "b": range(20)}),
         cudf.DataFrame(
             {
                 "a": range(20),

From 9393bd023a34ec87d921aacf13fa2fe95037bb0a Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 8 Feb 2022 12:19:14 -0800
Subject: [PATCH 13/20] update copyrights

---
 python/cudf/cudf/testing/_utils.py                              | 2 +-
 python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py | 2 +-
 python/cudf/cudf/tests/test_csv.py                              | 2 +-
 python/cudf/cudf/tests/test_extension_compilation.py            | 1 +
 python/cudf/cudf/tests/test_indexing.py                         | 2 +-
 python/cudf/cudf/tests/test_repr.py                             | 2 +-
 python/cudf/cudf/tests/test_udf_masked_ops.py                   | 1 +
 7 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 63410a814e3..00b22b2a490 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 import itertools
 import re
diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
index 72d976f0958..0bf1813a06c 100644
--- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
+++ b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/python/cudf/cudf/tests/test_csv.py b/python/cudf/cudf/tests/test_csv.py
index f49532e28f7..f3d69e1745e 100644
--- a/python/cudf/cudf/tests/test_csv.py
+++ b/python/cudf/cudf/tests/test_csv.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2021, NVIDIA CORPORATION.
+# Copyright (c) 2018-2022, NVIDIA CORPORATION.
 
 import gzip
 import os
diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py
index d77e9936092..328e7b89840 100644
--- a/python/cudf/cudf/tests/test_extension_compilation.py
+++ b/python/cudf/cudf/tests/test_extension_compilation.py
@@ -1,3 +1,4 @@
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 import operator
 
 import cupy as cp
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 8caabd47572..9265812ac14 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
 from itertools import combinations
 
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index 01e33896b84..21874e013f8 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Copyright (c) 2019-2022, NVIDIA CORPORATION.
 
 import textwrap
 
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 89344211fc4..99acb6dc744 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -1,3 +1,4 @@
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 import math
 import operator
 

From a20102ee2413011a3f8aaa6bbca46ce8a6bcbb04 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Wed, 9 Feb 2022 15:02:31 -0600
Subject: [PATCH 14/20] Apply suggestions from code review

Co-authored-by: Bradley Dice <bdice@bradleydice.com>
---
 python/cudf/cudf/testing/_utils.py            | 2 +-
 python/cudf/cudf/tests/test_udf_masked_ops.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
index 00b22b2a490..efaf8a228b6 100644
--- a/python/cudf/cudf/testing/_utils.py
+++ b/python/cudf/cudf/testing/_utils.py
@@ -333,7 +333,7 @@ def xfail_param(param, **kwargs):
     return pytest.param(param, marks=pytest.mark.xfail(**kwargs))
 
 
-numeric_dtypes_pairwise = pytest.mark.parametrize(
+parametrize_numeric_dtypes_pairwise = pytest.mark.parametrize(
     "left_dtype,right_dtype",
     list(itertools.combinations_with_replacement(NUMERIC_TYPES, 2)),
 )
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 99acb6dc744..57a777446a9 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -255,7 +255,7 @@ def test_apply_mixed_dtypes(left_dtype, right_dtype, op):
     # First perform the op on two dummy data on host, if numpy can
     # safely type cast, we should expect it to work in udf too.
     try:
-        op(getattr(np, left_dtype)(0), getattr(np, right_dtype)(42))
+        op(np.dtype(left_dtype).type(0), np.dtype(right_dtype).type(42))
     except TypeError:
         pytest.skip("Operation is unsupported for corresponding dtype.")
 

From 9c800cd9c0fc455ed114a3f2b943f1e723204975 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 9 Feb 2022 13:06:17 -0800
Subject: [PATCH 15/20] address reviews

---
 python/cudf/cudf/tests/test_binops.py         | 14 ++++----
 python/cudf/cudf/tests/test_dataframe.py      | 36 +++++++++++++++++++
 .../cudf/tests/test_extension_compilation.py  |  4 +--
 python/cudf/cudf/tests/test_indexing.py       | 35 +++++++++---------
 python/cudf/cudf/tests/test_parquet.py        |  4 +--
 python/cudf/cudf/tests/test_repr.py           | 16 ++++-----
 python/cudf/cudf/tests/test_udf_masked_ops.py |  4 +--
 7 files changed, 72 insertions(+), 41 deletions(-)

diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
index 7ad5fd743e4..02ca7a0cd58 100644
--- a/python/cudf/cudf/tests/test_binops.py
+++ b/python/cudf/cudf/tests/test_binops.py
@@ -216,14 +216,12 @@ def test_series_compare(cmpop, obj_class, dtype):
 
 
 def _series_compare_nulls_typegen():
-    tests = []
-
-    tests += list(combinations_with_replacement(DATETIME_TYPES, 2))
-    tests += list(combinations_with_replacement(TIMEDELTA_TYPES, 2))
-    tests += list(combinations_with_replacement(NUMERIC_TYPES, 2))
-    tests += list(combinations_with_replacement(STRING_TYPES, 2))
-
-    return tests
+    return [
+        *combinations_with_replacement(DATETIME_TYPES, 2),
+        *combinations_with_replacement(TIMEDELTA_TYPES, 2),
+        *combinations_with_replacement(NUMERIC_TYPES, 2),
+        *combinations_with_replacement(STRING_TYPES, 2),
+    ]
 
 
 @pytest.mark.parametrize("cmpop", _cmpops)
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index b3305a50031..3e0990880a4 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -6688,6 +6688,24 @@ def test_dataframe_info_null_counts():
             -6.88918237,
             -7.00001,
         ],
+        [
+            1.987654321,
+            2.987654321,
+            3.987654321,
+            0.1221,
+            2.1221,
+            0.112121,
+            -21.1212,
+        ],
+        [
+            -1.987654321,
+            -2.987654321,
+            -3.987654321,
+            -0.1221,
+            -2.1221,
+            -0.112121,
+            21.1212,
+        ],
     ],
 )
 @pytest.mark.parametrize(
@@ -6712,6 +6730,24 @@ def test_dataframe_info_null_counts():
             -6.88918237,
             -7.00001,
         ],
+        [
+            1.987654321,
+            2.987654321,
+            3.987654321,
+            0.1221,
+            2.1221,
+            0.112121,
+            -21.1212,
+        ],
+        [
+            -1.987654321,
+            -2.987654321,
+            -3.987654321,
+            -0.1221,
+            -2.1221,
+            -0.112121,
+            21.1212,
+        ],
     ],
 )
 @pytest.mark.parametrize("rtol", [0, 0.01, 1e-05, 1e-08, 5e-1, 50.12])
diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py
index 328e7b89840..692f40873d7 100644
--- a/python/cudf/cudf/tests/test_extension_compilation.py
+++ b/python/cudf/cudf/tests/test_extension_compilation.py
@@ -11,7 +11,7 @@
 from cudf import NA
 from cudf.core.udf.api import Masked
 from cudf.core.udf.typing import MaskedType
-from cudf.testing._utils import numeric_dtypes_pairwise
+from cudf.testing._utils import parametrize_numeric_dtypes_pairwise
 
 arith_ops = (
     operator.add,
@@ -163,7 +163,7 @@ def func(x):
 
 
 @pytest.mark.parametrize("op", ops)
-@numeric_dtypes_pairwise
+@parametrize_numeric_dtypes_pairwise
 @pytest.mark.parametrize(
     "masked",
     ((False, True), (True, False), (True, True)),
diff --git a/python/cudf/cudf/tests/test_indexing.py b/python/cudf/cudf/tests/test_indexing.py
index 9265812ac14..19d7c8a10ab 100644
--- a/python/cudf/cudf/tests/test_indexing.py
+++ b/python/cudf/cudf/tests/test_indexing.py
@@ -1292,27 +1292,27 @@ def test_loc_datetime_index(sli, is_dataframe):
 
 
 @pytest.mark.parametrize(
-    "gdf",
+    "gdf_kwargs",
     [
-        cudf.DataFrame({"a": range(100000)}),
-        cudf.DataFrame({"a": range(100000), "b": range(100000)}),
-        cudf.DataFrame(
-            {
+        {"data": {"a": range(100000)}},
+        {"data": {"a": range(100000), "b": range(100000)}},
+        {
+            "data": {
                 "a": range(20),
                 "b": range(20),
                 "c": ["abc", "def", "xyz", "def", "pqr"] * 4,
             }
-        ),
-        cudf.DataFrame(index=[1, 2, 3]),
-        cudf.DataFrame(index=range(100000)),
-        cudf.DataFrame(columns=["a", "b", "c", "d"]),
-        cudf.DataFrame(columns=["a"], index=range(100000)),
-        cudf.DataFrame(columns=["a", "col2", "...col n"], index=range(100000)),
-        cudf.DataFrame(index=cudf.Series(range(100000)).astype("str")),
-        cudf.DataFrame(
-            columns=["a", "b", "c", "d"],
-            index=cudf.Series(range(100000)).astype("str"),
-        ),
+        },
+        {"index": [1, 2, 3]},
+        {"index": range(100000)},
+        {"columns": ["a", "b", "c", "d"]},
+        {"columns": ["a"], "index": range(100000)},
+        {"columns": ["a", "col2", "...col n"], "index": range(100000)},
+        {"index": cudf.Series(range(100000)).astype("str")},
+        {
+            "columns": ["a", "b", "c", "d"],
+            "index": cudf.Series(range(100000)).astype("str"),
+        },
     ],
 )
 @pytest.mark.parametrize(
@@ -1327,7 +1327,8 @@ def test_loc_datetime_index(sli, is_dataframe):
         slice(6),
     ],
 )
-def test_dataframe_sliced(gdf, slice):
+def test_dataframe_sliced(gdf_kwargs, slice):
+    gdf = cudf.DataFrame(**gdf_kwargs)
     pdf = gdf.to_pandas()
 
     actual = gdf[slice]
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 98c2c9b8d32..7feaa400446 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -1105,7 +1105,7 @@ def test_parquet_reader_list_large_multi_rowgroup_nulls(tmpdir):
     assert_eq(expect, got)
 
 
-@pytest.mark.parametrize("skip", range(0, 10))
+@pytest.mark.parametrize("skip", [0, 1, 5, 10])
 def test_parquet_reader_list_skiprows(skip, tmpdir):
     num_rows = 10
     src = pd.DataFrame(
@@ -1124,7 +1124,7 @@ def test_parquet_reader_list_skiprows(skip, tmpdir):
     assert_eq(expect, got, check_dtype=False)
 
 
-@pytest.mark.parametrize("skip", range(0, 10))
+@pytest.mark.parametrize("skip", [0, 1, 5, 10])
 def test_parquet_reader_list_num_rows(skip, tmpdir):
     num_rows = 20
     src = pd.DataFrame(
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index 21874e013f8..8f2e4811e36 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -14,7 +14,6 @@
 from cudf.utils.dtypes import np_dtypes_to_pandas_dtypes
 
 repr_categories = [
-    "bool",
     "uint16",
     "int64",
     "float64",
@@ -102,15 +101,12 @@ def test_full_dataframe_20(dtype, size, nrows, ncols):
     ).astype(dtype)
     gdf = cudf.from_pandas(pdf)
 
-    pd.options.display.max_rows = int(nrows)
-    pd.options.display.max_columns = int(ncols)
-
-    assert pdf.__repr__() == gdf.__repr__()
-    assert pdf._repr_html_() == gdf._repr_html_()
-    assert pdf._repr_latex_() == gdf._repr_latex_()
-
-    pd.reset_option("display.max_rows")
-    pd.reset_option("display.max_columns")
+    with pd.option_context(
+        "display.max_rows", int(nrows), "display.max_columns", int(ncols)
+    ):
+        assert repr(pdf) == repr(gdf)
+        assert pdf._repr_html_() == gdf._repr_html_()
+        assert pdf._repr_latex_() == gdf._repr_latex_()
 
 
 @given(
diff --git a/python/cudf/cudf/tests/test_udf_masked_ops.py b/python/cudf/cudf/tests/test_udf_masked_ops.py
index 57a777446a9..faaea6eec08 100644
--- a/python/cudf/cudf/tests/test_udf_masked_ops.py
+++ b/python/cudf/cudf/tests/test_udf_masked_ops.py
@@ -18,7 +18,7 @@
 from cudf.testing._utils import (
     _decimal_series,
     assert_eq,
-    numeric_dtypes_pairwise,
+    parametrize_numeric_dtypes_pairwise,
 )
 
 
@@ -243,7 +243,7 @@ def func(row):
     run_masked_udf_test(func, gdf, check_dtype=False)
 
 
-@numeric_dtypes_pairwise
+@parametrize_numeric_dtypes_pairwise
 @pytest.mark.parametrize("op", [operator.add, operator.and_, operator.eq])
 def test_apply_mixed_dtypes(left_dtype, right_dtype, op):
     """

From 025c69db773c9f973f3a3df7fd4d949de2acdb70 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 9 Feb 2022 13:30:55 -0800
Subject: [PATCH 16/20] temporarily revert changes

---
 python/cudf/cudf/tests/test_dataframe.py | 36 ------------------------
 1 file changed, 36 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 3e0990880a4..b3305a50031 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -6688,24 +6688,6 @@ def test_dataframe_info_null_counts():
             -6.88918237,
             -7.00001,
         ],
-        [
-            1.987654321,
-            2.987654321,
-            3.987654321,
-            0.1221,
-            2.1221,
-            0.112121,
-            -21.1212,
-        ],
-        [
-            -1.987654321,
-            -2.987654321,
-            -3.987654321,
-            -0.1221,
-            -2.1221,
-            -0.112121,
-            21.1212,
-        ],
     ],
 )
 @pytest.mark.parametrize(
@@ -6730,24 +6712,6 @@ def test_dataframe_info_null_counts():
             -6.88918237,
             -7.00001,
         ],
-        [
-            1.987654321,
-            2.987654321,
-            3.987654321,
-            0.1221,
-            2.1221,
-            0.112121,
-            -21.1212,
-        ],
-        [
-            -1.987654321,
-            -2.987654321,
-            -3.987654321,
-            -0.1221,
-            -2.1221,
-            -0.112121,
-            21.1212,
-        ],
     ],
 )
 @pytest.mark.parametrize("rtol", [0, 0.01, 1e-05, 1e-08, 5e-1, 50.12])

From 60a0a927d3e197e37c88418eaa413361b9dd4126 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Wed, 9 Feb 2022 13:32:15 -0800
Subject: [PATCH 17/20] copyright

---
 python/cudf/cudf/tests/test_reshape.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index 8fc3b8012e4..2efa781c506 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 
 import re
 

From 227be3c579271ea63996eca5708337404b8eb815 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Mon, 14 Feb 2022 07:53:07 -0800
Subject: [PATCH 18/20] redo test_cudf_isclose parameterization

---
 python/cudf/cudf/tests/test_dataframe.py | 49 ++++++++++++------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index b3305a50031..67230fa1171 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -3,6 +3,7 @@
 import array as arr
 import datetime
 import io
+import itertools
 import operator
 import random
 import re
@@ -6666,10 +6667,11 @@ def test_dataframe_info_null_counts():
     assert str_cmp == actual_string
 
 
-@pytest.mark.parametrize(
-    "data1",
-    [
+def cudf_isclose_data():
+    # generate pairs of data for isclsoe
+    data_list = [
         [1, 2, 3, 4, 5, 6, 7],
+        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
         [
             1.9876543,
             2.9876654,
@@ -6688,32 +6690,29 @@ def test_dataframe_info_null_counts():
             -6.88918237,
             -7.00001,
         ],
-    ],
-)
-@pytest.mark.parametrize(
-    "data2",
-    [
-        [1, 2, 3, 4, 5, 6, 7],
         [
-            1.9876543,
-            2.9876654,
-            3.9876543,
-            4.1234587,
-            5.23,
-            6.88918237,
-            7.00001,
+            1.987654321,
+            2.987654321,
+            3.987654321,
+            0.1221,
+            2.1221,
+            0.112121,
+            -21.1212,
         ],
         [
-            -1.9876543,
-            -2.9876654,
-            -3.9876543,
-            -4.1234587,
-            -5.23,
-            -6.88918237,
-            -7.00001,
+            -1.987654321,
+            -2.987654321,
+            -3.987654321,
+            -0.1221,
+            -2.1221,
+            -0.112121,
+            21.1212,
         ],
-    ],
-)
+    ]
+    return list(itertools.combinations_with_replacement(data_list, 2))
+
+
+@pytest.mark.parametrize("data1,data2", cudf_isclose_data())
 @pytest.mark.parametrize("rtol", [0, 0.01, 1e-05, 1e-08, 5e-1, 50.12])
 @pytest.mark.parametrize("atol", [0, 0.01, 1e-05, 1e-08, 50.12])
 def test_cudf_isclose(data1, data2, rtol, atol):

From ecdb9860d3410250dc4ac7949b5858af7091f92a Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 15 Feb 2022 06:32:50 -0800
Subject: [PATCH 19/20] revert changes to test_cudf_isclose

---
 python/cudf/cudf/tests/test_dataframe.py | 57 ++++++++++++++++++++----
 1 file changed, 48 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 67230fa1171..fb173bc0eab 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -3,7 +3,6 @@
 import array as arr
 import datetime
 import io
-import itertools
 import operator
 import random
 import re
@@ -6667,9 +6666,9 @@ def test_dataframe_info_null_counts():
     assert str_cmp == actual_string
 
 
-def cudf_isclose_data():
-    # generate pairs of data for isclsoe
-    data_list = [
+@pytest.mark.parametrize(
+    "data1",
+    [
         [1, 2, 3, 4, 5, 6, 7],
         [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
         [
@@ -6708,11 +6707,51 @@ def cudf_isclose_data():
             -0.112121,
             21.1212,
         ],
-    ]
-    return list(itertools.combinations_with_replacement(data_list, 2))
-
-
-@pytest.mark.parametrize("data1,data2", cudf_isclose_data())
+    ],
+)
+@pytest.mark.parametrize(
+    "data2",
+    [
+        [1, 2, 3, 4, 5, 6, 7],
+        [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0],
+        [
+            1.9876543,
+            2.9876654,
+            3.9876543,
+            4.1234587,
+            5.23,
+            6.88918237,
+            7.00001,
+        ],
+        [
+            -1.9876543,
+            -2.9876654,
+            -3.9876543,
+            -4.1234587,
+            -5.23,
+            -6.88918237,
+            -7.00001,
+        ],
+        [
+            1.987654321,
+            2.987654321,
+            3.987654321,
+            0.1221,
+            2.1221,
+            0.112121,
+            -21.1212,
+        ],
+        [
+            -1.987654321,
+            -2.987654321,
+            -3.987654321,
+            -0.1221,
+            -2.1221,
+            -0.112121,
+            21.1212,
+        ],
+    ],
+)
 @pytest.mark.parametrize("rtol", [0, 0.01, 1e-05, 1e-08, 5e-1, 50.12])
 @pytest.mark.parametrize("atol", [0, 0.01, 1e-05, 1e-08, 50.12])
 def test_cudf_isclose(data1, data2, rtol, atol):

From 62d56be1fbe9aab315542c51de58407d0362a699 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <brmiller@nvidia.com>
Date: Tue, 15 Feb 2022 06:43:33 -0800
Subject: [PATCH 20/20] copyright

---
 python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
index 0bf1813a06c..9eb01ae31b4 100644
--- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
+++ b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.