From ee105fe1fbd2139304bb9e3d3a359d4268984312 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 1 Apr 2021 19:10:10 -0500
Subject: [PATCH 1/3] Revert dask versioning of concat dispatch (#7823)

Dependent on : https://github.com/dask/dask/pull/7500

With the changes in above dask PR we would not have to version the concat dispatch in `dask-cudf`.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - https://github.com/jakirkham

URL: https://github.com/rapidsai/cudf/pull/7823
---
 python/dask_cudf/dask_cudf/backends.py | 61 ++++++++------------------
 1 file changed, 19 insertions(+), 42 deletions(-)

diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index 66b06acc858..0570654fde3 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -1,13 +1,10 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
-from distutils.version import LooseVersion
-
 import cupy as cp
 import numpy as np
 import pandas as pd
 import pyarrow as pa
 
-import dask
 from dask.dataframe.categorical import categorical_dtype_dispatch
 from dask.dataframe.core import get_parallel_type, make_meta, meta_nonempty
 from dask.dataframe.methods import (
@@ -31,7 +28,6 @@
 get_parallel_type.register(cudf.DataFrame, lambda _: DataFrame)
 get_parallel_type.register(cudf.Series, lambda _: Series)
 get_parallel_type.register(cudf.Index, lambda _: Index)
-DASK_VERSION = LooseVersion(dask.__version__)
 
 
 @meta_nonempty.register(cudf.Index)
@@ -205,45 +201,26 @@ def make_meta_object(x, index=None):
     raise TypeError(f"Don't know how to create metadata from {x}")
 
 
-if DASK_VERSION > "2021.03.1":
-
-    @concat_dispatch.register((cudf.DataFrame, cudf.Series, cudf.Index))
-    def concat_cudf(
-        dfs,
-        axis=0,
-        join="outer",
-        uniform=False,
-        filter_warning=True,
-        sort=None,
-        ignore_index=False,
-        **kwargs,
-    ):
-        assert join == "outer"
-
-        ignore_order = kwargs.get("ignore_order", False)
-        if ignore_order:
-            raise NotImplementedError(
-                "ignore_order parameter is not yet supported in dask-cudf"
-            )
-
-        return cudf.concat(dfs, axis=axis, ignore_index=ignore_index)
-
-
-else:
-
-    @concat_dispatch.register((cudf.DataFrame, cudf.Series, cudf.Index))
-    def concat_cudf(
-        dfs,
-        axis=0,
-        join="outer",
-        uniform=False,
-        filter_warning=True,
-        sort=None,
-        ignore_index=False,
-    ):
-        assert join == "outer"
+@concat_dispatch.register((cudf.DataFrame, cudf.Series, cudf.Index))
+def concat_cudf(
+    dfs,
+    axis=0,
+    join="outer",
+    uniform=False,
+    filter_warning=True,
+    sort=None,
+    ignore_index=False,
+    **kwargs,
+):
+    assert join == "outer"
+
+    ignore_order = kwargs.get("ignore_order", False)
+    if ignore_order:
+        raise NotImplementedError(
+            "ignore_order parameter is not yet supported in dask-cudf"
+        )
 
-        return cudf.concat(dfs, axis=axis, ignore_index=ignore_index)
+    return cudf.concat(dfs, axis=axis, ignore_index=ignore_index)
 
 
 @categorical_dtype_dispatch.register((cudf.DataFrame, cudf.Series, cudf.Index))

From 11358ce259e63452983be0d97748dcb4a1534049 Mon Sep 17 00:00:00 2001
From: Ashwin Srinath <3190405+shwina@users.noreply.github.com>
Date: Thu, 1 Apr 2021 21:00:19 -0400
Subject: [PATCH 2/3] Constrain dask and distributed versions to 2021.3.1
 (#7825)

Authors:
  - Ashwin Srinath (https://github.com/shwina)

Approvers:
  - Keith Kraus (https://github.com/kkraus14)
  - Mike Wendt (https://github.com/mike-wendt)

URL: https://github.com/rapidsai/cudf/pull/7825
---
 conda/environments/cudf_dev_cuda10.1.yml | 4 ++--
 conda/environments/cudf_dev_cuda10.2.yml | 4 ++--
 conda/environments/cudf_dev_cuda11.0.yml | 4 ++--
 conda/environments/cudf_dev_cuda11.1.yml | 4 ++--
 conda/environments/cudf_dev_cuda11.2.yml | 4 ++--
 conda/recipes/custreamz/meta.yaml        | 2 +-
 conda/recipes/dask-cudf/meta.yaml        | 8 ++++----
 python/custreamz/dev_requirements.txt    | 4 ++--
 python/dask_cudf/dev_requirements.txt    | 6 +++---
 python/dask_cudf/setup.py                | 8 ++++----
 10 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/conda/environments/cudf_dev_cuda10.1.yml b/conda/environments/cudf_dev_cuda10.1.yml
index 2ec8af368f2..1e7fcbdc8ff 100644
--- a/conda/environments/cudf_dev_cuda10.1.yml
+++ b/conda/environments/cudf_dev_cuda10.1.yml
@@ -42,8 +42,8 @@ dependencies:
   - mypy=0.782
   - typing_extensions
   - pre_commit
-  - dask>=2021.3.1
-  - distributed>=2.22.0
+  - dask==2021.3.1
+  - distributed>=2.22.0,<=2021.3.1
   - streamz
   - dlpack
   - arrow-cpp=1.0.1
diff --git a/conda/environments/cudf_dev_cuda10.2.yml b/conda/environments/cudf_dev_cuda10.2.yml
index 64dcc458196..105a89bf902 100644
--- a/conda/environments/cudf_dev_cuda10.2.yml
+++ b/conda/environments/cudf_dev_cuda10.2.yml
@@ -42,8 +42,8 @@ dependencies:
   - mypy=0.782
   - typing_extensions
   - pre_commit
-  - dask>=2021.3.1
-  - distributed>=2.22.0
+  - dask==2021.3.1
+  - distributed>=2.22.0,<=2021.3.1
   - streamz
   - dlpack
   - arrow-cpp=1.0.1
diff --git a/conda/environments/cudf_dev_cuda11.0.yml b/conda/environments/cudf_dev_cuda11.0.yml
index f464738eaa8..bf0f0d236b1 100644
--- a/conda/environments/cudf_dev_cuda11.0.yml
+++ b/conda/environments/cudf_dev_cuda11.0.yml
@@ -42,8 +42,8 @@ dependencies:
   - mypy=0.782
   - typing_extensions
   - pre_commit
-  - dask>=2021.3.1
-  - distributed>=2.22.0
+  - dask==2021.3.1
+  - distributed>=2.22.0,<=2021.3.1
   - streamz
   - dlpack
   - arrow-cpp=1.0.1
diff --git a/conda/environments/cudf_dev_cuda11.1.yml b/conda/environments/cudf_dev_cuda11.1.yml
index fae94974502..a1169539b4a 100644
--- a/conda/environments/cudf_dev_cuda11.1.yml
+++ b/conda/environments/cudf_dev_cuda11.1.yml
@@ -42,8 +42,8 @@ dependencies:
   - mypy=0.782
   - typing_extensions
   - pre_commit
-  - dask>=2021.3.1
-  - distributed>=2.22.0
+  - dask==2021.3.1
+  - distributed>=2.22.0,<=2021.3.1
   - streamz
   - dlpack
   - arrow-cpp=1.0.1
diff --git a/conda/environments/cudf_dev_cuda11.2.yml b/conda/environments/cudf_dev_cuda11.2.yml
index fa0a69ad1ea..0952b4ea338 100644
--- a/conda/environments/cudf_dev_cuda11.2.yml
+++ b/conda/environments/cudf_dev_cuda11.2.yml
@@ -42,8 +42,8 @@ dependencies:
   - mypy=0.782
   - typing_extensions
   - pre_commit
-  - dask>=2021.3.1
-  - distributed>=2.22.0
+  - dask==2021.3.1
+  - distributed>=2.22.0,<=2021.3.1
   - streamz
   - dlpack
   - arrow-cpp=1.0.1
diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml
index ffda6d0c3c6..4b763813001 100644
--- a/conda/recipes/custreamz/meta.yaml
+++ b/conda/recipes/custreamz/meta.yaml
@@ -29,7 +29,7 @@ requirements:
     - streamz 
     - cudf {{ version }}
     - dask >=2.22.0
-    - distributed >=2.22.0
+    - distributed >=2.22.0,<=2021.3.1
     - python-confluent-kafka
     - cudf_kafka {{ version }}
 
diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml
index 66bffdfd61e..38bcfd469b9 100644
--- a/conda/recipes/dask-cudf/meta.yaml
+++ b/conda/recipes/dask-cudf/meta.yaml
@@ -23,13 +23,13 @@ requirements:
   host:
     - python
     - cudf {{ version }}
-    - dask>=2021.3.1
-    - distributed >=2.22.0
+    - dask==2021.3.1
+    - distributed >=2.22.0,<=2021.3.1
   run:
     - python
     - cudf {{ version }}
-    - dask>=2021.3.1
-    - distributed >=2.22.0
+    - dask==2021.3.1
+    - distributed >=2.22.0,<=2021.3.1
   
 test:
   requires:
diff --git a/python/custreamz/dev_requirements.txt b/python/custreamz/dev_requirements.txt
index 7e300376f4e..61b1b8cd7a1 100644
--- a/python/custreamz/dev_requirements.txt
+++ b/python/custreamz/dev_requirements.txt
@@ -3,8 +3,8 @@
 flake8==3.8.3
 black==19.10b0
 isort==5.0.7
-dask>=2021.3.1
-distributed>=2.22.0
+dask==2021.3.1
+distributed>=2.22.0,<=2021.3.1
 streamz
 python-confluent-kafka
 pytest
diff --git a/python/dask_cudf/dev_requirements.txt b/python/dask_cudf/dev_requirements.txt
index 39da0da3012..b19501845d1 100644
--- a/python/dask_cudf/dev_requirements.txt
+++ b/python/dask_cudf/dev_requirements.txt
@@ -1,7 +1,7 @@
 # Copyright (c) 2021, NVIDIA CORPORATION.
 
-dask>=2021.3.1
-distributed>=2.22.0
+dask==2021.3.1
+distributed>=2.22.0,<=2021.3.1
 fsspec>=0.6.0
 numba>=0.49.0,!=0.51.0
 numpy
@@ -11,4 +11,4 @@ setuptools
 wheel
 flake8==3.8.3
 black==19.10b0
-isort==5.0.7
\ No newline at end of file
+isort==5.0.7
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index 1318701b845..caf9bb6bbb0 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -10,8 +10,8 @@
 
 install_requires = [
     "cudf",
-    "dask>=2021.3.1",
-    "distributed>=2.22.0",
+    "dask==2021.3.1",
+    "distributed>=2.22.0,<=2021.3.1",
     "fsspec>=0.6.0",
     "numpy",
     "pandas>=1.0,<1.3.0dev0",
@@ -23,8 +23,8 @@
         "pandas>=1.0,<1.3.0dev0",
         "pytest",
         "numba>=0.49.0,!=0.51.0",
-        "dask>=2021.3.1",
-        "distributed>=2.22.0",
+        "dask==2021.3.1",
+        "distributed>=2.22.0,<=2021.3.1",
     ]
 }
 

From dee9238dffc73c25d7109cab8735e607ba9e474b Mon Sep 17 00:00:00 2001
From: ChrisJar <chris.jarrett.0@gmail.com>
Date: Thu, 1 Apr 2021 21:56:51 -0500
Subject: [PATCH 3/3] Enable join on decimal columns (#7764)

This enables joins on decimal columns with the same precision and scale.

Closes #7497
Depends on #7788

Authors:
  - https://github.com/ChrisJar

Approvers:
  - Keith Kraus (https://github.com/kkraus14)
  - Ashwin Srinath (https://github.com/shwina)
  - https://github.com/brandon-b-miller
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/7764
---
 python/cudf/cudf/_lib/replace.pyx           |   2 +-
 python/cudf/cudf/core/column/decimal.py     |  14 +-
 python/cudf/cudf/core/join/_join_helpers.py |   8 ++
 python/cudf/cudf/tests/test_joining.py      | 135 +++++++++++++++++++-
 4 files changed, 155 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/_lib/replace.pyx b/python/cudf/cudf/_lib/replace.pyx
index 2732142dd15..cdedd3ac022 100644
--- a/python/cudf/cudf/_lib/replace.pyx
+++ b/python/cudf/cudf/_lib/replace.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 4fee5060fe4..d9e4610832d 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2021, NVIDIA CORPORATION.
 
 from decimal import Decimal
-from typing import cast
+from typing import cast, Any
 
 import cupy as cp
 import numpy as np
@@ -170,6 +170,18 @@ def sum_of_squares(
             "sum_of_squares", skipna=skipna, dtype=dtype, min_count=min_count
         )
 
+    def fillna(
+        self, value: Any = None, method: str = None, dtype: Dtype = None
+    ):
+        """Fill null values with ``value``.
+
+        Returns a copy with null filled.
+        """
+        result = libcudf.replace.replace_nulls(
+            input_col=self, replacement=value, method=method, dtype=dtype
+        )
+        return self._copy_type_metadata(result)
+
 
 def _binop_scale(l_dtype, r_dtype, op):
     # This should at some point be hooked up to libcudf's
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 3807f408369..5e15ddfc359 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -97,6 +97,14 @@ def _match_join_keys(
     if pd.api.types.is_dtype_equal(ltype, rtype):
         return lcol, rcol
 
+    if isinstance(ltype, cudf.Decimal64Dtype) or isinstance(
+        rtype, cudf.Decimal64Dtype
+    ):
+        raise TypeError(
+            "Decimal columns can only be merged with decimal columns "
+            "of the same precision and scale"
+        )
+
     if (np.issubdtype(ltype, np.number)) and (np.issubdtype(rtype, np.number)):
         common_type = (
             max(ltype, rtype)
diff --git a/python/cudf/cudf/tests/test_joining.py b/python/cudf/cudf/tests/test_joining.py
index 9164bfe98d1..2dae2bf1e97 100644
--- a/python/cudf/cudf/tests/test_joining.py
+++ b/python/cudf/cudf/tests/test_joining.py
@@ -6,7 +6,7 @@
 
 import cudf
 from cudf.core._compat import PANDAS_GE_120
-from cudf.core.dtypes import CategoricalDtype
+from cudf.core.dtypes import CategoricalDtype, Decimal64Dtype
 from cudf.tests.utils import (
     INTEGER_TYPES,
     NUMERIC_TYPES,
@@ -89,7 +89,7 @@ def assert_join_results_equal(expect, got, how, **kwargs):
         ):  # can't sort_values() on a df without columns
             return assert_eq(expect, got, **kwargs)
 
-        return assert_eq(
+        assert_eq(
             expect.sort_values(expect.columns.to_list()).reset_index(
                 drop=True
             ),
@@ -1152,6 +1152,137 @@ def test_typecast_on_join_overflow_unsafe(dtypes):
         merged = lhs.merge(rhs, on="a", how="left")  # noqa: F841
 
 
+@pytest.mark.parametrize(
+    "dtype",
+    [Decimal64Dtype(5, 2), Decimal64Dtype(7, 5), Decimal64Dtype(12, 7)],
+)
+def test_decimal_typecast_inner(dtype):
+    other_data = ["a", "b", "c", "d", "e"]
+
+    join_data_l = cudf.Series(["1.6", "9.5", "7.2", "8.7", "2.3"]).astype(
+        dtype
+    )
+    join_data_r = cudf.Series(["1.6", "9.5", "7.2", "4.5", "2.3"]).astype(
+        dtype
+    )
+
+    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
+    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
+
+    exp_join_data = ["1.6", "9.5", "7.2", "2.3"]
+    exp_other_data = ["a", "b", "c", "e"]
+
+    exp_join_col = cudf.Series(exp_join_data).astype(dtype)
+
+    expected = cudf.DataFrame(
+        {
+            "join_col": exp_join_col,
+            "B_x": exp_other_data,
+            "B_y": exp_other_data,
+        }
+    )
+
+    got = gdf_l.merge(gdf_r, on="join_col", how="inner")
+
+    assert_join_results_equal(expected, got, how="inner")
+    assert_eq(dtype, got["join_col"].dtype)
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [Decimal64Dtype(7, 3), Decimal64Dtype(9, 5), Decimal64Dtype(14, 10)],
+)
+def test_decimal_typecast_left(dtype):
+    other_data = ["a", "b", "c", "d"]
+
+    join_data_l = cudf.Series(["95.05", "384.26", "74.22", "1456.94"]).astype(
+        dtype
+    )
+    join_data_r = cudf.Series(
+        ["95.05", "62.4056", "74.22", "1456.9472"]
+    ).astype(dtype)
+
+    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
+    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
+
+    exp_join_data = ["95.05", "74.22", "384.26", "1456.94"]
+    exp_other_data_x = ["a", "c", "b", "d"]
+    exp_other_data_y = ["a", "c", None, None]
+
+    exp_join_col = cudf.Series(exp_join_data).astype(dtype)
+
+    expected = cudf.DataFrame(
+        {
+            "join_col": exp_join_col,
+            "B_x": exp_other_data_x,
+            "B_y": exp_other_data_y,
+        }
+    )
+
+    got = gdf_l.merge(gdf_r, on="join_col", how="left")
+
+    assert_join_results_equal(expected, got, how="left")
+    assert_eq(dtype, got["join_col"].dtype)
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [Decimal64Dtype(7, 3), Decimal64Dtype(10, 5), Decimal64Dtype(18, 9)],
+)
+def test_decimal_typecast_outer(dtype):
+    other_data = ["a", "b", "c"]
+    join_data_l = cudf.Series(["741.248", "1029.528", "3627.292"]).astype(
+        dtype
+    )
+    join_data_r = cudf.Series(["9284.103", "1029.528", "948.637"]).astype(
+        dtype
+    )
+    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
+    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
+    exp_join_data = ["9284.103", "948.637", "1029.528", "741.248", "3627.292"]
+    exp_other_data_x = [None, None, "b", "a", "c"]
+    exp_other_data_y = ["a", "c", "b", None, None]
+    exp_join_col = cudf.Series(exp_join_data).astype(dtype)
+    expected = cudf.DataFrame(
+        {
+            "join_col": exp_join_col,
+            "B_x": exp_other_data_x,
+            "B_y": exp_other_data_y,
+        }
+    )
+    got = gdf_l.merge(gdf_r, on="join_col", how="outer")
+
+    assert_join_results_equal(expected, got, how="outer")
+    assert_eq(dtype, got["join_col"].dtype)
+
+
+@pytest.mark.parametrize(
+    "dtype_l", [Decimal64Dtype(7, 3), Decimal64Dtype(9, 5)],
+)
+@pytest.mark.parametrize(
+    "dtype_r", [Decimal64Dtype(8, 3), Decimal64Dtype(11, 6)],
+)
+def test_mixed_decimal_typecast(dtype_l, dtype_r):
+    other_data = ["a", "b", "c", "d"]
+
+    join_data_l = cudf.Series(["95.05", "34.6", "74.22", "14.94"]).astype(
+        dtype_r
+    )
+    join_data_r = cudf.Series(["95.05", "62.4056", "74.22", "1.42"]).astype(
+        dtype_l
+    )
+
+    gdf_l = cudf.DataFrame({"join_col": join_data_l, "B": other_data})
+    gdf_r = cudf.DataFrame({"join_col": join_data_r, "B": other_data})
+
+    with pytest.raises(
+        TypeError,
+        match="Decimal columns can only be merged with decimal columns "
+        "of the same precision and scale",
+    ):
+        gdf_l.merge(gdf_r, on="join_col", how="inner")
+
+
 @pytest.mark.parametrize(
     "dtype_l",
     ["datetime64[s]", "datetime64[ms]", "datetime64[us]", "datetime64[ns]"],