From 8bf0dd677799dc170d613e827a28f56e1422ec25 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Fri, 22 Jan 2021 18:41:34 -0800
Subject: [PATCH 01/23] fix inplace updation of data and add Series.update

---
 python/cudf/cudf/core/dataframe.py       |   2 +-
 python/cudf/cudf/core/series.py          | 104 +++++++++++++++++++++++
 python/cudf/cudf/tests/test_dataframe.py |  13 +--
 python/cudf/cudf/tests/test_series.py    |  37 ++++++++
 4 files changed, 146 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index f9b61a60830..e625db5b0c5 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1545,7 +1545,7 @@ def update(
             if mask.all():
                 continue
 
-            self[col] = this.where(mask, that)
+            self[col].where(mask, that, inplace=True)
 
     def __add__(self, other):
         return self._apply_op("__add__", other)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 147262be08d..68424546bf4 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2419,6 +2419,110 @@ def replace(
 
         return self._mimic_inplace(result, inplace=inplace)
 
+    def update(self, other):
+        """
+        Modify Series in place using values from passed Series.
+        Uses non-NA values from passed Series to make updates. Aligns
+        on index.
+
+        Parameters
+        ----------
+        other : Series, or object coercible into Series
+
+        Examples
+        --------
+        >>> import cudf
+        >>> s = cudf.Series([1, 2, 3])
+        >>> s
+        0    1
+        1    2
+        2    3
+        dtype: int64
+        >>> s.update(cudf.Series([4, 5, 6]))
+        >>> s
+        0    4
+        1    5
+        2    6
+        dtype: int64
+        >>> s = cudf.Series(['a', 'b', 'c'])
+        >>> s
+        0    a
+        1    b
+        2    c
+        dtype: object
+        >>> s.update(cudf.Series(['d', 'e'], index=[0, 2]))
+        >>> s
+        0    d
+        1    b
+        2    e
+        dtype: object
+        >>> s = cudf.Series([1, 2, 3])
+        >>> s
+        0    1
+        1    2
+        2    3
+        dtype: int64
+        >>> s.update(cudf.Series([4, 5, 6, 7, 8]))
+        >>> s
+        0    4
+        1    5
+        2    6
+        dtype: int64
+
+        If ``other`` contains NaNs the corresponding values are not updated
+        in the original Series.
+
+        >>> s = cudf.Series([1, 2, 3])
+        >>> s
+        0    1
+        1    2
+        2    3
+        dtype: int64
+        >>> s.update(cudf.Series([4, np.nan, 6], nan_as_null=False))
+        >>> s
+        0    4
+        1    2
+        2    6
+        dtype: int64
+
+        ``other`` can also be a non-Series object type
+        that is coercible into a Series
+
+        >>> s = cudf.Series([1, 2, 3])
+        >>> s
+        0    1
+        1    2
+        2    3
+        dtype: int64
+        >>> s.update([4, np.nan, 6])
+        >>> s
+        0    4
+        1    2
+        2    6
+        dtype: int64
+        >>> s = cudf.Series([1, 2, 3])
+        >>> s
+        0    1
+        1    2
+        2    3
+        dtype: int64
+        >>> s.update({1: 9})
+        >>> s
+        0    1
+        1    9
+        2    3
+        dtype: int64
+        """
+
+        if not isinstance(other, cudf.Series):
+            other = cudf.Series(other)
+
+        if not self.index.equals(other.index):
+            other = other.reindex(index=self.index)
+        mask = other.notna()
+
+        self.mask(mask, other, inplace=True)
+
     def reverse(self):
         """Reverse the Series
         """
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index f41714ec1ad..8967b4f299d 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8175,9 +8175,6 @@ def test_agg_for_dataframe_with_string_columns(aggs):
 @pytest.mark.parametrize(
     "overwrite", [True, False],
 )
-@pytest.mark.parametrize(
-    "filter_func", [None],
-)
 @pytest.mark.parametrize(
     "errors", ["ignore"],
 )
@@ -8222,19 +8219,17 @@ def test_agg_for_dataframe_with_string_columns(aggs):
         },
     ],
 )
-def test_update_for_dataframes(
-    data, data2, join, overwrite, filter_func, errors
-):
+def test_update_for_dataframes(data, data2, join, overwrite, errors):
     pdf = pd.DataFrame(data)
     gdf = gd.DataFrame(data)
 
     other_pd = pd.DataFrame(data2)
     other_gd = gd.DataFrame(data2)
 
-    expect = pdf.update(other_pd, join, overwrite, filter_func, errors)
-    got = gdf.update(other_gd, join, overwrite, filter_func, errors)
+    pdf.update(other_pd, join, overwrite, errors)
+    gdf.update(other_gd, join, overwrite, errors)
 
-    assert_eq(expect, got)
+    assert_eq(pdf, gdf)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 980dcb5a13b..a97b632a07c 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2020-2021, NVIDIA CORPORATION.
+
 import operator
 import re
 from string import ascii_letters, digits
@@ -913,3 +914,39 @@ def custom_add_func(sr, val):
         lfunc_args_and_kwargs=([(custom_add_func, "val")], {"val": 11}),
         rfunc_args_and_kwargs=([(custom_add_func, "val")], {"val": 11}),
     )
+
+
+@pytest.mark.parametrize(
+    "data",
+    [cudf.Series([1, 2, 3]), cudf.Series([10, 11, 12], index=[1, 2, 3])],
+)
+@pytest.mark.parametrize(
+    "other",
+    [
+        cudf.Series([4, 5, 6]),
+        cudf.Series([4, 5, 6, 7, 8]),
+        cudf.Series([4, np.nan, 6], nan_as_null=False),
+        [4, np.nan, 6],
+        {1: 9},
+    ],
+)
+def test_series_update(data, other):
+    gs = data.copy(deep=True)
+    if isinstance(other, cudf.Series):
+        g_other = other.copy(deep=True)
+        p_other = g_other.to_pandas()
+    else:
+        g_other = other
+        p_other = other
+
+    ps = gs.to_pandas()
+
+    gs_column_before = gs._column
+    gs.update(g_other)
+    gs_column_after = gs._column
+
+    assert_eq(gs_column_before.to_array(), gs_column_after.to_array())
+
+    ps.update(p_other)
+
+    assert_eq(gs, ps)

From c87de031660961fee8a875ce0db391b9a25cde90 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 1 Feb 2021 17:39:48 -0800
Subject: [PATCH 02/23] fix where type-casting logic and update logic

---
 python/cudf/cudf/_lib/copying.pyx        |   4 +-
 python/cudf/cudf/core/dataframe.py       |   8 +-
 python/cudf/cudf/core/frame.py           | 245 ++++++++++++++++-------
 python/cudf/cudf/tests/test_dataframe.py |   6 +-
 python/cudf/cudf/tests/test_replace.py   |  27 ++-
 python/cudf/cudf/tests/test_series.py    |  13 ++
 python/cudf/cudf/utils/dtypes.py         |   9 +-
 7 files changed, 226 insertions(+), 86 deletions(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index ad798a73ed2..9f56297edaa 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -554,11 +554,11 @@ def copy_if_else(object lhs, object rhs, Column boolean_mask):
             return _copy_if_else_column_column(lhs, rhs, boolean_mask)
         else:
             return _copy_if_else_column_scalar(
-                lhs, as_device_scalar(rhs, lhs.dtype), boolean_mask)
+                lhs, as_device_scalar(rhs), boolean_mask)
     else:
         if isinstance(rhs, Column):
             return _copy_if_else_scalar_column(
-                as_device_scalar(lhs, rhs.dtype), rhs, boolean_mask)
+                as_device_scalar(lhs), rhs, boolean_mask)
         else:
             if lhs is None and rhs is None:
                 return lhs
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 7e98bcfb156..7a6f1c9d52c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1547,8 +1547,9 @@ def update(
         if not self.index.equals(other.index):
             other = other.reindex(self.index, axis=0)
 
-        for col in self.columns:
-            this = self[col]
+        source_df = self.copy(deep=False)
+        for col in source_df.columns:
+            this = source_df[col]
             that = other[col]
 
             if errors == "raise":
@@ -1565,8 +1566,9 @@ def update(
             # don't overwrite columns unnecessarily
             if mask.all():
                 continue
+            source_df[col] = source_df[col].where(mask, that)
 
-            self[col].where(mask, that, inplace=True)
+        self._mimic_inplace(source_df, inplace=True)
 
     def __add__(self, other):
         return self._apply_op("__add__", other)
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 5bc5675e1e6..96845755c17 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -738,29 +738,7 @@ def clip(self, lower=None, upper=None, inplace=False, axis=1):
 
         return self._mimic_inplace(output, inplace=inplace)
 
-    def _normalize_scalars(self, other):
-        """
-        Try to normalizes scalar values as per self dtype
-        """
-        if (
-            other is not None
-            and (isinstance(other, float) and not np.isnan(other))
-        ) and (self.dtype.type(other) != other):
-            raise TypeError(
-                f"Cannot safely cast non-equivalent "
-                f"{type(other).__name__} to {self.dtype.name}"
-            )
-
-        return (
-            self.dtype.type(other)
-            if (
-                other is not None
-                and (isinstance(other, float) and not np.isnan(other))
-            )
-            else other
-        )
-
-    def _normalize_columns_and_scalars_type(self, other):
+    def _normalize_columns_and_scalars_type(self, other, inplace=False):
         """
         Try to normalize the other's dtypes as per self.
 
@@ -785,34 +763,57 @@ def _normalize_columns_and_scalars_type(self, other):
         if isinstance(self, cudf.DataFrame) and isinstance(
             other, cudf.DataFrame
         ):
-            return [
-                other[self_col].astype(self._data[self_col].dtype)._column
-                for self_col in self._data.names
-            ]
+            source_df = self.copy()
+            other_df = other.copy()
+            for self_col in source_df._data.names:
+                source_col, other_col = _check_and_cast_columns(
+                    source_col=source_df._data[self_col],
+                    other_col=other_df._data[self_col],
+                    inplace=inplace,
+                )
+                source_df._data[self_col] = source_col
+                other_df._data[self_col] = other_col
+            return source_df, other_df
 
         elif isinstance(self, (cudf.Series, cudf.Index)) and not is_scalar(
             other
         ):
             other = as_column(other)
-            return other.astype(self.dtype)
-
+            input_col = self._data[self.name]
+            return _check_and_cast_columns(
+                source_col=input_col, other_col=other, inplace=inplace
+            )
         else:
             # Handles scalar or list/array like scalars
             if isinstance(self, (cudf.Series, cudf.Index)) and is_scalar(
                 other
             ):
-                return self._normalize_scalars(other)
+                input_col = self._data[self.name]
+                return _check_and_cast_columns_with_scalar(
+                    source_col=self._data[self.name],
+                    other_scalar=other,
+                    inplace=inplace,
+                )
 
             elif isinstance(self, cudf.DataFrame):
-                out = []
                 if is_scalar(other):
                     other = [other for i in range(len(self._data.names))]
-                out = [
-                    self[in_col_name]._normalize_scalars(sclr)
-                    for in_col_name, sclr in zip(self._data.names, other)
-                ]
 
-                return out
+                source_df = self.copy()
+                others = []
+                for col_name, other_sclr in zip(self._data.names, other):
+
+                    (
+                        source_col,
+                        other_scalar,
+                    ) = _check_and_cast_columns_with_scalar(
+                        source_col=source_df._data[col_name],
+                        other_scalar=other_sclr,
+                        inplace=inplace,
+                    )
+                    source_df._data[col_name] = source_col
+                    others.append(other_scalar)
+                return source_df, others
             else:
                 raise ValueError(
                     f"Inappropriate input {type(self)} "
@@ -897,27 +898,33 @@ def where(self, cond, other=None, inplace=False):
                 # as `cond` has no column names.
                 cond.columns = self.columns
 
-            other = self._normalize_columns_and_scalars_type(other)
+            source_df, others = self._normalize_columns_and_scalars_type(other)
+            if isinstance(other, Frame):
+                others = others._data.columns
+
             out_df = cudf.DataFrame(index=self.index)
-            if len(self._columns) != len(other):
+            if len(self._columns) != len(others):
                 raise ValueError(
                     """Replacement list length or number of dataframe columns
                     should be equal to Number of columns of dataframe"""
                 )
-
-            for column_name, other_column in zip(self._data.names, other):
-                input_col = self._data[column_name]
+            for i, column_name in enumerate(self._data.names):
+                input_col = source_df._data[column_name]
+                other_column = others[i]
                 if column_name in cond._data:
                     if isinstance(
                         input_col, cudf.core.column.CategoricalColumn
                     ):
-                        if np.isscalar(other_column):
+                        if is_scalar(other_column):
                             try:
                                 other_column = input_col._encode(other_column)
                             except ValueError:
                                 # When other is not present in categories,
                                 # fill with Null.
                                 other_column = None
+                            other_column = cudf.Scalar(
+                                other_column, dtype=input_col.codes.dtype
+                            )
                         elif hasattr(other_column, "codes"):
                             other_column = other_column.codes
                         input_col = input_col.codes
@@ -952,45 +959,49 @@ def where(self, cond, other=None, inplace=False):
             return self._mimic_inplace(out_df, inplace=inplace)
 
         else:
-
             if isinstance(other, cudf.DataFrame):
                 raise NotImplementedError(
                     "cannot align with a higher dimensional Frame"
                 )
-
-            other = self._normalize_columns_and_scalars_type(other)
-
+            input_col = self._data[self.name]
             cond = as_column(cond)
             if len(cond) != len(self):
                 raise ValueError(
                     """Array conditional must be same shape as self"""
                 )
-            input_col = self._data[self.name]
-            if isinstance(input_col, cudf.core.column.CategoricalColumn):
-                if np.isscalar(other):
-                    try:
-                        other = input_col._encode(other)
-                    except ValueError:
-                        # When other is not present in categories,
-                        # fill with Null.
-                        other = None
-                elif hasattr(other, "codes"):
-                    other = other.codes
-
-                input_col = input_col.codes
-
-            result = libcudf.copying.copy_if_else(input_col, other, cond)
-
-            if is_categorical_dtype(self.dtype):
-                result = build_categorical_column(
-                    categories=self._data[self.name].categories,
-                    codes=as_column(result.base_data, dtype=result.dtype),
-                    mask=result.base_mask,
-                    size=result.size,
-                    offset=result.offset,
-                    ordered=self._data[self.name].ordered,
+            if cond.all():
+                result = input_col
+            else:
+                input_col, other = self._normalize_columns_and_scalars_type(
+                    other, inplace
                 )
 
+                if isinstance(input_col, cudf.core.column.CategoricalColumn):
+                    if is_scalar(other):
+                        try:
+                            other = input_col._encode(other)
+                        except ValueError:
+                            # When other is not present in categories,
+                            # fill with Null.
+                            other = None
+                        other = cudf.Scalar(other, dtype=input_col.codes.dtype)
+                    elif hasattr(other, "codes"):
+                        other = other.codes
+
+                    input_col = input_col.codes
+
+                result = libcudf.copying.copy_if_else(input_col, other, cond)
+
+                if is_categorical_dtype(self.dtype):
+                    result = build_categorical_column(
+                        categories=self._data[self.name].categories,
+                        codes=as_column(result.base_data, dtype=result.dtype),
+                        mask=result.base_mask,
+                        size=result.size,
+                        offset=result.offset,
+                        ordered=self._data[self.name].ordered,
+                    )
+
             if isinstance(self, cudf.Index):
                 from cudf.core.index import as_index
 
@@ -3746,3 +3757,97 @@ def _reassign_categories(categories, cols, col_idxs):
                 offset=cols[name].offset,
                 size=cols[name].size,
             )
+
+
+def _normalize_scalars(col, other):
+    """
+    Try to normalizes scalar values as per col dtype
+    """
+    if (
+        other is not None
+        and (isinstance(other, float) and not np.isnan(other))
+    ) and (col.dtype.type(other) != other):
+        raise TypeError(
+            f"Cannot safely cast non-equivalent "
+            f"{type(other).__name__} to {col.dtype.name}"
+        )
+
+    return (
+        col.dtype.type(other)
+        if (
+            other is not None
+            and (isinstance(other, float) and not np.isnan(other))
+        )
+        else other
+    )
+
+
+def _check_and_cast_columns(source_col, other_col, inplace):
+    """
+    Returns type-casted columns of `source_col` & `other_col`
+    based on `inplace` parameter.
+    """
+    if cudf.utils.dtypes.is_categorical_dtype(source_col.dtype):
+        return source_col, other_col
+    elif cudf.utils.dtypes.is_mixed_with_object_dtype(source_col, other_col):
+        raise TypeError(
+            "cudf does not support mixed types, please type-cast "
+            "the column of dataframe/series and other "
+            "to same dtypes."
+        )
+    if inplace:
+        if not source_col.can_cast_safely(other_col.dtype):
+            warnings.warn(
+                f"Type-casting from {other_col.dtype} "
+                f"to {source_col.dtype}, there could be potential data loss"
+            )
+        return source_col, other_col.astype(source_col.dtype)
+    else:
+        common_dtype = cudf.utils.dtypes.find_common_type(
+            [source_col.dtype, other_col.dtype]
+        )
+        return source_col.astype(common_dtype), other_col.astype(common_dtype)
+
+
+def _check_and_cast_columns_with_scalar(source_col, other_scalar, inplace):
+    """
+    Returns type-casted column `source_col` & scalar `other_scalar`
+    based on `inplace` parameter.
+    """
+    if cudf.utils.dtypes.is_categorical_dtype(source_col.dtype):
+        return source_col, other_scalar
+
+    device_scalar = cudf.Scalar(
+        _normalize_scalars(source_col, other_scalar),
+        dtype=source_col.dtype if other_scalar is None else None,
+    )
+
+    if other_scalar is None:
+        return source_col, device_scalar
+    elif cudf.utils.dtypes.is_mixed_with_object_dtype(
+        device_scalar, source_col
+    ):
+        raise TypeError(
+            "cudf does not support mixed types, please type-cast "
+            "the column of dataframe/series and other "
+            "to same dtypes."
+        )
+    if inplace:
+        if not np.can_cast(device_scalar, source_col.dtype):
+            warnings.warn(
+                f"Type-casting from {device_scalar.dtype} "
+                f"to {source_col.dtype}, there could be potential data loss"
+            )
+        return source_col, device_scalar.astype(source_col.dtype)
+    else:
+        if pd.api.types.is_numeric_dtype(source_col.dtype) and np.can_cast(
+            other_scalar, source_col.dtype
+        ):
+            common_dtype = source_col.dtype
+        else:
+            common_dtype = cudf.utils.dtypes.find_common_type(
+                [source_col.dtype, np.min_scalar_type(other_scalar)]
+            )
+
+        source_col = source_col.astype(common_dtype)
+        return source_col, cudf.Scalar(other_scalar, dtype=common_dtype)
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 8967b4f299d..3ac2b4fc918 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8226,10 +8226,10 @@ def test_update_for_dataframes(data, data2, join, overwrite, errors):
     other_pd = pd.DataFrame(data2)
     other_gd = gd.DataFrame(data2)
 
-    pdf.update(other_pd, join, overwrite, errors)
-    gdf.update(other_gd, join, overwrite, errors)
+    pdf.update(other=other_pd, join=join, overwrite=overwrite, errors=errors)
+    gdf.update(other=other_gd, join=join, overwrite=overwrite, errors=errors)
 
-    assert_eq(pdf, gdf)
+    assert_eq(pdf, gdf, check_dtype=False)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf/cudf/tests/test_replace.py b/python/cudf/cudf/tests/test_replace.py
index 5338761372f..b2468ea990f 100644
--- a/python/cudf/cudf/tests/test_replace.py
+++ b/python/cudf/cudf/tests/test_replace.py
@@ -610,25 +610,40 @@ def test_series_where(data_dtype, fill_value):
             sr.where(sr > 0, fill_value)
     else:
         # Cast back to original dtype as pandas automatically upcasts
-        expect = psr.where(psr > 0, fill_value).astype(psr.dtype)
+        expect = psr.where(psr > 0, fill_value)
         got = sr.where(sr > 0, fill_value)
-        assert_eq(expect, got)
+        # pandas returns 'float16' dtype, which is not supported in cudf
+        assert_eq(
+            expect,
+            got,
+            check_dtype=False if expect.dtype.kind in ("f") else True,
+        )
 
     if sr.dtype.type(fill_value) != fill_value:
         with pytest.raises(TypeError):
             sr.where(sr < 0, fill_value)
     else:
-        expect = psr.where(psr < 0, fill_value).astype(psr.dtype)
+        expect = psr.where(psr < 0, fill_value)
         got = sr.where(sr < 0, fill_value)
-        assert_eq(expect, got)
+        # pandas returns 'float16' dtype, which is not supported in cudf
+        assert_eq(
+            expect,
+            got,
+            check_dtype=False if expect.dtype.kind in ("f") else True,
+        )
 
     if sr.dtype.type(fill_value) != fill_value:
         with pytest.raises(TypeError):
             sr.where(sr == 0, fill_value)
     else:
-        expect = psr.where(psr == 0, fill_value).astype(psr.dtype)
+        expect = psr.where(psr == 0, fill_value)
         got = sr.where(sr == 0, fill_value)
-        assert_eq(expect, got)
+        # pandas returns 'float16' dtype, which is not supported in cudf
+        assert_eq(
+            expect,
+            got,
+            check_dtype=False if expect.dtype.kind in ("f") else True,
+        )
 
 
 @pytest.mark.parametrize("fill_value", [100, 100.0, 100.5])
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 5b4b260b9a3..c86e537f740 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -971,3 +971,16 @@ def test_fillna_with_nan(data, nan_as_null, fill_value):
     actual = gs.fillna(fill_value)
 
     assert_eq(expected, actual)
+
+
+def test_series_mask_mixed_dtypes_error():
+    s = cudf.Series(["a", "b", "c"])
+    with pytest.raises(
+        TypeError,
+        match=re.escape(
+            "cudf does not support mixed types, please type-cast "
+            "the column of dataframe/series and other "
+            "to same dtypes."
+        ),
+    ):
+        s.where([True, False, True], [1, 2, 3])
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index d49b4abd399..d6d34be192e 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -318,7 +318,7 @@ def to_cudf_compatible_scalar(val, dtype=None):
     if not is_scalar(val):
         raise ValueError(
             f"Cannot convert value of type {type(val).__name__} "
-            " to cudf scalar"
+            "to cudf scalar"
         )
 
     if isinstance(val, (np.ndarray, cp.ndarray)) and val.ndim == 0:
@@ -624,7 +624,12 @@ def find_common_type(dtypes):
         dtypes = dtypes - td_dtypes
         dtypes.add(np.result_type(*td_dtypes))
 
-    return np.find_common_type(list(dtypes), [])
+    common_dtype = np.find_common_type(list(dtypes), [])
+    if common_dtype == np.dtype("float16"):
+        # cuDF does not support float16 dtype
+        return np.dtype("float32")
+    else:
+        return common_dtype
 
 
 # Type dispatch loops similar to what are found in `np.add.types`

From f05bda4ad58c9dcb6543af975268c2e9fbf97fe4 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 2 Feb 2021 16:36:26 -0800
Subject: [PATCH 03/23] move where related internal apis

---
 python/cudf/cudf/core/__init__.py           |   2 +-
 python/cudf/cudf/core/frame.py              | 192 ++------------------
 python/cudf/cudf/core/internals/__init__.py |   0
 python/cudf/cudf/core/internals/where.py    | 181 ++++++++++++++++++
 4 files changed, 194 insertions(+), 181 deletions(-)
 create mode 100644 python/cudf/cudf/core/internals/__init__.py
 create mode 100644 python/cudf/cudf/core/internals/where.py

diff --git a/python/cudf/cudf/core/__init__.py b/python/cudf/cudf/core/__init__.py
index 91a369c31f8..a71f15dd95b 100644
--- a/python/cudf/cudf/core/__init__.py
+++ b/python/cudf/cudf/core/__init__.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2018-2020, NVIDIA CORPORATION.
 
-from cudf.core import buffer, column, column_accessor, common
+from cudf.core import buffer, column, column_accessor, common, internals
 from cudf.core.buffer import Buffer
 from cudf.core.dataframe import DataFrame, from_pandas, merge
 from cudf.core.index import (
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index d3c3c18050e..ebe91d12012 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -20,6 +20,7 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf.core.column import as_column, build_categorical_column, column_empty
+from cudf.core.internals import where as where_internals
 from cudf.utils.dtypes import (
     is_categorical_dtype,
     is_column_like,
@@ -737,88 +738,6 @@ def clip(self, lower=None, upper=None, inplace=False, axis=1):
 
         return self._mimic_inplace(output, inplace=inplace)
 
-    def _normalize_columns_and_scalars_type(self, other, inplace=False):
-        """
-        Try to normalize the other's dtypes as per self.
-
-        Parameters
-        ----------
-
-        self : Can be a DataFrame or Series or Index
-        other : Can be a DataFrame, Series, Index, Array
-            like object or a scalar value
-
-            if self is DataFrame, other can be only a
-            scalar or array like with size of number of columns
-            in DataFrame or a DataFrame with same dimension
-
-            if self is Series, other can be only a scalar or
-            a series like with same length as self
-
-        Returns:
-        --------
-        A dataframe/series/list/scalar form of normalized other
-        """
-        if isinstance(self, cudf.DataFrame) and isinstance(
-            other, cudf.DataFrame
-        ):
-            source_df = self.copy()
-            other_df = other.copy()
-            for self_col in source_df._data.names:
-                source_col, other_col = _check_and_cast_columns(
-                    source_col=source_df._data[self_col],
-                    other_col=other_df._data[self_col],
-                    inplace=inplace,
-                )
-                source_df._data[self_col] = source_col
-                other_df._data[self_col] = other_col
-            return source_df, other_df
-
-        elif isinstance(self, (cudf.Series, cudf.Index)) and not is_scalar(
-            other
-        ):
-            other = as_column(other)
-            input_col = self._data[self.name]
-            return _check_and_cast_columns(
-                source_col=input_col, other_col=other, inplace=inplace
-            )
-        else:
-            # Handles scalar or list/array like scalars
-            if isinstance(self, (cudf.Series, cudf.Index)) and is_scalar(
-                other
-            ):
-                input_col = self._data[self.name]
-                return _check_and_cast_columns_with_scalar(
-                    source_col=self._data[self.name],
-                    other_scalar=other,
-                    inplace=inplace,
-                )
-
-            elif isinstance(self, cudf.DataFrame):
-                if is_scalar(other):
-                    other = [other for i in range(len(self._data.names))]
-
-                source_df = self.copy()
-                others = []
-                for col_name, other_sclr in zip(self._data.names, other):
-
-                    (
-                        source_col,
-                        other_scalar,
-                    ) = _check_and_cast_columns_with_scalar(
-                        source_col=source_df._data[col_name],
-                        other_scalar=other_sclr,
-                        inplace=inplace,
-                    )
-                    source_df._data[col_name] = source_col
-                    others.append(other_scalar)
-                return source_df, others
-            else:
-                raise ValueError(
-                    f"Inappropriate input {type(self)} "
-                    f"and other {type(other)} combination"
-                )
-
     def where(self, cond, other=None, inplace=False):
         """
         Replace values where the condition is False.
@@ -897,7 +816,12 @@ def where(self, cond, other=None, inplace=False):
                 # as `cond` has no column names.
                 cond.columns = self.columns
 
-            source_df, others = self._normalize_columns_and_scalars_type(other)
+            (
+                source_df,
+                others,
+            ) = where_internals._normalize_columns_and_scalars_type(
+                self, other
+            )
             if isinstance(other, Frame):
                 others = others._data.columns
 
@@ -971,8 +895,11 @@ def where(self, cond, other=None, inplace=False):
             if cond.all():
                 result = input_col
             else:
-                input_col, other = self._normalize_columns_and_scalars_type(
-                    other, inplace
+                (
+                    input_col,
+                    other,
+                ) = where_internals._normalize_columns_and_scalars_type(
+                    self, other, inplace
                 )
 
                 if isinstance(input_col, cudf.core.column.CategoricalColumn):
@@ -2725,7 +2652,6 @@ def searchsorted(
         array([4, 4, 4, 0], dtype=int32)
         """
         # Call libcudf++ search_sorted primitive
-        from cudf.utils.dtypes import is_scalar
 
         scalar_flag = None
         if is_scalar(values):
@@ -3862,100 +3788,6 @@ def _reassign_categories(categories, cols, col_idxs):
             )
 
 
-def _normalize_scalars(col, other):
-    """
-    Try to normalizes scalar values as per col dtype
-    """
-    if (
-        other is not None
-        and (isinstance(other, float) and not np.isnan(other))
-    ) and (col.dtype.type(other) != other):
-        raise TypeError(
-            f"Cannot safely cast non-equivalent "
-            f"{type(other).__name__} to {col.dtype.name}"
-        )
-
-    return (
-        col.dtype.type(other)
-        if (
-            other is not None
-            and (isinstance(other, float) and not np.isnan(other))
-        )
-        else other
-    )
-
-
-def _check_and_cast_columns(source_col, other_col, inplace):
-    """
-    Returns type-casted columns of `source_col` & `other_col`
-    based on `inplace` parameter.
-    """
-    if cudf.utils.dtypes.is_categorical_dtype(source_col.dtype):
-        return source_col, other_col
-    elif cudf.utils.dtypes.is_mixed_with_object_dtype(source_col, other_col):
-        raise TypeError(
-            "cudf does not support mixed types, please type-cast "
-            "the column of dataframe/series and other "
-            "to same dtypes."
-        )
-    if inplace:
-        if not source_col.can_cast_safely(other_col.dtype):
-            warnings.warn(
-                f"Type-casting from {other_col.dtype} "
-                f"to {source_col.dtype}, there could be potential data loss"
-            )
-        return source_col, other_col.astype(source_col.dtype)
-    else:
-        common_dtype = cudf.utils.dtypes.find_common_type(
-            [source_col.dtype, other_col.dtype]
-        )
-        return source_col.astype(common_dtype), other_col.astype(common_dtype)
-
-
-def _check_and_cast_columns_with_scalar(source_col, other_scalar, inplace):
-    """
-    Returns type-casted column `source_col` & scalar `other_scalar`
-    based on `inplace` parameter.
-    """
-    if cudf.utils.dtypes.is_categorical_dtype(source_col.dtype):
-        return source_col, other_scalar
-
-    device_scalar = cudf.Scalar(
-        _normalize_scalars(source_col, other_scalar),
-        dtype=source_col.dtype if other_scalar is None else None,
-    )
-
-    if other_scalar is None:
-        return source_col, device_scalar
-    elif cudf.utils.dtypes.is_mixed_with_object_dtype(
-        device_scalar, source_col
-    ):
-        raise TypeError(
-            "cudf does not support mixed types, please type-cast "
-            "the column of dataframe/series and other "
-            "to same dtypes."
-        )
-    if inplace:
-        if not np.can_cast(device_scalar, source_col.dtype):
-            warnings.warn(
-                f"Type-casting from {device_scalar.dtype} "
-                f"to {source_col.dtype}, there could be potential data loss"
-            )
-        return source_col, device_scalar.astype(source_col.dtype)
-    else:
-        if pd.api.types.is_numeric_dtype(source_col.dtype) and np.can_cast(
-            other_scalar, source_col.dtype
-        ):
-            common_dtype = source_col.dtype
-        else:
-            common_dtype = cudf.utils.dtypes.find_common_type(
-                [source_col.dtype, np.min_scalar_type(other_scalar)]
-            )
-
-        source_col = source_col.astype(common_dtype)
-        return source_col, cudf.Scalar(other_scalar, dtype=common_dtype)
-
-
 def _is_series(obj):
     """
     Checks if the `obj` is of type `cudf.Series`
diff --git a/python/cudf/cudf/core/internals/__init__.py b/python/cudf/cudf/core/internals/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/core/internals/where.py b/python/cudf/cudf/core/internals/where.py
new file mode 100644
index 00000000000..e51be89507e
--- /dev/null
+++ b/python/cudf/cudf/core/internals/where.py
@@ -0,0 +1,181 @@
+import warnings
+
+import numpy as np
+import pandas as pd
+
+import cudf
+
+
+def _normalize_scalars(col, other):
+    """
+    Try to normalizes scalar values as per col dtype
+    """
+    if (
+        other is not None
+        and (isinstance(other, float) and not np.isnan(other))
+    ) and (col.dtype.type(other) != other):
+        raise TypeError(
+            f"Cannot safely cast non-equivalent "
+            f"{type(other).__name__} to {col.dtype.name}"
+        )
+
+    return (
+        col.dtype.type(other)
+        if (
+            other is not None
+            and (isinstance(other, float) and not np.isnan(other))
+        )
+        else other
+    )
+
+
+def _check_and_cast_columns(source_col, other_col, inplace):
+    """
+    Returns type-casted columns of `source_col` & `other_col`
+    based on `inplace` parameter.
+    """
+    if cudf.utils.dtypes.is_categorical_dtype(source_col.dtype):
+        return source_col, other_col
+    elif cudf.utils.dtypes.is_mixed_with_object_dtype(source_col, other_col):
+        raise TypeError(
+            "cudf does not support mixed types, please type-cast "
+            "the column of dataframe/series and other "
+            "to same dtypes."
+        )
+    if inplace:
+        if not source_col.can_cast_safely(other_col.dtype):
+            warnings.warn(
+                f"Type-casting from {other_col.dtype} "
+                f"to {source_col.dtype}, there could be potential data loss"
+            )
+        return source_col, other_col.astype(source_col.dtype)
+    else:
+        common_dtype = cudf.utils.dtypes.find_common_type(
+            [source_col.dtype, other_col.dtype]
+        )
+        return source_col.astype(common_dtype), other_col.astype(common_dtype)
+
+
+def _check_and_cast_columns_with_scalar(source_col, other_scalar, inplace):
+    """
+    Returns type-casted column `source_col` & scalar `other_scalar`
+    based on `inplace` parameter.
+    """
+    if cudf.utils.dtypes.is_categorical_dtype(source_col.dtype):
+        return source_col, other_scalar
+
+    device_scalar = cudf.Scalar(
+        _normalize_scalars(source_col, other_scalar),
+        dtype=source_col.dtype if other_scalar is None else None,
+    )
+
+    if other_scalar is None:
+        return source_col, device_scalar
+    elif cudf.utils.dtypes.is_mixed_with_object_dtype(
+        device_scalar, source_col
+    ):
+        raise TypeError(
+            "cudf does not support mixed types, please type-cast "
+            "the column of dataframe/series and other "
+            "to same dtypes."
+        )
+    if inplace:
+        if not np.can_cast(device_scalar, source_col.dtype):
+            warnings.warn(
+                f"Type-casting from {device_scalar.dtype} "
+                f"to {source_col.dtype}, there could be potential data loss"
+            )
+        return source_col, device_scalar.astype(source_col.dtype)
+    else:
+        if pd.api.types.is_numeric_dtype(source_col.dtype) and np.can_cast(
+            other_scalar, source_col.dtype
+        ):
+            common_dtype = source_col.dtype
+        else:
+            common_dtype = cudf.utils.dtypes.find_common_type(
+                [source_col.dtype, np.min_scalar_type(other_scalar)]
+            )
+
+        source_col = source_col.astype(common_dtype)
+        return source_col, cudf.Scalar(other_scalar, dtype=common_dtype)
+
+
+def _normalize_columns_and_scalars_type(frame, other, inplace=False):
+    """
+    Try to normalize the other's dtypes as per frame.
+
+    Parameters
+    ----------
+
+    frame : Can be a DataFrame or Series or Index
+    other : Can be a DataFrame, Series, Index, Array
+        like object or a scalar value
+
+        if frame is DataFrame, other can be only a
+        scalar or array like with size of number of columns
+        in DataFrame or a DataFrame with same dimension
+
+        if frame is Series, other can be only a scalar or
+        a series like with same length as frame
+
+    Returns:
+    --------
+    A dataframe/series/list/scalar form of normalized other
+    """
+    if isinstance(frame, cudf.DataFrame) and isinstance(other, cudf.DataFrame):
+        source_df = frame.copy()
+        other_df = other.copy()
+        for self_col in source_df._data.names:
+            source_col, other_col = _check_and_cast_columns(
+                source_col=source_df._data[self_col],
+                other_col=other_df._data[self_col],
+                inplace=inplace,
+            )
+            source_df._data[self_col] = source_col
+            other_df._data[self_col] = other_col
+        return source_df, other_df
+
+    elif isinstance(
+        frame, (cudf.Series, cudf.Index)
+    ) and not cudf.utils.dtypes.is_scalar(other):
+        other = cudf.core.column.as_column(other)
+        input_col = frame._data[frame.name]
+        return _check_and_cast_columns(
+            source_col=input_col, other_col=other, inplace=inplace
+        )
+    else:
+        # Handles scalar or list/array like scalars
+        if isinstance(
+            frame, (cudf.Series, cudf.Index)
+        ) and cudf.utils.dtypes.is_scalar(other):
+            input_col = frame._data[frame.name]
+            return _check_and_cast_columns_with_scalar(
+                source_col=frame._data[frame.name],
+                other_scalar=other,
+                inplace=inplace,
+            )
+
+        elif isinstance(frame, cudf.DataFrame):
+            if cudf.utils.dtypes.is_scalar(other):
+                other = [other for i in range(len(frame._data.names))]
+
+            source_df = frame.copy()
+            others = []
+            for col_name, other_sclr in zip(frame._data.names, other):
+
+                (
+                    source_col,
+                    other_scalar,
+                ) = _check_and_cast_columns_with_scalar(
+                    source_col=source_df._data[col_name],
+                    other_scalar=other_sclr,
+                    inplace=inplace,
+                )
+                source_df._data[col_name] = source_col
+                others.append(other_scalar)
+            return source_df, others
+        else:
+            raise ValueError(
+                f"Inappropriate input {type(frame)} "
+                f"and other {type(other)} combination"
+            )

From 5e7cf4e224e18da9c2c5afc4f3fd11e8fe84281b Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 3 Feb 2021 16:57:10 -0800
Subject: [PATCH 04/23] move where core logic to where.py

---
 python/cudf/cudf/core/frame.py           | 148 +----------------
 python/cudf/cudf/core/internals/where.py | 194 +++++++++++++++++++++++
 2 files changed, 197 insertions(+), 145 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index ebe91d12012..7d5d50796a0 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -791,151 +791,9 @@ def where(self, cond, other=None, inplace=False):
         dtype: int64
         """
 
-        if isinstance(self, cudf.DataFrame):
-            if hasattr(cond, "__cuda_array_interface__"):
-                cond = cudf.DataFrame(
-                    cond, columns=self._data.names, index=self.index
-                )
-            elif not isinstance(cond, cudf.DataFrame):
-                cond = self.from_pandas(pd.DataFrame(cond))
-
-            common_cols = set(self._data.names).intersection(
-                set(cond._data.names)
-            )
-            if len(common_cols) > 0:
-                # If `self` and `cond` are having unequal index,
-                # then re-index `cond`.
-                if not self.index.equals(cond.index):
-                    cond = cond.reindex(self.index)
-            else:
-                if cond.shape != self.shape:
-                    raise ValueError(
-                        """Array conditional must be same shape as self"""
-                    )
-                # Setting `self` column names to `cond`
-                # as `cond` has no column names.
-                cond.columns = self.columns
-
-            (
-                source_df,
-                others,
-            ) = where_internals._normalize_columns_and_scalars_type(
-                self, other
-            )
-            if isinstance(other, Frame):
-                others = others._data.columns
-
-            out_df = cudf.DataFrame(index=self.index)
-            if len(self._columns) != len(others):
-                raise ValueError(
-                    """Replacement list length or number of dataframe columns
-                    should be equal to Number of columns of dataframe"""
-                )
-            for i, column_name in enumerate(self._data.names):
-                input_col = source_df._data[column_name]
-                other_column = others[i]
-                if column_name in cond._data:
-                    if isinstance(
-                        input_col, cudf.core.column.CategoricalColumn
-                    ):
-                        if is_scalar(other_column):
-                            try:
-                                other_column = input_col._encode(other_column)
-                            except ValueError:
-                                # When other is not present in categories,
-                                # fill with Null.
-                                other_column = None
-                            other_column = cudf.Scalar(
-                                other_column, dtype=input_col.codes.dtype
-                            )
-                        elif hasattr(other_column, "codes"):
-                            other_column = other_column.codes
-                        input_col = input_col.codes
-
-                    result = libcudf.copying.copy_if_else(
-                        input_col, other_column, cond._data[column_name]
-                    )
-
-                    if isinstance(
-                        self._data[column_name],
-                        cudf.core.column.CategoricalColumn,
-                    ):
-                        result = build_categorical_column(
-                            categories=self._data[column_name].categories,
-                            codes=as_column(
-                                result.base_data, dtype=result.dtype
-                            ),
-                            mask=result.base_mask,
-                            size=result.size,
-                            offset=result.offset,
-                            ordered=self._data[column_name].ordered,
-                        )
-                else:
-                    from cudf._lib.null_mask import MaskState, create_null_mask
-
-                    out_mask = create_null_mask(
-                        len(input_col), state=MaskState.ALL_NULL
-                    )
-                    result = input_col.set_mask(out_mask)
-                out_df[column_name] = self[column_name].__class__(result)
-
-            return self._mimic_inplace(out_df, inplace=inplace)
-
-        else:
-            if isinstance(other, cudf.DataFrame):
-                raise NotImplementedError(
-                    "cannot align with a higher dimensional Frame"
-                )
-            input_col = self._data[self.name]
-            cond = as_column(cond)
-            if len(cond) != len(self):
-                raise ValueError(
-                    """Array conditional must be same shape as self"""
-                )
-            if cond.all():
-                result = input_col
-            else:
-                (
-                    input_col,
-                    other,
-                ) = where_internals._normalize_columns_and_scalars_type(
-                    self, other, inplace
-                )
-
-                if isinstance(input_col, cudf.core.column.CategoricalColumn):
-                    if is_scalar(other):
-                        try:
-                            other = input_col._encode(other)
-                        except ValueError:
-                            # When other is not present in categories,
-                            # fill with Null.
-                            other = None
-                        other = cudf.Scalar(other, dtype=input_col.codes.dtype)
-                    elif hasattr(other, "codes"):
-                        other = other.codes
-
-                    input_col = input_col.codes
-
-                result = libcudf.copying.copy_if_else(input_col, other, cond)
-
-                if is_categorical_dtype(self.dtype):
-                    result = build_categorical_column(
-                        categories=self._data[self.name].categories,
-                        codes=as_column(result.base_data, dtype=result.dtype),
-                        mask=result.base_mask,
-                        size=result.size,
-                        offset=result.offset,
-                        ordered=self._data[self.name].ordered,
-                    )
-
-            if isinstance(self, cudf.Index):
-                from cudf.core.index import as_index
-
-                result = as_index(result, name=self.name)
-            else:
-                result = self._copy_construct(data=result)
-
-            return self._mimic_inplace(result, inplace=inplace)
+        return where_internals.where(
+            frame=self, cond=cond, other=other, inplace=inplace
+        )
 
     def mask(self, cond, other=None, inplace=False):
         """
diff --git a/python/cudf/cudf/core/internals/where.py b/python/cudf/cudf/core/internals/where.py
index e51be89507e..3fc2f87f299 100644
--- a/python/cudf/cudf/core/internals/where.py
+++ b/python/cudf/cudf/core/internals/where.py
@@ -179,3 +179,197 @@ def _normalize_columns_and_scalars_type(frame, other, inplace=False):
                 f"Inappropriate input {type(frame)} "
                 f"and other {type(other)} combination"
             )
+
+
+def where(frame, cond, other=None, inplace=False):
+    """
+    Replace values where the condition is False.
+
+    Parameters
+    ----------
+    cond : bool Series/DataFrame, array-like
+        Where cond is True, keep the original value.
+        Where False, replace with corresponding value from other.
+        Callables are not supported.
+    other: scalar, list of scalars, Series/DataFrame
+        Entries where cond is False are replaced with
+        corresponding value from other. Callables are not
+        supported. Default is None.
+
+        DataFrame expects only Scalar or array like with scalars or
+        dataframe with same dimension as frame.
+
+        Series expects only scalar or series like with same length
+    inplace : bool, default False
+        Whether to perform the operation in place on the data.
+
+    Returns
+    -------
+    Same type as caller
+
+    Examples
+    --------
+    >>> import cudf
+    >>> df = cudf.DataFrame({"A":[1, 4, 5], "B":[3, 5, 8]})
+    >>> df.where(df % 2 == 0, [-1, -1])
+        A  B
+    0 -1 -1
+    1  4 -1
+    2 -1  8
+
+    >>> ser = cudf.Series([4, 3, 2, 1, 0])
+    >>> ser.where(ser > 2, 10)
+    0     4
+    1     3
+    2    10
+    3    10
+    4    10
+    dtype: int64
+    >>> ser.where(ser > 2)
+    0       4
+    1       3
+    2    <NA>
+    3    <NA>
+    4    <NA>
+    dtype: int64
+    """
+
+    if isinstance(frame, cudf.DataFrame):
+        if hasattr(cond, "__cuda_array_interface__"):
+            cond = cudf.DataFrame(
+                cond, columns=frame._data.names, index=frame.index
+            )
+        elif not isinstance(cond, cudf.DataFrame):
+            cond = frame.from_pandas(pd.DataFrame(cond))
+
+        common_cols = set(frame._data.names).intersection(
+            set(cond._data.names)
+        )
+        if len(common_cols) > 0:
+            # If `frame` and `cond` are having unequal index,
+            # then re-index `cond`.
+            if not frame.index.equals(cond.index):
+                cond = cond.reindex(frame.index)
+        else:
+            if cond.shape != frame.shape:
+                raise ValueError(
+                    """Array conditional must be same shape as self"""
+                )
+            # Setting `frame` column names to `cond`
+            # as `cond` has no column names.
+            cond.columns = frame.columns
+
+        (source_df, others,) = _normalize_columns_and_scalars_type(
+            frame, other
+        )
+        if isinstance(other, cudf.core.frame.Frame):
+            others = others._data.columns
+
+        out_df = cudf.DataFrame(index=frame.index)
+        if len(frame._columns) != len(others):
+            raise ValueError(
+                """Replacement list length or number of dataframe columns
+                should be equal to Number of columns of dataframe"""
+            )
+        for i, column_name in enumerate(frame._data.names):
+            input_col = source_df._data[column_name]
+            other_column = others[i]
+            if column_name in cond._data:
+                if isinstance(input_col, cudf.core.column.CategoricalColumn):
+                    if cudf.utils.dtypes.is_scalar(other_column):
+                        try:
+                            other_column = input_col._encode(other_column)
+                        except ValueError:
+                            # When other is not present in categories,
+                            # fill with Null.
+                            other_column = None
+                        other_column = cudf.Scalar(
+                            other_column, dtype=input_col.codes.dtype
+                        )
+                    elif hasattr(other_column, "codes"):
+                        other_column = other_column.codes
+                    input_col = input_col.codes
+
+                result = cudf._lib.copying.copy_if_else(
+                    input_col, other_column, cond._data[column_name]
+                )
+
+                if isinstance(
+                    frame._data[column_name],
+                    cudf.core.column.CategoricalColumn,
+                ):
+                    result = cudf.core.column.build_categorical_column(
+                        categories=frame._data[column_name].categories,
+                        codes=cudf.core.column.as_column(
+                            result.base_data, dtype=result.dtype
+                        ),
+                        mask=result.base_mask,
+                        size=result.size,
+                        offset=result.offset,
+                        ordered=frame._data[column_name].ordered,
+                    )
+            else:
+                from cudf._lib.null_mask import MaskState, create_null_mask
+
+                out_mask = create_null_mask(
+                    len(input_col), state=MaskState.ALL_NULL
+                )
+                result = input_col.set_mask(out_mask)
+            out_df[column_name] = frame[column_name].__class__(result)
+
+        return frame._mimic_inplace(out_df, inplace=inplace)
+
+    else:
+        if isinstance(other, cudf.DataFrame):
+            raise NotImplementedError(
+                "cannot align with a higher dimensional Frame"
+            )
+        input_col = frame._data[frame.name]
+        cond = cudf.core.column.as_column(cond)
+        if len(cond) != len(frame):
+            raise ValueError(
+                """Array conditional must be same shape as self"""
+            )
+        if cond.all():
+            result = input_col
+        else:
+            (input_col, other,) = _normalize_columns_and_scalars_type(
+                frame, other, inplace
+            )
+
+            if isinstance(input_col, cudf.core.column.CategoricalColumn):
+                if cudf.utils.dtypes.is_scalar(other):
+                    try:
+                        other = input_col._encode(other)
+                    except ValueError:
+                        # When other is not present in categories,
+                        # fill with Null.
+                        other = None
+                    other = cudf.Scalar(other, dtype=input_col.codes.dtype)
+                elif hasattr(other, "codes"):
+                    other = other.codes
+
+                input_col = input_col.codes
+
+            result = cudf._lib.copying.copy_if_else(input_col, other, cond)
+
+            if cudf.utils.dtypes.is_categorical_dtype(frame.dtype):
+                result = cudf.core.column.build_categorical_column(
+                    categories=frame._data[frame.name].categories,
+                    codes=cudf.core.column.as_column(
+                        result.base_data, dtype=result.dtype
+                    ),
+                    mask=result.base_mask,
+                    size=result.size,
+                    offset=result.offset,
+                    ordered=frame._data[frame.name].ordered,
+                )
+
+        if isinstance(frame, cudf.Index):
+            from cudf.core.index import as_index
+
+            result = as_index(result, name=frame.name)
+        else:
+            result = frame._copy_construct(data=result)
+
+        return frame._mimic_inplace(result, inplace=inplace)

From 8dc1b9e3a008cbf217efd6dce6dec21aeae05154 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 25 Mar 2021 11:30:42 -0500
Subject: [PATCH 05/23] Apply suggestions from code review

Co-authored-by: Keith Kraus <kkraus@nvidia.com>
---
 python/cudf/cudf/core/internals/where.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/internals/where.py b/python/cudf/cudf/core/internals/where.py
index 3fc2f87f299..4e1d404c874 100644
--- a/python/cudf/cudf/core/internals/where.py
+++ b/python/cudf/cudf/core/internals/where.py
@@ -8,7 +8,7 @@
 
 def _normalize_scalars(col, other):
     """
-    Try to normalizes scalar values as per col dtype
+    Try to normalize scalar values as per col dtype
     """
     if (
         other is not None

From 7b4079edff3fc594e66e2ca98dd05730d3c97ab8 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 25 Mar 2021 10:28:49 -0700
Subject: [PATCH 06/23] use _column_names instead of columns

---
 python/cudf/cudf/core/dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 4dd266e2fc9..608a04b84cf 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1659,7 +1659,7 @@ def update(
             other = other.reindex(self.index, axis=0)
 
         source_df = self.copy(deep=False)
-        for col in source_df.columns:
+        for col in source_df._column_names:
             this = source_df[col]
             that = other[col]
 

From aae8f0b453617d6be2187ce496baf216e1f5cc92 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 25 Mar 2021 10:34:56 -0700
Subject: [PATCH 07/23] copyright

---
 python/cudf/cudf/core/internals/where.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/cudf/cudf/core/internals/where.py b/python/cudf/cudf/core/internals/where.py
index 4e1d404c874..a005d7b7625 100644
--- a/python/cudf/cudf/core/internals/where.py
+++ b/python/cudf/cudf/core/internals/where.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
 import warnings
 
 import numpy as np

From 467957c119bcc0e2b1fc2f569c2eefdedf937d7a Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 25 Mar 2021 11:48:45 -0700
Subject: [PATCH 08/23] address reviews

---
 python/cudf/cudf/core/internals/where.py | 29 ++++++++++--------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/python/cudf/cudf/core/internals/where.py b/python/cudf/cudf/core/internals/where.py
index a005d7b7625..9ca8fb0af84 100644
--- a/python/cudf/cudf/core/internals/where.py
+++ b/python/cudf/cudf/core/internals/where.py
@@ -12,10 +12,9 @@ def _normalize_scalars(col, other):
     """
     Try to normalize scalar values as per col dtype
     """
-    if (
-        other is not None
-        and (isinstance(other, float) and not np.isnan(other))
-    ) and (col.dtype.type(other) != other):
+    if (isinstance(other, float) and not np.isnan(other)) and (
+        col.dtype.type(other) != other
+    ):
         raise TypeError(
             f"Cannot safely cast non-equivalent "
             f"{type(other).__name__} to {col.dtype.name}"
@@ -23,10 +22,7 @@ def _normalize_scalars(col, other):
 
     return (
         col.dtype.type(other)
-        if (
-            other is not None
-            and (isinstance(other, float) and not np.isnan(other))
-        )
+        if (isinstance(other, float) and not np.isnan(other))
         else other
     )
 
@@ -288,7 +284,9 @@ def where(frame, cond, other=None, inplace=False):
                         other_column = cudf.Scalar(
                             other_column, dtype=input_col.codes.dtype
                         )
-                    elif hasattr(other_column, "codes"):
+                    elif isinstance(
+                        other_column, cudf.core.column.CategoricalColumn
+                    ):
                         other_column = other_column.codes
                     input_col = input_col.codes
 
@@ -311,10 +309,9 @@ def where(frame, cond, other=None, inplace=False):
                         ordered=frame._data[column_name].ordered,
                     )
             else:
-                from cudf._lib.null_mask import MaskState, create_null_mask
-
-                out_mask = create_null_mask(
-                    len(input_col), state=MaskState.ALL_NULL
+                out_mask = cudf._lib.null_mask.create_null_mask(
+                    len(input_col),
+                    state=cudf._lib.null_mask.MaskState.ALL_NULL,
                 )
                 result = input_col.set_mask(out_mask)
             out_df[column_name] = frame[column_name].__class__(result)
@@ -348,7 +345,7 @@ def where(frame, cond, other=None, inplace=False):
                         # fill with Null.
                         other = None
                     other = cudf.Scalar(other, dtype=input_col.codes.dtype)
-                elif hasattr(other, "codes"):
+                elif isinstance(other, cudf.core.column.CategoricalColumn):
                     other = other.codes
 
                 input_col = input_col.codes
@@ -368,9 +365,7 @@ def where(frame, cond, other=None, inplace=False):
                 )
 
         if isinstance(frame, cudf.Index):
-            from cudf.core.index import as_index
-
-            result = as_index(result, name=frame.name)
+            result = cudf.Index(result, name=frame.name)
         else:
             result = frame._copy_construct(data=result)
 

From 679ffb4f0d4de5cbd4ca735044ab79bce2fbaccf Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Thu, 25 Mar 2021 11:56:03 -0700
Subject: [PATCH 09/23] address reviews

---
 python/cudf/cudf/core/__init__.py                           | 2 +-
 python/cudf/cudf/core/{internals => _internals}/__init__.py | 0
 python/cudf/cudf/core/{internals => _internals}/where.py    | 0
 python/cudf/cudf/core/frame.py                              | 2 +-
 4 files changed, 2 insertions(+), 2 deletions(-)
 rename python/cudf/cudf/core/{internals => _internals}/__init__.py (100%)
 rename python/cudf/cudf/core/{internals => _internals}/where.py (100%)

diff --git a/python/cudf/cudf/core/__init__.py b/python/cudf/cudf/core/__init__.py
index a71f15dd95b..0ca9e4f6124 100644
--- a/python/cudf/cudf/core/__init__.py
+++ b/python/cudf/cudf/core/__init__.py
@@ -1,6 +1,6 @@
 # Copyright (c) 2018-2020, NVIDIA CORPORATION.
 
-from cudf.core import buffer, column, column_accessor, common, internals
+from cudf.core import _internals, buffer, column, column_accessor, common
 from cudf.core.buffer import Buffer
 from cudf.core.dataframe import DataFrame, from_pandas, merge
 from cudf.core.index import (
diff --git a/python/cudf/cudf/core/internals/__init__.py b/python/cudf/cudf/core/_internals/__init__.py
similarity index 100%
rename from python/cudf/cudf/core/internals/__init__.py
rename to python/cudf/cudf/core/_internals/__init__.py
diff --git a/python/cudf/cudf/core/internals/where.py b/python/cudf/cudf/core/_internals/where.py
similarity index 100%
rename from python/cudf/cudf/core/internals/where.py
rename to python/cudf/cudf/core/_internals/where.py
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 602960bba97..5e541eed17e 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -19,8 +19,8 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf._typing import ColumnLike, DataFrameOrSeries
+from cudf.core._internals import where as where_internals
 from cudf.core.column import as_column, build_categorical_column, column_empty
-from cudf.core.internals import where as where_internals
 from cudf.utils.dtypes import (
     is_categorical_dtype,
     is_column_like,

From 85d79283e17b48dd05de467706e486fb80402501 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 29 Mar 2021 15:43:24 -0700
Subject: [PATCH 10/23] return a cudf scalar

---
 python/cudf/cudf/core/_internals/where.py | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index 9ca8fb0af84..bfe6200e474 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -20,11 +20,7 @@ def _normalize_scalars(col, other):
             f"{type(other).__name__} to {col.dtype.name}"
         )
 
-    return (
-        col.dtype.type(other)
-        if (isinstance(other, float) and not np.isnan(other))
-        else other
-    )
+    return cudf.Scalar(other, dtype=col.dtype if other is None else None)
 
 
 def _check_and_cast_columns(source_col, other_col, inplace):
@@ -62,10 +58,7 @@ def _check_and_cast_columns_with_scalar(source_col, other_scalar, inplace):
     if cudf.utils.dtypes.is_categorical_dtype(source_col.dtype):
         return source_col, other_scalar
 
-    device_scalar = cudf.Scalar(
-        _normalize_scalars(source_col, other_scalar),
-        dtype=source_col.dtype if other_scalar is None else None,
-    )
+    device_scalar = _normalize_scalars(source_col, other_scalar)
 
     if other_scalar is None:
         return source_col, device_scalar
@@ -121,8 +114,8 @@ def _normalize_columns_and_scalars_type(frame, other, inplace=False):
     A dataframe/series/list/scalar form of normalized other
     """
     if isinstance(frame, cudf.DataFrame) and isinstance(other, cudf.DataFrame):
-        source_df = frame.copy()
-        other_df = other.copy()
+        source_df = frame.copy(deep=False)
+        other_df = other.copy(deep=False)
         for self_col in source_df._data.names:
             source_col, other_col = _check_and_cast_columns(
                 source_col=source_df._data[self_col],
@@ -157,7 +150,7 @@ def _normalize_columns_and_scalars_type(frame, other, inplace=False):
             if cudf.utils.dtypes.is_scalar(other):
                 other = [other for i in range(len(frame._data.names))]
 
-            source_df = frame.copy()
+            source_df = frame.copy(deep=False)
             others = []
             for col_name, other_sclr in zip(frame._data.names, other):
 

From 2764a855dfd69baaa96afaa721632a11039193eb Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 29 Mar 2021 17:22:18 -0700
Subject: [PATCH 11/23] add cudf can_cast utility

---
 python/cudf/cudf/core/_internals/where.py | 10 +++--
 python/cudf/cudf/utils/dtypes.py          | 48 +++++++++++++++++++++++
 2 files changed, 54 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index bfe6200e474..0132c168176 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -71,16 +71,18 @@ def _check_and_cast_columns_with_scalar(source_col, other_scalar, inplace):
             "to same dtypes."
         )
     if inplace:
-        if not np.can_cast(device_scalar, source_col.dtype):
+        if not cudf.utils.dtypes.can_cast(
+            device_scalar.dtype, source_col.dtype
+        ):
             warnings.warn(
                 f"Type-casting from {device_scalar.dtype} "
                 f"to {source_col.dtype}, there could be potential data loss"
             )
         return source_col, device_scalar.astype(source_col.dtype)
     else:
-        if pd.api.types.is_numeric_dtype(source_col.dtype) and np.can_cast(
-            other_scalar, source_col.dtype
-        ):
+        if pd.api.types.is_numeric_dtype(
+            source_col.dtype
+        ) and cudf.utils.dtypes.can_cast(other_scalar, source_col.dtype):
             common_dtype = source_col.dtype
         else:
             common_dtype = cudf.utils.dtypes.find_common_type(
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index a1eb2212ac2..afebfb6a00d 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -637,6 +637,11 @@ def find_common_type(dtypes):
     # Aggregate same types
     dtypes = set(dtypes)
 
+    if any(is_decimal_dtype(dtype) for dtype in dtypes):
+        raise NotImplementedError(
+            "DecimalDtype is not yet supported in find_common_type"
+        )
+
     # Corner case 1:
     # Resort to np.result_type to handle "M" and "m" types separately
     dt_dtypes = set(filter(lambda t: is_datetime_dtype(t), dtypes))
@@ -659,6 +664,49 @@ def find_common_type(dtypes):
         return common_dtype
 
 
+def can_cast(from_dtype, to_dtype):
+    """
+    Utility function to determine if we can cast
+    from `from_dtype` to `to_dtype`. This function primarily calls
+    `np.can_cast` but with some special handling around
+    cudf specific dtypes.
+    """
+    if isinstance(from_dtype, cudf.core.dtypes.Decimal64Dtype):
+        if isinstance(to_dtype, cudf.core.dtypes.Decimal64Dtype):
+            return True
+        elif isinstance(to_dtype, np.dtype):
+            if to_dtype.kind in {"i", "f", "u", "U", "O"}:
+                return True
+            else:
+                return False
+    elif isinstance(from_dtype, np.dtype):
+        if isinstance(to_dtype, np.dtype):
+            return np.can_cast(from_dtype, to_dtype)
+        elif isinstance(to_dtype, cudf.core.dtypes.Decimal64Dtype):
+            if from_dtype.kind in {"i", "f", "u", "U", "O"}:
+                return True
+            else:
+                return False
+        elif isinstance(to_dtype, cudf.core.types.CategoricalDtype):
+            return True
+        else:
+            return False
+    elif isinstance(from_dtype, cudf.core.dtypes.ListDtype):
+        if isinstance(to_dtype, cudf.core.dtypes.ListDtype):
+            return True
+        else:
+            return False
+    elif isinstance(from_dtype, cudf.core.dtypes.CategoricalDtype):
+        if isinstance(to_dtype, cudf.core.dtypes.CategoricalDtype):
+            return True
+        elif isinstance(to_dtype, np.dtype):
+            return np.can_cast(from_dtype._categories.dtype, to_dtype)
+        else:
+            return False
+    else:
+        return np.can_cast(from_dtype, to_dtype)
+
+
 # Type dispatch loops similar to what are found in `np.add.types`
 # In NumPy, whether or not an op can be performed between two
 # operands is determined by checking to see if NumPy has a c/c++

From 9362195c917bc2dff9fecd2fef131ed750ad86b2 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 29 Mar 2021 17:25:40 -0700
Subject: [PATCH 12/23] add type annotations

---
 python/cudf/cudf/core/_internals/where.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index 0132c168176..dbda318aca1 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -6,9 +6,12 @@
 import pandas as pd
 
 import cudf
+from cudf._typing import ScalarLike
 
 
-def _normalize_scalars(col, other):
+def _normalize_scalars(
+    col: cudf.core.column.ColumnBase, other: ScalarLike
+) -> cudf.Scalar:
     """
     Try to normalize scalar values as per col dtype
     """

From f7ca26896dd67f270ca8c9e2d6ffb1a3a09d0169 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 29 Mar 2021 18:37:07 -0700
Subject: [PATCH 13/23] add typing for _check_and_cast_columns

---
 python/cudf/cudf/core/_internals/where.py | 17 ++++++++++++++---
 python/cudf/cudf/core/column/column.py    |  3 +++
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index dbda318aca1..1247a371117 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2021, NVIDIA CORPORATION.
 
 import warnings
+from typing import Any, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -26,7 +27,11 @@ def _normalize_scalars(
     return cudf.Scalar(other, dtype=col.dtype if other is None else None)
 
 
-def _check_and_cast_columns(source_col, other_col, inplace):
+def _check_and_cast_columns(
+    source_col: cudf.core.column.ColumnBase,
+    other_col: cudf.core.column.ColumnBase,
+    inplace: bool,
+) -> Tuple[cudf.core.column.ColumnBase, cudf.core.column.ColumnBase]:
     """
     Returns type-casted columns of `source_col` & `other_col`
     based on `inplace` parameter.
@@ -53,7 +58,11 @@ def _check_and_cast_columns(source_col, other_col, inplace):
         return source_col.astype(common_dtype), other_col.astype(common_dtype)
 
 
-def _check_and_cast_columns_with_scalar(source_col, other_scalar, inplace):
+def _check_and_cast_columns_with_scalar(
+    source_col: cudf.core.column.ColumnBase,
+    other_scalar: ScalarLike,
+    inplace: bool,
+) -> Tuple[cudf.core.column.ColumnBase, ScalarLike]:
     """
     Returns type-casted column `source_col` & scalar `other_scalar`
     based on `inplace` parameter.
@@ -96,7 +105,9 @@ def _check_and_cast_columns_with_scalar(source_col, other_scalar, inplace):
         return source_col, cudf.Scalar(other_scalar, dtype=common_dtype)
 
 
-def _normalize_columns_and_scalars_type(frame, other, inplace=False):
+def _normalize_columns_and_scalars_type(
+    frame: cudf.core.frame.Frame, other: Any, inplace: bool = False
+) -> Tuple[Union[cudf.core.frame.Frame, cudf.core.column.ColumnBase], Any]:
     """
     Try to normalize the other's dtypes as per frame.
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index dd06d97d105..7a5253a6b5d 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1309,6 +1309,9 @@ def corr(self, other: ColumnBase):
             f"cannot perform corr with types {self.dtype}, {other.dtype}"
         )
 
+    def can_cast_safely(self, to_dtype: Dtype) -> bool:
+        raise NotImplementedError()
+
     def nans_to_nulls(self: T) -> T:
         if self.dtype.kind == "f":
             newmask = libcudf.transform.nans_to_nulls(self)

From 21c2ac63c6a70596b5d1aacc2d78543eb91cb711 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 29 Mar 2021 19:04:57 -0700
Subject: [PATCH 14/23] add typing in where

---
 python/cudf/cudf/core/_internals/where.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index 1247a371117..46b5874b528 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -107,7 +107,10 @@ def _check_and_cast_columns_with_scalar(
 
 def _normalize_columns_and_scalars_type(
     frame: cudf.core.frame.Frame, other: Any, inplace: bool = False
-) -> Tuple[Union[cudf.core.frame.Frame, cudf.core.column.ColumnBase], Any]:
+) -> Tuple[
+    Union[cudf.core.frame.Frame, cudf.core.column.ColumnBase],
+    Union[cudf.core.frame.Frame, ScalarLike],
+]:
     """
     Try to normalize the other's dtypes as per frame.
 
@@ -188,7 +191,9 @@ def _normalize_columns_and_scalars_type(
             )
 
 
-def where(frame, cond, other=None, inplace=False):
+def where(
+    frame, cond, other=None, inplace=False,
+):
     """
     Replace values where the condition is False.
 

From d770d61d5cf2fc6387f9262302af0728d0857f49 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 29 Mar 2021 20:54:40 -0700
Subject: [PATCH 15/23] typing

---
 python/cudf/cudf/core/_internals/where.py | 92 +++++++++++++----------
 python/cudf/cudf/core/frame.py            | 20 ++---
 2 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index 46b5874b528..e47572b72db 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -1,18 +1,22 @@
 # Copyright (c) 2021, NVIDIA CORPORATION.
 
 import warnings
-from typing import Any, Tuple, Union
+from typing import Any, Optional, Tuple, Union, cast
 
 import numpy as np
 import pandas as pd
 
 import cudf
-from cudf._typing import ScalarLike
+from cudf._typing import ColumnLike, ScalarLike
+from cudf.core.column import ColumnBase
+from cudf.core.dataframe import DataFrame
+from cudf.core.frame import Frame
+from cudf.core.index import Index
+from cudf.core.scalar import Scalar
+from cudf.core.series import Series
 
 
-def _normalize_scalars(
-    col: cudf.core.column.ColumnBase, other: ScalarLike
-) -> cudf.Scalar:
+def _normalize_scalars(col: ColumnBase, other: ScalarLike) -> Scalar:
     """
     Try to normalize scalar values as per col dtype
     """
@@ -24,14 +28,12 @@ def _normalize_scalars(
             f"{type(other).__name__} to {col.dtype.name}"
         )
 
-    return cudf.Scalar(other, dtype=col.dtype if other is None else None)
+    return Scalar(other, dtype=col.dtype if other is None else None)
 
 
 def _check_and_cast_columns(
-    source_col: cudf.core.column.ColumnBase,
-    other_col: cudf.core.column.ColumnBase,
-    inplace: bool,
-) -> Tuple[cudf.core.column.ColumnBase, cudf.core.column.ColumnBase]:
+    source_col: ColumnBase, other_col: ColumnBase, inplace: bool,
+) -> Tuple[ColumnBase, ColumnBase]:
     """
     Returns type-casted columns of `source_col` & `other_col`
     based on `inplace` parameter.
@@ -59,10 +61,8 @@ def _check_and_cast_columns(
 
 
 def _check_and_cast_columns_with_scalar(
-    source_col: cudf.core.column.ColumnBase,
-    other_scalar: ScalarLike,
-    inplace: bool,
-) -> Tuple[cudf.core.column.ColumnBase, ScalarLike]:
+    source_col: ColumnBase, other_scalar: ScalarLike, inplace: bool,
+) -> Tuple[ColumnBase, ScalarLike]:
     """
     Returns type-casted column `source_col` & scalar `other_scalar`
     based on `inplace` parameter.
@@ -102,14 +102,13 @@ def _check_and_cast_columns_with_scalar(
             )
 
         source_col = source_col.astype(common_dtype)
-        return source_col, cudf.Scalar(other_scalar, dtype=common_dtype)
+        return source_col, Scalar(other_scalar, dtype=common_dtype)
 
 
 def _normalize_columns_and_scalars_type(
-    frame: cudf.core.frame.Frame, other: Any, inplace: bool = False
+    frame: Union[Series, Index, DataFrame], other: Any, inplace: bool = False,
 ) -> Tuple[
-    Union[cudf.core.frame.Frame, cudf.core.column.ColumnBase],
-    Union[cudf.core.frame.Frame, ScalarLike],
+    Union[Series, Index, DataFrame, ColumnLike], Any,
 ]:
     """
     Try to normalize the other's dtypes as per frame.
@@ -132,7 +131,7 @@ def _normalize_columns_and_scalars_type(
     --------
     A dataframe/series/list/scalar form of normalized other
     """
-    if isinstance(frame, cudf.DataFrame) and isinstance(other, cudf.DataFrame):
+    if isinstance(frame, DataFrame) and isinstance(other, DataFrame):
         source_df = frame.copy(deep=False)
         other_df = other.copy(deep=False)
         for self_col in source_df._data.names:
@@ -146,7 +145,7 @@ def _normalize_columns_and_scalars_type(
         return source_df, other_df
 
     elif isinstance(
-        frame, (cudf.Series, cudf.Index)
+        frame, (Series, Index)
     ) and not cudf.utils.dtypes.is_scalar(other):
         other = cudf.core.column.as_column(other)
         input_col = frame._data[frame.name]
@@ -155,9 +154,9 @@ def _normalize_columns_and_scalars_type(
         )
     else:
         # Handles scalar or list/array like scalars
-        if isinstance(
-            frame, (cudf.Series, cudf.Index)
-        ) and cudf.utils.dtypes.is_scalar(other):
+        if isinstance(frame, (Series, Index)) and cudf.utils.dtypes.is_scalar(
+            other
+        ):
             input_col = frame._data[frame.name]
             return _check_and_cast_columns_with_scalar(
                 source_col=frame._data[frame.name],
@@ -165,7 +164,7 @@ def _normalize_columns_and_scalars_type(
                 inplace=inplace,
             )
 
-        elif isinstance(frame, cudf.DataFrame):
+        elif isinstance(frame, DataFrame):
             if cudf.utils.dtypes.is_scalar(other):
                 other = [other for i in range(len(frame._data.names))]
 
@@ -192,8 +191,11 @@ def _normalize_columns_and_scalars_type(
 
 
 def where(
-    frame, cond, other=None, inplace=False,
-):
+    frame: Union[Series, Index, DataFrame],
+    cond: Any,
+    other: Any = None,
+    inplace: bool = False,
+) -> Optional[Union[Frame]]:
     """
     Replace values where the condition is False.
 
@@ -222,14 +224,14 @@ def where(
     Examples
     --------
     >>> import cudf
-    >>> df = cudf.DataFrame({"A":[1, 4, 5], "B":[3, 5, 8]})
+    >>> df = DataFrame({"A":[1, 4, 5], "B":[3, 5, 8]})
     >>> df.where(df % 2 == 0, [-1, -1])
         A  B
     0 -1 -1
     1  4 -1
     2 -1  8
 
-    >>> ser = cudf.Series([4, 3, 2, 1, 0])
+    >>> ser = Series([4, 3, 2, 1, 0])
     >>> ser.where(ser > 2, 10)
     0     4
     1     3
@@ -246,12 +248,12 @@ def where(
     dtype: int64
     """
 
-    if isinstance(frame, cudf.DataFrame):
+    if isinstance(frame, DataFrame):
         if hasattr(cond, "__cuda_array_interface__"):
-            cond = cudf.DataFrame(
+            cond = DataFrame(
                 cond, columns=frame._data.names, index=frame.index
             )
-        elif not isinstance(cond, cudf.DataFrame):
+        elif not isinstance(cond, DataFrame):
             cond = frame.from_pandas(pd.DataFrame(cond))
 
         common_cols = set(frame._data.names).intersection(
@@ -274,10 +276,10 @@ def where(
         (source_df, others,) = _normalize_columns_and_scalars_type(
             frame, other
         )
-        if isinstance(other, cudf.core.frame.Frame):
+        if isinstance(other, Frame):
             others = others._data.columns
 
-        out_df = cudf.DataFrame(index=frame.index)
+        out_df = DataFrame(index=frame.index)
         if len(frame._columns) != len(others):
             raise ValueError(
                 """Replacement list length or number of dataframe columns
@@ -295,7 +297,7 @@ def where(
                             # When other is not present in categories,
                             # fill with Null.
                             other_column = None
-                        other_column = cudf.Scalar(
+                        other_column = Scalar(
                             other_column, dtype=input_col.codes.dtype
                         )
                     elif isinstance(
@@ -333,7 +335,7 @@ def where(
         return frame._mimic_inplace(out_df, inplace=inplace)
 
     else:
-        if isinstance(other, cudf.DataFrame):
+        if isinstance(other, DataFrame):
             raise NotImplementedError(
                 "cannot align with a higher dimensional Frame"
             )
@@ -358,7 +360,7 @@ def where(
                         # When other is not present in categories,
                         # fill with Null.
                         other = None
-                    other = cudf.Scalar(other, dtype=input_col.codes.dtype)
+                    other = Scalar(other, dtype=input_col.codes.dtype)
                 elif isinstance(other, cudf.core.column.CategoricalColumn):
                     other = other.codes
 
@@ -366,20 +368,28 @@ def where(
 
             result = cudf._lib.copying.copy_if_else(input_col, other, cond)
 
-            if cudf.utils.dtypes.is_categorical_dtype(frame.dtype):
+            if isinstance(
+                frame._data[frame.name], cudf.core.column.CategoricalColumn
+            ):
                 result = cudf.core.column.build_categorical_column(
-                    categories=frame._data[frame.name].categories,
+                    categories=cast(
+                        cudf.core.column.CategoricalColumn,
+                        frame._data[frame.name],
+                    ).categories,
                     codes=cudf.core.column.as_column(
                         result.base_data, dtype=result.dtype
                     ),
                     mask=result.base_mask,
                     size=result.size,
                     offset=result.offset,
-                    ordered=frame._data[frame.name].ordered,
+                    ordered=cast(
+                        cudf.core.column.CategoricalColumn,
+                        frame._data[frame.name],
+                    ).ordered,
                 )
 
-        if isinstance(frame, cudf.Index):
-            result = cudf.Index(result, name=frame.name)
+        if isinstance(frame, Index):
+            result = Index(result, name=frame.name)
         else:
             result = frame._copy_construct(data=result)
 
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 5e541eed17e..a03502bebde 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -6,7 +6,7 @@
 import functools
 import warnings
 from collections import OrderedDict, abc as abc
-from typing import TYPE_CHECKING, Any, Dict, Tuple, TypeVar, Union, overload
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, TypeVar, Union
 
 import cupy
 import numpy as np
@@ -14,7 +14,6 @@
 import pyarrow as pa
 from nvtx import annotate
 from pandas.api.types import is_dict_like, is_dtype_equal
-from typing_extensions import Literal
 
 import cudf
 from cudf import _lib as libcudf
@@ -53,19 +52,9 @@ class Frame(libcudf.table.Table):
     def _from_table(cls, table: Frame):
         return cls(table._data, index=table._index)
 
-    @overload
-    def _mimic_inplace(self, result: Frame) -> Frame:
-        ...
-
-    @overload
-    def _mimic_inplace(self, result: Frame, inplace: Literal[True]):
-        ...
-
-    @overload
-    def _mimic_inplace(self, result: Frame, inplace: Literal[False]) -> Frame:
-        ...
-
-    def _mimic_inplace(self, result, inplace=False):
+    def _mimic_inplace(
+        self: T, result: Frame, inplace: bool = False
+    ) -> Optional[Frame]:
         if inplace:
             for col in self._data:
                 if col in result._data:
@@ -74,6 +63,7 @@ def _mimic_inplace(self, result, inplace=False):
                     )
             self._data = result._data
             self._index = result._index
+            return None
         else:
             return result
 

From db25a599bb5866b292f60a18f16312108ffc472b Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Mon, 29 Mar 2021 21:30:51 -0700
Subject: [PATCH 16/23] refactor

---
 python/cudf/cudf/core/_internals/__init__.py |  1 +
 python/cudf/cudf/core/_internals/where.py    | 11 +++++------
 python/cudf/cudf/core/frame.py               |  3 +--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/python/cudf/cudf/core/_internals/__init__.py b/python/cudf/cudf/core/_internals/__init__.py
index e69de29bb2d..ed3e30fd65d 100644
--- a/python/cudf/cudf/core/_internals/__init__.py
+++ b/python/cudf/cudf/core/_internals/__init__.py
@@ -0,0 +1 @@
+from cudf.core._internals.where import where
diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index e47572b72db..2310e613901 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -12,11 +12,10 @@
 from cudf.core.dataframe import DataFrame
 from cudf.core.frame import Frame
 from cudf.core.index import Index
-from cudf.core.scalar import Scalar
 from cudf.core.series import Series
 
 
-def _normalize_scalars(col: ColumnBase, other: ScalarLike) -> Scalar:
+def _normalize_scalars(col: ColumnBase, other: ScalarLike) -> ScalarLike:
     """
     Try to normalize scalar values as per col dtype
     """
@@ -28,7 +27,7 @@ def _normalize_scalars(col: ColumnBase, other: ScalarLike) -> Scalar:
             f"{type(other).__name__} to {col.dtype.name}"
         )
 
-    return Scalar(other, dtype=col.dtype if other is None else None)
+    return cudf.Scalar(other, dtype=col.dtype if other is None else None)
 
 
 def _check_and_cast_columns(
@@ -102,7 +101,7 @@ def _check_and_cast_columns_with_scalar(
             )
 
         source_col = source_col.astype(common_dtype)
-        return source_col, Scalar(other_scalar, dtype=common_dtype)
+        return source_col, cudf.Scalar(other_scalar, dtype=common_dtype)
 
 
 def _normalize_columns_and_scalars_type(
@@ -297,7 +296,7 @@ def where(
                             # When other is not present in categories,
                             # fill with Null.
                             other_column = None
-                        other_column = Scalar(
+                        other_column = cudf.Scalar(
                             other_column, dtype=input_col.codes.dtype
                         )
                     elif isinstance(
@@ -360,7 +359,7 @@ def where(
                         # When other is not present in categories,
                         # fill with Null.
                         other = None
-                    other = Scalar(other, dtype=input_col.codes.dtype)
+                    other = cudf.Scalar(other, dtype=input_col.codes.dtype)
                 elif isinstance(other, cudf.core.column.CategoricalColumn):
                     other = other.codes
 
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index a03502bebde..eeac98c3332 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -18,7 +18,6 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf._typing import ColumnLike, DataFrameOrSeries
-from cudf.core._internals import where as where_internals
 from cudf.core.column import as_column, build_categorical_column, column_empty
 from cudf.utils.dtypes import (
     is_categorical_dtype,
@@ -836,7 +835,7 @@ def where(self, cond, other=None, inplace=False):
         dtype: int64
         """
 
-        return where_internals.where(
+        return cudf.core._internals.where(
             frame=self, cond=cond, other=other, inplace=inplace
         )
 

From e5b140a918670670d9a24e69d6130723c0bcc76f Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 30 Mar 2021 09:15:52 -0700
Subject: [PATCH 17/23] remove duplicated logic and squash into single method

---
 python/cudf/cudf/core/_internals/where.py | 103 +++++++++-------------
 python/cudf/cudf/core/column/column.py    |   3 -
 2 files changed, 44 insertions(+), 62 deletions(-)

diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index 2310e613901..32544618e40 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -30,78 +30,63 @@ def _normalize_scalars(col: ColumnBase, other: ScalarLike) -> ScalarLike:
     return cudf.Scalar(other, dtype=col.dtype if other is None else None)
 
 
-def _check_and_cast_columns(
-    source_col: ColumnBase, other_col: ColumnBase, inplace: bool,
-) -> Tuple[ColumnBase, ColumnBase]:
-    """
-    Returns type-casted columns of `source_col` & `other_col`
-    based on `inplace` parameter.
-    """
-    if cudf.utils.dtypes.is_categorical_dtype(source_col.dtype):
-        return source_col, other_col
-    elif cudf.utils.dtypes.is_mixed_with_object_dtype(source_col, other_col):
-        raise TypeError(
-            "cudf does not support mixed types, please type-cast "
-            "the column of dataframe/series and other "
-            "to same dtypes."
-        )
-    if inplace:
-        if not source_col.can_cast_safely(other_col.dtype):
-            warnings.warn(
-                f"Type-casting from {other_col.dtype} "
-                f"to {source_col.dtype}, there could be potential data loss"
-            )
-        return source_col, other_col.astype(source_col.dtype)
-    else:
-        common_dtype = cudf.utils.dtypes.find_common_type(
-            [source_col.dtype, other_col.dtype]
-        )
-        return source_col.astype(common_dtype), other_col.astype(common_dtype)
-
-
-def _check_and_cast_columns_with_scalar(
-    source_col: ColumnBase, other_scalar: ScalarLike, inplace: bool,
-) -> Tuple[ColumnBase, ScalarLike]:
+def _check_and_cast_columns_with_other(
+    source_col: ColumnBase,
+    other: Union[ScalarLike, ColumnBase],
+    inplace: bool,
+) -> Tuple[ColumnBase, Union[ScalarLike, ColumnBase]]:
     """
     Returns type-casted column `source_col` & scalar `other_scalar`
     based on `inplace` parameter.
     """
     if cudf.utils.dtypes.is_categorical_dtype(source_col.dtype):
-        return source_col, other_scalar
+        return source_col, other
 
-    device_scalar = _normalize_scalars(source_col, other_scalar)
+    if cudf.utils.dtypes.is_scalar(other):
+        device_obj = _normalize_scalars(source_col, other)
+    else:
+        device_obj = other
 
-    if other_scalar is None:
-        return source_col, device_scalar
-    elif cudf.utils.dtypes.is_mixed_with_object_dtype(
-        device_scalar, source_col
-    ):
+    if other is None:
+        return source_col, device_obj
+    elif cudf.utils.dtypes.is_mixed_with_object_dtype(device_obj, source_col):
         raise TypeError(
             "cudf does not support mixed types, please type-cast "
             "the column of dataframe/series and other "
             "to same dtypes."
         )
     if inplace:
-        if not cudf.utils.dtypes.can_cast(
-            device_scalar.dtype, source_col.dtype
-        ):
+        if not cudf.utils.dtypes.can_cast(device_obj.dtype, source_col.dtype):
             warnings.warn(
-                f"Type-casting from {device_scalar.dtype} "
+                f"Type-casting from {device_obj.dtype} "
                 f"to {source_col.dtype}, there could be potential data loss"
             )
-        return source_col, device_scalar.astype(source_col.dtype)
+        return source_col, device_obj.astype(source_col.dtype)
     else:
-        if pd.api.types.is_numeric_dtype(
-            source_col.dtype
-        ) and cudf.utils.dtypes.can_cast(other_scalar, source_col.dtype):
+        if (
+            cudf.utils.dtypes.is_scalar(other)
+            and pd.api.types.is_numeric_dtype(source_col.dtype)
+            and cudf.utils.dtypes.can_cast(other, source_col.dtype)
+        ):
             common_dtype = source_col.dtype
+            return (
+                source_col.astype(common_dtype),
+                cudf.Scalar(other, dtype=common_dtype),
+            )
         else:
             common_dtype = cudf.utils.dtypes.find_common_type(
-                [source_col.dtype, np.min_scalar_type(other_scalar)]
+                [
+                    source_col.dtype,
+                    np.min_scalar_type(other)
+                    if cudf.utils.dtypes.is_scalar(other)
+                    else other.dtype,
+                ]
             )
-
-        source_col = source_col.astype(common_dtype)
-        return source_col, cudf.Scalar(other_scalar, dtype=common_dtype)
+            if cudf.utils.dtypes.is_scalar(device_obj):
+                device_obj = cudf.Scalar(other, dtype=common_dtype)
+            else:
+                device_obj = device_obj.astype(common_dtype)
+            return source_col.astype(common_dtype), device_obj
 
 
 def _normalize_columns_and_scalars_type(
@@ -134,9 +119,9 @@ def _normalize_columns_and_scalars_type(
         source_df = frame.copy(deep=False)
         other_df = other.copy(deep=False)
         for self_col in source_df._data.names:
-            source_col, other_col = _check_and_cast_columns(
+            source_col, other_col = _check_and_cast_columns_with_other(
                 source_col=source_df._data[self_col],
-                other_col=other_df._data[self_col],
+                other=other_df._data[self_col],
                 inplace=inplace,
             )
             source_df._data[self_col] = source_col
@@ -148,8 +133,8 @@ def _normalize_columns_and_scalars_type(
     ) and not cudf.utils.dtypes.is_scalar(other):
         other = cudf.core.column.as_column(other)
         input_col = frame._data[frame.name]
-        return _check_and_cast_columns(
-            source_col=input_col, other_col=other, inplace=inplace
+        return _check_and_cast_columns_with_other(
+            source_col=input_col, other=other, inplace=inplace
         )
     else:
         # Handles scalar or list/array like scalars
@@ -157,9 +142,9 @@ def _normalize_columns_and_scalars_type(
             other
         ):
             input_col = frame._data[frame.name]
-            return _check_and_cast_columns_with_scalar(
+            return _check_and_cast_columns_with_other(
                 source_col=frame._data[frame.name],
-                other_scalar=other,
+                other=other,
                 inplace=inplace,
             )
 
@@ -174,9 +159,9 @@ def _normalize_columns_and_scalars_type(
                 (
                     source_col,
                     other_scalar,
-                ) = _check_and_cast_columns_with_scalar(
+                ) = _check_and_cast_columns_with_other(
                     source_col=source_df._data[col_name],
-                    other_scalar=other_sclr,
+                    other=other_sclr,
                     inplace=inplace,
                 )
                 source_df._data[col_name] = source_col
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 10529c70d0f..e59b395ec0f 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1319,9 +1319,6 @@ def corr(self, other: ColumnBase):
             f"cannot perform corr with types {self.dtype}, {other.dtype}"
         )
 
-    def can_cast_safely(self, to_dtype: Dtype) -> bool:
-        raise NotImplementedError()
-
     def nans_to_nulls(self: T) -> T:
         if self.dtype.kind == "f":
             newmask = libcudf.transform.nans_to_nulls(self)

From 95d409b448cdd1f827f51fb5cf1de3889cc91e57 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 30 Mar 2021 09:29:39 -0700
Subject: [PATCH 18/23] use _column_names

---
 python/cudf/cudf/core/_internals/where.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index 32544618e40..94ad760ceab 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -118,7 +118,7 @@ def _normalize_columns_and_scalars_type(
     if isinstance(frame, DataFrame) and isinstance(other, DataFrame):
         source_df = frame.copy(deep=False)
         other_df = other.copy(deep=False)
-        for self_col in source_df._data.names:
+        for self_col in source_df._column_names:
             source_col, other_col = _check_and_cast_columns_with_other(
                 source_col=source_df._data[self_col],
                 other=other_df._data[self_col],
@@ -150,11 +150,11 @@ def _normalize_columns_and_scalars_type(
 
         elif isinstance(frame, DataFrame):
             if cudf.utils.dtypes.is_scalar(other):
-                other = [other for i in range(len(frame._data.names))]
+                other = [other for i in range(len(frame._column_names))]
 
             source_df = frame.copy(deep=False)
             others = []
-            for col_name, other_sclr in zip(frame._data.names, other):
+            for col_name, other_sclr in zip(frame._column_names, other):
 
                 (
                     source_col,
@@ -235,13 +235,13 @@ def where(
     if isinstance(frame, DataFrame):
         if hasattr(cond, "__cuda_array_interface__"):
             cond = DataFrame(
-                cond, columns=frame._data.names, index=frame.index
+                cond, columns=frame._column_names, index=frame.index
             )
         elif not isinstance(cond, DataFrame):
             cond = frame.from_pandas(pd.DataFrame(cond))
 
-        common_cols = set(frame._data.names).intersection(
-            set(cond._data.names)
+        common_cols = set(frame._column_names).intersection(
+            set(cond._column_names)
         )
         if len(common_cols) > 0:
             # If `frame` and `cond` are having unequal index,
@@ -269,7 +269,7 @@ def where(
                 """Replacement list length or number of dataframe columns
                 should be equal to Number of columns of dataframe"""
             )
-        for i, column_name in enumerate(frame._data.names):
+        for i, column_name in enumerate(frame._column_names):
             input_col = source_df._data[column_name]
             other_column = others[i]
             if column_name in cond._data:

From 349fff5c027b1c5c4c21bb3dabcdd30db683eb80 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 30 Mar 2021 09:46:40 -0700
Subject: [PATCH 19/23] handle different shape

---
 python/cudf/cudf/core/_internals/where.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index 94ad760ceab..584232f932c 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -210,7 +210,7 @@ def where(
     >>> import cudf
     >>> df = DataFrame({"A":[1, 4, 5], "B":[3, 5, 8]})
     >>> df.where(df % 2 == 0, [-1, -1])
-        A  B
+       A  B
     0 -1 -1
     1  4 -1
     2 -1  8
@@ -237,6 +237,11 @@ def where(
             cond = DataFrame(
                 cond, columns=frame._column_names, index=frame.index
             )
+        elif (
+            hasattr(cond, "__array_interface__")
+            and cond.__array_interface__["shape"] != frame.shape
+        ):
+            raise ValueError("conditional must be same shape as self")
         elif not isinstance(cond, DataFrame):
             cond = frame.from_pandas(pd.DataFrame(cond))
 

From 17a581e0354ed40456a7b8f3a0029f3f55e2467f Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 30 Mar 2021 10:20:18 -0700
Subject: [PATCH 20/23] address reviews

---
 python/cudf/cudf/_lib/copying.pyx            |  2 +-
 python/cudf/cudf/core/__init__.py            |  2 +-
 python/cudf/cudf/core/_internals/__init__.py |  2 +
 python/cudf/cudf/core/_internals/where.py    | 84 ++++++++++----------
 python/cudf/cudf/utils/dtypes.py             | 12 ++-
 5 files changed, 53 insertions(+), 49 deletions(-)

diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index 6150c6110b9..8f93866612e 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 
 import pandas as pd
 
diff --git a/python/cudf/cudf/core/__init__.py b/python/cudf/cudf/core/__init__.py
index 0ca9e4f6124..59173cc0247 100644
--- a/python/cudf/cudf/core/__init__.py
+++ b/python/cudf/cudf/core/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 from cudf.core import _internals, buffer, column, column_accessor, common
 from cudf.core.buffer import Buffer
diff --git a/python/cudf/cudf/core/_internals/__init__.py b/python/cudf/cudf/core/_internals/__init__.py
index ed3e30fd65d..53d186def85 100644
--- a/python/cudf/cudf/core/_internals/__init__.py
+++ b/python/cudf/cudf/core/_internals/__init__.py
@@ -1 +1,3 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
 from cudf.core._internals.where import where
diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index 584232f932c..566691d08be 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -56,7 +56,7 @@ def _check_and_cast_columns_with_other(
             "to same dtypes."
         )
     if inplace:
-        if not cudf.utils.dtypes.can_cast(device_obj.dtype, source_col.dtype):
+        if not cudf.utils.dtypes._can_cast(device_obj.dtype, source_col.dtype):
             warnings.warn(
                 f"Type-casting from {device_obj.dtype} "
                 f"to {source_col.dtype}, there could be potential data loss"
@@ -66,7 +66,7 @@ def _check_and_cast_columns_with_other(
         if (
             cudf.utils.dtypes.is_scalar(other)
             and pd.api.types.is_numeric_dtype(source_col.dtype)
-            and cudf.utils.dtypes.can_cast(other, source_col.dtype)
+            and cudf.utils.dtypes._can_cast(other, source_col.dtype)
         ):
             common_dtype = source_col.dtype
             return (
@@ -334,48 +334,46 @@ def where(
             raise ValueError(
                 """Array conditional must be same shape as self"""
             )
-        if cond.all():
-            result = input_col
-        else:
-            (input_col, other,) = _normalize_columns_and_scalars_type(
-                frame, other, inplace
-            )
 
-            if isinstance(input_col, cudf.core.column.CategoricalColumn):
-                if cudf.utils.dtypes.is_scalar(other):
-                    try:
-                        other = input_col._encode(other)
-                    except ValueError:
-                        # When other is not present in categories,
-                        # fill with Null.
-                        other = None
-                    other = cudf.Scalar(other, dtype=input_col.codes.dtype)
-                elif isinstance(other, cudf.core.column.CategoricalColumn):
-                    other = other.codes
-
-                input_col = input_col.codes
-
-            result = cudf._lib.copying.copy_if_else(input_col, other, cond)
-
-            if isinstance(
-                frame._data[frame.name], cudf.core.column.CategoricalColumn
-            ):
-                result = cudf.core.column.build_categorical_column(
-                    categories=cast(
-                        cudf.core.column.CategoricalColumn,
-                        frame._data[frame.name],
-                    ).categories,
-                    codes=cudf.core.column.as_column(
-                        result.base_data, dtype=result.dtype
-                    ),
-                    mask=result.base_mask,
-                    size=result.size,
-                    offset=result.offset,
-                    ordered=cast(
-                        cudf.core.column.CategoricalColumn,
-                        frame._data[frame.name],
-                    ).ordered,
-                )
+        (input_col, other,) = _normalize_columns_and_scalars_type(
+            frame, other, inplace
+        )
+
+        if isinstance(input_col, cudf.core.column.CategoricalColumn):
+            if cudf.utils.dtypes.is_scalar(other):
+                try:
+                    other = input_col._encode(other)
+                except ValueError:
+                    # When other is not present in categories,
+                    # fill with Null.
+                    other = None
+                other = cudf.Scalar(other, dtype=input_col.codes.dtype)
+            elif isinstance(other, cudf.core.column.CategoricalColumn):
+                other = other.codes
+
+            input_col = input_col.codes
+
+        result = cudf._lib.copying.copy_if_else(input_col, other, cond)
+
+        if isinstance(
+            frame._data[frame.name], cudf.core.column.CategoricalColumn
+        ):
+            result = cudf.core.column.build_categorical_column(
+                categories=cast(
+                    cudf.core.column.CategoricalColumn,
+                    frame._data[frame.name],
+                ).categories,
+                codes=cudf.core.column.as_column(
+                    result.base_data, dtype=result.dtype
+                ),
+                mask=result.base_mask,
+                size=result.size,
+                offset=result.offset,
+                ordered=cast(
+                    cudf.core.column.CategoricalColumn,
+                    frame._data[frame.name],
+                ).ordered,
+            )
 
         if isinstance(frame, Index):
             result = Index(result, name=frame.name)
diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index afebfb6a00d..e501f202754 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -664,13 +664,15 @@ def find_common_type(dtypes):
         return common_dtype
 
 
-def can_cast(from_dtype, to_dtype):
+def _can_cast(from_dtype, to_dtype):
     """
     Utility function to determine if we can cast
     from `from_dtype` to `to_dtype`. This function primarily calls
     `np.can_cast` but with some special handling around
     cudf specific dtypes.
     """
+    # TODO : Add precision & scale checking for
+    # decimal types in future
     if isinstance(from_dtype, cudf.core.dtypes.Decimal64Dtype):
         if isinstance(to_dtype, cudf.core.dtypes.Decimal64Dtype):
             return True
@@ -680,7 +682,7 @@ def can_cast(from_dtype, to_dtype):
             else:
                 return False
     elif isinstance(from_dtype, np.dtype):
-        if isinstance(to_dtype, np.dtype):
+        if isinstance(to_dtype, (np.dtype, type)):
             return np.can_cast(from_dtype, to_dtype)
         elif isinstance(to_dtype, cudf.core.dtypes.Decimal64Dtype):
             if from_dtype.kind in {"i", "f", "u", "U", "O"}:
@@ -692,14 +694,16 @@ def can_cast(from_dtype, to_dtype):
         else:
             return False
     elif isinstance(from_dtype, cudf.core.dtypes.ListDtype):
+        # TODO: Add level based checks too once casting of
+        # list columns is supported
         if isinstance(to_dtype, cudf.core.dtypes.ListDtype):
-            return True
+            return np.can_cast(from_dtype.leaf_type, to_dtype.leaf_type)
         else:
             return False
     elif isinstance(from_dtype, cudf.core.dtypes.CategoricalDtype):
         if isinstance(to_dtype, cudf.core.dtypes.CategoricalDtype):
             return True
-        elif isinstance(to_dtype, np.dtype):
+        elif isinstance(to_dtype, (np.dtype, type)):
             return np.can_cast(from_dtype._categories.dtype, to_dtype)
         else:
             return False

From 16f973838b8550f2383b54190144f4f99b3587e6 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Tue, 30 Mar 2021 12:37:41 -0700
Subject: [PATCH 21/23] use cudf utility for is_numerical_dtype

---
 python/cudf/cudf/core/_internals/where.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index 566691d08be..1fdc907875e 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -65,7 +65,7 @@ def _check_and_cast_columns_with_other(
     else:
         if (
             cudf.utils.dtypes.is_scalar(other)
-            and pd.api.types.is_numeric_dtype(source_col.dtype)
+            and cudf.utils.dtypes.is_numerical_dtype(source_col.dtype)
             and cudf.utils.dtypes._can_cast(other, source_col.dtype)
         ):
             common_dtype = source_col.dtype

From 4268d65f19e950c7bea700a6a38f4884b771f198 Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 31 Mar 2021 09:37:35 -0700
Subject: [PATCH 22/23] handle generic types

---
 python/cudf/cudf/utils/dtypes.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index e501f202754..2b92b4d1f10 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -681,7 +681,8 @@ def _can_cast(from_dtype, to_dtype):
                 return True
             else:
                 return False
-    elif isinstance(from_dtype, np.dtype):
+    elif isinstance(from_dtype, (np.dtype, type)):
+        from_dtype = np.dtype(from_dtype)
         if isinstance(to_dtype, (np.dtype, type)):
             return np.can_cast(from_dtype, to_dtype)
         elif isinstance(to_dtype, cudf.core.dtypes.Decimal64Dtype):

From 05965dd4bb1884aa659f2d992f756b4453a602cf Mon Sep 17 00:00:00 2001
From: galipremsagar <sagarprem75@gmail.com>
Date: Wed, 31 Mar 2021 09:40:06 -0700
Subject: [PATCH 23/23] refactor

---
 python/cudf/cudf/utils/dtypes.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py
index 2b92b4d1f10..be2b1bca2e0 100644
--- a/python/cudf/cudf/utils/dtypes.py
+++ b/python/cudf/cudf/utils/dtypes.py
@@ -671,6 +671,11 @@ def _can_cast(from_dtype, to_dtype):
     `np.can_cast` but with some special handling around
     cudf specific dtypes.
     """
+    if isinstance(from_dtype, type):
+        from_dtype = np.dtype(from_dtype)
+    if isinstance(to_dtype, type):
+        to_dtype = np.dtype(to_dtype)
+
     # TODO : Add precision & scale checking for
     # decimal types in future
     if isinstance(from_dtype, cudf.core.dtypes.Decimal64Dtype):
@@ -681,9 +686,8 @@ def _can_cast(from_dtype, to_dtype):
                 return True
             else:
                 return False
-    elif isinstance(from_dtype, (np.dtype, type)):
-        from_dtype = np.dtype(from_dtype)
-        if isinstance(to_dtype, (np.dtype, type)):
+    elif isinstance(from_dtype, np.dtype):
+        if isinstance(to_dtype, np.dtype):
             return np.can_cast(from_dtype, to_dtype)
         elif isinstance(to_dtype, cudf.core.dtypes.Decimal64Dtype):
             if from_dtype.kind in {"i", "f", "u", "U", "O"}:
@@ -704,7 +708,7 @@ def _can_cast(from_dtype, to_dtype):
     elif isinstance(from_dtype, cudf.core.dtypes.CategoricalDtype):
         if isinstance(to_dtype, cudf.core.dtypes.CategoricalDtype):
             return True
-        elif isinstance(to_dtype, (np.dtype, type)):
+        elif isinstance(to_dtype, np.dtype):
             return np.can_cast(from_dtype._categories.dtype, to_dtype)
         else:
             return False