From bd111e171db019e60c3ebcd8471710916a329103 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Mon, 15 Mar 2021 23:52:09 -0700
Subject: [PATCH 01/15] Passing tests

---
 python/cudf/cudf/_lib/cpp/lists/explode.pxd | 13 ++++++
 python/cudf/cudf/_lib/lists.pyx             | 25 ++++++++++-
 python/cudf/cudf/core/dataframe.py          | 50 +++++++++++++++++++++
 python/cudf/cudf/core/frame.py              | 23 +++++++++-
 python/cudf/cudf/core/series.py             | 42 +++++++++++++++++
 python/cudf/cudf/tests/test_dataframe.py    | 28 ++++++++++++
 python/cudf/cudf/tests/test_series.py       | 21 +++++++++
 7 files changed, 200 insertions(+), 2 deletions(-)
 create mode 100644 python/cudf/cudf/_lib/cpp/lists/explode.pxd

diff --git a/python/cudf/cudf/_lib/cpp/lists/explode.pxd b/python/cudf/cudf/_lib/cpp/lists/explode.pxd
new file mode 100644
index 00000000000..cd2d44d2e42
--- /dev/null
+++ b/python/cudf/cudf/_lib/cpp/lists/explode.pxd
@@ -0,0 +1,13 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+
+from cudf._lib.cpp.table.table cimport table
+from cudf._lib.cpp.table.table_view cimport table_view
+from cudf._lib.cpp.types cimport size_type
+
+cdef extern from "cudf/lists/explode.hpp" namespace "cudf" nogil:
+    cdef unique_ptr[table] explode_outer(
+        const table_view,
+        size_type explode_column_idx,
+    ) except +
diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx
index aba13580912..56b89e9244c 100644
--- a/python/cudf/cudf/_lib/lists.pyx
+++ b/python/cudf/cudf/_lib/lists.pyx
@@ -6,12 +6,19 @@ from libcpp.utility cimport move
 from cudf._lib.cpp.lists.count_elements cimport (
     count_elements as cpp_count_elements
 )
+from cudf._lib.cpp.lists.explode cimport (
+    explode_outer as cpp_explode_outer
+)
 from cudf._lib.cpp.lists.lists_column_view cimport lists_column_view
 from cudf._lib.cpp.column.column_view cimport column_view
 from cudf._lib.cpp.column.column cimport column
 
-from cudf._lib.column cimport Column
+from cudf._lib.cpp.table.table cimport table
+from cudf._lib.cpp.table.table_view cimport table_view
+from cudf._lib.cpp.types cimport size_type
 
+from cudf._lib.column cimport Column
+from cudf._lib.table cimport Table
 
 from cudf.core.dtypes import ListDtype
 
@@ -32,3 +39,19 @@ def count_elements(Column col):
 
     result = Column.from_unique_ptr(move(c_result))
     return result
+
+
+def explode_outer(Table tbl, int explode_column_idx):
+    cdef table_view c_table_view = tbl.view()
+    cdef size_type c_explode_column_idx = explode_column_idx
+
+    cdef unique_ptr[table] c_result
+
+    with nogil:
+        c_result = move(cpp_explode_outer(c_table_view, c_explode_column_idx))
+
+    return Table.from_unique_ptr(
+        move(c_result),
+        column_names=tbl._column_names,
+        index_names=tbl._index_names
+    )
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 25f57748765..0a1a8d7ba0c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -7709,6 +7709,56 @@ def equals(self, other):
                 return False
         return super().equals(other)
 
+    def explode(self, column, ignore_index=False):
+        """
+        Transform each element of a list-like to a row, replicating index
+        values.
+
+        Parameters
+        ----------
+        column : str or tuple
+            Column to explode. Now only supports one column
+        ignore_index : bool, default False
+            If True, the resulting index will be labeled 0, 1, …, n - 1.
+
+        Returns
+        -------
+        DataFrame
+
+        Notes
+        -------
+        In cudf, empty lists `[]` are mapped to nulls, as opposed to `nan` in
+        Pandas.
+
+        Examples
+        -------
+        >>> import cudf
+        >>> cudf.DataFrame(
+                {"a": [[1, 2, 3], [], None, [4, 5]], "b": [11, 22, 33, 44]}
+            )
+                   a   b
+        0  [1, 2, 3]  11
+        1         []  22
+        2       None  33
+        3     [4, 5]  44
+        >>> df.explode('a')
+              a   b
+        0     1  11
+        0     2  11
+        0     3  11
+        1  <NA>  22
+        2  <NA>  33
+        3     4  44
+        3     5  44
+        """
+        if column not in self._column_names:
+            raise KeyError(column)
+
+        explode_num = self._column_names.index(column)
+        return super()._explode(
+            explode_num, None if ignore_index else self.index
+        )
+
     _accessors = set()  # type: Set[Any]
 
 
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index fab5936f94d..22c47213138 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -6,7 +6,16 @@
 import functools
 import warnings
 from collections import OrderedDict, abc as abc
-from typing import TYPE_CHECKING, Any, Dict, Tuple, TypeVar, Union, overload
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Optional,
+    Tuple,
+    TypeVar,
+    Union,
+    overload,
+)
 
 import cupy
 import numpy as np
@@ -573,6 +582,18 @@ def equals(self, other, **kwargs):
         else:
             return self._index.equals(other._index)
 
+    def _explode(self, explode_column_num: int, index: Optional[cudf.Index]):
+        if index is not None:
+            explode_column_num += index.nlevels
+        res_tbl = libcudf.lists.explode_outer(
+            cudf._lib.table.Table(self._data, index=index), explode_column_num
+        )
+
+        res = self.__class__._from_table(res_tbl)
+        if index is not None:
+            res.index.names = index.names
+        return res
+
     def _get_columns_by_label(self, labels, downcast):
         """
         Returns columns of the Frame specified by `labels`
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 5e7121c0488..4aaf2c0f94d 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -6364,6 +6364,48 @@ def keys(self):
         """
         return self.index
 
+    def explode(self, ignore_index=False):
+        """
+        Transform each element of a list-like to a row, replicating index
+        values.
+
+        Parameters
+        ----------
+        ignore_index : bool, default False
+            If True, the resulting index will be labeled 0, 1, …, n - 1.
+
+        Returns
+        -------
+        DataFrame
+
+        Notes
+        -------
+        In cudf, empty lists `[]` are mapped to nulls, as opposed to `nan` in
+        Pandas.
+
+        Examples
+        -------
+        >>> import cudf
+        >>> s = cudf.Series([[1, 2, 3], [], None, [4, 5]])
+        >>> s
+        0    [1, 2, 3]
+        1           []
+        2         None
+        3       [4, 5]
+        dtype: list
+        >>> s.explode()
+        0       1
+        0       2
+        0       3
+        1    <NA>
+        2    <NA>
+        3       4
+        3       5
+        dtype: int64
+        """
+
+        return super()._explode(0, None if ignore_index else self.index)
+
     _accessors = set()  # type: Set[Any]
 
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 77548b95277..e2e6c469949 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8442,3 +8442,31 @@ def test_rename_for_level_is_None_MC():
     got = gdf.rename(columns={"a": "f"}, level=None)
 
     assert_eq(expect, got)
+
+
+@pytest.mark.parametrize("ignore_index", [True, False])
+@pytest.mark.parametrize(
+    "p_index",
+    [
+        None,
+        ["ia", "ib", "ic", "id", "ie"],
+        pd.MultiIndex.from_tuples(
+            [(0, "a"), (0, "b"), (0, "c"), (1, "a"), (1, "b")]
+        ),
+    ],
+)
+def test_explode(ignore_index, p_index):
+    gdf = cudf.DataFrame(
+        {
+            "a": [[1, 2, 3], None, [4], [], [5, 6]],
+            "b": [11, 22, 33, 44, 55],
+            "c": ["a", "e", "i", "o", "u"],
+        },
+        index=p_index,
+    )
+    pdf = gdf.to_pandas(nullable=True)
+
+    expect = pdf.explode("a", ignore_index).fillna(pd.NA)
+    got = gdf.explode("a", ignore_index)
+
+    assert_eq(got, expect, check_dtype=False)
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index a1b4236719d..d8531657177 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -1118,3 +1118,24 @@ def test_series_drop_raises():
     actual = gs.drop("p", errors="ignore")
 
     assert_eq(actual, expect)
+
+
+@pytest.mark.parametrize("ignore_index", [True, False])
+@pytest.mark.parametrize(
+    "p_index",
+    [
+        None,
+        ["ia", "ib", "ic", "id", "ie"],
+        pd.MultiIndex.from_tuples(
+            [(0, "a"), (0, "b"), (0, "c"), (1, "a"), (1, "b")]
+        ),
+    ],
+)
+def test_explode(ignore_index, p_index):
+    gdf = cudf.Series([[1, 2, 3], None, [4], [], [5, 6]], index=p_index)
+    pdf = gdf.to_pandas(nullable=True)
+
+    expect = pdf.explode(ignore_index)
+    got = gdf.explode(ignore_index)
+
+    assert_eq(expect, got, check_dtype=False)

From 2c543c6a4f9b9241efa25f7c7a57c7009dc6a95e Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Tue, 16 Mar 2021 00:45:24 -0700
Subject: [PATCH 02/15] Cleaning stale docstrings

---
 python/cudf/cudf/core/dataframe.py | 5 -----
 python/cudf/cudf/core/series.py    | 5 -----
 2 files changed, 10 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 0a1a8d7ba0c..23030bcb95e 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -7725,11 +7725,6 @@ def explode(self, column, ignore_index=False):
         -------
         DataFrame
 
-        Notes
-        -------
-        In cudf, empty lists `[]` are mapped to nulls, as opposed to `nan` in
-        Pandas.
-
         Examples
         -------
         >>> import cudf
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 4aaf2c0f94d..f93acab0818 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -6378,11 +6378,6 @@ def explode(self, ignore_index=False):
         -------
         DataFrame
 
-        Notes
-        -------
-        In cudf, empty lists `[]` are mapped to nulls, as opposed to `nan` in
-        Pandas.
-
         Examples
         -------
         >>> import cudf

From 6cc0feedc4e111f1bdd7ec7a854e6dfdcdaa8dae Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Tue, 16 Mar 2021 23:00:00 -0700
Subject: [PATCH 03/15] remove fillna in test code

---
 python/cudf/cudf/tests/test_dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index e2e6c469949..d172dbaaaaa 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8466,7 +8466,7 @@ def test_explode(ignore_index, p_index):
     )
     pdf = gdf.to_pandas(nullable=True)
 
-    expect = pdf.explode("a", ignore_index).fillna(pd.NA)
+    expect = pdf.explode("a", ignore_index)
     got = gdf.explode("a", ignore_index)
 
     assert_eq(got, expect, check_dtype=False)

From 87683896a934f4f5c2110b01135fa2b6b93e7286 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Tue, 16 Mar 2021 23:01:55 -0700
Subject: [PATCH 04/15] small doc fix

---
 python/cudf/cudf/core/dataframe.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 23030bcb95e..24c138561da 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -7717,7 +7717,7 @@ def explode(self, column, ignore_index=False):
         Parameters
         ----------
         column : str or tuple
-            Column to explode. Now only supports one column
+            Column to explode.
         ignore_index : bool, default False
             If True, the resulting index will be labeled 0, 1, …, n - 1.
 
@@ -7729,8 +7729,7 @@ def explode(self, column, ignore_index=False):
         -------
         >>> import cudf
         >>> cudf.DataFrame(
-                {"a": [[1, 2, 3], [], None, [4, 5]], "b": [11, 22, 33, 44]}
-            )
+                {"a": [[1, 2, 3], [], None, [4, 5]], "b": [11, 22, 33, 44]})
                    a   b
         0  [1, 2, 3]  11
         1         []  22

From 7aa72e44e6c6ed188a34e10ddcfbcfe05c5e4557 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Wed, 17 Mar 2021 11:01:12 -0700
Subject: [PATCH 05/15] direct passing ignore_index

---
 python/cudf/cudf/core/dataframe.py |  4 +---
 python/cudf/cudf/core/frame.py     | 30 ++++++++++++------------------
 python/cudf/cudf/core/series.py    |  2 +-
 3 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 24c138561da..778bb45437f 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -7749,9 +7749,7 @@ def explode(self, column, ignore_index=False):
             raise KeyError(column)
 
         explode_num = self._column_names.index(column)
-        return super()._explode(
-            explode_num, None if ignore_index else self.index
-        )
+        return super()._explode(explode_num, ignore_index)
 
     _accessors = set()  # type: Set[Any]
 
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 22c47213138..80532e9f0bd 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -6,16 +6,7 @@
 import functools
 import warnings
 from collections import OrderedDict, abc as abc
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Dict,
-    Optional,
-    Tuple,
-    TypeVar,
-    Union,
-    overload,
-)
+from typing import TYPE_CHECKING, Any, Dict, Tuple, TypeVar, Union, overload
 
 import cupy
 import numpy as np
@@ -582,16 +573,19 @@ def equals(self, other, **kwargs):
         else:
             return self._index.equals(other._index)
 
-    def _explode(self, explode_column_num: int, index: Optional[cudf.Index]):
-        if index is not None:
-            explode_column_num += index.nlevels
-        res_tbl = libcudf.lists.explode_outer(
-            cudf._lib.table.Table(self._data, index=index), explode_column_num
-        )
+    def _explode(self, explode_column_num: int, ignore_index: bool):
+        if ignore_index:
+            tmp_index, self._index = self._index, None
+        elif self._index is not None:
+            explode_column_num += self._index.nlevels
 
+        res_tbl = libcudf.lists.explode_outer(self, explode_column_num)
         res = self.__class__._from_table(res_tbl)
-        if index is not None:
-            res.index.names = index.names
+
+        if ignore_index:
+            self._index = tmp_index
+        elif self._index is not None:
+            res.index.names = self._index.names
         return res
 
     def _get_columns_by_label(self, labels, downcast):
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index f93acab0818..0abffb50f85 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -6399,7 +6399,7 @@ def explode(self, ignore_index=False):
         dtype: int64
         """
 
-        return super()._explode(0, None if ignore_index else self.index)
+        return super()._explode(0, ignore_index)
 
     _accessors = set()  # type: Set[Any]
 

From 859f0f68ec97465f4a3b649b92fb5c42666f9444 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Wed, 17 Mar 2021 14:29:28 -0700
Subject: [PATCH 06/15] handling no-op case

---
 python/cudf/cudf/core/dataframe.py       |  3 +--
 python/cudf/cudf/core/frame.py           | 10 ++++++++-
 python/cudf/cudf/core/series.py          |  2 +-
 python/cudf/cudf/tests/test_dataframe.py | 26 ++++++++++++++++--------
 python/cudf/cudf/tests/test_series.py    | 15 +++++++++++---
 5 files changed, 40 insertions(+), 16 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 778bb45437f..e0c3c69bbea 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -7748,8 +7748,7 @@ def explode(self, column, ignore_index=False):
         if column not in self._column_names:
             raise KeyError(column)
 
-        explode_num = self._column_names.index(column)
-        return super()._explode(explode_num, ignore_index)
+        return super()._explode(column, ignore_index)
 
     _accessors = set()  # type: Set[Any]
 
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 80532e9f0bd..1795ceca2bc 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -23,6 +23,7 @@
 from cudf.utils.dtypes import (
     is_categorical_dtype,
     is_column_like,
+    is_list_dtype,
     is_numerical_dtype,
     is_scalar,
     min_scalar_type,
@@ -573,7 +574,14 @@ def equals(self, other, **kwargs):
         else:
             return self._index.equals(other._index)
 
-    def _explode(self, explode_column_num: int, ignore_index: bool):
+    def _explode(self, explode_column: Any, ignore_index: bool):
+        if not is_list_dtype(self._data[explode_column].dtype):
+            copy = self.copy()
+            if ignore_index:
+                copy._index = cudf.RangeIndex(copy._num_rows)
+            return copy
+
+        explode_column_num = self._column_names.index(explode_column)
         if ignore_index:
             tmp_index, self._index = self._index, None
         elif self._index is not None:
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 0abffb50f85..ffba9267430 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -6399,7 +6399,7 @@ def explode(self, ignore_index=False):
         dtype: int64
         """
 
-        return super()._explode(0, ignore_index)
+        return super()._explode(self._column_names[0], ignore_index)
 
     _accessors = set()  # type: Set[Any]
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index d172dbaaaaa..d965f997645 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8444,6 +8444,21 @@ def test_rename_for_level_is_None_MC():
     assert_eq(expect, got)
 
 
+@pytest.mark.parametrize(
+    "data",
+    [
+        {
+            "a": [[1, 2, 3], None, [4], [], [5, 6]],
+            "b": [11, 22, 33, 44, 55],
+            "c": ["a", "e", "i", "o", "u"],
+        },  # nested
+        {
+            "a": [1, 2, 3, 4, 5],
+            "b": [11, 22, 33, 44, 55],
+            "c": ["a", "e", "i", "o", "u"],
+        },  # non-nested
+    ],
+)
 @pytest.mark.parametrize("ignore_index", [True, False])
 @pytest.mark.parametrize(
     "p_index",
@@ -8455,15 +8470,8 @@ def test_rename_for_level_is_None_MC():
         ),
     ],
 )
-def test_explode(ignore_index, p_index):
-    gdf = cudf.DataFrame(
-        {
-            "a": [[1, 2, 3], None, [4], [], [5, 6]],
-            "b": [11, 22, 33, 44, 55],
-            "c": ["a", "e", "i", "o", "u"],
-        },
-        index=p_index,
-    )
+def test_explode(data, ignore_index, p_index):
+    gdf = cudf.DataFrame(data, index=p_index,)
     pdf = gdf.to_pandas(nullable=True)
 
     expect = pdf.explode("a", ignore_index)
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index d8531657177..bbdeb55b9ee 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -1120,6 +1120,10 @@ def test_series_drop_raises():
     assert_eq(actual, expect)
 
 
+@pytest.mark.parametrize(
+    "data",
+    [[[1, 2, 3], None, [4], [], [5, 6]], [1, 2, 3, 4, 5]],  # non-nested
+)
 @pytest.mark.parametrize("ignore_index", [True, False])
 @pytest.mark.parametrize(
     "p_index",
@@ -1131,11 +1135,16 @@ def test_series_drop_raises():
         ),
     ],
 )
-def test_explode(ignore_index, p_index):
-    gdf = cudf.Series([[1, 2, 3], None, [4], [], [5, 6]], index=p_index)
+def test_explode(data, ignore_index, p_index):
+    gdf = cudf.Series(data, index=p_index)
     pdf = gdf.to_pandas(nullable=True)
 
     expect = pdf.explode(ignore_index)
     got = gdf.explode(ignore_index)
 
-    assert_eq(expect, got, check_dtype=False)
+    if data == [1, 2, 3, 4, 5] and ignore_index and p_index is not None:
+        # https://github.com/pandas-dev/pandas/issues/40487
+        with pytest.raises(AssertionError, match="different"):
+            assert_eq(expect, got, check_dtype=False)
+    else:
+        assert_eq(expect, got, check_dtype=False)

From fe085f6a0b0fa2b651ff2b96bc58523e9a91b431 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Wed, 17 Mar 2021 16:06:01 -0700
Subject: [PATCH 07/15] Account for multi-level column names

---
 python/cudf/cudf/core/frame.py           |  3 ++
 python/cudf/cudf/tests/test_dataframe.py | 49 ++++++++++++++++--------
 python/cudf/cudf/tests/test_series.py    |  4 +-
 3 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 1795ceca2bc..f34e375103b 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -590,6 +590,9 @@ def _explode(self, explode_column: Any, ignore_index: bool):
         res_tbl = libcudf.lists.explode_outer(self, explode_column_num)
         res = self.__class__._from_table(res_tbl)
 
+        res._data.multiindex = self._data.multiindex
+        res._data._level_names = self._data._level_names
+
         if ignore_index:
             self._index = tmp_index
         elif self._index is not None:
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index d965f997645..9f426c1bc82 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8447,16 +8447,33 @@ def test_rename_for_level_is_None_MC():
 @pytest.mark.parametrize(
     "data",
     [
-        {
-            "a": [[1, 2, 3], None, [4], [], [5, 6]],
-            "b": [11, 22, 33, 44, 55],
-            "c": ["a", "e", "i", "o", "u"],
-        },  # nested
-        {
-            "a": [1, 2, 3, 4, 5],
-            "b": [11, 22, 33, 44, 55],
-            "c": ["a", "e", "i", "o", "u"],
-        },  # non-nested
+        [
+            [[1, 2, 3], 11, "a"],
+            [None, 22, "e"],
+            [[4], 33, "i"],
+            [[], 44, "o"],
+            [[5, 6], 55, "u"],
+        ],  # nested
+        [
+            [1, 11, "a"],
+            [2, 22, "e"],
+            [3, 33, "i"],
+            [4, 44, "o"],
+            [5, 55, "u"],
+        ],  # non-nested
+    ],
+)
+@pytest.mark.parametrize(
+    ("labels", "label_to_explode"),
+    [
+        (None, 0),
+        (pd.Index(["a", "b", "c"]), "a"),
+        (
+            pd.MultiIndex.from_tuples(
+                [(0, "a"), (0, "b"), (1, "a")], names=["l0", "l1"]
+            ),
+            (0, "a"),
+        ),
     ],
 )
 @pytest.mark.parametrize("ignore_index", [True, False])
@@ -8470,11 +8487,11 @@ def test_rename_for_level_is_None_MC():
         ),
     ],
 )
-def test_explode(data, ignore_index, p_index):
-    gdf = cudf.DataFrame(data, index=p_index,)
-    pdf = gdf.to_pandas(nullable=True)
+def test_explode(data, labels, ignore_index, p_index, label_to_explode):
+    pdf = pd.DataFrame(data, index=p_index, columns=labels)
+    gdf = cudf.from_pandas(pdf)
 
-    expect = pdf.explode("a", ignore_index)
-    got = gdf.explode("a", ignore_index)
+    expect = pdf.explode(label_to_explode, ignore_index)
+    got = gdf.explode(label_to_explode, ignore_index)
 
-    assert_eq(got, expect, check_dtype=False)
+    assert_eq(expect, got, check_dtype=False)
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index bbdeb55b9ee..5c583fc58fc 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -1136,8 +1136,8 @@ def test_series_drop_raises():
     ],
 )
 def test_explode(data, ignore_index, p_index):
-    gdf = cudf.Series(data, index=p_index)
-    pdf = gdf.to_pandas(nullable=True)
+    pdf = pd.Series(data, index=p_index, name="someseries")
+    gdf = cudf.from_pandas(pdf)
 
     expect = pdf.explode(ignore_index)
     got = gdf.explode(ignore_index)

From ca419cc19d5bb86957587c959a9b09197e2520c9 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Wed, 17 Mar 2021 16:10:36 -0700
Subject: [PATCH 08/15] Doc for _explode

---
 python/cudf/cudf/core/frame.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index f34e375103b..d3cf798af82 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -575,6 +575,12 @@ def equals(self, other, **kwargs):
             return self._index.equals(other._index)
 
     def _explode(self, explode_column: Any, ignore_index: bool):
+        """Helper function for `explode` in Series and Dataframe.
+        if the designated column to explode is non-nested, a copy
+        of the frame is returned. Otherwise, if ignore_index is
+        set, the original index is not exploded and will use
+        a `RangeIndex` instead.
+        """
         if not is_list_dtype(self._data[explode_column].dtype):
             copy = self.copy()
             if ignore_index:

From 9d7e4a12602c24bb5d050f33dffaa17f37b60412 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Wed, 17 Mar 2021 21:14:23 -0700
Subject: [PATCH 09/15] Better handling of index column

---
 python/cudf/cudf/_lib/lists.pyx |  9 ++++++---
 python/cudf/cudf/core/frame.py  | 14 ++++++--------
 2 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/python/cudf/cudf/_lib/lists.pyx b/python/cudf/cudf/_lib/lists.pyx
index 56b89e9244c..0f0ee35556a 100644
--- a/python/cudf/cudf/_lib/lists.pyx
+++ b/python/cudf/cudf/_lib/lists.pyx
@@ -1,5 +1,6 @@
 # Copyright (c) 2021, NVIDIA CORPORATION.
 
+from libcpp cimport bool
 from libcpp.memory cimport unique_ptr, shared_ptr, make_shared
 from libcpp.utility cimport move
 
@@ -41,8 +42,10 @@ def count_elements(Column col):
     return result
 
 
-def explode_outer(Table tbl, int explode_column_idx):
-    cdef table_view c_table_view = tbl.view()
+def explode_outer(Table tbl, int explode_column_idx, bool ignore_index=False):
+    cdef table_view c_table_view = (
+        tbl.data_view() if ignore_index else tbl.view()
+    )
     cdef size_type c_explode_column_idx = explode_column_idx
 
     cdef unique_ptr[table] c_result
@@ -53,5 +56,5 @@ def explode_outer(Table tbl, int explode_column_idx):
     return Table.from_unique_ptr(
         move(c_result),
         column_names=tbl._column_names,
-        index_names=tbl._index_names
+        index_names=None if ignore_index else tbl._index_names
     )
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index d3cf798af82..eaa27c2fd75 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -579,7 +579,7 @@ def _explode(self, explode_column: Any, ignore_index: bool):
         if the designated column to explode is non-nested, a copy
         of the frame is returned. Otherwise, if ignore_index is
         set, the original index is not exploded and will use
-        a `RangeIndex` instead.
+        a `RangeIndex`.
         """
         if not is_list_dtype(self._data[explode_column].dtype):
             copy = self.copy()
@@ -588,20 +588,18 @@ def _explode(self, explode_column: Any, ignore_index: bool):
             return copy
 
         explode_column_num = self._column_names.index(explode_column)
-        if ignore_index:
-            tmp_index, self._index = self._index, None
-        elif self._index is not None:
+        if not ignore_index and self._index is not None:
             explode_column_num += self._index.nlevels
 
-        res_tbl = libcudf.lists.explode_outer(self, explode_column_num)
+        res_tbl = libcudf.lists.explode_outer(
+            self, explode_column_num, ignore_index
+        )
         res = self.__class__._from_table(res_tbl)
 
         res._data.multiindex = self._data.multiindex
         res._data._level_names = self._data._level_names
 
-        if ignore_index:
-            self._index = tmp_index
-        elif self._index is not None:
+        if not ignore_index and self._index is not None:
             res.index.names = self._index.names
         return res
 

From c09f8151e440e80217b4be6440dda31a570a6005 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Wed, 17 Mar 2021 21:39:39 -0700
Subject: [PATCH 10/15] Rev: avoid extra index copy when ignore_index=True

---
 python/cudf/cudf/core/frame.py | 39 +++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index eaa27c2fd75..fa10c406b56 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -581,27 +581,32 @@ def _explode(self, explode_column: Any, ignore_index: bool):
         set, the original index is not exploded and will use
         a `RangeIndex`.
         """
-        if not is_list_dtype(self._data[explode_column].dtype):
-            copy = self.copy()
-            if ignore_index:
-                copy._index = cudf.RangeIndex(copy._num_rows)
-            return copy
+        if (
+            isinstance(self, (cudf.Series, cudf.DataFrame))
+            and self._index is not None
+        ):
+            if not is_list_dtype(self._data[explode_column].dtype):
+                data = self._data.copy(deep=True)
+                idx = None if ignore_index else self._index.copy(deep=True)
+                return self.__class__._from_data(data, index=idx)
 
-        explode_column_num = self._column_names.index(explode_column)
-        if not ignore_index and self._index is not None:
-            explode_column_num += self._index.nlevels
+            explode_column_num = self._column_names.index(explode_column)
+            if not ignore_index:
+                explode_column_num += self._index.nlevels
 
-        res_tbl = libcudf.lists.explode_outer(
-            self, explode_column_num, ignore_index
-        )
-        res = self.__class__._from_table(res_tbl)
+            res_tbl = libcudf.lists.explode_outer(
+                self, explode_column_num, ignore_index
+            )
+            res = self.__class__._from_table(res_tbl)
 
-        res._data.multiindex = self._data.multiindex
-        res._data._level_names = self._data._level_names
+            res._data.multiindex = self._data.multiindex
+            res._data._level_names = self._data._level_names
 
-        if not ignore_index and self._index is not None:
-            res.index.names = self._index.names
-        return res
+            if not ignore_index:
+                res.index.names = self._index.names
+            return res
+        else:
+            raise NotImplementedError("_explode is not implemented for Index.")
 
     def _get_columns_by_label(self, labels, downcast):
         """

From b33a6a04b78dc82a347d4acae38daa9590e24641 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Wed, 17 Mar 2021 21:58:40 -0700
Subject: [PATCH 11/15] Remove stale comments

---
 python/cudf/cudf/tests/test_series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 5c583fc58fc..23d348b1e68 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -1122,7 +1122,7 @@ def test_series_drop_raises():
 
 @pytest.mark.parametrize(
     "data",
-    [[[1, 2, 3], None, [4], [], [5, 6]], [1, 2, 3, 4, 5]],  # non-nested
+    [[[1, 2, 3], None, [4], [], [5, 6]], [1, 2, 3, 4, 5]],
 )
 @pytest.mark.parametrize("ignore_index", [True, False])
 @pytest.mark.parametrize(

From 25409c1498453bf147f5e3f53cf8f674498df0e7 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Wed, 17 Mar 2021 22:03:07 -0700
Subject: [PATCH 12/15] style

---
 python/cudf/cudf/tests/test_series.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 23d348b1e68..beda14934ca 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -1121,8 +1121,7 @@ def test_series_drop_raises():
 
 
 @pytest.mark.parametrize(
-    "data",
-    [[[1, 2, 3], None, [4], [], [5, 6]], [1, 2, 3, 4, 5]],
+    "data", [[[1, 2, 3], None, [4], [], [5, 6]], [1, 2, 3, 4, 5]],
 )
 @pytest.mark.parametrize("ignore_index", [True, False])
 @pytest.mark.parametrize(

From 767d3e2d975e0ed7be92d2f50e8cf2039900fe4c Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Wed, 17 Mar 2021 23:02:01 -0700
Subject: [PATCH 13/15] Move copy case 1-level up

---
 python/cudf/cudf/core/dataframe.py |  5 +++++
 python/cudf/cudf/core/frame.py     | 36 ++++++++++--------------------
 python/cudf/cudf/core/series.py    |  4 ++++
 3 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index e0c3c69bbea..86b4b3259a5 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -7748,6 +7748,11 @@ def explode(self, column, ignore_index=False):
         if column not in self._column_names:
             raise KeyError(column)
 
+        if not is_list_dtype(self._data[column].dtype):
+            data = self._data.copy(deep=True)
+            idx = None if ignore_index else self._index.copy(deep=True)
+            return self.__class__._from_data(data, index=idx)
+
         return super()._explode(column, ignore_index)
 
     _accessors = set()  # type: Set[Any]
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index fa10c406b56..e589eab3775 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -23,7 +23,6 @@
 from cudf.utils.dtypes import (
     is_categorical_dtype,
     is_column_like,
-    is_list_dtype,
     is_numerical_dtype,
     is_scalar,
     min_scalar_type,
@@ -581,32 +580,21 @@ def _explode(self, explode_column: Any, ignore_index: bool):
         set, the original index is not exploded and will use
         a `RangeIndex`.
         """
-        if (
-            isinstance(self, (cudf.Series, cudf.DataFrame))
-            and self._index is not None
-        ):
-            if not is_list_dtype(self._data[explode_column].dtype):
-                data = self._data.copy(deep=True)
-                idx = None if ignore_index else self._index.copy(deep=True)
-                return self.__class__._from_data(data, index=idx)
+        explode_column_num = self._column_names.index(explode_column)
+        if not ignore_index and self._index is not None:
+            explode_column_num += self._index.nlevels
 
-            explode_column_num = self._column_names.index(explode_column)
-            if not ignore_index:
-                explode_column_num += self._index.nlevels
-
-            res_tbl = libcudf.lists.explode_outer(
-                self, explode_column_num, ignore_index
-            )
-            res = self.__class__._from_table(res_tbl)
+        res_tbl = libcudf.lists.explode_outer(
+            self, explode_column_num, ignore_index
+        )
+        res = self.__class__._from_table(res_tbl)
 
-            res._data.multiindex = self._data.multiindex
-            res._data._level_names = self._data._level_names
+        res._data.multiindex = self._data.multiindex
+        res._data._level_names = self._data._level_names
 
-            if not ignore_index:
-                res.index.names = self._index.names
-            return res
-        else:
-            raise NotImplementedError("_explode is not implemented for Index.")
+        if not ignore_index and self._index is not None:
+            res.index.names = self._index.names
+        return res
 
     def _get_columns_by_label(self, labels, downcast):
         """
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index ffba9267430..93a2260f338 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -6398,6 +6398,10 @@ def explode(self, ignore_index=False):
         3       5
         dtype: int64
         """
+        if not is_list_dtype(self._column.dtype):
+            data = self._data.copy(deep=True)
+            idx = None if ignore_index else self._index.copy(deep=True)
+            return self.__class__._from_data(data, index=idx)
 
         return super()._explode(self._column_names[0], ignore_index)
 

From 7cb25e7957bce8dc7ef09323732b7b0a7466c5e3 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Thu, 18 Mar 2021 10:48:27 -0700
Subject: [PATCH 14/15] Rev: _explode doc fix

---
 python/cudf/cudf/core/frame.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index e589eab3775..bfcc2d125db 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -574,11 +574,10 @@ def equals(self, other, **kwargs):
             return self._index.equals(other._index)
 
     def _explode(self, explode_column: Any, ignore_index: bool):
-        """Helper function for `explode` in Series and Dataframe.
-        if the designated column to explode is non-nested, a copy
-        of the frame is returned. Otherwise, if ignore_index is
-        set, the original index is not exploded and will use
-        a `RangeIndex`.
+        """Helper function for `explode` in `Series` and `Dataframe`, explodes
+        a specified nested column. Other columns' corresponding rows are
+        duplicated. If ignore_index is set, the original index is not exploded
+        and will be replaced with a `RangeIndex`.
         """
         explode_column_num = self._column_names.index(explode_column)
         if not ignore_index and self._index is not None:

From 6ce751aaf7691736de1c4dd397ad06d34fe4fcc4 Mon Sep 17 00:00:00 2001
From: Michael Wang <isVoid@users.noreply.github.com>
Date: Thu, 18 Mar 2021 10:49:25 -0700
Subject: [PATCH 15/15] Rev: small docstrings bug fix

Co-authored-by: GALI PREM SAGAR <sagarprem75@gmail.com>
---
 python/cudf/cudf/core/dataframe.py | 2 +-
 python/cudf/cudf/core/series.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 86b4b3259a5..4414b9324d6 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -7726,7 +7726,7 @@ def explode(self, column, ignore_index=False):
         DataFrame
 
         Examples
-        -------
+        --------
         >>> import cudf
         >>> cudf.DataFrame(
                 {"a": [[1, 2, 3], [], None, [4, 5]], "b": [11, 22, 33, 44]})
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 93a2260f338..0c356d33606 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -6379,7 +6379,7 @@ def explode(self, ignore_index=False):
         DataFrame
 
         Examples
-        -------
+        --------
         >>> import cudf
         >>> s = cudf.Series([[1, 2, 3], [], None, [4, 5]])
         >>> s