From 2f584913243079221746480dca951ce7d8c3f244 Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <kirui.sheilah@gmail.com>
Date: Wed, 2 Dec 2020 12:49:58 -0800
Subject: [PATCH 01/14] implementing update() function

---
 python/cudf/cudf/core/dataframe.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 107d2d20e38..8cc8f3690a3 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1434,6 +1434,10 @@ def add(self, other, axis="columns", level=None, fill_value=None):
 
         return self._apply_op("add", other, fill_value)
 
+
+    def update():
+
+        
     def __add__(self, other):
         return self._apply_op("__add__", other)
 

From 9c2b1aced1a2f161c2417dc17f60965d3cef3c5a Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <kirui.sheilah@gmail.com>
Date: Tue, 8 Dec 2020 21:30:43 -0800
Subject: [PATCH 02/14] stash changes to switch branch

---
 python/cudf/cudf/core/dataframe.py       | 1 +
 python/cudf/cudf/tests/test_dataframe.py | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 8cc8f3690a3..1a4c18147d0 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1436,6 +1436,7 @@ def add(self, other, axis="columns", level=None, fill_value=None):
 
 
     def update():
+        pass
 
         
     def __add__(self, other):
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 69eb70e7201..b08e2347753 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -7995,3 +7995,7 @@ def test_dataframe_from_pandas_duplicate_columns():
         ValueError, match="Duplicate column names are not allowed"
     ):
         gd.from_pandas(pdf)
+
+
+
+def test_dataframe_for_update_function(data,other,overwrite, errors):

From 7d31eb52f51092c8c2a6fa8926663ebf2c382dc3 Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <kirui.sheilah@gmail.com>
Date: Tue, 8 Dec 2020 21:32:01 -0800
Subject: [PATCH 03/14] stash changes to switch branch

---
 python/cudf/cudf/core/dataframe.py       | 33 ++++++++++++++++++++++--
 python/cudf/cudf/tests/test_dataframe.py |  1 +
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 1a4c18147d0..85722e5ee19 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1435,8 +1435,37 @@ def add(self, other, axis="columns", level=None, fill_value=None):
         return self._apply_op("add", other, fill_value)
 
 
-    def update():
-        pass
+def update(self, other, join="left", overwrite=True, filter_func=None, errors="ignore"):
+    # TODO: Support other joins
+    if join != "left":  # pragma: no cover
+        raise NotImplementedError("Only left join is supported")
+    if errors not in ["ignore", "raise"]:
+        raise ValueError("The parameter errors must be either 'ignore' or 'raise'")
+
+    other = other.reindex(df.index, axis=0)
+    other = other.reindex(df.columns, axis=1)
+
+    for col in df.columns:
+        this = df[col]
+        that = other[col]
+
+        if errors == "raise":
+            mask_this = that.notna()
+            mask_that = this.notna()
+            if any(mask_this & mask_that):
+                raise ValueError("Data overlaps.")
+
+        if overwrite:
+            mask = that.isna()
+        else:
+            mask = this.notna()
+
+        # don't overwrite columns unnecessarily
+        if mask.all():
+            continue
+
+        df.loc[mask, col] = this[mask]
+        df.loc[~mask, col] = that[~mask]
 
         
     def __add__(self, other):
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index b08e2347753..077f00730f8 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -7999,3 +7999,4 @@ def test_dataframe_from_pandas_duplicate_columns():
 
 
 def test_dataframe_for_update_function(data,other,overwrite, errors):
+    

From b72a8d59ce37509647fb05b9bf66f3d4bf45bdff Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <kirui.sheilah@gmail.com>
Date: Mon, 14 Dec 2020 20:46:08 -0800
Subject: [PATCH 04/14] Made edits to update()

---
 python/cudf/cudf/core/dataframe.py       | 102 ++++++++++++++++-------
 python/cudf/cudf/tests/test_dataframe.py |  34 +++++++-
 2 files changed, 106 insertions(+), 30 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 85722e5ee19..d7d56f74edb 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1435,37 +1435,81 @@ def add(self, other, axis="columns", level=None, fill_value=None):
         return self._apply_op("add", other, fill_value)
 
 
-def update(self, other, join="left", overwrite=True, filter_func=None, errors="ignore"):
-    # TODO: Support other joins
-    if join != "left":  # pragma: no cover
-        raise NotImplementedError("Only left join is supported")
-    if errors not in ["ignore", "raise"]:
-        raise ValueError("The parameter errors must be either 'ignore' or 'raise'")
-
-    other = other.reindex(df.index, axis=0)
-    other = other.reindex(df.columns, axis=1)
-
-    for col in df.columns:
-        this = df[col]
-        that = other[col]
-
-        if errors == "raise":
-            mask_this = that.notna()
-            mask_that = this.notna()
-            if any(mask_this & mask_that):
-                raise ValueError("Data overlaps.")
-
-        if overwrite:
-            mask = that.isna()
-        else:
-            mask = this.notna()
+    def update(self, other, join="left", overwrite=True, filter_func=None, errors="ignore"):
+        """
+        Modify a DataFrame in place using non-NA values from another DataFrame.
+
+        Aligns on indices. There is no return value.
+
+        Parameters
+        ----------
+
+        other : DataFrame, or object coercible into a DataFrame
+            Should have at least one matching index/column label with the 
+            original DataFrame.If a Series is passed, its name attribute must
+            be set, and that will be used as the column name to align with the 
+            original DataFrame.
+
+        join : {‘left’}, default ‘left’
+            Only left join is implemented, keeping the index and 
+            columns of the original object.
+
+        overwrite : {True, False}, default True
+            How to handle non-NA values for overlapping keys:
+            True: overwrite original DataFrame’s values with values from other.
+            False: only update values that are NA in the original DataFrame.
+
+        filter_func : callable(1d-array) -> bool 1d-array, optional
+            Can choose to replace values other than NA. 
+            Return True for values that should be updated.
+
+        errors : {‘raise’, ‘ignore’}, default ‘ignore’
+            If ‘raise’, will raise a ValueError if the DataFrame and other both contain non-NA data in the same place.
+
+
+        Returns
+        -------
+        None : method directly changes calling object
+          
+        Raises
+        -------
+        ValueError
+            - When ``errors``= 'raise' and there’s overlapping non-NA data.
+            - When ``errors`` is not either 'ignore' or 'raise'
+
+        NotImplementedError
+            - If ``join`` != ‘left’
+
+        """
+        # TODO: Support other joins
+        if join != "left": 
+            raise NotImplementedError("Only left join is supported")
+        if errors not in ["ignore", "raise"]:
+            raise ValueError("The parameter errors must be either 'ignore' or 'raise'")
+
+        other = other.reindex(self.index, axis=0)
+        other = other.reindex(self.columns, axis=1)
+        for col in self.columns: 
+            original = self[col] 
+            new = other[col]  
+
+            if errors == "raise":
+                mask_original = original.notna() 
+                mask_new = new.notna()
+                if any(mask_original & mask_new):
+                    raise ValueError("Data overlaps.") 
+
+            if overwrite: 
+                mask = new.isna() #overwrite original with values from other 
+            else:
+                mask = original.notna() #only update values that are na in the original 
 
-        # don't overwrite columns unnecessarily
-        if mask.all():
-            continue
+            # don't overwrite columns unnecessarily
+            if mask.all():
+                continue
 
-        df.loc[mask, col] = this[mask]
-        df.loc[~mask, col] = that[~mask]
+            self.loc[mask, col] = original[mask]
+            self.loc[~mask, col] = newr[~mask]
 
         
     def __add__(self, other):
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 077f00730f8..8d025342bc2 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -7997,6 +7997,38 @@ def test_dataframe_from_pandas_duplicate_columns():
         gd.from_pandas(pdf)
 
 
+@pytest.mark.parametrize("join",["left", "right"],)
+@pytest.mark.parametrize("overwrite",[True, False],)
+@pytest.mark.parametrize("errors",["raise", "ignore"],)
+@pytest.mark.parametrize(
+    "data",
+    [
+        {"a": [1, 2, 3], "b": [3, 4, 5]},
+        {"a": [1.0, 2.0, 3.0], "b": [3.0, 4.0, 5.0]},
+        {"a": ["n", "p", "q"],"b": ["t", "l", "s"]},
+        {"a": [False, False, True], "b": [True, True, False]},
+        {"a": [2.0, np.nan, 4.0], "b": [np.nan, np.nan, np.nan]},
+        {"a": [np.nan, np.nan, np.nan], "b": [np.nan, np.nan, np.nan]},
+    ],
+)
+@pytest.mark.parametrize(
+    "data2",
+    [
+        {"a": [7, 5, 8], "b": [2.0, 7.0, 9.0]},
+        {"a": ["yp", "zn", "uf"],"b": ["md", "rl", "sv"]},
+        {"a": [True, False, True], "b":[3.0, 4.0, 5.0], "c": [False, True, False]},
+        {"a": [np.nan, np.nan, np.nan], "b": [np.nan, np.nan, np.nan]},
+        {"a": [np.nan, 2.0, False], "b": [2, np.nan, "pl"], "c": [np.nan, np.nan, np.nan]},
+    ],
+)
+def test_update_for_dataframes(data, data2, join, overwrite, errors):
+    pdf = pd.DataFrame(data)
+    gdf = gd.DataFrame(data)
+
+    other_pd = pd.DataFrame(data2)
+    other_gd = gd.DataFrame(data2)
 
-def test_dataframe_for_update_function(data,other,overwrite, errors):
+    expect = pdf.update(other_pd, join, overwrite, errors)
+    got = gdf.update(other_gd, join, overwrite, errors)
     
+    assert_eq(expect, got)

From a2bbab352d1538a96c0c8df0e3e401858014b84b Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <kirui.sheilah@gmail.com>
Date: Mon, 14 Dec 2020 23:01:51 -0800
Subject: [PATCH 05/14] Made edits to update()

---
 python/cudf/cudf/core/dataframe.py       | 20 +++++++++++++++-----
 python/cudf/cudf/tests/test_dataframe.py |  9 +++++----
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index d7d56f74edb..3608837587a 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1435,7 +1435,9 @@ def add(self, other, axis="columns", level=None, fill_value=None):
         return self._apply_op("add", other, fill_value)
 
 
-    def update(self, other, join="left", overwrite=True, filter_func=None, errors="ignore"):
+    def update(
+        self, other, join="left", overwrite=True, filter_func=None, errors="ignore"
+        ):
         """
         Modify a DataFrame in place using non-NA values from another DataFrame.
 
@@ -1487,20 +1489,28 @@ def update(self, other, join="left", overwrite=True, filter_func=None, errors="i
         if errors not in ["ignore", "raise"]:
             raise ValueError("The parameter errors must be either 'ignore' or 'raise'")
 
+        if not isinstance(other, DataFrame):
+            other = DataFrame(other)
+
         other = other.reindex(self.index, axis=0)
         other = other.reindex(self.columns, axis=1)
         for col in self.columns: 
-            original = self[col] 
+            original = self[col]
             new = other[col]  
-
+#             if filter_func is not None:
+#                 with np.errstate(all="ignore"):
+# >                   mask = ~filter_func(original) | new.isna()
+            # else:
+            
             if errors == "raise":
-                mask_original = original.notna() 
-                mask_new = new.notna()
+                mask_original = new.notna() 
+                mask_new = original.notna()
                 if any(mask_original & mask_new):
                     raise ValueError("Data overlaps.") 
 
             if overwrite: 
                 mask = new.isna() #overwrite original with values from other 
+
             else:
                 mask = original.notna() #only update values that are na in the original 
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 8d025342bc2..d2c675856c4 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -7999,13 +7999,14 @@ def test_dataframe_from_pandas_duplicate_columns():
 
 @pytest.mark.parametrize("join",["left", "right"],)
 @pytest.mark.parametrize("overwrite",[True, False],)
+@pytest.mark.parametrize("filter_func",[None],)
 @pytest.mark.parametrize("errors",["raise", "ignore"],)
 @pytest.mark.parametrize(
     "data",
     [
         {"a": [1, 2, 3], "b": [3, 4, 5]},
         {"a": [1.0, 2.0, 3.0], "b": [3.0, 4.0, 5.0]},
-        {"a": ["n", "p", "q"],"b": ["t", "l", "s"]},
+        #{"a": ["n", "p", "q"],"b": ["t", "l", "s"]},
         {"a": [False, False, True], "b": [True, True, False]},
         {"a": [2.0, np.nan, 4.0], "b": [np.nan, np.nan, np.nan]},
         {"a": [np.nan, np.nan, np.nan], "b": [np.nan, np.nan, np.nan]},
@@ -8015,13 +8016,13 @@ def test_dataframe_from_pandas_duplicate_columns():
     "data2",
     [
         {"a": [7, 5, 8], "b": [2.0, 7.0, 9.0]},
-        {"a": ["yp", "zn", "uf"],"b": ["md", "rl", "sv"]},
+        #{"a": ["yp", "zn", "uf"],"b": ["md", "rl", "sv"]},
         {"a": [True, False, True], "b":[3.0, 4.0, 5.0], "c": [False, True, False]},
         {"a": [np.nan, np.nan, np.nan], "b": [np.nan, np.nan, np.nan]},
-        {"a": [np.nan, 2.0, False], "b": [2, np.nan, "pl"], "c": [np.nan, np.nan, np.nan]},
+        {"a": [np.nan, 2.0, False], "b": [2, np.nan, 5.0], "c": [np.nan, np.nan, np.nan]},
     ],
 )
-def test_update_for_dataframes(data, data2, join, overwrite, errors):
+def test_update_for_dataframes(data, data2, join, overwrite, filter_func, errors):
     pdf = pd.DataFrame(data)
     gdf = gd.DataFrame(data)
 

From 73c36832c01e3dce66cf1ba40087519c058f5cfe Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <kirui.sheilah@gmail.com>
Date: Wed, 16 Dec 2020 14:07:24 -0800
Subject: [PATCH 06/14] edits

---
 python/cudf/cudf/core/dataframe.py       | 34 +++++++++++-------------
 python/cudf/cudf/tests/test_dataframe.py |  9 +++----
 2 files changed, 18 insertions(+), 25 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 3608837587a..8dffd2a2260 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1435,9 +1435,7 @@ def add(self, other, axis="columns", level=None, fill_value=None):
         return self._apply_op("add", other, fill_value)
 
 
-    def update(
-        self, other, join="left", overwrite=True, filter_func=None, errors="ignore"
-        ):
+    def update(self, other, join="left", overwrite=True, filter_func=None, errors="ignore"):
         """
         Modify a DataFrame in place using non-NA values from another DataFrame.
 
@@ -1488,38 +1486,36 @@ def update(
             raise NotImplementedError("Only left join is supported")
         if errors not in ["ignore", "raise"]:
             raise ValueError("The parameter errors must be either 'ignore' or 'raise'")
+        if filter_func != None:
+            raise NotImplementedError("filter_func is not supported yet")
 
         if not isinstance(other, DataFrame):
             other = DataFrame(other)
 
         other = other.reindex(self.index, axis=0)
         other = other.reindex(self.columns, axis=1)
+
         for col in self.columns: 
-            original = self[col]
-            new = other[col]  
-#             if filter_func is not None:
-#                 with np.errstate(all="ignore"):
-# >                   mask = ~filter_func(original) | new.isna()
-            # else:
+            this = df[col]
+            that = other[col]  
             
             if errors == "raise":
-                mask_original = new.notna() 
-                mask_new = original.notna()
-                if any(mask_original & mask_new):
-                    raise ValueError("Data overlaps.") 
-
-            if overwrite: 
-                mask = new.isna() #overwrite original with values from other 
+                mask_this = that.notna()
+                mask_that = this.notna()
+                if any(mask_this & mask_that):
+                    raise ValueError("Data overlaps.")
 
+            if overwrite:
+                mask = that.isna()
             else:
-                mask = original.notna() #only update values that are na in the original 
+                mask = this.notna()
 
             # don't overwrite columns unnecessarily
             if mask.all():
                 continue
 
-            self.loc[mask, col] = original[mask]
-            self.loc[~mask, col] = newr[~mask]
+            df.loc[mask, col] = this[mask]
+            df.loc[~mask, col] = that[~mask]
 
         
     def __add__(self, other):
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index d2c675856c4..770472c7d39 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -7999,14 +7999,12 @@ def test_dataframe_from_pandas_duplicate_columns():
 
 @pytest.mark.parametrize("join",["left", "right"],)
 @pytest.mark.parametrize("overwrite",[True, False],)
-@pytest.mark.parametrize("filter_func",[None],)
 @pytest.mark.parametrize("errors",["raise", "ignore"],)
 @pytest.mark.parametrize(
     "data",
     [
         {"a": [1, 2, 3], "b": [3, 4, 5]},
         {"a": [1.0, 2.0, 3.0], "b": [3.0, 4.0, 5.0]},
-        #{"a": ["n", "p", "q"],"b": ["t", "l", "s"]},
         {"a": [False, False, True], "b": [True, True, False]},
         {"a": [2.0, np.nan, 4.0], "b": [np.nan, np.nan, np.nan]},
         {"a": [np.nan, np.nan, np.nan], "b": [np.nan, np.nan, np.nan]},
@@ -8016,13 +8014,12 @@ def test_dataframe_from_pandas_duplicate_columns():
     "data2",
     [
         {"a": [7, 5, 8], "b": [2.0, 7.0, 9.0]},
-        #{"a": ["yp", "zn", "uf"],"b": ["md", "rl", "sv"]},
-        {"a": [True, False, True], "b":[3.0, 4.0, 5.0], "c": [False, True, False]},
+        {"a": [True, False, True], "b":[3.0, 4.0, 5.0]},
         {"a": [np.nan, np.nan, np.nan], "b": [np.nan, np.nan, np.nan]},
-        {"a": [np.nan, 2.0, False], "b": [2, np.nan, 5.0], "c": [np.nan, np.nan, np.nan]},
+        {"a": [np.nan, 2.0, False], "b": [2, np.nan, 5.0]}, 
     ],
 )
-def test_update_for_dataframes(data, data2, join, overwrite, filter_func, errors):
+def test_update_for_dataframes(data, data2, join, overwrite, errors):
     pdf = pd.DataFrame(data)
     gdf = gd.DataFrame(data)
 

From 32730a0ab768c8ea501841e904361ac1c9ecd21a Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <kirui.sheilah@gmail.com>
Date: Thu, 17 Dec 2020 17:59:20 -0800
Subject: [PATCH 07/14] All tests passing. ready for review

---
 python/cudf/cudf/core/dataframe.py       |  8 ++---
 python/cudf/cudf/tests/test_dataframe.py | 37 ++++++++++++++++++++----
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 50f5d4d0902..b6abf67d8b9 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1516,13 +1516,13 @@ def update(self, other, join="left", overwrite=True, filter_func=None, errors="i
         other = other.reindex(self.columns, axis=1)
 
         for col in self.columns: 
-            this = df[col]
+            this = self[col]
             that = other[col]  
             
             if errors == "raise":
                 mask_this = that.notna()
                 mask_that = this.notna()
-                if any(mask_this & mask_that):
+                if ((mask_this & mask_that).any()):
                     raise ValueError("Data overlaps.")
 
             if overwrite:
@@ -1534,8 +1534,8 @@ def update(self, other, join="left", overwrite=True, filter_func=None, errors="i
             if mask.all():
                 continue
 
-            df.loc[mask, col] = this[mask]
-            df.loc[~mask, col] = that[~mask]
+            self.loc[mask, col] = this[mask]
+            self.loc[~mask, col] = that[~mask]
 
         
     def __add__(self, other):
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 02d674bc4a1..dafd5f856b5 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8178,9 +8178,10 @@ def test_agg_for_dataframe_with_string_columns(aggs):
         gdf.agg(aggs)
 
 
-@pytest.mark.parametrize("join",["left", "right"],)
+@pytest.mark.parametrize("join",["left"],)
 @pytest.mark.parametrize("overwrite",[True, False],)
-@pytest.mark.parametrize("errors",["raise", "ignore"],)
+@pytest.mark.parametrize("filter_func",[None],)
+@pytest.mark.parametrize("errors",["ignore"],)
 @pytest.mark.parametrize(
     "data",
     [
@@ -8197,17 +8198,41 @@ def test_agg_for_dataframe_with_string_columns(aggs):
         {"a": [7, 5, 8], "b": [2.0, 7.0, 9.0]},
         {"a": [True, False, True], "b":[3.0, 4.0, 5.0]},
         {"a": [np.nan, np.nan, np.nan], "b": [np.nan, np.nan, np.nan]},
-        {"a": [np.nan, 2.0, False], "b": [2, np.nan, 5.0]}, 
+        {"a": [np.nan, 2.0, np.nan], "b": [2, np.nan, 5.0]}, 
     ],
 )
-def test_update_for_dataframes(data, data2, join, overwrite, errors):
+def test_update_for_dataframes(data, data2, join, overwrite, filter_func, errors):
     pdf = pd.DataFrame(data)
     gdf = gd.DataFrame(data)
 
     other_pd = pd.DataFrame(data2)
     other_gd = gd.DataFrame(data2)
 
-    expect = pdf.update(other_pd, join, overwrite, errors)
-    got = gdf.update(other_gd, join, overwrite, errors)
+    expect = pdf.update(other_pd, join, overwrite, filter_func, errors)
+    got = gdf.update(other_gd, join, overwrite, filter_func, errors)
     
     assert_eq(expect, got)
+
+@pytest.mark.parametrize("join",["right"],)
+def test_update_for_right_join(join):
+    pdf = pd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
+    gdf = gd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
+
+    other_pd = pd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
+    other_gd = gd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
+
+    with pytest.raises(
+        NotImplementedError,
+        match="Only left join is supported"
+    ):
+        gdf.update(other_gd,join)
+
+@pytest.mark.parametrize("errors",["raise"],)
+def test_update_for_data_overlap(errors):
+    pdf = pd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
+    gdf = gd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
+
+    other_pd = pd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
+    other_gd = gd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
+
+    assert_exceptions_equal(lambda: pdf.update(other_pd,errors), lambda: gdf.update(other_gd,errors))
\ No newline at end of file

From 7e05ed59becd69fff6fe38e1e267c79afda2a9ae Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <kirui.sheilah@gmail.com>
Date: Tue, 5 Jan 2021 19:59:03 -0800
Subject: [PATCH 08/14] Addressed Prem's review comments

---
 python/cudf/cudf/core/dataframe.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index b6abf67d8b9..34776957edf 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1463,10 +1463,9 @@ def update(self, other, join="left", overwrite=True, filter_func=None, errors="i
 
         Parameters
         ----------
-
         other : DataFrame, or object coercible into a DataFrame
             Should have at least one matching index/column label with the 
-            original DataFrame.If a Series is passed, its name attribute must
+            original DataFrame. If a Series is passed, its name attribute must
             be set, and that will be used as the column name to align with the 
             original DataFrame.
 
@@ -1479,8 +1478,8 @@ def update(self, other, join="left", overwrite=True, filter_func=None, errors="i
             True: overwrite original DataFrame’s values with values from other.
             False: only update values that are NA in the original DataFrame.
 
-        filter_func : callable(1d-array) -> bool 1d-array, optional
-            Can choose to replace values other than NA. 
+        filter_func : None
+            filter_func is not supported yet
             Return True for values that should be updated.
 
         errors : {‘raise’, ‘ignore’}, default ‘ignore’
@@ -1504,7 +1503,7 @@ def update(self, other, join="left", overwrite=True, filter_func=None, errors="i
         # TODO: Support other joins
         if join != "left": 
             raise NotImplementedError("Only left join is supported")
-        if errors not in ["ignore", "raise"]:
+        if errors not in {"ignore", "raise"}:
             raise ValueError("The parameter errors must be either 'ignore' or 'raise'")
         if filter_func != None:
             raise NotImplementedError("filter_func is not supported yet")
@@ -1512,8 +1511,9 @@ def update(self, other, join="left", overwrite=True, filter_func=None, errors="i
         if not isinstance(other, DataFrame):
             other = DataFrame(other)
 
-        other = other.reindex(self.index, axis=0)
-        other = other.reindex(self.columns, axis=1)
+        if self.index != self.columns:
+            other = other.reindex(self.index, axis=0)
+            other = other.reindex(self.columns, axis=1)
 
         for col in self.columns: 
             this = self[col]
@@ -1534,9 +1534,8 @@ def update(self, other, join="left", overwrite=True, filter_func=None, errors="i
             if mask.all():
                 continue
 
-            self.loc[mask, col] = this[mask]
-            self.loc[~mask, col] = that[~mask]
-
+            self[col] = this.where(mask, that)
+            
         
     def __add__(self, other):
         return self._apply_op("__add__", other)

From b923e16cb7a70373dcb12c58b9633c5c552f8e0c Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <kirui.sheilah@gmail.com>
Date: Fri, 8 Jan 2021 12:30:00 -0800
Subject: [PATCH 09/14] Addressed review comments. Ready for review

---
 python/cudf/cudf/core/dataframe.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index ad4d8646500..8261a842e54 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1513,9 +1513,10 @@ def update(self, other, join="left", overwrite=True, filter_func=None, errors="i
         if not isinstance(other, DataFrame):
             other = DataFrame(other)
 
-        if self.index != self.columns:
-            other = other.reindex(self.index, axis=0)
+        if self.columns.any() != other.columns.any():
             other = other.reindex(self.columns, axis=1)
+        if self.index.any() != other.index.any():
+            other = other.reindex(self.index, axis=0)
 
         for col in self.columns: 
             this = self[col]

From 388b6f90b0343eaf46fd5d562176ebe157a95b56 Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <kirui.sheilah@gmail.com>
Date: Mon, 11 Jan 2021 14:18:09 -0800
Subject: [PATCH 10/14] addressed review comments

---
 python/cudf/cudf/core/dataframe.py       | 44 +++++++++-------
 python/cudf/cudf/tests/test_dataframe.py | 64 ++++++++++++++++--------
 2 files changed, 69 insertions(+), 39 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 8261a842e54..f502ed3c22c 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1456,8 +1456,14 @@ def add(self, other, axis="columns", level=None, fill_value=None):
 
         return self._apply_op("add", other, fill_value)
 
-
-    def update(self, other, join="left", overwrite=True, filter_func=None, errors="ignore"):
+    def update(
+        self,
+        other,
+        join="left",
+        overwrite=True,
+        filter_func=None,
+        errors="ignore",
+    ):
         """
         Modify a DataFrame in place using non-NA values from another DataFrame.
 
@@ -1466,13 +1472,13 @@ def update(self, other, join="left", overwrite=True, filter_func=None, errors="i
         Parameters
         ----------
         other : DataFrame, or object coercible into a DataFrame
-            Should have at least one matching index/column label with the 
+            Should have at least one matching index/column label with the
             original DataFrame. If a Series is passed, its name attribute must
-            be set, and that will be used as the column name to align with the 
+            be set, and that will be used as the column name to align with the
             original DataFrame.
 
         join : {‘left’}, default ‘left’
-            Only left join is implemented, keeping the index and 
+            Only left join is implemented, keeping the index and
             columns of the original object.
 
         overwrite : {True, False}, default True
@@ -1485,13 +1491,14 @@ def update(self, other, join="left", overwrite=True, filter_func=None, errors="i
             Return True for values that should be updated.
 
         errors : {‘raise’, ‘ignore’}, default ‘ignore’
-            If ‘raise’, will raise a ValueError if the DataFrame and other both contain non-NA data in the same place.
+            If ‘raise’, will raise a ValueError if the DataFrame and other
+            both contain non-NA data in the same place.
 
 
         Returns
         -------
         None : method directly changes calling object
-          
+
         Raises
         -------
         ValueError
@@ -1503,29 +1510,31 @@ def update(self, other, join="left", overwrite=True, filter_func=None, errors="i
 
         """
         # TODO: Support other joins
-        if join != "left": 
+        if join != "left":
             raise NotImplementedError("Only left join is supported")
         if errors not in {"ignore", "raise"}:
-            raise ValueError("The parameter errors must be either 'ignore' or 'raise'")
-        if filter_func != None:
+            raise ValueError(
+                "The parameter errors must be either 'ignore' or 'raise'"
+            )
+        if filter_func is not None:
             raise NotImplementedError("filter_func is not supported yet")
 
         if not isinstance(other, DataFrame):
             other = DataFrame(other)
 
-        if self.columns.any() != other.columns.any():
+        if not self.columns.equals(other.columns):
             other = other.reindex(self.columns, axis=1)
-        if self.index.any() != other.index.any():
+        if not self.index.equals(other.index):
             other = other.reindex(self.index, axis=0)
 
-        for col in self.columns: 
+        for col in self.columns:
             this = self[col]
-            that = other[col]  
-            
+            that = other[col]
+
             if errors == "raise":
                 mask_this = that.notna()
                 mask_that = this.notna()
-                if ((mask_this & mask_that).any()):
+                if (mask_this & mask_that).any():
                     raise ValueError("Data overlaps.")
 
             if overwrite:
@@ -1538,8 +1547,7 @@ def update(self, other, join="left", overwrite=True, filter_func=None, errors="i
                 continue
 
             self[col] = this.where(mask, that)
-            
-        
+
     def __add__(self, other):
         return self._apply_op("__add__", other)
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index f9036312bd6..b62dab59272 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8193,30 +8193,45 @@ def test_agg_for_dataframe_with_string_columns(aggs):
         gdf.agg(aggs)
 
 
-@pytest.mark.parametrize("join",["left"],)
-@pytest.mark.parametrize("overwrite",[True, False],)
-@pytest.mark.parametrize("filter_func",[None],)
-@pytest.mark.parametrize("errors",["ignore"],)
+@pytest.mark.parametrize(
+    "join", ["left"],
+)
+@pytest.mark.parametrize(
+    "overwrite", [True, False],
+)
+@pytest.mark.parametrize(
+    "filter_func", [None],
+)
+@pytest.mark.parametrize(
+    "errors", ["ignore"],
+)
 @pytest.mark.parametrize(
     "data",
     [
         {"a": [1, 2, 3], "b": [3, 4, 5]},
-        {"a": [1.0, 2.0, 3.0], "b": [3.0, 4.0, 5.0]},
-        {"a": [False, False, True], "b": [True, True, False]},
-        {"a": [2.0, np.nan, 4.0], "b": [np.nan, np.nan, np.nan]},
-        {"a": [np.nan, np.nan, np.nan], "b": [np.nan, np.nan, np.nan]},
+        {"e": [1.0, 2.0, 3.0], "d": [3.0, 4.0, 5.0]},
+        {"b": [4.0, 3.0, 1.0], "c": [2.0, 5.0, 8.0]},
+        {"c": [True, False, False], "d": [False, True, True]},
+        {"g": [2.0, np.nan, 4.0], "n": [np.nan, np.nan, np.nan]},
+        {"b": [np.nan, np.nan, 3.0], "c": [6.0, np.nan, np.nan]},
+        {"d": [np.nan, np.nan, np.nan], "e": [np.nan, np.nan, np.nan]},
+        {"b": [np.nan, np.nan, np.nan], "c": [np.nan, np.nan, np.nan]},
     ],
 )
 @pytest.mark.parametrize(
     "data2",
     [
+        {"b": [3, 5, 6], "e": [8, 2, 1]},
         {"a": [7, 5, 8], "b": [2.0, 7.0, 9.0]},
-        {"a": [True, False, True], "b":[3.0, 4.0, 5.0]},
+        {"c": [True, False, True], "d": [3.0, 4.0, 5.0]},
+        {"e": [False, False, True], "g": [True, True, False]},
         {"a": [np.nan, np.nan, np.nan], "b": [np.nan, np.nan, np.nan]},
-        {"a": [np.nan, 2.0, np.nan], "b": [2, np.nan, 5.0]}, 
+        {"b": [np.nan, 2.0, np.nan], "c": [2, np.nan, 5.0]},
     ],
 )
-def test_update_for_dataframes(data, data2, join, overwrite, filter_func, errors):
+def test_update_for_dataframes(
+    data, data2, join, overwrite, filter_func, errors
+):
     pdf = pd.DataFrame(data)
     gdf = gd.DataFrame(data)
 
@@ -8225,24 +8240,26 @@ def test_update_for_dataframes(data, data2, join, overwrite, filter_func, errors
 
     expect = pdf.update(other_pd, join, overwrite, filter_func, errors)
     got = gdf.update(other_gd, join, overwrite, filter_func, errors)
-    
+
     assert_eq(expect, got)
 
-@pytest.mark.parametrize("join",["right"],)
+
+@pytest.mark.parametrize(
+    "join", ["right"],
+)
 def test_update_for_right_join(join):
-    pdf = pd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
     gdf = gd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
-
-    other_pd = pd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
     other_gd = gd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
 
     with pytest.raises(
-        NotImplementedError,
-        match="Only left join is supported"
+        NotImplementedError, match="Only left join is supported"
     ):
-        gdf.update(other_gd,join)
+        gdf.update(other_gd, join)
 
-@pytest.mark.parametrize("errors",["raise"],)
+
+@pytest.mark.parametrize(
+    "errors", ["raise"],
+)
 def test_update_for_data_overlap(errors):
     pdf = pd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
     gdf = gd.DataFrame({"a": [1, 2, 3], "b": [3.0, 4.0, 5.0]})
@@ -8250,4 +8267,9 @@ def test_update_for_data_overlap(errors):
     other_pd = pd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
     other_gd = gd.DataFrame({"a": [1, np.nan, 3], "b": [np.nan, 2.0, 5.0]})
 
-    assert_exceptions_equal(lambda: pdf.update(other_pd,errors), lambda: gdf.update(other_gd,errors))
\ No newline at end of file
+    assert_exceptions_equal(
+        lfunc=pdf.update,
+        rfunc=gdf.update,
+        lfunc_args_and_kwargs=([other_pd, errors], {}),
+        rfunc_args_and_kwargs=([other_gd, errors], {}),
+    )

From c6d6d597ca5dfc3af728159b88e139d30efe19e7 Mon Sep 17 00:00:00 2001
From: Sheilah Kirui <kirui.sheilah@gmail.com>
Date: Tue, 12 Jan 2021 16:06:03 -0800
Subject: [PATCH 11/14] added test for unequal indices

---
 python/cudf/cudf/core/dataframe.py       |  2 +-
 python/cudf/cudf/tests/test_dataframe.py | 29 +++++++++++++++++++-----
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index f502ed3c22c..47eba33cd24 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1477,7 +1477,7 @@ def update(
             be set, and that will be used as the column name to align with the
             original DataFrame.
 
-        join : {‘left’}, default ‘left’
+        join : {`left`}, default `left`
             Only left join is implemented, keeping the index and
             columns of the original object.
 
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index b62dab59272..e3ac8520547 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8210,23 +8210,40 @@ def test_agg_for_dataframe_with_string_columns(aggs):
     [
         {"a": [1, 2, 3], "b": [3, 4, 5]},
         {"e": [1.0, 2.0, 3.0], "d": [3.0, 4.0, 5.0]},
-        {"b": [4.0, 3.0, 1.0], "c": [2.0, 5.0, 8.0]},
         {"c": [True, False, False], "d": [False, True, True]},
         {"g": [2.0, np.nan, 4.0], "n": [np.nan, np.nan, np.nan]},
-        {"b": [np.nan, np.nan, 3.0], "c": [6.0, np.nan, np.nan]},
         {"d": [np.nan, np.nan, np.nan], "e": [np.nan, np.nan, np.nan]},
-        {"b": [np.nan, np.nan, np.nan], "c": [np.nan, np.nan, np.nan]},
+        {"a": [1.0, 2, 3], "b": pd.Series([4.0, 8.0, 3.0], index=[1, 2, 3])},
+        {
+            "d": [1.0, 2.0, 3.0],
+            "c": pd.Series([np.nan, np.nan, np.nan], index=[1, 2, 3]),
+        },
+        {
+            "a": [False, True, False],
+            "b": pd.Series([1.0, 2.0, np.nan], index=[1, 2, 3]),
+        },
+        {
+            "a": [np.nan, np.nan, np.nan],
+            "e": pd.Series([np.nan, np.nan, np.nan], index=[1, 2, 3]),
+        },
     ],
 )
 @pytest.mark.parametrize(
     "data2",
     [
         {"b": [3, 5, 6], "e": [8, 2, 1]},
-        {"a": [7, 5, 8], "b": [2.0, 7.0, 9.0]},
         {"c": [True, False, True], "d": [3.0, 4.0, 5.0]},
         {"e": [False, False, True], "g": [True, True, False]},
-        {"a": [np.nan, np.nan, np.nan], "b": [np.nan, np.nan, np.nan]},
-        {"b": [np.nan, 2.0, np.nan], "c": [2, np.nan, 5.0]},
+        {"g": [np.nan, np.nan, np.nan], "c": [np.nan, np.nan, np.nan]},
+        {"a": [7, 5, 8], "b": pd.Series([2.0, 7.0, 9.0], index=[0, 1, 2])},
+        {
+            "b": [np.nan, 2.0, np.nan],
+            "c": pd.Series([2, np.nan, 5.0], index=[2, 3, 4]),
+        },
+        {
+            "a": [True, np.nan, True],
+            "d": pd.Series([False, True, np.nan], index=[0, 1, 3]),
+        },
     ],
 )
 def test_update_for_dataframes(

From d4a34a6395e0ac797f24fa0aab219ff1aee34b07 Mon Sep 17 00:00:00 2001
From: skirui-source <kirui.sheilah@gmail.com>
Date: Tue, 19 Jan 2021 10:29:03 -0800
Subject: [PATCH 12/14] Fixed changes in Docstrings

---
 python/cudf/cudf/core/dataframe.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 47eba33cd24..fd2f8be08f7 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1477,7 +1477,7 @@ def update(
             be set, and that will be used as the column name to align with the
             original DataFrame.
 
-        join : {`left`}, default `left`
+        join : {'left'}, default 'left'
             Only left join is implemented, keeping the index and
             columns of the original object.
 
@@ -1488,10 +1488,10 @@ def update(
 
         filter_func : None
             filter_func is not supported yet
-            Return True for values that should be updated.
+            Return True for values that should be updated.S
 
-        errors : {‘raise’, ‘ignore’}, default ‘ignore’
-            If ‘raise’, will raise a ValueError if the DataFrame and other
+        errors : {'raise', 'ignore'}, default 'ignore'
+            If 'raise', will raise a ValueError if the DataFrame and other
             both contain non-NA data in the same place.
 
 
@@ -1502,12 +1502,11 @@ def update(
         Raises
         -------
         ValueError
-            - When ``errors``= 'raise' and there’s overlapping non-NA data.
+            - When ``errors`` = 'raise' and there's overlapping non-NA data.
             - When ``errors`` is not either 'ignore' or 'raise'
 
         NotImplementedError
-            - If ``join`` != ‘left’
-
+            - If ``join`` != 'left'
         """
         # TODO: Support other joins
         if join != "left":

From cf2648b041ad5a416dd1be85f25f9f78c766dd2c Mon Sep 17 00:00:00 2001
From: skirui-source <kirui.sheilah@gmail.com>
Date: Wed, 20 Jan 2021 23:33:20 -0800
Subject: [PATCH 13/14] fixed typo

---
 python/cudf/cudf/core/dataframe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index c26fb3c5d4a..534e6adb5b3 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1483,7 +1483,7 @@ def update(
 
         overwrite : {True, False}, default True
             How to handle non-NA values for overlapping keys:
-            True: overwrite original DataFrame’s values with values from other.
+            True: overwrite original DataFrame's values with values from other.
             False: only update values that are NA in the original DataFrame.
 
         filter_func : None

From b5ad50136a01324b89e9387a00055299bafdcfb5 Mon Sep 17 00:00:00 2001
From: skirui-source <kirui.sheilah@gmail.com>
Date: Thu, 21 Jan 2021 10:13:33 -0800
Subject: [PATCH 14/14] Recovered two deleted tests

---
 python/cudf/cudf/tests/test_dataframe.py | 58 ++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 4f91ac86003..f41714ec1ad 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -8266,3 +8266,61 @@ def test_update_for_data_overlap(errors):
         lfunc_args_and_kwargs=([other_pd, errors], {}),
         rfunc_args_and_kwargs=([other_gd, errors], {}),
     )
+
+
+@pytest.mark.parametrize(
+    "gdf",
+    [
+        gd.DataFrame({"a": [[1], [2], [3]]}),
+        gd.DataFrame(
+            {
+                "left-a": [0, 1, 2],
+                "a": [[1], None, [3]],
+                "right-a": ["abc", "def", "ghi"],
+            }
+        ),
+        gd.DataFrame(
+            {
+                "left-a": [[], None, None],
+                "a": [[1], None, [3]],
+                "right-a": ["abc", "def", "ghi"],
+            }
+        ),
+    ],
+)
+def test_dataframe_roundtrip_arrow_list_dtype(gdf):
+    table = gdf.to_arrow()
+    expected = gd.DataFrame.from_arrow(table)
+
+    assert_eq(gdf, expected)
+
+
+@pytest.mark.parametrize(
+    "gdf",
+    [
+        gd.DataFrame({"a": [{"one": 3, "two": 4, "three": 10}]}),
+        gd.DataFrame(
+            {
+                "left-a": [0, 1, 2],
+                "a": [{"x": 0.23, "y": 43}, None, {"x": 23.9, "y": 4.3}],
+                "right-a": ["abc", "def", "ghi"],
+            }
+        ),
+        gd.DataFrame(
+            {
+                "left-a": [{"a": 1}, None, None],
+                "a": [
+                    {"one": 324, "two": 23432, "three": 324},
+                    None,
+                    {"one": 3.24, "two": 1, "three": 324},
+                ],
+                "right-a": ["abc", "def", "ghi"],
+            }
+        ),
+    ],
+)
+def test_dataframe_roundtrip_arrow_struct_dtype(gdf):
+    table = gdf.to_arrow()
+    expected = gd.DataFrame.from_arrow(table)
+
+    assert_eq(gdf, expected)