Closes #1600 - SegArray.to_ndarray() returns list (#1612)

* Updated segarray.to_ndarray to return an ndarray. Updated dataframe to convert to a list when running to_pandas because columns in a df need to be 1d. * adding object to indicate that we want the format we are providing.
Bears-R-Us · Jul 21, 2022 · 14d98cc · 14d98cc
1 parent bf3ed39
commit 14d98cc
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 11 deletions.
diff --git a/arkouda/dataframe.py b/arkouda/dataframe.py
@@ -510,9 +510,9 @@ def __str__(self):
         # Get units that make the most sense.
         if self._bytes < 1024:
             mem = self.memory_usage(unit="B")
-        elif self._bytes < 1024 ** 2:
+        elif self._bytes < 1024**2:
             mem = self.memory_usage(unit="KB")
-        elif self._bytes < 1024 ** 3:
+        elif self._bytes < 1024**3:
             mem = self.memory_usage(unit="MB")
         else:
             mem = self.memory_usage(unit="GB")
@@ -899,9 +899,9 @@ def info(self):
         # Get units that make the most sense.
         if self._bytes < 1024:
             mem = self.memory_usage(unit="B")
-        elif self._bytes < 1024 ** 2:
+        elif self._bytes < 1024**2:
             mem = self.memory_usage(unit="KB")
-        elif self._bytes < 1024 ** 3:
+        elif self._bytes < 1024**3:
             mem = self.memory_usage(unit="MB")
         else:
             mem = self.memory_usage(unit="GB")
@@ -980,9 +980,7 @@ def _rename_column(
         return None
 
     @typechecked
-    def _rename_index(
-        self, mapper: Union[Callable, Dict], inplace: bool = False
-    ) -> Optional[DataFrame]:
+    def _rename_index(self, mapper: Union[Callable, Dict], inplace: bool = False) -> Optional[DataFrame]:
         """
         Rename indexes within the dataframe
 
@@ -1397,12 +1395,16 @@ def to_pandas(self, datalimit=maxTransferBytes, retain_index=False):
             warn(msg, UserWarning)
             return None
 
-        # Proceed with conversion if possible, ignore index column
+        # Proceed with conversion if possible
         pandas_data = {}
         for key in self._columns:
             val = self[key]
             try:
-                pandas_data[key] = val.to_ndarray()
+                # in order for proper pandas functionality, SegArrays must be seen as 1d
+                # and therefore need to be converted to list
+                pandas_data[key] = (
+                    val.to_ndarray() if not isinstance(val, SegArray) else val.to_ndarray().tolist()
+                )
             except TypeError:
                 raise IndexError("Bad index type or format.")
 

diff --git a/arkouda/segarray.py b/arkouda/segarray.py
@@ -2,6 +2,8 @@
 
 from warnings import warn
 
+import numpy as np  # type: ignore
+
 from arkouda.dtypes import bool as akbool
 from arkouda.dtypes import int64 as akint64
 from arkouda.dtypes import isSupportedInt, str_
@@ -680,7 +682,7 @@ def to_ndarray(self):
         --------
         >>> segarr = ak.SegArray(ak.array([0, 4, 7]), ak.arange(12))
         >>> segarr.to_ndarray()
-        array([[1, 2, 3, 4], [5, 6, 7], [8, 9, 10, 11, 12]])
+        array([array([1, 2, 3, 4]), array([5, 6, 7]), array([8, 9, 10, 11, 12])])
         >>> type(segarr.to_ndarray())
         numpy.ndarray
         """
@@ -689,7 +691,7 @@ def to_ndarray(self):
         arr = [ndvals[start:end] for start, end in zip(ndsegs, ndsegs[1:])]
         if self.size > 0:
             arr.append(ndvals[ndsegs[-1] :])
-        return arr
+        return np.array(arr, dtype=object)
 
     def sum(self, x=None):
         if x is None: