From 14d98cc479af4ea456021fdeb8642502f83e5434 Mon Sep 17 00:00:00 2001 From: Ethan-DeBandi99 <16845933+Ethan-DeBandi99@users.noreply.github.com> Date: Thu, 21 Jul 2022 14:16:49 -0400 Subject: [PATCH] Closes #1600 - `SegArray.to_ndarray()` returns `list` (#1612) * Updated segarray.to_ndarray to return an ndarray. Updated dataframe to convert to a list when running to_pandas because columns in a df need to be 1d. * adding object to indicate that we want the format we are providing. --- arkouda/dataframe.py | 20 +++++++++++--------- arkouda/segarray.py | 6 ++++-- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/arkouda/dataframe.py b/arkouda/dataframe.py index 0c5e3a1d7b..89f721b7d8 100644 --- a/arkouda/dataframe.py +++ b/arkouda/dataframe.py @@ -510,9 +510,9 @@ def __str__(self): # Get units that make the most sense. if self._bytes < 1024: mem = self.memory_usage(unit="B") - elif self._bytes < 1024 ** 2: + elif self._bytes < 1024**2: mem = self.memory_usage(unit="KB") - elif self._bytes < 1024 ** 3: + elif self._bytes < 1024**3: mem = self.memory_usage(unit="MB") else: mem = self.memory_usage(unit="GB") @@ -899,9 +899,9 @@ def info(self): # Get units that make the most sense. if self._bytes < 1024: mem = self.memory_usage(unit="B") - elif self._bytes < 1024 ** 2: + elif self._bytes < 1024**2: mem = self.memory_usage(unit="KB") - elif self._bytes < 1024 ** 3: + elif self._bytes < 1024**3: mem = self.memory_usage(unit="MB") else: mem = self.memory_usage(unit="GB") @@ -980,9 +980,7 @@ def _rename_column( return None @typechecked - def _rename_index( - self, mapper: Union[Callable, Dict], inplace: bool = False - ) -> Optional[DataFrame]: + def _rename_index(self, mapper: Union[Callable, Dict], inplace: bool = False) -> Optional[DataFrame]: """ Rename indexes within the dataframe @@ -1397,12 +1395,16 @@ def to_pandas(self, datalimit=maxTransferBytes, retain_index=False): warn(msg, UserWarning) return None - # Proceed with conversion if possible, ignore index column + # Proceed with conversion if possible pandas_data = {} for key in self._columns: val = self[key] try: - pandas_data[key] = val.to_ndarray() + # in order for proper pandas functionality, SegArrays must be seen as 1d + # and therefore need to be converted to list + pandas_data[key] = ( + val.to_ndarray() if not isinstance(val, SegArray) else val.to_ndarray().tolist() + ) except TypeError: raise IndexError("Bad index type or format.") diff --git a/arkouda/segarray.py b/arkouda/segarray.py index a0d198dd38..cb4b669aff 100644 --- a/arkouda/segarray.py +++ b/arkouda/segarray.py @@ -2,6 +2,8 @@ from warnings import warn +import numpy as np # type: ignore + from arkouda.dtypes import bool as akbool from arkouda.dtypes import int64 as akint64 from arkouda.dtypes import isSupportedInt, str_ @@ -680,7 +682,7 @@ def to_ndarray(self): -------- >>> segarr = ak.SegArray(ak.array([0, 4, 7]), ak.arange(12)) >>> segarr.to_ndarray() - array([[1, 2, 3, 4], [5, 6, 7], [8, 9, 10, 11, 12]]) + array([array([1, 2, 3, 4]), array([5, 6, 7]), array([8, 9, 10, 11, 12])]) >>> type(segarr.to_ndarray()) numpy.ndarray """ @@ -689,7 +691,7 @@ def to_ndarray(self): arr = [ndvals[start:end] for start, end in zip(ndsegs, ndsegs[1:])] if self.size > 0: arr.append(ndvals[ndsegs[-1] :]) - return arr + return np.array(arr, dtype=object) def sum(self, x=None): if x is None: