Skip to content

Commit

Permalink
Closes Bears-R-Us#3510-Series.to_pandas to handle categoricals
Browse files Browse the repository at this point in the history
  • Loading branch information
ajpotts committed Jul 18, 2024
1 parent 9b376bf commit aa288c2
Showing 1 changed file with 63 additions and 39 deletions.
102 changes: 63 additions & 39 deletions arkouda/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from arkouda.index import Index, MultiIndex
from arkouda.numeric import cast as akcast
from arkouda.numeric import isnan, value_counts
from arkouda.segarray import SegArray
from arkouda.pdarrayclass import (
RegistrationError,
any,
Expand All @@ -27,6 +26,7 @@
)
from arkouda.pdarraycreation import arange, array, full, zeros
from arkouda.pdarraysetops import argsort, concatenate, in1d, indexof1d
from arkouda.segarray import SegArray
from arkouda.strings import Strings
from arkouda.util import convert_if_categorical, get_callback, is_float

Expand Down Expand Up @@ -133,11 +133,16 @@ class Series:
@typechecked
def __init__(
self,
data: Union[Tuple, List, groupable_element_type, Series, SegArray],
data: Union[Tuple, List, groupable_element_type, Series, SegArray, pd.Series, pd.Categorical],
name=None,
index: Optional[Union[pdarray, Strings, Tuple, List, Index]] = None,
):

if isinstance(data, pd.Categorical):
values = Categorical(data)

self.registered_name: Optional[str] = None

if index is None and isinstance(data, (tuple, list)) and len(data) == 2:
# handles the previous `ar_tuple` case
if not isinstance(data[0], (pdarray, Index, Strings, Categorical, list, tuple)):
Expand All @@ -146,6 +151,13 @@ def __init__(
raise TypeError("values must be a pdarray, Strings, SegArray, or Categorical")
self.values = data[1] if not isinstance(data[1], Series) else data[1].values
self.index = Index.factory(index) if index else Index.factory(data[0])
elif isinstance(data, pd.Series):
if isinstance(data.values, pd.Categorical):
self.values = Categorical(data.values)
else:
self.values = array(data.values)
self.index = Index(data.index)
self.name = data.name
elif isinstance(data, tuple) and len(data) != 2:
raise TypeError("Series initialization requries a tuple of (index, values)")
else:
Expand All @@ -162,7 +174,10 @@ def __init__(
raise ValueError(
"Index size does not match data size: {} != {}".format(self.index.size, self.values.size)
)
self.name = name
if name is None and isinstance(data, (Series, pd.Series)):
self.name = data.name
else:
self.name = name
self.size = self.index.size

def __len__(self):
Expand Down Expand Up @@ -737,16 +752,21 @@ def to_pandas(self) -> pd.Series:
import copy

idx = self.index.to_pandas()
val = convert_if_categorical(self.values)
# pandas errors when ndarray formatted like a segarray is
# passed into Series but works when it's just a list of lists
vals_on_client = val.to_list() if isinstance(val, SegArray) else val.to_ndarray()

if isinstance(self.values, Categorical):
val = self.values.to_pandas()
elif isinstance(self.values, SegArray):
# pandas errors when ndarray formatted like a segarray is
# passed into Series but works when it's just a list of lists
val = self.values.to_list()
else:
val = self.values.to_ndarray()

if isinstance(self.name, str):
name = copy.copy(self.name)
return pd.Series(vals_on_client, index=idx, name=name)
return pd.Series(val, index=idx, name=name)
else:
return pd.Series(vals_on_client, index=idx)
return pd.Series(val, index=idx)

def to_markdown(self, mode="wt", index=True, tablefmt="grid", storage_options=None, **kwargs):
r"""
Expand Down Expand Up @@ -917,46 +937,50 @@ def register(self, user_defined_name: str):
"objType": self.objType,
"num_idxs": 1,
"idx_names": [
(
json.dumps(
{
"codes": self.index.values.codes.name,
"categories": self.index.values.categories.name,
"NA_codes": self.index.values._akNAcode.name,
**(
{"permutation": self.index.values.permutation.name}
if self.index.values.permutation is not None
else {}
),
**(
{"segments": self.index.values.segments.name}
if self.index.values.segments is not None
else {}
),
}
)
if isinstance(self.index.values, Categorical)
else self.index.values.name
)
],
"idx_types": [self.index.values.objType],
"values": (
json.dumps(
{
"codes": self.index.values.codes.name,
"categories": self.index.values.categories.name,
"NA_codes": self.index.values._akNAcode.name,
"codes": self.values.codes.name,
"categories": self.values.categories.name,
"NA_codes": self.values._akNAcode.name,
**(
{"permutation": self.index.values.permutation.name}
if self.index.values.permutation is not None
{"permutation": self.values.permutation.name}
if self.values.permutation is not None
else {}
),
**(
{"segments": self.index.values.segments.name}
if self.index.values.segments is not None
{"segments": self.values.segments.name}
if self.values.segments is not None
else {}
),
}
)
if isinstance(self.index.values, Categorical)
else self.index.values.name
],
"idx_types": [self.index.values.objType],
"values": json.dumps(
{
"codes": self.values.codes.name,
"categories": self.values.categories.name,
"NA_codes": self.values._akNAcode.name,
**(
{"permutation": self.values.permutation.name}
if self.values.permutation is not None
else {}
),
**(
{"segments": self.values.segments.name}
if self.values.segments is not None
else {}
),
}
)
if isinstance(self.values, Categorical)
else self.values.name,
if isinstance(self.values, Categorical)
else self.values.name
),
"val_type": self.values.objType,
},
)
Expand Down

0 comments on commit aa288c2

Please sign in to comment.