Skip to content

Commit

Permalink
Closes #1363: ak.Series with only values and more robust typechecking
Browse files Browse the repository at this point in the history
This PR (closes #1363):

- Deprecates the `ar_tuple` parameter inside of the `__init__` method.
- Adjusts the tests to use the other 2 parameters `(index, data)` when instantiating.
  • Loading branch information
jeichert60 committed May 24, 2022
1 parent aa51dd8 commit 2d14d7f
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 46 deletions.
54 changes: 36 additions & 18 deletions arkouda/series.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typeguard import typechecked
from typing import Union
from arkouda.pdarrayclass import pdarray, argmaxk, attach_pdarray
from arkouda.pdarraycreation import arange, array, zeros
from arkouda.pdarraysetops import argsort, concatenate, in1d
from arkouda.index import Index
from arkouda.groupbyclass import GroupBy
from arkouda.groupbyclass import GroupBy, groupable_element_type
from arkouda.dtypes import int64, float64
from arkouda.numeric import value_counts, cast as akcast
from arkouda.util import get_callback
Expand All @@ -17,6 +19,8 @@
import numpy as np # type: ignore
from warnings import warn

import warnings

__all__ = [
"Series",
]
Expand Down Expand Up @@ -75,28 +79,36 @@ class Series:
Parameters
----------
ar_tuple : 2-tuple of arkouda arrays with the first being the grouping key(s) and the
second being the value. The grouping key(s) will be the index of the series.
ar_tuple : tuple, list.
2-tuple of arkouda arrays with the first being the grouping key(s) and the
second being the value. The grouping key(s) will be the index of the series.
This will be deprecated in future releases.
index : pdarray
an array of indexes associated with the data array.
If empty, it will default to a range of ints whose size match the size of the data.
data : pdarray, Strings, Categorical
a 1D array. Must not be empty if ar_tuple is not provided.
"""

def __init__(self, ar_tuple=None, data=None, index=None):
@typechecked
def __init__(self, ar_tuple: Union[tuple, list] = None, index: pdarray = None,
data: groupable_element_type = None):
if ar_tuple is not None:
self.index = Index.factory(ar_tuple[0])
self.values = ar_tuple[1]
warnings.warn('ar_tuple will be deprecated in future releases', DeprecationWarning)
elif data is None:
raise TypeError("ar_tuple and data cannot both be null")

else:
if not isinstance(data, (pdarray, Strings, Categorical)):
data = array(data)
self.values = data
self.index = Index.factory(index) if index is not None else Index(arange(self.values.size))

if index is None:
index = arange(data.size)
self.index = Index.factory(index)
if self.index.size != self.values.size:
raise ValueError("Index and data must have same length")
raise ValueError("Index size does not match data size")
self.size = self.index.size

def __len__(self):
Expand Down Expand Up @@ -196,7 +208,7 @@ def locate(self, key):
else:
# scalar value
idx = self.index == key
return Series((self.index[idx], self.values[idx]))
return Series(index=self.index.index[idx], data=self.values[idx])

@classmethod
def _make_binop(cls, operator):
Expand Down Expand Up @@ -233,7 +245,12 @@ def add(self, b):
index = self.index.concat(b.index).index

values = concatenate([self.values, b.values], ordered=False)
return Series(GroupBy(index).sum(values))

vals = GroupBy(index).sum(values)
idx = vals[0]
val = vals[1]
return Series(data=val, index=idx)
# return Series(GroupBy(index).sum(values))

def topn(self, n=10):
""" Return the top values of the series
Expand All @@ -252,7 +269,7 @@ def topn(self, n=10):
idx = argmaxk(v, n)
idx = idx[-1:-n - 1:-1]

return Series((k[idx], v[idx]))
return Series(index=k.index[idx], data=v[idx])

def sort_index(self, ascending=True):
""" Sort the series by its index
Expand All @@ -263,7 +280,7 @@ def sort_index(self, ascending=True):
"""

idx = self.index.argsort(ascending=ascending)
return Series((self.index[idx], self.values[idx]))
return Series(index=self.index.index[idx], data=self.values[idx])

def sort_values(self, ascending=True):
""" Sort the series numerically
Expand All @@ -282,19 +299,19 @@ def sort_values(self, ascending=True):
idx = argsort(self.values)[arange(self.values.size - 1, -1, -1)]
else:
idx = argsort(self.values)
return Series((self.index[idx], self.values[idx]))
return Series(index=self.index.index[idx], data=self.values[idx])

def tail(self, n=10):
"""Return the last n values of the series"""

idx_series = (self.index[-n:])
return Series((idx_series, self.values[-n:]))
return Series(data=idx_series.index, index=self.values[-n:])

def head(self, n=10):
"""Return the first n values of the series"""

idx_series = (self.index[0:n])
return Series((idx_series, self.values[0:n]))
return Series(index=idx_series.index, data=self.values[0:n])

def to_pandas(self):
"""Convert the series to a local PANDAS series"""
Expand All @@ -316,7 +333,8 @@ def value_counts(self, sort=True):
"""

dtype = get_callback(self.values)
s = Series(value_counts(self.values))
vc = value_counts(self.values)
s = Series(index=vc[0], data=vc[1])
if sort:
s = s.sort_values(ascending=False)
s.index.set_dtype(dtype)
Expand Down Expand Up @@ -477,7 +495,7 @@ def concat(arrays, axis=0, index_labels=None, value_labels=None):
for other in arrays[1:]:
idx = idx.concat(other.index)
v = concatenate([v, other.values], ordered=True)
retval = Series((idx, v))
retval = Series(index=idx.index, data=v)

return retval

Expand Down Expand Up @@ -513,7 +531,7 @@ def pdconcat(arrays, axis=0, labels=None):

cols = []
for col in arrays:
cols.append(pd.Series(col.values.to_ndarray(), index=idx))
cols.append(pd.Series(data=col.values.to_ndarray(), index=idx))
retval = pd.concat(cols, axis=1)
if labels is not None:
retval.columns = labels
Expand Down
87 changes: 59 additions & 28 deletions tests/series_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,39 @@
class SeriesTest(ArkoudaTest):

def test_series_creation(self):
ar_tuple = (ak.arange(3), ak.array(['A', 'B', 'C']))
s = ak.Series(ar_tuple=ar_tuple)
# Both data and index are supplied
v = ak.array(['A', 'B', 'C'])
i = ak.arange(3)
s = ak.Series(data=v, index=i)

self.assertIsInstance(s, ak.Series)
self.assertIsInstance(s.index, ak.Index)

# Just data is supplied
s = ak.Series(data=v)

self.assertIsInstance(s, ak.Series)
self.assertIsInstance(s.index, ak.Index)

# Just index is supplied
with self.assertRaises(TypeError):
s = ak.Series(index=i)

# No arguments are supplied
with self.assertRaises(TypeError):
s = ak.Series()

with self.assertRaises(ValueError):
s = ak.Series(data=ak.arange(3), index=ak.arange(6))

def test_lookup(self):
ar_tuple = (ak.arange(3), ak.array(['A', 'B', 'C']))
s = ak.Series(ar_tuple=ar_tuple)
with self.assertWarns(DeprecationWarning):
s = ak.Series(ar_tuple=ar_tuple)

def test_lookup(self):
v = ak.array(['A', 'B', 'C'])
i = ak.arange(3)
s = ak.Series(data=v, index=i)

l = s.locate(1)
self.assertIsInstance(l, ak.Series)
Expand All @@ -36,8 +54,9 @@ def test_lookup(self):
self.assertEqual(l.values[1], 'C')

def test_shape(self):
ar_tuple = (ak.arange(3), ak.array(['A', 'B', 'C']))
s = ak.Series(ar_tuple=ar_tuple)
v = ak.array(['A', 'B', 'C'])
i = ak.arange(3)
s = ak.Series(data=v, index=i)

l, = s.shape
self.assertEqual(l, 3)
Expand All @@ -46,9 +65,11 @@ def test_add(self):
ar_tuple = (ak.arange(3), ak.arange(3))
ar_tuple_add = (ak.arange(3, 6, 1), ak.arange(3, 6, 1))

s = ak.Series(ar_tuple=ar_tuple)
i = ak.arange(3)
v = ak.arange(3, 6, 1)
s = ak.Series(data=i, index=i)

s_add = ak.Series(ar_tuple=ar_tuple_add)
s_add = ak.Series(data=v, index=v)

added = s.add(s_add)

Expand All @@ -59,32 +80,36 @@ def test_add(self):
self.assertIn(i, val_list)

def test_topn(self):
ar_tuple = (ak.arange(3), ak.arange(3))
s = ak.Series(ar_tuple=ar_tuple)
v = ak.arange(3)
i = ak.arange(3)
s = ak.Series(data=v, index=i)

top = s.topn(2)
self.assertListEqual(top.index.to_pandas().tolist(), [2, 1])
self.assertListEqual(top.values.to_ndarray().tolist(), [2, 1])

def test_sort_idx(self):
ar_tuple = (ak.array([3, 1, 4, 0, 2]), ak.arange(5))
s = ak.Series(ar_tuple=ar_tuple)
v = ak.arange(5)
i = ak.array([3, 1, 4, 0, 2])
s = ak.Series(data=v, index=i)

sorted = s.sort_index()
self.assertListEqual(sorted.index.to_pandas().tolist(), [i for i in range(5)])
self.assertListEqual(sorted.values.to_ndarray().tolist(), [3, 1, 4, 0, 2])

def test_sort_value(self):
ar_tuple = (ak.arange(5), ak.array([3, 1, 4, 0, 2]))
s = ak.Series(ar_tuple=ar_tuple)
v = ak.array([3, 1, 4, 0, 2])
i = ak.arange(5)
s = ak.Series(data=v, index=i)

sorted = s.sort_values()
self.assertListEqual(sorted.index.to_pandas().tolist(), [3, 1, 4, 0, 2])
self.assertListEqual(sorted.values.to_ndarray().tolist(), [i for i in range(5)])

def test_head_tail(self):
ar_tuple = (ak.arange(5), ak.arange(5))
s = ak.Series(ar_tuple=ar_tuple)
v = ak.arange(5)
i = ak.arange(5)
s = ak.Series(data=v, index=i)

head = s.head(2)
self.assertListEqual(head.index.to_pandas().tolist(), [0, 1])
Expand All @@ -95,8 +120,9 @@ def test_head_tail(self):
self.assertListEqual(tail.values.to_ndarray().tolist(), [2, 3, 4])

def test_value_counts(self):
ar_tuple = (ak.arange(5), ak.array([0, 0, 1, 2, 2]))
s = ak.Series(ar_tuple=ar_tuple)
v = ak.array([0, 0, 1, 2, 2])
i = ak.arange(5)
s = ak.Series(data=v, index=i)

c = s.value_counts()
self.assertListEqual(c.index.to_pandas().tolist(), [0, 2, 1])
Expand All @@ -107,11 +133,13 @@ def test_value_counts(self):
self.assertListEqual(c.values.to_ndarray().tolist(), [2, 2, 1])

def test_concat(self):
ar_tuple = (ak.arange(5), ak.arange(5))
s = ak.Series(ar_tuple=ar_tuple)
v = ak.arange(5)
i = ak.arange(5)
s = ak.Series(data=v, index=i)

ar_tuple_2 = (ak.arange(5, 11, 1), ak.arange(5, 11, 1))
s2 = ak.Series(ar_tuple_2)
v = ak.arange(5, 11, 1)
i = ak.arange(5, 11, 1)
s2 = ak.Series(data=v, index=i)

c = ak.Series.concat([s, s2])
self.assertListEqual(c.index.to_pandas().tolist(), [i for i in range(11)])
Expand All @@ -125,19 +153,22 @@ def test_concat(self):
self.assertTrue(((ref_df == df.to_pandas()).all()).all())

def test_pdconcat(self):
ar_tuple = (ak.arange(5), ak.arange(5))
s = ak.Series(ar_tuple=ar_tuple)
v = ak.arange(5)
i = ak.arange(5)
s = ak.Series(data=v, index=i)

ar_tuple_2 = (ak.arange(5, 11, 1), ak.arange(5, 11, 1))
s2 = ak.Series(ar_tuple_2)
v = ak.arange(5, 11, 1)
i = ak.arange(5, 11, 1)
s2 = ak.Series(data=v, index=i)

c = ak.Series.pdconcat([s, s2])
self.assertIsInstance(c, pd.Series)
self.assertListEqual(c.index.tolist(), [i for i in range(11)])
self.assertListEqual(c.values.tolist(), [i for i in range(11)])

ar_tuple_2 = (ak.arange(5, 10, 1), ak.arange(5, 10, 1))
s2 = ak.Series(ar_tuple_2)
v = ak.arange(5, 10, 1)
i = ak.arange(5, 10, 1)
s2 = ak.Series(data=v, index=i)

df = ak.Series.pdconcat([s, s2], axis=1)
self.assertIsInstance(df, pd.DataFrame)
Expand Down

0 comments on commit 2d14d7f

Please sign in to comment.