-
Notifications
You must be signed in to change notification settings - Fork 915
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement cudf.MultiIndex.from_arrays
#14740
Changes from all commits
019029e
56c6784
3848647
66e854b
a3aeeb9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ | |
from cudf.core._compat import PANDAS_GE_150 | ||
from cudf.core.frame import Frame | ||
from cudf.core.index import BaseIndex, _lexsorted_equal_range, as_index | ||
from cudf.utils.dtypes import is_column_like | ||
from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate | ||
from cudf.utils.utils import NotIterable, _external_only_api, _is_same_name | ||
|
||
|
@@ -1226,6 +1227,7 @@ def from_tuples(cls, tuples, names=None): | |
|
||
See Also | ||
-------- | ||
MultiIndex.from_arrays : Convert list of arrays to MultiIndex. | ||
MultiIndex.from_product : Make a MultiIndex from cartesian product | ||
of iterables. | ||
MultiIndex.from_frame : Make a MultiIndex from a DataFrame. | ||
|
@@ -1335,6 +1337,7 @@ def from_frame(cls, df, names=None): | |
|
||
See Also | ||
-------- | ||
MultiIndex.from_arrays : Convert list of arrays to MultiIndex. | ||
MultiIndex.from_tuples : Convert list of tuples to MultiIndex. | ||
MultiIndex.from_product : Make a MultiIndex from cartesian product | ||
of iterables. | ||
|
@@ -1429,6 +1432,66 @@ def from_product(cls, arrays, names=None): | |
pdi = pd.MultiIndex.from_product(arrays, names=names) | ||
return cls.from_pandas(pdi) | ||
|
||
@classmethod | ||
@_cudf_nvtx_annotate | ||
def from_arrays( | ||
cls, | ||
arrays, | ||
sortorder=None, | ||
names=None, | ||
) -> MultiIndex: | ||
""" | ||
Convert arrays to MultiIndex. | ||
|
||
Parameters | ||
---------- | ||
arrays : list / sequence of array-likes | ||
Each array-like gives one level's value for each data point. | ||
len(arrays) is the number of levels. | ||
sortorder : optional int | ||
Not yet supported | ||
names : list / sequence of str, optional | ||
Names for the levels in the index. | ||
|
||
Returns | ||
------- | ||
MultiIndex | ||
|
||
See Also | ||
-------- | ||
MultiIndex.from_tuples : Convert list of tuples to MultiIndex. | ||
MultiIndex.from_product : Make a MultiIndex from cartesian product | ||
of iterables. | ||
MultiIndex.from_frame : Make a MultiIndex from a DataFrame. | ||
|
||
Examples | ||
-------- | ||
>>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] | ||
>>> cudf.MultiIndex.from_arrays(arrays, names=('number', 'color')) | ||
MultiIndex([(1, 'red'), | ||
(1, 'blue'), | ||
(2, 'red'), | ||
(2, 'blue')], | ||
names=['number', 'color']) | ||
""" | ||
# Imported here due to circular import | ||
from cudf.core.algorithms import factorize | ||
|
||
error_msg = "Input must be a list / sequence of array-likes." | ||
if not is_list_like(arrays): | ||
raise TypeError(error_msg) | ||
codes = [] | ||
levels = [] | ||
for array in arrays: | ||
if not (is_list_like(array) or is_column_like(array)): | ||
raise TypeError(error_msg) | ||
code, level = factorize(array, sort=True) | ||
codes.append(code) | ||
levels.append(level) | ||
Comment on lines
+1485
to
+1490
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As a functional exercise, this can rewrite as a code_levels = map(functools.partial(factorize, sort=True), array)
codes, levels = [x[0] for x in code_levels], [x[1] for x in code_levels] Error checking is also functional: if not all (is_list_like(arr) or is_column_like(arr) for arr in arrays):
raise TypeError(error_msg) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ya my idea was just to do the validation + factorization in the same loop |
||
return cls( | ||
codes=codes, levels=levels, sortorder=sortorder, names=names | ||
) | ||
|
||
@_cudf_nvtx_annotate | ||
def _poplevels(self, level): | ||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing
sortorder
docstring in parameters.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch. Added