Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closes #1337 - Adding support to generic attach for categorical and segarray #1342

Merged
merged 3 commits into from
May 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 56 additions & 1 deletion arkouda/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typeguard import typechecked
from arkouda.client import generic_msg
from arkouda.strings import Strings
from arkouda.pdarrayclass import pdarray, RegistrationError, unregister_pdarray_by_name
from arkouda.pdarrayclass import pdarray, RegistrationError, unregister_pdarray_by_name, create_pdarray
from arkouda.groupbyclass import unique, GroupBy, broadcast
from arkouda.pdarraysetops import in1d, concatenate
from arkouda.pdarraycreation import zeros_like, arange, array, zeros, ones
Expand Down Expand Up @@ -238,6 +238,61 @@ def set_categories(self, new_categories, NAvalue=None):
new_codes = code_mapping[self.codes]
return self.__class__.from_codes(new_codes, new_categories, NAvalue=NAvalue)

@staticmethod
def from_return_msg(repMsg):
"""
Return a categorical instance pointing to components created by the arkouda server.
The user should not call this function directly.

Parameters
----------
repMsg : str
; delimited string containing the categories, codes, permutation, and segments
details

Returns
-------
categorical
A categorical representing a set of strings and pdarray components on the server

Raises
------
RuntimeError
Raised if a server-side error is thrown in the process of creating
the categorical instance
"""
# parts[0] is "categorical". Used by the generic attach method to identify the
# response message as a Categorical

repParts = repMsg.split("+")
stringsMsg = f"{repParts[1]}+{repParts[2]}"
parts = {
"categories": Strings.from_return_msg(stringsMsg),
"codes": create_pdarray(repParts[3])
}

if len(repParts) > 3:
name = repParts[4].split()[1]
if ".permutation" in name:
parts["permutation"] = create_pdarray(repParts[4])
elif ".segments" in name:
parts["segments"] = create_pdarray(repParts[4])
else:
raise ValueError(f"Unknown field, {name}, found in Categorical.")

if len(repParts) == 4:
parts["segments"] = create_pdarray(repParts[5])

# To get the name split the message into Categories, Codes, Permutation, Segments
# then split the categories into it's components, Name being second: name.categories
# split the name on . and take the first half to get the given name
# for example repParts[1] = "created user_defined_name.categories"
name = repParts[1].split()[1].split(".")[0]

c = Categorical(None, **parts) # Call constructor with unpacked kwargs
c.name = name # Update our name
return c

def to_ndarray(self) -> np.ndarray:
"""
Convert the array to a np.ndarray, transferring array data from
Expand Down
34 changes: 33 additions & 1 deletion arkouda/segarray.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from arkouda.pdarrayclass import pdarray, is_sorted, attach_pdarray
from arkouda.pdarrayclass import pdarray, is_sorted, attach_pdarray, create_pdarray
from arkouda.numeric import cumsum
from arkouda.dtypes import isSupportedInt
from arkouda.dtypes import int64 as akint64
Expand Down Expand Up @@ -128,6 +128,7 @@ def __init__(self, segments, values, copy=False, lengths=None, grouping=None):
else:
self.grouping = grouping


@classmethod
def from_multi_array(cls, m):
"""
Expand Down Expand Up @@ -232,6 +233,37 @@ def concat(cls, x, axis=0, ordered=True):
else:
raise ValueError("Supported values for axis are 0 (vertical concat) or 1 (horizontal concat)")

@staticmethod
def from_return_msg(repMsg) -> SegArray:
"""
Return a SegArray instance pointing to components created by the arkouda server.
The user should not call this function directly.

Parameters
----------
repMsg : str
; delimited string containing the segments, values, and lengths details

Returns
-------
SegArray
A SegArray representing a set of pdarray components on the server

Raises
------
RuntimeError
Raised if a server-side error is thrown in the process of creating
the categorical instance
"""
# parts[0] is "segarray". Used by the generic attach method to identify the
# response message as a SegArray
parts = repMsg.split("+")
segments = create_pdarray(parts[1])
values = create_pdarray(parts[2])
lengths = create_pdarray(parts[3])

return SegArray(segments, values, lengths=lengths)

def copy(self):
"""
Return a deep copy.
Expand Down
26 changes: 17 additions & 9 deletions arkouda/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
import numpy as np # type: ignore
import h5py #type: ignore
import os
from typing import Union, Mapping
from typing import Union, Mapping, cast

from arkouda import __version__, Strings
from arkouda import __version__, Strings, SegArray
from arkouda.client_dtypes import BitVector, BitVectorizer, IPv4
from arkouda.timeclass import Datetime, Timedelta
from arkouda.pdarrayclass import attach_pdarray, pdarray, create_pdarray
Expand Down Expand Up @@ -282,13 +282,21 @@ def convert_if_categorical(values):
return values


def attach(name):
def attach(name: str, dtype: str = "infer"):
stress-tess marked this conversation as resolved.
Show resolved Hide resolved
"""
Attaches to a known element name without requiring to know if the element is a Strings object or pdarray
Attaches to a known element name. If a type is passed, the server will use that type
to pull the corresponding parts, otherwise the server will try to infer the type
"""
repMsg = generic_msg(cmd="attach", args=name)
dtype = repMsg.split()[2]
if dtype == "str":
return Strings.from_return_msg(repMsg)
repMsg = cast(str, generic_msg(cmd="genericAttach", args=f"{dtype}+{name}"))

if repMsg.split("+")[0] == "categorical":
return Categorical.from_return_msg(repMsg)
elif repMsg.split("+")[0] == "segarray":
return SegArray.from_return_msg(repMsg)
else:
return create_pdarray(repMsg)
dtype = repMsg.split()[2]

if dtype == "str":
return Strings.from_return_msg(repMsg)
else:
return create_pdarray(repMsg)
stress-tess marked this conversation as resolved.
Show resolved Hide resolved
18 changes: 18 additions & 0 deletions src/MultiTypeSymbolTable.chpl
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,24 @@ module MultiTypeSymbolTable
}

}

/*
Attempts to find a sym entry mapped to the provided string, then
returns a boolean value signfying the provided string's sym entry
existance

:arg name: name of entry to be checked
:type name: string

:returns: bool signifying existance of the sym entry
*/
proc contains(name: string): bool {
if tab.contains(name) {
return true;
} else {
return false;
}
}
}

/**
Expand Down
Loading