Skip to content

Commit

Permalink
1406 Fix bug in ak.lookup (#1407)
Browse files Browse the repository at this point in the history
* account for ordering of keys in lookup

* test for lookup function
  • Loading branch information
reuster986 authored May 19, 2022
1 parent aba0b41 commit 6122a56
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 1 deletion.
11 changes: 10 additions & 1 deletion arkouda/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def in1dmulti(a, b, assume_unique=False, symmetric=False):
return atruth


def lookup(keys, values, arguments, fillvalue=-1):
def lookup(keys, values, arguments, fillvalue=-1, keys_from_unique=False):
"""
Apply the function defined by the mapping keys --> values to arguments.
Expand All @@ -187,6 +187,9 @@ def lookup(keys, values, arguments, fillvalue=-1):
(or tuple of dtypes, for a sequence) as keys.
fillvalue : scalar
The default value to return for arguments not in keys.
keys_from_unique : bool
If True, keys are assumed to be the output of ak.unique, e.g. the
.unique_keys attribute of a GroupBy instance.
Returns
-------
Expand Down Expand Up @@ -219,6 +222,12 @@ def lookup(keys, values, arguments, fillvalue=-1):
(array(['twenty', 'twenty', 'twenty']),
array(['four', 'one', 'two']))
"""
if not keys_from_unique:
keyg = GroupBy(keys)
if keyg.size != keyg.ngroups:
raise NonUniqueError("Function keys must be unique.")
keys = keyg.unique_keys
values = values[keyg.permutation]
if isinstance(values, Categorical):
codes = lookup(keys, values.codes, arguments, fillvalue=values._NAcode)
return Categorical.from_codes(codes, values.categories, NAvalue=values.NAvalue)
Expand Down
21 changes: 21 additions & 0 deletions tests/join_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,27 @@ def test_inner_join(self):
with self.assertRaises(ValueError):
l, r = ak.join.inner_join(left, right, wherefunc=ak.intersect1d, whereargs=(ak.arange(10), ak.arange(5)))

def test_lookup(self):
keys = ak.arange(5)
values = 10*keys
args = ak.array([5, 3, 1, 4, 2, 3, 1, 0])
ans = np.array([-1, 30, 10, 40, 20, 30, 10, 0])
# Simple lookup with int keys
# Also test shortcut for unique-ordered keys
res = ak.lookup(keys, values, args, fillvalue=-1, keys_from_unique=True)
self.assertTrue((res.to_ndarray() == ans).all())
# Compound lookup with (str, int) keys
res2 = ak.lookup((ak.cast(keys, ak.str_), keys), values, (ak.cast(args, ak.str_), args), fillvalue=-1)
self.assertTrue((res2.to_ndarray() == ans).all())
# Keys not in uniqued order
res3 = ak.lookup(keys[::-1], values[::-1], args, fillvalue=-1)
self.assertTrue((res3.to_ndarray() == ans).all())
# Non-unique keys should raise error
with self.assertRaises(ak.NonUniqueError):
keys = ak.arange(10) % 5
values = 10 * keys
ak.lookup(keys, values, args)

def test_error_handling(self):
"""
Tests error TypeError and ValueError handling
Expand Down

0 comments on commit 6122a56

Please sign in to comment.