Skip to content

Commit

Permalink
Feature/pandas api isin (#30)
Browse files Browse the repository at this point in the history
code, test and documentation of isin
---------

Co-authored-by: marcosvm13 <[email protected]>
Co-authored-by: cperezln <[email protected]>
  • Loading branch information
3 people committed Mar 11, 2024
1 parent d448911 commit f90f42b
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 0 deletions.
99 changes: 99 additions & 0 deletions docs/user-guide/advanced/Pandas_API.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2659,6 +2659,105 @@
"Example Table."
]
},
{
"cell_type": "markdown",
"id": "7f08eb84",
"metadata": {},
"source": [
"## Comparison\n",
"\n",
"### Table.isin()\n",
"\n",
"```\n",
"Table.isin(\n",
" values\n",
")\n",
"```\n",
"\n",
"Whether each element in the DataFrame is contained in values.\n",
"\n",
"**Parameters:**\n",
"\n",
"| Name | Type | Description | Default |\n",
"| :--------------: | :---------------------------------: | :-------------------------------------------------------------------------- | :------: |\n",
"| values | Union[List, dict, Table, KeyedTable] | The result will only be true at a location if all the labels match. If values is a dict, the keys must be the column names, which must match. If values is a Table or KeyedTable, then both the index and column labels must match. | None|\n",
"\n",
"\n",
"**Returns:**\n",
"\n",
"| Type | Description |\n",
"| :-----------------------: | :---------------------------------------------- |\n",
"| Table | Boolean type Table/KeyedTable showing whether each element in the DataFrame is contained in values.|\n",
"\n",
"**Examples:**\n",
"\n",
"Example Table."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6e453c8",
"metadata": {},
"outputs": [],
"source": [
"tab = kx.Table(data={'x': list(range(3)), 'y': [\"A\", \"B\", \"C\"]})"
]
},
{
"cell_type": "markdown",
"id": "aadd23c1",
"metadata": {},
"source": [
"Find if element \"A\" or \"1\" is in the table:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d41d40e0",
"metadata": {},
"outputs": [],
"source": [
"tab.isin([\"A\", 1])"
]
},
{
"cell_type": "markdown",
"id": "cff856fe",
"metadata": {},
"source": [
"Find if element \"A\" is in colum \"y\":"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bccf59d9",
"metadata": {},
"outputs": [],
"source": [
"tab.isin({\"y\": [\"A\"]})"
]
},
{
"cell_type": "markdown",
"id": "ed704cce",
"metadata": {},
"source": [
"Find if element \"A\" is in the first position of \"y\" column:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "41840cc0",
"metadata": {},
"outputs": [],
"source": [
"tab.isin(kx.Table(data={\"y\":[\"A\"]}))"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down
47 changes: 47 additions & 0 deletions src/pykx/pandas_api/pandas_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,53 @@ def abs(self, numeric_only=False):
tab = _get_numeric_only_subtable(self)
return q.abs(tab)

@api_return
def isin(self, values):
tab = self
key_table = 'KeyedTable' in str(type(tab))
key_value = 'KeyedTable' in str(type(values))
n_rows = 0
false_dataframe_f = q("""{u:(cols x);
v:(count[u],count[x])#0b;
flip u!v}""")
if key_value and not key_table:
return false_dataframe_f(tab)
if key_table:
kcols = q.key(tab)
if key_value:
n_rows, tab = q("""{n_rows:max 0, count[x]-
count rows:(key y) inter key x;
(n_rows; x each rows)}""", tab, values)
values = q.value(values)
else:
tab = q.value(tab)
dic_value, is_tab = q("""{$[98h = type x;
(flip x; 1b);
(x; 0b)]}""", values)
if key_table and not key_value and is_tab:
ftable = false_dataframe_f(tab)
else:
ftable = q("""{ [table; values; is_tab; n_rows]
flip (cols table)!
{[col_name; tab; values; v_is_tab; n_rows]
col: tab col_name;
ltype: .Q.ty col;
values: $[99h~type values; values col_name; values];
$[v_is_tab or ltype=" "; ;
values@:where (lower ltype) = .Q.t abs type each values];
$[0 = count values;
(n_rows + count[col])#0b;
$[v_is_tab;
$[any ltype = (" ";"C"); ~'; =]
[mlen#col;mlen#values],
(n_rows + max 0,count[col]-
mlen: min count[values],
count[col])#0b;
any $[any ltype = (" ";"C"); ~/:\:; =\:][values;col]
]]}[; table; values; is_tab; n_rows]
each cols table}""", tab, dic_value, is_tab, n_rows)
return ftable.set_index(kcols) if key_table else ftable

@convert_result
def all(self, axis=0, bool_only=False, skipna=True):
res, cols = preparse_computations(self, axis, skipna, bool_only=bool_only)
Expand Down
24 changes: 24 additions & 0 deletions tests/test_pandas_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2109,6 +2109,30 @@ def test_keyed_loc_fixes(q):
mkt['k1']


def test_pandas_isin(kx):
tab = kx.q("""([] k1: 0n 1. 0n 2. 0n;
k2: ("A";" ";"B";" ";"A");
k3: (`a;1.;`c;5;`d))""")
keyed_tab = kx.q("""([`a`b`c`d`e]
k1: 0n 1. 0n 2. 0n;
k2: ("A";" ";"B";" ";"A");
k3: (`a;1.;`c;5;`d))""")

list_value = kx.q('(`a;1.;"A")')
tab_value = kx.q('([] k1: 1. 2. 3.; k2: ("A";"B";"C"))')
dict_value = {"k1": [1., 2., 3.]}
keyed_tab_value = kx.q('([`a`b] k1: 1. 2.; k2: ("A";"B"))')

assert tab.isin(list_value).pd().equals(tab.pd().isin(list_value.py()))
assert tab.isin(tab_value).pd().equals(tab.pd().isin(tab_value.pd()))
assert tab.isin(dict_value).pd().equals(tab.pd().isin(dict_value))
assert tab.isin(keyed_tab_value).pd().equals(tab.pd().isin(keyed_tab_value))
assert keyed_tab.isin(list_value).pd().equals(keyed_tab.pd().isin(list_value.py()))
assert keyed_tab.isin(dict_value).pd().equals(keyed_tab.pd().isin(dict_value))
assert keyed_tab.isin(keyed_tab_value).pd().equals(keyed_tab.pd().isin(keyed_tab_value.pd()))
assert keyed_tab.isin(tab_value).pd().equals(keyed_tab.pd().isin(tab_value))


def test_pandas_count(q):
tab = q('([] k1: 0n 2 0n 2 0n ; k2: (`a;`;`b;`;`c))')
df = tab.pd()
Expand Down

0 comments on commit f90f42b

Please sign in to comment.