Skip to content

Commit

Permalink
Support {:list, numeric_type} for Series.frequencies/1 (#788)
Browse files Browse the repository at this point in the history
It is related to #787,
but does not fix the issue because Polars can only get frequencies of
`{:list, numeric_dtype}`.
  • Loading branch information
philss authored Dec 27, 2023
1 parent e5c3e58 commit 207538c
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 11 deletions.
20 changes: 9 additions & 11 deletions lib/explorer/polars_backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,8 @@ defmodule Explorer.PolarsBackend.Series do
defguardp is_non_finite(n) when n in [:nan, :infinity, :neg_infinity]
defguardp is_numeric(n) when is_number(n) or is_non_finite(n)

@integer_types [
{:s, 8},
{:s, 16},
{:s, 32},
{:s, 64},
{:u, 8},
{:u, 16},
{:u, 32},
{:u, 64}
]
@integer_types Explorer.Shared.integer_types()
@numeric_types Explorer.Shared.numeric_types()

# Conversion

Expand Down Expand Up @@ -456,7 +448,13 @@ defmodule Explorer.PolarsBackend.Series do
def n_distinct(series), do: Shared.apply_series(series, :s_n_distinct)

@impl true
def frequencies(series) do
def frequencies(%Series{dtype: {:list, inner_dtype} = dtype})
when inner_dtype not in @numeric_types do
raise ArgumentError,
"frequencies/1 only works with series of lists of numeric types, but #{Explorer.Shared.dtype_to_string(dtype)} was given"
end

def frequencies(%Series{} = series) do
Shared.apply(:s_frequencies, [series.data])
|> Shared.create_dataframe()
|> DataFrame.rename(["values", "counts"])
Expand Down
1 change: 1 addition & 0 deletions native/explorer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ features = [
"decompress-fast",
"describe",
"dtype-full",
"group_by_list",
"ipc",
"ipc_streaming",
"lazy",
Expand Down
54 changes: 54 additions & 0 deletions test/explorer/series_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -5168,4 +5168,58 @@ defmodule Explorer.SeriesTest do
]
end
end

describe "frequencies/1" do
test "integer" do
s = Series.from_list([1, 2, 3, 1, 3, 4, 1, 5, 6, 1, 2])

df = Series.frequencies(s)

assert Series.dtype(df[:values]) == {:s, 64}
assert Series.dtype(df[:counts]) == {:s, 64}

assert Explorer.DataFrame.to_columns(df, atom_keys: true) == %{
values: [1, 2, 3, 4, 5, 6],
counts: [4, 2, 2, 1, 1, 1]
}
end

test "string" do
s = Series.from_list(["a", "a", "b", "c", "c", "c"])

df = Series.frequencies(s)

assert Series.dtype(df[:values]) == :string
assert Series.dtype(df[:counts]) == {:s, 64}

assert Explorer.DataFrame.to_columns(df, atom_keys: true) == %{
values: ["c", "a", "b"],
counts: [3, 2, 1]
}
end

test "list of integer" do
s = Series.from_list([[1, 2], [3, 1, 3], [4, 1], [5, 6], [1, 2], [4, 1]])

df = Series.frequencies(s)

assert Series.dtype(df[:values]) == {:list, {:s, 64}}
assert Series.dtype(df[:counts]) == {:s, 64}

assert Explorer.DataFrame.to_columns(df, atom_keys: true) == %{
values: [[1, 2], [4, 1], [3, 1, 3], [5, 6]],
counts: [2, 2, 1, 1]
}
end

test "list of list of string" do
s = Series.from_list([["a"], ["a", "b"], ["c"], ["c"], ["c"]])

assert_raise ArgumentError,
"frequencies/1 only works with series of lists of numeric types, but list[string] was given",
fn ->
Series.frequencies(s)
end
end
end
end

0 comments on commit 207538c

Please sign in to comment.