Skip to content

Commit

Permalink
Add mode (#453)
Browse files Browse the repository at this point in the history
  • Loading branch information
cigrainger authored Nov 12, 2023
1 parent 9950c0b commit e0c02a4
Show file tree
Hide file tree
Showing 15 changed files with 200 additions and 28 deletions.
42 changes: 30 additions & 12 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@
buildInputs = with pkgs; [
act
clang
beam.packages.erlangR25.elixir_1_14
erlangR25
gdb
elixir
erlangR26
libiconv
openssl
pkg-config
Expand All @@ -40,6 +39,7 @@
cmake
] ++ lib.optionals stdenv.isDarwin [
darwin.apple_sdk.frameworks.Security
darwin.apple_sdk.frameworks.SystemConfiguration
];
shellHook = ''
mkdir -p .nix-mix
Expand Down
2 changes: 2 additions & 0 deletions lib/explorer/backend/lazy_series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ defmodule Explorer.Backend.LazySeries do
argmax: 1,
mean: 1,
median: 1,
mode: 1,
n_distinct: 1,
variance: 1,
standard_deviation: 1,
Expand Down Expand Up @@ -149,6 +150,7 @@ defmodule Explorer.Backend.LazySeries do
:argmax,
:mean,
:median,
:mode,
:variance,
:standard_deviation,
:count,
Expand Down
1 change: 1 addition & 0 deletions lib/explorer/backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ defmodule Explorer.Backend.Series do
@callback argmax(s) :: number() | non_finite() | lazy_s() | nil
@callback mean(s) :: float() | non_finite() | lazy_s() | nil
@callback median(s) :: float() | non_finite() | lazy_s() | nil
@callback mode(s) :: s | lazy_s()
@callback variance(s) :: float() | non_finite() | lazy_s() | nil
@callback standard_deviation(s) :: float() | non_finite() | lazy_s() | nil
@callback quantile(s, float()) ::
Expand Down
1 change: 1 addition & 0 deletions lib/explorer/polars_backend/expression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ defmodule Explorer.PolarsBackend.Expression do
mean: 1,
median: 1,
min: 1,
mode: 1,
n_distinct: 1,
nil_count: 1,
not_equal: 2,
Expand Down
1 change: 1 addition & 0 deletions lib/explorer/polars_backend/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,7 @@ defmodule Explorer.PolarsBackend.Native do
def s_median(_s), do: err()
def s_product(_s), do: err()
def s_min(_s), do: err()
def s_mode(_s), do: err()
def s_multiply(_s, _other), do: err()
def s_n_chunks(_s), do: err()
def s_n_distinct(_s), do: err()
Expand Down
3 changes: 3 additions & 0 deletions lib/explorer/polars_backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,9 @@ defmodule Explorer.PolarsBackend.Series do
@impl true
def median(series), do: Shared.apply_series(series, :s_median)

@impl true
def mode(series), do: Shared.apply_series(series, :s_mode)

@impl true
def variance(series), do: Shared.apply_series(series, :s_variance)

Expand Down
40 changes: 40 additions & 0 deletions lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2164,6 +2164,46 @@ defmodule Explorer.Series do

def mean(%Series{dtype: dtype}), do: dtype_error("mean/1", dtype, [:integer, :float])

@doc """
Gets the most common value(s) of the series.
This function will return multiple values when there's a tie.
## Supported dtypes
All except `:list`.
## Examples
iex> s = Explorer.Series.from_list([1, 2, 2, nil])
iex> Explorer.Series.mode(s)
#Explorer.Series<
Polars[1]
integer [2]
>
iex> s = Explorer.Series.from_list(["a", "b", "b", "c"])
iex> Explorer.Series.mode(s)
#Explorer.Series<
Polars[1]
string ["b"]
>
s = Explorer.Series.from_list([1.0, 2.0, 2.0, 3.0, 3.0])
Explorer.Series.mode(s)
#Explorer.Series<
Polars[2]
float [2.0, 3.0]
>
"""
@doc type: :aggregation
@spec mode(series :: Series.t()) :: Series.t() | nil
def mode(%Series{dtype: {:list, _} = dtype}),
do: dtype_error("mode/1", dtype, Shared.non_list_types())

def mode(%Series{} = series),
do: Shared.apply_impl(series, :mode)

@doc """
Gets the median value of the series.
Expand Down
16 changes: 10 additions & 6 deletions lib/explorer/shared.ex
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,16 @@ defmodule Explorer.Shared do
within lists inside.
"""
def dtypes do
non_list_dtypes = [
non_list_dtypes = non_list_types()
list_dtypes = for dtype <- non_list_dtypes, do: {:list, dtype}
non_list_dtypes ++ list_dtypes
end

@doc """
Non-list dtypes.
"""
def non_list_types,
do: [
:binary,
:boolean,
:category,
Expand All @@ -26,11 +35,6 @@ defmodule Explorer.Shared do
{:duration, :nanosecond}
]

list_dtypes = for dtype <- non_list_dtypes, do: {:list, dtype}

non_list_dtypes ++ list_dtypes
end

@doc """
Supported datetime dtypes.
"""
Expand Down
17 changes: 10 additions & 7 deletions native/explorer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,13 @@ thiserror = "1"
smartstring = "1"

# Deps necessary for cloud features.
tokio = { version = "1.33", default-features = false, features = ["rt"], optional = true }
tokio-util = { version = "0.7", default-features = false, features = ["io", "io-util"], optional = true }
tokio = { version = "1.33", default-features = false, features = [
"rt",
], optional = true }
tokio-util = { version = "0.7", default-features = false, features = [
"io",
"io-util",
], optional = true }
object_store = { version = "0.7", default-features = false, optional = true }

# MiMalloc won´t compile on Windows with the GCC compiler.
Expand Down Expand Up @@ -52,6 +57,7 @@ features = [
"lazy",
"lazy_regex",
"log",
"mode",
"parquet",
"performant",
"pivot",
Expand All @@ -71,15 +77,12 @@ features = [
"product",
"peaks",
"moment",
"rank"
"rank",
]

[dependencies.polars-ops]
version = "0.34"
features = [
"ewma",
"cum_agg"
]
features = ["ewma", "cum_agg"]

[dependencies.polars-algo]
version = "0.34"
Expand Down
7 changes: 7 additions & 0 deletions native/explorer/src/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,13 @@ pub fn expr_median(expr: ExExpr) -> ExExpr {
ExExpr::new(expr.median())
}

#[rustler::nif]
pub fn expr_mode(expr: ExExpr) -> ExExpr {
let expr = expr.clone_inner();

ExExpr::new(expr.mode())
}

#[rustler::nif]
pub fn expr_product(expr: ExExpr) -> ExExpr {
let expr = expr.clone_inner();
Expand Down
2 changes: 2 additions & 0 deletions native/explorer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ rustler::init!(
expr_mean,
expr_median,
expr_min,
expr_mode,
expr_n_distinct,
expr_nil_count,
expr_quantile,
Expand Down Expand Up @@ -366,6 +367,7 @@ rustler::init!(
s_correlation,
s_covariance,
s_min,
s_mode,
s_multiply,
s_n_distinct,
s_name,
Expand Down
8 changes: 8 additions & 0 deletions native/explorer/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -937,6 +937,14 @@ pub fn s_median(env: Env, s: ExSeries) -> Result<Term, ExplorerError> {
}
}

#[rustler::nif(schedule = "DirtyCpu")]
pub fn s_mode(s: ExSeries) -> Result<ExSeries, ExplorerError> {
match mode::mode(&s) {
Ok(s) => Ok(ExSeries::new(s)),
Err(e) => Err(e.into()),
}
}

#[rustler::nif(schedule = "DirtyCpu")]
pub fn s_product(s: ExSeries) -> Result<ExSeries, ExplorerError> {
match s.dtype() {
Expand Down
12 changes: 12 additions & 0 deletions test/explorer/data_frame_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -3526,6 +3526,18 @@ defmodule Explorer.DataFrameTest do
}
end

test "mode/1" do
df =
Datasets.iris()
|> DF.group_by(:species)
|> DF.summarise(petal_width_mode: mode(petal_width))

assert DF.to_columns(df) == %{
"petal_width_mode" => [[0.2], [1.3], [1.8]],
"species" => ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
}
end

test "argmax/1 and argmin/1" do
df =
DF.new(
Expand Down
Loading

0 comments on commit e0c02a4

Please sign in to comment.