Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Series.skew/2 #614

Merged
merged 7 commits into from
Jun 15, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/explorer/backend/lazy_series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ defmodule Explorer.Backend.LazySeries do
last: 1,
count: 1,
nil_count: 1,
skew: 2,
# Strings
contains: 2,
trim_leading: 1,
Expand Down Expand Up @@ -130,7 +131,8 @@ defmodule Explorer.Backend.LazySeries do
:size,
:first,
:last,
:n_distinct
:n_distinct,
:skew
]

@window_fun_operations [:window_max, :window_mean, :window_min, :window_sum]
Expand Down
1 change: 1 addition & 0 deletions lib/explorer/backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ defmodule Explorer.Backend.Series do
number() | non_finite() | Date.t() | NaiveDateTime.t() | lazy_s() | nil
@callback nil_count(s) :: number() | lazy_s()
@callback product(s) :: float() | non_finite() | lazy_s() | nil
@callback skew(s, bias? :: boolean()) :: float() | non_finite() | lazy_s() | nil

# Cumulative

Expand Down
3 changes: 2 additions & 1 deletion lib/explorer/polars_backend/expression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ defmodule Explorer.PolarsBackend.Expression do
subtract: 2,
sum: 1,
unordered_distinct: 1,
variance: 1
variance: 1,
skew: 2
]

@first_only_expressions [
Expand Down
1 change: 1 addition & 0 deletions lib/explorer/polars_backend/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ defmodule Explorer.PolarsBackend.Native do
def s_cumulative_min(_s, _reverse), do: err()
def s_cumulative_sum(_s, _reverse), do: err()
def s_cumulative_product(_s, _reverse), do: err()
def s_skew(_s, _bias), do: err()
def s_distinct(_s), do: err()
def s_divide(_s, _other), do: err()
def s_dtype(_s), do: err()
Expand Down
4 changes: 4 additions & 0 deletions lib/explorer/polars_backend/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ defmodule Explorer.PolarsBackend.Series do
@impl true
def product(series), do: Shared.apply_series(series, :s_product)

@impl true
def skew(series, bias?),
do: Shared.apply_series(series, :s_skew, [bias?])

# Cumulative

@impl true
Expand Down
51 changes: 51 additions & 0 deletions lib/explorer/series.ex
Original file line number Diff line number Diff line change
Expand Up @@ -1864,6 +1864,57 @@ defmodule Explorer.Series do
def quantile(%Series{dtype: dtype}, _),
do: dtype_error("quantile/2", dtype, [:integer, :float, :date, :time, :datetime])

@doc """
Compute the sample skewness of a data set.
guarilha marked this conversation as resolved.
Show resolved Hide resolved

For normally distributed data, the skewness should be about zero.
For unimodal continuous distributions, a skewness value greater
than zero means that there is more weight in the right tail of the
distribution. The function skewtest can be used to determine if
guarilha marked this conversation as resolved.
Show resolved Hide resolved
the skewness value is close enough to zero, statistically speaking.

See scipy.stats for more information.
guarilha marked this conversation as resolved.
Show resolved Hide resolved

## Supported dtypes

* `:integer`
* `:float`

## Examples

iex> s = Explorer.Series.from_list([1, 2, 3, 4, 5, 23])
iex> Explorer.Series.skew(s)
1.6727687946848508

iex> s = Explorer.Series.from_list([1, 2, 3, 4, 5, 23])
iex> Explorer.Series.skew(s, bias: false)
2.2905330058490514

iex> s = Explorer.Series.from_list([1, 2, 3, nil, 1])
iex> Explorer.Series.skew(s, bias: false)
0.8545630383279712

iex> s = Explorer.Series.from_list([1, 2, 3, nil, 1])
iex> Explorer.Series.skew(s)
0.49338220021815865

iex> s = Explorer.Series.from_list([true, false, true])
iex> Explorer.Series.skew(s, false)
** (ArgumentError) Explorer.Series.skew/2 not implemented for dtype :boolean. Valid dtypes are [:integer, :float]
"""
@doc type: :aggregation
@spec skew(series :: Series.t(), opts :: Keyword.t()) :: Series.t()
def skew(series, opts \\ [])

def skew(%Series{dtype: dtype} = series, opts)
when is_numeric_or_date_dtype(dtype) do
opts = Keyword.validate!(opts, bias: true)
apply_series(series, :skew, [opts[:bias]])
end

def skew(%Series{dtype: dtype}, _),
do: dtype_error("skew/2", dtype, [:integer, :float])

# Cumulative

@doc """
Expand Down
3 changes: 2 additions & 1 deletion native/explorer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ features = [
"strings",
"round_series",
"ewma",
"product"
"product",
"moment"
]

[dependencies.polars-ops]
Expand Down
6 changes: 6 additions & 0 deletions native/explorer/src/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,12 @@ pub fn expr_quantile(expr: ExExpr, quantile: f64) -> ExExpr {
ExExpr::new(expr.quantile(quantile.into(), strategy))
}

#[rustler::nif]
pub fn expr_skew(data: ExExpr, bias: bool) -> ExExpr {
let expr = data.clone_inner();
ExExpr::new(expr.skew(bias))
}

#[rustler::nif]
pub fn expr_alias(expr: ExExpr, name: &str) -> ExExpr {
let expr = expr.clone_inner();
Expand Down
2 changes: 2 additions & 0 deletions native/explorer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ rustler::init!(
expr_sum,
expr_variance,
expr_product,
expr_skew,
// window expressions
expr_cumulative_max,
expr_cumulative_min,
Expand Down Expand Up @@ -306,6 +307,7 @@ rustler::init!(
s_mean,
s_median,
s_product,
s_skew,
s_min,
s_multiply,
s_n_distinct,
Expand Down
10 changes: 10 additions & 0 deletions native/explorer/src/series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -773,6 +773,16 @@ pub fn s_standard_deviation(env: Env, s: ExSeries) -> Result<Term, ExplorerError
}
}

#[rustler::nif(schedule = "DirtyCpu")]
pub fn s_skew(env: Env, s: ExSeries, bias: bool) -> Result<Term, ExplorerError> {
match s.dtype() {
DataType::Float64 => Ok(s.skew(bias)?.encode(env)),
DataType::Int64 => Ok(s.skew(bias)?.encode(env)),
// DataType::Float64 => Ok(term_from_optional_float(s.skew(bias), env)),
dt => panic!("skew/2 not implemented for {dt:?}"),
}
}

fn term_from_optional_float(option: Option<f64>, env: Env<'_>) -> Term<'_> {
match option {
Some(float) => encoding::term_from_float(float, env),
Expand Down
22 changes: 22 additions & 0 deletions test/explorer/series_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -3546,6 +3546,28 @@ defmodule Explorer.SeriesTest do
end
end

describe "skew/2" do
test "returns the skew of an integer series" do
s = Series.from_list([1, 2, 3, nil, 1])
assert Series.skew(s) - 0.8545630383279711 < 1.0e-4
end

test "returns the skew of a float series" do
s = Series.from_list([1.0, 2.0, 3.0, nil, 1.0])
assert Series.skew(s, bias: true) - 0.49338220021815865 < 1.0e-4
end

test "returns the skew of an integer series (bias true)" do
s = Series.from_list([1, 2, 3, 4, 5, 23])
assert Series.skew(s, bias: true) - 1.6727687946848508 < 1.0e-4
end

test "returns the skew of an integer series (bias false)" do
s = Series.from_list([1, 2, 3, 4, 5, 23])
assert Series.skew(s, bias: false) - 2.2905330058490514 < 1.0e-4
end
end

describe "variance/1" do
test "variance of an integer series" do
s = Series.from_list([1, 2, nil, 3])
Expand Down