Skip to content

Commit

Permalink
add map as expression
Browse files Browse the repository at this point in the history
  • Loading branch information
lkarthee committed Feb 11, 2024
1 parent 521fef0 commit 1a8fc05
Show file tree
Hide file tree
Showing 6 changed files with 89 additions and 6 deletions.
41 changes: 41 additions & 0 deletions lib/explorer/data_frame.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2852,6 +2852,47 @@ defmodule Explorer.DataFrame do

Explorer.Backend.Series.new(lazy_s, {:duration, precision})

map = %{} when not is_struct(map) ->
{series_list, dtype_list} =
Enum.reduce(map, {[], []}, fn {name, series_or_scalars}, {sl, dl} ->
series =
case series_or_scalars do
%Series{} ->
series_or_scalars.data

nil ->
LazySeries.new(:lazy, [nil], :null)

number when is_number(number) ->
dtype = if is_integer(number), do: {:s, 64}, else: {:f, 64}
LazySeries.new(:lazy, [number], dtype)

string when is_binary(string) ->
LazySeries.new(:lazy, [string], :string)

boolean when is_boolean(boolean) ->
LazySeries.new(:lazy, [boolean], :boolean)

date = %Date{} ->
LazySeries.new(:lazy, [date], :date)

datetime = %NaiveDateTime{} ->
LazySeries.new(:lazy, [datetime], {:datetime, :nanosecond})

duration = %Explorer.Duration{precision: precision} ->
LazySeries.new(:lazy, [duration], {:duration, precision})
end

name = if is_atom(name), do: Atom.to_string(name), else: name
{[{name, series} | sl], [{name, series.dtype} | dl]}
end)

map = Enum.into(series_list, %{})
dtype_list = Enum.sort(dtype_list)

lazy_s = LazySeries.new(:lazy, [map], {:struct, dtype_list})
Explorer.Backend.Series.new(lazy_s, {:struct, dtype_list})

other ->
raise ArgumentError,
"expecting a lazy series or scalar value, but instead got #{inspect(other)}"
Expand Down
9 changes: 9 additions & 0 deletions lib/explorer/polars_backend/expression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,15 @@ defmodule Explorer.PolarsBackend.Expression do
def to_expr(%Explorer.Duration{} = duration), do: Native.expr_duration(duration)
def to_expr(%PolarsSeries{} = polars_series), do: Native.expr_series(polars_series)

def to_expr(map) when is_map(map) do
expr_list =
Enum.map(map, fn {name, series} ->
series |> to_expr() |> alias_expr(name)
end)

Native.expr_struct(expr_list)
end

# Used by Explorer.PolarsBackend.DataFrame
def alias_expr(%__MODULE__{} = expr, alias_name) when is_binary(alias_name) do
Native.expr_alias(expr, alias_name)
Expand Down
1 change: 1 addition & 0 deletions lib/explorer/polars_backend/native.ex
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ defmodule Explorer.PolarsBackend.Native do
def expr_integer(_number), do: err()
def expr_series(_series), do: err()
def expr_string(_string), do: err()
def expr_struct(_map), do: err()

# LazyFrame
def lf_collect(_df), do: err()
Expand Down
19 changes: 13 additions & 6 deletions native/explorer/src/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
// or an expression and returns an expression that is
// wrapped in an Elixir struct.

use polars::prelude::{
col, concat_str, cov, pearson_corr, spearman_rank_corr, when, IntoLazy, LiteralValue,
SortOptions,
};
use polars::prelude::{DataType, EWMOptions, Expr, Literal, StrptimeOptions, TimeUnit};

use crate::datatypes::{
ExCorrelationMethod, ExDate, ExDateTime, ExDuration, ExRankMethod, ExSeriesDtype, ExValidValue,
};
use crate::series::{cast_str_to_f64, ewm_opts, rolling_opts};
use crate::{ExDataFrame, ExExpr, ExSeries};
use polars::lazy::dsl;
use polars::prelude::{
col, concat_str, cov, pearson_corr, spearman_rank_corr, when, IntoLazy, LiteralValue,
SortOptions,
};
use polars::prelude::{DataType, EWMOptions, Expr, Literal, StrptimeOptions, TimeUnit};

// Useful to get an ExExpr vec into a vec of expressions.
pub fn ex_expr_to_exprs(ex_exprs: Vec<ExExpr>) -> Vec<Expr> {
Expand Down Expand Up @@ -1079,3 +1079,10 @@ pub fn expr_json_decode(expr: ExExpr, ex_dtype: ExSeriesDtype) -> ExExpr {
let expr = expr.clone_inner().str().json_decode(Some(dtype), None);
ExExpr::new(expr)
}

#[rustler::nif]
pub fn expr_struct(ex_exprs: Vec<ExExpr>) -> ExExpr {
let exprs = ex_exprs.iter().map(|e| e.clone_inner()).collect();
let expr = dsl::as_struct(exprs);
ExExpr::new(expr)
}
1 change: 1 addition & 0 deletions native/explorer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ rustler::init!(
// struct expressions
expr_field,
expr_json_decode,
expr_struct,
// lazyframe
lf_collect,
lf_describe_plan,
Expand Down
24 changes: 24 additions & 0 deletions test/explorer/data_frame_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,30 @@ defmodule Explorer.DataFrameTest do
}
end

test "with map " do
df = DF.new(%{a: [1, nil, 3], b: ["a", "b", nil]})
df1 = DF.mutate(df, c: %{a: a, b: b, lit: 1, null: is_nil(a)})
assert df1.names == ["a", "b", "c"]

assert df1.dtypes == %{
"a" => {:s, 64},
"b" => :string,
"c" =>
{:struct,
[{"a", {:s, 64}}, {"b", :string}, {"lit", {:s, 64}}, {"null", :boolean}]}
}

assert DF.to_columns(df1, atom_keys: true) == %{
a: [1, nil, 3],
b: ["a", "b", nil],
c: [
%{"a" => 1, "b" => "a", "lit" => 1, "null" => false},
%{"a" => nil, "b" => "b", "lit" => 1, "null" => true},
%{"a" => 3, "b" => nil, "lit" => 1, "null" => false}
]
}
end

test "adds new columns" do
df = DF.new(a: [1, 2, 3], b: ["a", "b", "c"])

Expand Down

0 comments on commit 1a8fc05

Please sign in to comment.