diff --git a/lib/explorer/backend/lazy_series.ex b/lib/explorer/backend/lazy_series.ex index 5a17d1ae9..556844616 100644 --- a/lib/explorer/backend/lazy_series.ex +++ b/lib/explorer/backend/lazy_series.ex @@ -135,7 +135,7 @@ defmodule Explorer.Backend.LazySeries do @comparison_operations [:equal, :not_equal, :greater, :greater_equal, :less, :less_equal] - @arithmetic_operations [:add, :subtract, :multiply, :pow, :quotient, :remainder] + @arithmetic_operations [:pow, :quotient, :remainder] @aggregation_operations [ :sum, @@ -184,18 +184,44 @@ defmodule Explorer.Backend.LazySeries do def dtype(%Series{} = s), do: s.dtype @impl true - def cast(%Series{} = s, dtype) when is_atom(dtype) do + @valid_dtypes Explorer.Shared.dtypes() + def cast(%Series{} = s, dtype) when dtype in @valid_dtypes do args = [lazy_series!(s), dtype] data = new(:cast, args, aggregations?(args)) Backend.Series.new(data, dtype) end + @impl true + def add(left, right) do + args = [data!(left), data!(right)] + data = new(:add, args, aggregations?(args)) + dtype = resolve_numeric_temporal_dtype(:add, left, right) + Backend.Series.new(data, dtype) + end + + @impl true + def subtract(left, right) do + args = [data!(left), data!(right)] + data = new(:subtract, args, aggregations?(args)) + dtype = resolve_numeric_temporal_dtype(:subtract, left, right) + Backend.Series.new(data, dtype) + end + + @impl true + def multiply(left, right) do + args = [data!(left), data!(right)] + data = new(:multiply, args, aggregations?(args)) + dtype = resolve_numeric_temporal_dtype(:multiply, left, right) + Backend.Series.new(data, dtype) + end + @impl true def divide(left, right) do args = [data!(left), data!(right)] data = new(:divide, args, aggregations?(args)) - Backend.Series.new(data, :float) + dtype = resolve_numeric_temporal_dtype(:divide, left, right) + Backend.Series.new(data, dtype) end @impl true @@ -635,6 +661,41 @@ defmodule Explorer.Backend.LazySeries do defp resolve_numeric_dtype(:window_mean, _items), do: :float defp resolve_numeric_dtype(_op, items), do: resolve_numeric_dtype(items) + defp resolve_numeric_temporal_dtype(op, %Series{dtype: ldt} = left, %Series{dtype: rdt} = right) do + case {op, ldt, rdt} do + {:add, {:datetime, ltu}, {:duration, rtu}} -> {:datetime, highest_precision(ltu, rtu)} + {:add, {:duration, ltu}, {:datetime, rtu}} -> {:datetime, highest_precision(ltu, rtu)} + {:add, {:duration, ltu}, {:duration, rtu}} -> {:duration, highest_precision(ltu, rtu)} + {:subtract, {:datetime, ltu}, {:datetime, rtu}} -> {:duration, highest_precision(ltu, rtu)} + {:subtract, {:datetime, ltu}, {:duration, rtu}} -> {:datetime, highest_precision(ltu, rtu)} + {:subtract, {:duration, ltu}, {:duration, rtu}} -> {:duration, highest_precision(ltu, rtu)} + {:multiply, :integer, {:duration, tu}} -> {:duration, tu} + {:multiply, {:duration, tu}, :integer} -> {:duration, tu} + {:divide, {:duration, tu}, :integer} -> {:duration, tu} + {:divide, _, {:duration, _}} -> raise("cannot divide by duration") + {:divide, _, _} -> :float + _ -> resolve_numeric_dtype([left, right]) + end + end + + defp resolve_numeric_temporal_dtype(op, left, right) do + case op do + :divide -> :float + _ -> resolve_numeric_dtype([left, right]) + end + end + + defp highest_precision(left_timeunit, right_timeunit) do + # Higher precision wins, otherwise information is lost. + case {left_timeunit, right_timeunit} do + {equal, equal} -> equal + {:nanosecond, _} -> :nanosecond + {_, :nanosecond} -> :nanosecond + {:microsecond, _} -> :microsecond + {_, :microsecond} -> :microsecond + end + end + # Returns the inner `data` if it's a lazy series. Otherwise raises an error. defp lazy_series!(series) do case series do diff --git a/lib/explorer/duration.ex b/lib/explorer/duration.ex new file mode 100644 index 000000000..ba220760a --- /dev/null +++ b/lib/explorer/duration.ex @@ -0,0 +1,64 @@ +defmodule Explorer.Duration do + # Internal representation of a duration. + @moduledoc false + alias Explorer.Duration + + @enforce_keys [:value, :precision] + defstruct [:value, :precision] + + # Nanosecond constants + @us_ns 1_000 + @ms_ns 1_000 * @us_ns + @sec_ns 1_000 * @ms_ns + @min_ns 60 * @sec_ns + @hour_ns 60 * @min_ns + @day_ns 24 * @hour_ns + + def to_string(%Explorer.Duration{value: value, precision: precision}) do + case precision do + :millisecond -> format_nanoseconds(value * @ms_ns) + :microsecond -> format_nanoseconds(value * @us_ns) + :nanosecond -> format_nanoseconds(value) + end + end + + defp format_nanoseconds(nanoseconds) when is_integer(nanoseconds) do + result = nanoseconds |> abs |> format_pos_nanoseconds() + + if nanoseconds < 0 do + "-" <> result + else + result + end + end + + defp format_pos_nanoseconds(nanoseconds) when is_integer(nanoseconds) and nanoseconds >= 0 do + [d: @day_ns, h: @hour_ns, m: @min_ns, s: @sec_ns, ms: @ms_ns, us: @us_ns, ns: 1] + |> Enum.reduce({[], nanoseconds}, fn {unit, ns_per_unit}, {parts, ns} -> + {num_units, remaining_ns} = + if ns >= ns_per_unit do + {div(ns, ns_per_unit), rem(ns, ns_per_unit)} + else + {0, ns} + end + + {[{unit, num_units} | parts], remaining_ns} + end) + |> then(fn {parts_reversed, _} -> parts_reversed end) + |> Enum.reverse() + |> Enum.reject(fn {_unit, value} -> value == 0 end) + |> Enum.map_join(" ", fn {unit, value} -> "#{value}#{unit}" end) + |> case do + "" -> "0" + result -> result + end + end + + defimpl String.Chars do + def to_string(%Duration{} = duration), do: Duration.to_string(duration) + end + + defimpl Inspect do + def inspect(%Duration{} = duration, _), do: "Duration[" <> Duration.to_string(duration) <> "]" + end +end diff --git a/lib/explorer/polars_backend/expression.ex b/lib/explorer/polars_backend/expression.ex index d9e238b6f..3eecf5901 100644 --- a/lib/explorer/polars_backend/expression.ex +++ b/lib/explorer/polars_backend/expression.ex @@ -154,8 +154,9 @@ defmodule Explorer.PolarsBackend.Expression do end def to_expr(%LazySeries{op: :cast, args: [lazy_series, dtype]}) do - expr = to_expr(lazy_series) - Native.expr_cast(expr, Atom.to_string(dtype)) + lazy_series_expr = to_expr(lazy_series) + dtype_expr = Explorer.Shared.dtype_to_string(dtype) + Native.expr_cast(lazy_series_expr, dtype_expr) end def to_expr(%LazySeries{op: :fill_missing_with_strategy, args: [lazy_series, strategy]}) do diff --git a/lib/explorer/polars_backend/native.ex b/lib/explorer/polars_backend/native.ex index f3808039b..f7cae3b69 100644 --- a/lib/explorer/polars_backend/native.ex +++ b/lib/explorer/polars_backend/native.ex @@ -281,6 +281,7 @@ defmodule Explorer.PolarsBackend.Native do def s_from_list_date(_name, _val), do: err() def s_from_list_time(_name, _val), do: err() def s_from_list_datetime(_name, _val, _precision), do: err() + def s_from_list_duration(_name, _val, _precision), do: err() def s_from_list_f64(_name, _val), do: err() def s_from_list_i64(_name, _val), do: err() def s_from_list_u32(_name, _val), do: err() diff --git a/lib/explorer/polars_backend/series.ex b/lib/explorer/polars_backend/series.ex index 8a8aa2bea..d7d656788 100644 --- a/lib/explorer/polars_backend/series.ex +++ b/lib/explorer/polars_backend/series.ex @@ -44,6 +44,15 @@ defmodule Explorer.PolarsBackend.Series do def cast(series, {:datetime, :nanosecond}), do: Shared.apply_series(series, :s_cast, ["datetime[ns]"]) + def cast(series, {:duration, :millisecond}), + do: Shared.apply_series(series, :s_cast, ["duration[ms]"]) + + def cast(series, {:duration, :microsecond}), + do: Shared.apply_series(series, :s_cast, ["duration[μs]"]) + + def cast(series, {:duration, :nanosecond}), + do: Shared.apply_series(series, :s_cast, ["duration[ns]"]) + def cast(series, dtype), do: Shared.apply_series(series, :s_cast, [Atom.to_string(dtype)]) @impl true @@ -78,6 +87,9 @@ defmodule Explorer.PolarsBackend.Series do "datetime[ms]" -> {:s, 64} "datetime[μs]" -> {:s, 64} "datetime[ns]" -> {:s, 64} + "duration[ms]" -> {:s, 64} + "duration[μs]" -> {:s, 64} + "duration[ns]" -> {:s, 64} "cat" -> {:u, 32} dtype -> raise "cannot convert dtype #{inspect(dtype)} to iotype" end @@ -681,6 +693,10 @@ defmodule Explorer.PolarsBackend.Series do defp to_mod_series(value, %{dtype: :integer}, mod) when is_float(value) or is_non_finite(value), do: mod.from_list([value], :float) + defp to_mod_series(%NaiveDateTime{} = value, %{dtype: {dtype_base, _}}, mod) + when dtype_base in [:datetime, :duration], + do: mod.from_list([value], {:datetime, :microsecond}) + defp to_mod_series(value, %{dtype: :category}, mod), do: mod.from_list([value], :string) diff --git a/lib/explorer/polars_backend/shared.ex b/lib/explorer/polars_backend/shared.ex index 1fc5b0ec2..fb86dfb0c 100644 --- a/lib/explorer/polars_backend/shared.ex +++ b/lib/explorer/polars_backend/shared.ex @@ -110,6 +110,7 @@ defmodule Explorer.PolarsBackend.Shared do :date -> Native.s_from_list_date(name, list) :time -> Native.s_from_list_time(name, list) {:datetime, precision} -> Native.s_from_list_datetime(name, list, Atom.to_string(precision)) + {:duration, precision} -> Native.s_from_list_duration(name, list, Atom.to_string(precision)) :binary -> Native.s_from_list_binary(name, list) end end @@ -134,6 +135,15 @@ defmodule Explorer.PolarsBackend.Shared do {:datetime, :nanosecond} -> Native.s_from_binary_i64(name, binary) |> Native.s_cast("datetime[ns]") |> ok() + {:duration, :millisecond} -> + Native.s_from_binary_i64(name, binary) |> Native.s_cast("duration[ms]") |> ok() + + {:duration, :microsecond} -> + Native.s_from_binary_i64(name, binary) |> Native.s_cast("duration[μs]") |> ok() + + {:duration, :nanosecond} -> + Native.s_from_binary_i64(name, binary) |> Native.s_cast("duration[ns]") |> ok() + :integer -> Native.s_from_binary_i64(name, binary) @@ -152,6 +162,9 @@ defmodule Explorer.PolarsBackend.Shared do def normalise_dtype("datetime[ms]"), do: {:datetime, :millisecond} def normalise_dtype("datetime[ns]"), do: {:datetime, :nanosecond} def normalise_dtype("datetime[μs]"), do: {:datetime, :microsecond} + def normalise_dtype("duration[ms]"), do: {:duration, :millisecond} + def normalise_dtype("duration[ns]"), do: {:duration, :nanosecond} + def normalise_dtype("duration[μs]"), do: {:duration, :microsecond} def normalise_dtype("f64"), do: :float def normalise_dtype("i64"), do: :integer def normalise_dtype("list[u32]"), do: :integer @@ -165,6 +178,9 @@ defmodule Explorer.PolarsBackend.Shared do def internal_from_dtype({:datetime, :millisecond}), do: "datetime[ms]" def internal_from_dtype({:datetime, :nanosecond}), do: "datetime[ns]" def internal_from_dtype({:datetime, :microsecond}), do: "datetime[μs]" + def internal_from_dtype({:duration, :millisecond}), do: "duration[ms]" + def internal_from_dtype({:duration, :nanosecond}), do: "duration[ns]" + def internal_from_dtype({:duration, :microsecond}), do: "duration[μs]" def internal_from_dtype(:float), do: "f64" def internal_from_dtype(:integer), do: "i64" def internal_from_dtype(:string), do: "str" diff --git a/lib/explorer/series.ex b/lib/explorer/series.ex index 2679dd824..c7b2196f7 100644 --- a/lib/explorer/series.ex +++ b/lib/explorer/series.ex @@ -11,6 +11,9 @@ defmodule Explorer.Series do * `{:datetime, :millisecond}` - DateTime type with milli-second precision that unwraps to `Elixir.NaiveDateTime` * `{:datetime, :microsecond}` - DateTime type with micro-second precision that unwraps to `Elixir.NaiveDateTime` * `{:datetime, :nanosecond}` - DateTime type with nano-second precision that unwraps to `Elixir.NaiveDateTime` + * `{:duration, :millisecond}` - Duration type with milli-second precision that unwraps to `integer` + * `{:duration, :microsecond}` - Duration type with micro-second precision that unwraps to `integer` + * `{:duration, :nanosecond}` - Duration type with nano-second precision that unwraps to `integer` * `:float` - 64-bit floating point number * `:integer` - 64-bit signed integer * `:string` - UTF-8 encoded binary @@ -64,7 +67,11 @@ defmodule Explorer.Series do @valid_dtypes Explorer.Shared.dtypes() @datetime_dtypes Explorer.Shared.datetime_types() - @date_or_datetime_dtypes [:date | Explorer.Shared.datetime_types()] + @duration_dtypes Explorer.Shared.duration_types() + @date_or_datetime_dtypes [:date | @datetime_dtypes] + @temporal_dtypes [:time | @date_or_datetime_dtypes ++ @duration_dtypes] + @numeric_dtypes [:integer, :float] + @numeric_or_temporal_dtypes @numeric_dtypes ++ @temporal_dtypes @type dtype :: :binary @@ -73,14 +80,14 @@ defmodule Explorer.Series do | :date | :time | datetime_dtype + | duration_dtype | :float | :integer | :string - @type datetime_dtype :: - {:datetime, :nanosecond} - | {:datetime, :microsecond} - | {:datetime, :millisecond} + @type time_unit :: :nanosecond | :microsecond | :millisecond + @type datetime_dtype :: {:datetime, time_unit} + @type duration_dtype :: {:duration, time_unit} @type t :: %Series{data: Explorer.Backend.Series.t(), dtype: dtype()} @type lazy_t :: %Series{data: Explorer.Backend.LazySeries.t(), dtype: dtype()} @@ -107,7 +114,7 @@ defmodule Explorer.Series do defguardp is_numeric_dtype(dtype) when K.in(dtype, [:float, :integer]) defguardp is_numeric_or_bool_dtype(dtype) when K.in(dtype, [:float, :integer, :boolean]) - defguardp is_numeric_or_date_dtype(dtype) + defguardp is_numeric_or_temporal_dtype(dtype) when K.in(dtype, [ :float, :integer, @@ -115,7 +122,10 @@ defmodule Explorer.Series do :time, {:datetime, :nanosecond}, {:datetime, :microsecond}, - {:datetime, :millisecond} + {:datetime, :millisecond}, + {:duration, :nanosecond}, + {:duration, :microsecond}, + {:duration, :millisecond} ]) @impl true @@ -380,7 +390,7 @@ defmodule Explorer.Series do @doc type: :conversion @spec from_binary( binary, - :float | :integer | :boolean | :date | :time | datetime_dtype, + :float | :integer | :boolean | :date | :time | datetime_dtype | duration_dtype, keyword ) :: Series.t() @@ -932,6 +942,7 @@ defmodule Explorer.Series do * `:date` - Date type that unwraps to `Elixir.Date` * `:time` - Time type that unwraps to `Elixir.Time` * `:datetime` - DateTime type that unwraps to `Elixir.NaiveDateTime` + * `:duration` - Duration type that unwraps to `Explorer.Duration` ## Examples @@ -1799,6 +1810,7 @@ defmodule Explorer.Series do * `:date` * `:time` * `:datetime` + * `:duration` ## Examples @@ -1824,16 +1836,15 @@ defmodule Explorer.Series do iex> s = Explorer.Series.from_list(["a", "b", "c"]) iex> Explorer.Series.min(s) - ** (ArgumentError) Explorer.Series.min/1 not implemented for dtype :string. Valid dtypes are [:integer, :float, :date, :time, {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}] + ** (ArgumentError) Explorer.Series.min/1 not implemented for dtype :string. Valid dtypes are [:integer, :float, :time, :date, {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}, {:duration, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}] """ @doc type: :aggregation @spec min(series :: Series.t()) :: number() | non_finite() | Date.t() | Time.t() | NaiveDateTime.t() | nil - def min(%Series{dtype: dtype} = series) when is_numeric_or_date_dtype(dtype), + def min(%Series{dtype: dtype} = series) when is_numeric_or_temporal_dtype(dtype), do: apply_series(series, :min) - def min(%Series{dtype: dtype}), - do: dtype_error("min/1", dtype, [:integer, :float, :date, :time] ++ @datetime_dtypes) + def min(%Series{dtype: dtype}), do: dtype_error("min/1", dtype, @numeric_or_temporal_dtypes) @doc """ Gets the maximum value of the series. @@ -1845,6 +1856,7 @@ defmodule Explorer.Series do * `:date` * `:time` * `:datetime` + * `:duration` ## Examples @@ -1870,16 +1882,15 @@ defmodule Explorer.Series do iex> s = Explorer.Series.from_list(["a", "b", "c"]) iex> Explorer.Series.max(s) - ** (ArgumentError) Explorer.Series.max/1 not implemented for dtype :string. Valid dtypes are [:integer, :float, :date, :time, {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}] + ** (ArgumentError) Explorer.Series.max/1 not implemented for dtype :string. Valid dtypes are [:integer, :float, :time, :date, {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}, {:duration, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}] """ @doc type: :aggregation @spec max(series :: Series.t()) :: number() | non_finite() | Date.t() | Time.t() | NaiveDateTime.t() | nil - def max(%Series{dtype: dtype} = series) when is_numeric_or_date_dtype(dtype), + def max(%Series{dtype: dtype} = series) when is_numeric_or_temporal_dtype(dtype), do: apply_series(series, :max) - def max(%Series{dtype: dtype}), - do: dtype_error("max/1", dtype, [:integer, :float, :date, :time] ++ @datetime_dtypes) + def max(%Series{dtype: dtype}), do: dtype_error("max/1", dtype, @numeric_or_temporal_dtypes) @doc """ Gets the index of the maximum value of the series. @@ -1891,6 +1902,7 @@ defmodule Explorer.Series do * `:date` * `:time` * `:datetime` + * `:duration` ## Examples @@ -1916,15 +1928,15 @@ defmodule Explorer.Series do iex> s = Explorer.Series.from_list(["a", "b", "c"]) iex> Explorer.Series.argmax(s) - ** (ArgumentError) Explorer.Series.argmax/1 not implemented for dtype :string. Valid dtypes are [:integer, :float, :date, :time, {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}] + ** (ArgumentError) Explorer.Series.argmax/1 not implemented for dtype :string. Valid dtypes are [:integer, :float, :time, :date, {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}, {:duration, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}] """ @doc type: :aggregation @spec argmax(series :: Series.t()) :: number() | non_finite() | nil - def argmax(%Series{dtype: dtype} = series) when is_numeric_or_date_dtype(dtype), + def argmax(%Series{dtype: dtype} = series) when is_numeric_or_temporal_dtype(dtype), do: apply_series(series, :argmax) def argmax(%Series{dtype: dtype}), - do: dtype_error("argmax/1", dtype, [:integer, :float, :date, :time] ++ @datetime_dtypes) + do: dtype_error("argmax/1", dtype, @numeric_or_temporal_dtypes) @doc """ Gets the index of the minimum value of the series. @@ -1938,6 +1950,7 @@ defmodule Explorer.Series do * `:date` * `:time` * `:datetime` + * `:duration` ## Examples @@ -1963,15 +1976,15 @@ defmodule Explorer.Series do iex> s = Explorer.Series.from_list(["a", "b", "c"]) iex> Explorer.Series.argmin(s) - ** (ArgumentError) Explorer.Series.argmin/1 not implemented for dtype :string. Valid dtypes are [:integer, :float, :date, :time, {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}] + ** (ArgumentError) Explorer.Series.argmin/1 not implemented for dtype :string. Valid dtypes are [:integer, :float, :time, :date, {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}, {:duration, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}] """ @doc type: :aggregation @spec argmin(series :: Series.t()) :: number() | non_finite() | nil - def argmin(%Series{dtype: dtype} = series) when is_numeric_or_date_dtype(dtype), + def argmin(%Series{dtype: dtype} = series) when is_numeric_or_temporal_dtype(dtype), do: apply_series(series, :argmin) def argmin(%Series{dtype: dtype}), - do: dtype_error("argmin/1", dtype, [:integer, :float, :date, :time] ++ @datetime_dtypes) + do: dtype_error("argmin/1", dtype, @numeric_or_temporal_dtypes) @doc """ Gets the mean value of the series. @@ -2125,6 +2138,7 @@ defmodule Explorer.Series do * `:date` * `:time` * `:datetime` + * `:duration` ## Examples @@ -2150,16 +2164,16 @@ defmodule Explorer.Series do iex> s = Explorer.Series.from_list([true, false, true]) iex> Explorer.Series.quantile(s, 0.5) - ** (ArgumentError) Explorer.Series.quantile/2 not implemented for dtype :boolean. Valid dtypes are [:integer, :float, :date, :time, {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}] + ** (ArgumentError) Explorer.Series.quantile/2 not implemented for dtype :boolean. Valid dtypes are [:integer, :float, :time, :date, {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}, {:duration, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}] """ @doc type: :aggregation @spec quantile(series :: Series.t(), quantile :: float()) :: any() def quantile(%Series{dtype: dtype} = series, quantile) - when is_numeric_or_date_dtype(dtype), + when is_numeric_or_temporal_dtype(dtype), do: apply_series(series, :quantile, [quantile]) def quantile(%Series{dtype: dtype}, _), - do: dtype_error("quantile/2", dtype, [:integer, :float, :date, :time] ++ @datetime_dtypes) + do: dtype_error("quantile/2", dtype, @numeric_or_temporal_dtypes) @doc """ Compute the sample skewness of a series. @@ -2201,14 +2215,12 @@ defmodule Explorer.Series do @spec skew(series :: Series.t(), opts :: Keyword.t()) :: float() | non_finite() | nil def skew(series, opts \\ []) - def skew(%Series{dtype: dtype} = series, opts) - when is_numeric_or_date_dtype(dtype) do + def skew(%Series{dtype: dtype} = series, opts) when is_numeric_dtype(dtype) do opts = Keyword.validate!(opts, bias: true) apply_series(series, :skew, [opts[:bias]]) end - def skew(%Series{dtype: dtype}, _), - do: dtype_error("skew/2", dtype, [:integer, :float]) + def skew(%Series{dtype: dtype}, _), do: dtype_error("skew/2", dtype, [:integer, :float]) @doc """ Compute the Pearson's correlation between two series. @@ -2290,6 +2302,7 @@ defmodule Explorer.Series do * `:date` * `:time` * `:datetime` + * `:duration` ## Examples @@ -2319,14 +2332,13 @@ defmodule Explorer.Series do def cumulative_max(series, opts \\ []) def cumulative_max(%Series{dtype: dtype} = series, opts) - when is_numeric_or_date_dtype(dtype) do + when is_numeric_or_temporal_dtype(dtype) do opts = Keyword.validate!(opts, reverse: false) apply_series(series, :cumulative_max, [opts[:reverse]]) end def cumulative_max(%Series{dtype: dtype}, _), - do: - dtype_error("cumulative_max/2", dtype, [:integer, :float, :date, :time] ++ @datetime_dtypes) + do: dtype_error("cumulative_max/2", dtype, @numeric_or_temporal_dtypes) @doc """ Calculates the cumulative minimum of the series. @@ -2342,6 +2354,7 @@ defmodule Explorer.Series do * `:date` * `:time` * `:datetime` + * `:duration` ## Examples @@ -2371,14 +2384,13 @@ defmodule Explorer.Series do def cumulative_min(series, opts \\ []) def cumulative_min(%Series{dtype: dtype} = series, opts) - when is_numeric_or_date_dtype(dtype) do + when is_numeric_or_temporal_dtype(dtype) do opts = Keyword.validate!(opts, reverse: false) apply_series(series, :cumulative_min, [opts[:reverse]]) end def cumulative_min(%Series{dtype: dtype}, _), - do: - dtype_error("cumulative_min/2", dtype, [:integer, :float, :date, :time] ++ @datetime_dtypes) + do: dtype_error("cumulative_min/2", dtype, @numeric_or_temporal_dtypes) @doc """ Calculates the cumulative sum of the series. @@ -2475,6 +2487,7 @@ defmodule Explorer.Series do * `:date` * `:time` * `:datetime` + * `:duration` ## Examples @@ -2497,14 +2510,28 @@ defmodule Explorer.Series do def peaks(series, max_or_min \\ :max) def peaks(%Series{dtype: dtype} = series, max_or_min) - when is_numeric_or_date_dtype(dtype), + when is_numeric_or_temporal_dtype(dtype), do: apply_series(series, :peaks, [max_or_min]) def peaks(%Series{dtype: dtype}, _), - do: dtype_error("peaks/2", dtype, [:integer, :float, :date, :time] ++ @datetime_dtypes) + do: dtype_error("peaks/2", dtype, @numeric_or_temporal_dtypes) # Arithmetic + defp enforce_highest_precision( + %Series{dtype: {left_base, left_timeunit}} = left, + %Series{dtype: {right_base, right_timeunit}} = right + ) do + # Higher precision wins, otherwise information is lost. + case {left_timeunit, right_timeunit} do + {equal, equal} -> [left, right] + {:nanosecond, _} -> [left, cast(right, {right_base, :nanosecond})] + {_, :nanosecond} -> [cast(left, {left_base, :nanosecond}), right] + {:microsecond, _} -> [left, cast(right, {right_base, :microsecond})] + {_, :microsecond} -> [cast(left, {left_base, :microsecond}), right] + end + end + @doc """ Adds right to left, element-wise. @@ -2546,7 +2573,25 @@ defmodule Explorer.Series do > """ @doc type: :element_wise - @spec add(left :: Series.t() | number(), right :: Series.t() | number()) :: Series.t() + @spec add( + left :: Series.t() | number() | NaiveDateTime.t(), + right :: Series.t() | number() | NaiveDateTime.t() + ) :: Series.t() + def add(%NaiveDateTime{} = left, %Series{dtype: {:duration, timeunit}} = right), + do: apply_series_list(:add, [from_list([left], dtype: {:datetime, timeunit}), right]) + + def add(%Series{dtype: {:duration, timeunit}} = left, %NaiveDateTime{} = right), + do: apply_series_list(:add, [left, from_list([right], dtype: {:datetime, timeunit})]) + + def add(%Series{dtype: {:datetime, _}} = left, %Series{dtype: {:duration, _}} = right), + do: apply_series_list(:add, enforce_highest_precision(left, right)) + + def add(%Series{dtype: {:duration, _}} = left, %Series{dtype: {:datetime, _}} = right), + do: apply_series_list(:add, enforce_highest_precision(left, right)) + + def add(%Series{dtype: {:duration, _}} = left, %Series{dtype: {:duration, _}} = right), + do: apply_series_list(:add, enforce_highest_precision(left, right)) + def add(left, right), do: basic_numeric_operation(:add, left, right) @doc """ @@ -2590,7 +2635,28 @@ defmodule Explorer.Series do > """ @doc type: :element_wise - @spec subtract(left :: Series.t() | number(), right :: Series.t() | number()) :: Series.t() + @spec subtract( + left :: Series.t() | number() | NaiveDateTime.t(), + right :: Series.t() | number() | NaiveDateTime.t() + ) :: Series.t() + def subtract(%NaiveDateTime{} = left, %Series{dtype: {:datetime, timeunit}} = right), + do: apply_series_list(:subtract, [from_list([left], dtype: {:datetime, timeunit}), right]) + + def subtract(%Series{dtype: {:datetime, timeunit}} = left, %NaiveDateTime{} = right), + do: apply_series_list(:subtract, [left, from_list([right], dtype: {:datetime, timeunit})]) + + def subtract(%NaiveDateTime{} = left, %Series{dtype: {:duration, timeunit}} = right), + do: apply_series_list(:subtract, [from_list([left], dtype: {:datetime, timeunit}), right]) + + def subtract(%Series{dtype: {:datetime, _}} = left, %Series{dtype: {:datetime, _}} = right), + do: apply_series_list(:subtract, enforce_highest_precision(left, right)) + + def subtract(%Series{dtype: {:datetime, _}} = left, %Series{dtype: {:duration, _}} = right), + do: apply_series_list(:subtract, enforce_highest_precision(left, right)) + + def subtract(%Series{dtype: {:duration, _}} = left, %Series{dtype: {:duration, _}} = right), + do: apply_series_list(:subtract, enforce_highest_precision(left, right)) + def subtract(left, right), do: basic_numeric_operation(:subtract, left, right) @doc """ @@ -2626,6 +2692,12 @@ defmodule Explorer.Series do """ @doc type: :element_wise @spec multiply(left :: Series.t() | number(), right :: Series.t() | number()) :: Series.t() + def multiply(%Series{dtype: {:duration, _} = dtype} = left, %Series{dtype: :integer} = right), + do: apply_series_list(:multiply, [left, right]) |> cast(dtype) + + def multiply(%Series{dtype: :integer} = left, %Series{dtype: {:duration, _} = dtype} = right), + do: apply_series_list(:multiply, [left, right]) |> cast(dtype) + def multiply(left, right), do: basic_numeric_operation(:multiply, left, right) @doc """ @@ -2676,6 +2748,12 @@ defmodule Explorer.Series do """ @doc type: :element_wise @spec divide(left :: Series.t() | number(), right :: Series.t() | number()) :: Series.t() + def divide(%Series{dtype: {:duration, _} = dtype} = left, %Series{dtype: :integer} = right), + do: apply_series_list(:divide, [left, right]) |> cast(dtype) + + def divide(_, %Series{dtype: {:duration, _}}), + do: raise(ArgumentError, "cannot divide by duration") + def divide(left, right), do: basic_numeric_operation(:divide, left, right) @doc """ @@ -3243,6 +3321,7 @@ defmodule Explorer.Series do * `:date` * `:time` * `:datetime` + * `:duration` ## Examples @@ -3263,8 +3342,7 @@ defmodule Explorer.Series do if valid_for_bool_mask_operation?(left, right) do apply_series_list(:greater, [left, right]) else - dtypes = [:integer, :float, :date, :time] ++ @datetime_dtypes - dtype_mismatch_error("greater/2", left, right, dtypes) + dtype_mismatch_error("greater/2", left, right, @numeric_or_temporal_dtypes) end end @@ -3282,6 +3360,7 @@ defmodule Explorer.Series do * `:date` * `:time` * `:datetime` + * `:duration` ## Examples @@ -3302,8 +3381,7 @@ defmodule Explorer.Series do if valid_for_bool_mask_operation?(left, right) do apply_series_list(:greater_equal, [left, right]) else - types = [:integer, :float, :date, :time] ++ @datetime_dtypes - dtype_mismatch_error("greater_equal/2", left, right, types) + dtype_mismatch_error("greater_equal/2", left, right, @numeric_or_temporal_dtypes) end end @@ -3321,6 +3399,7 @@ defmodule Explorer.Series do * `:date` * `:time` * `:datetime` + * `:duration` ## Examples @@ -3341,8 +3420,7 @@ defmodule Explorer.Series do if valid_for_bool_mask_operation?(left, right) do apply_series_list(:less, [left, right]) else - dtypes = [:integer, :float, :date, :time] ++ @datetime_dtypes - dtype_mismatch_error("less/2", left, right, dtypes) + dtype_mismatch_error("less/2", left, right, @numeric_or_temporal_dtypes) end end @@ -3360,6 +3438,7 @@ defmodule Explorer.Series do * `:date` * `:time` * `:datetime` + * `:duration` ## Examples @@ -3380,8 +3459,7 @@ defmodule Explorer.Series do if valid_for_bool_mask_operation?(left, right) do apply_series_list(:less_equal, [left, right]) else - types = [:integer, :float, :date, :time] ++ @datetime_dtypes - dtype_mismatch_error("less_equal/2", left, right, types) + dtype_mismatch_error("less_equal/2", left, right, @numeric_or_temporal_dtypes) end end @@ -3440,6 +3518,10 @@ defmodule Explorer.Series do defp valid_for_bool_mask_operation?(%Series{dtype: {:datetime, _}}, %NaiveDateTime{}), do: true + defp valid_for_bool_mask_operation?(%Series{dtype: {:duration, _}}, right) + when is_integer(right), + do: true + defp valid_for_bool_mask_operation?(left, %Series{dtype: dtype}) when K.and(is_numeric_dtype(dtype), is_numerical(left)), do: true @@ -3448,6 +3530,10 @@ defmodule Explorer.Series do defp valid_for_bool_mask_operation?(%NaiveDateTime{}, %Series{dtype: {:datetime, _}}), do: true + defp valid_for_bool_mask_operation?(left, %Series{dtype: {:duration, _}}) + when is_integer(left), + do: true + defp valid_for_bool_mask_operation?(_, _), do: false @doc """ diff --git a/lib/explorer/shared.ex b/lib/explorer/shared.ex index 76b8eff57..c250a5502 100644 --- a/lib/explorer/shared.ex +++ b/lib/explorer/shared.ex @@ -15,6 +15,9 @@ defmodule Explorer.Shared do {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}, + {:duration, :nanosecond}, + {:duration, :microsecond}, + {:duration, :millisecond}, :float, :integer, :string @@ -26,6 +29,12 @@ defmodule Explorer.Shared do def datetime_types, do: [{:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}] + @doc """ + Supported duration dtypes. + """ + def duration_types, + do: [{:duration, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}] + @doc """ Gets the backend from a `Keyword.t()` or `nil`. """ @@ -183,6 +192,11 @@ defmodule Explorer.Shared do type || preferable_type || :float end + defp type(%Date{} = _item, _type), do: :date + defp type(%Time{} = _item, _type), do: :time + defp type(%NaiveDateTime{} = _item, _type), do: {:datetime, :microsecond} + defp type(%Explorer.Duration{precision: precision} = _item, _type), do: {:duration, precision} + defp type(item, type) when is_integer(item) and type == :float, do: :numeric defp type(item, type) when is_float(item) and type == :integer, do: :numeric defp type(item, type) when is_number(item) and type == :numeric, do: :numeric @@ -200,9 +214,6 @@ defmodule Explorer.Shared do defp type(item, :category) when is_binary(item), do: :category defp type(item, _type) when is_binary(item), do: :string - defp type(%Date{} = _item, _type), do: :date - defp type(%Time{} = _item, _type), do: :time - defp type(%NaiveDateTime{} = _item, _type), do: {:datetime, :microsecond} defp type(item, _type) when is_nil(item), do: nil defp type(item, _type), do: raise(ArgumentError, "unsupported datatype: #{inspect(item)}") @@ -242,6 +253,7 @@ defmodule Explorer.Shared do :date -> {:s, 32} :time -> {:s, 64} {:datetime, _} -> {:s, 64} + {:duration, _} -> {:s, 64} _ -> raise ArgumentError, "cannot convert dtype #{dtype} into a binary/tensor type" end end @@ -265,6 +277,9 @@ defmodule Explorer.Shared do def dtype_to_string({:datetime, :millisecond}), do: "datetime[ms]" def dtype_to_string({:datetime, :microsecond}), do: "datetime[μs]" def dtype_to_string({:datetime, :nanosecond}), do: "datetime[ns]" + def dtype_to_string({:duration, :millisecond}), do: "duration[ms]" + def dtype_to_string({:duration, :microsecond}), do: "duration[μs]" + def dtype_to_string({:duration, :nanosecond}), do: "duration[ns]" def dtype_to_string(other), do: Atom.to_string(other) @threshold 0.77 diff --git a/native/explorer/cargo-toolchain.toml b/native/explorer/cargo-toolchain.toml new file mode 100644 index 000000000..bc8b52682 --- /dev/null +++ b/native/explorer/cargo-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel = "nightly-2023-06-23" diff --git a/native/explorer/src/dataframe/io.rs b/native/explorer/src/dataframe/io.rs index 8977154cc..1a1ce327a 100644 --- a/native/explorer/src/dataframe/io.rs +++ b/native/explorer/src/dataframe/io.rs @@ -96,6 +96,9 @@ fn dtype_from_str(dtype: &str) -> Result { "datetime[ms]" => Ok(DataType::Datetime(TimeUnit::Milliseconds, None)), "datetime[ns]" => Ok(DataType::Datetime(TimeUnit::Nanoseconds, None)), "datetime[μs]" => Ok(DataType::Datetime(TimeUnit::Microseconds, None)), + "duration[ms]" => Ok(DataType::Duration(TimeUnit::Milliseconds)), + "duration[ns]" => Ok(DataType::Duration(TimeUnit::Nanoseconds)), + "duration[μs]" => Ok(DataType::Duration(TimeUnit::Microseconds)), "f64" => Ok(DataType::Float64), "i64" => Ok(DataType::Int64), "str" => Ok(DataType::Utf8), diff --git a/native/explorer/src/datatypes.rs b/native/explorer/src/datatypes.rs index 23a46af4a..e668aea5e 100644 --- a/native/explorer/src/datatypes.rs +++ b/native/explorer/src/datatypes.rs @@ -209,6 +209,19 @@ impl From for ExDate { } } +#[derive(NifStruct, Copy, Clone, Debug)] +#[module = "Explorer.Duration"] +pub struct ExDuration { + pub value: i64, + pub precision: Atom, +} + +impl From for i64 { + fn from(d: ExDuration) -> i64 { + d.value + } +} + #[derive(NifStruct, Copy, Clone, Debug)] #[module = "NaiveDateTime"] pub struct ExDateTime { diff --git a/native/explorer/src/encoding.rs b/native/explorer/src/encoding.rs index 144966b0f..83b5cd11a 100644 --- a/native/explorer/src/encoding.rs +++ b/native/explorer/src/encoding.rs @@ -5,8 +5,8 @@ use rustler::{Encoder, Env, NewBinary, OwnedBinary, ResourceArc, Term}; use std::{mem, slice}; use crate::atoms::{ - self, calendar, day, hour, infinity, microsecond, minute, month, nan, neg_infinity, second, - year, + self, calendar, day, hour, infinity, microsecond, millisecond, minute, month, nan, nanosecond, + neg_infinity, precision, second, value, year, }; use crate::datatypes::{ days_to_date, time64ns_to_time, timestamp_to_datetime, ExSeries, ExSeriesRef, @@ -210,6 +210,81 @@ fn datetime_series_to_list<'b>( )) } +fn time_unit_to_atom(time_unit: TimeUnit) -> atom::Atom { + match time_unit { + TimeUnit::Milliseconds => millisecond(), + TimeUnit::Microseconds => microsecond(), + TimeUnit::Nanoseconds => nanosecond(), + } +} + +macro_rules! unsafe_encode_duration { + ($v: expr, $time_unit: expr, $duration_struct_keys: ident, $duration_module: ident, $env: ident) => {{ + let value = $v; + let precision = time_unit_to_atom($time_unit); + + unsafe { + Term::new( + $env, + map::make_map_from_arrays( + $env.as_c_arg(), + $duration_struct_keys, + &[ + $duration_module, + value.encode($env).as_c_arg(), + precision.encode($env).as_c_arg(), + ], + ) + .unwrap(), + ) + } + }}; +} + +// Here we build the Explorer.Duration struct manually, as it's much faster than using NifStruct +// This is because we already have the keys (we know this at compile time), and the types, +// so we can build the struct directly. +fn duration_struct_keys(env: Env) -> [NIF_TERM; 3] { + return [ + atom::__struct__().encode(env).as_c_arg(), + value().encode(env).as_c_arg(), + precision().encode(env).as_c_arg(), + ]; +} + +#[inline] +pub fn encode_duration(v: i64, time_unit: TimeUnit, env: Env) -> Result { + let duration_struct_keys = &duration_struct_keys(env); + let duration_module = atoms::duration_module().encode(env).as_c_arg(); + + Ok(unsafe_encode_duration!( + v, + time_unit, + duration_struct_keys, + duration_module, + env + )) +} + +#[inline] +fn duration_series_to_list<'b>( + s: &Series, + time_unit: TimeUnit, + env: Env<'b>, +) -> Result, ExplorerError> { + let duration_struct_keys = &duration_struct_keys(env); + let duration_module = atoms::duration_module().encode(env).as_c_arg(); + + Ok(unsafe_iterator_series_to_list!( + env, + s.duration()?.into_iter().map(|option| option + .map(|v| { + unsafe_encode_duration!(v, time_unit, duration_struct_keys, duration_module, env) + }) + .encode(env)) + )) +} + macro_rules! unsafe_encode_time { ($v: expr, $naive_time_struct_keys: ident, $calendar_iso_module: ident, $time_module: ident, $env: ident) => {{ let t = time64ns_to_time($v); @@ -467,6 +542,7 @@ pub fn term_from_value<'b>(v: AnyValue, env: Env<'b>) -> Result, Explor AnyValue::Date(v) => encode_date(v, env), AnyValue::Time(v) => encode_time(v, env), AnyValue::Datetime(v, time_unit, None) => encode_datetime(v, time_unit, env), + AnyValue::Duration(v, time_unit) => encode_duration(v, time_unit, env), AnyValue::Categorical(idx, mapping, _) => Ok(mapping.get(idx).encode(env)), dt => panic!("cannot encode value {dt:?} to term"), } @@ -493,6 +569,7 @@ pub fn list_from_series(s: ExSeries, env: Env) -> Result { DataType::Date => date_series_to_list(&s, env), DataType::Time => time_series_to_list(&s, env), DataType::Datetime(time_unit, None) => datetime_series_to_list(&s, *time_unit, env), + DataType::Duration(time_unit) => duration_series_to_list(&s, *time_unit, env), DataType::Utf8 => { generic_binary_series_to_list(&s.resource, s.utf8()?.downcast_iter(), env) } @@ -524,6 +601,9 @@ pub fn iovec_from_series(s: ExSeries, env: Env) -> Result { DataType::Datetime(_, None) => { series_to_iovec!(resource, s, env, datetime, i64) } + DataType::Duration(_) => { + series_to_iovec!(resource, s, env, duration, i64) + } DataType::Categorical(Some(_)) => { let cat_series = s.cast(&DataType::UInt32)?; diff --git a/native/explorer/src/lib.rs b/native/explorer/src/lib.rs index b53596b69..184228fda 100644 --- a/native/explorer/src/lib.rs +++ b/native/explorer/src/lib.rs @@ -50,6 +50,7 @@ mod atoms { rustler::atoms! { calendar_iso_module = "Elixir.Calendar.ISO", date_module = "Elixir.Date", + duration_module = "Elixir.Explorer.Duration", naive_datetime_module = "Elixir.NaiveDateTime", time_module = "Elixir.Time", hour, @@ -58,7 +59,11 @@ mod atoms { day, month, year, + value, + precision, + millisecond, microsecond, + nanosecond, calendar, nan, infinity, @@ -364,6 +369,7 @@ rustler::init!( s_from_list_date, s_from_list_time, s_from_list_datetime, + s_from_list_duration, s_from_list_f64, s_from_list_i64, s_from_list_u32, diff --git a/native/explorer/src/series.rs b/native/explorer/src/series.rs index 5c6b5dc74..a19c8dd7c 100644 --- a/native/explorer/src/series.rs +++ b/native/explorer/src/series.rs @@ -1,6 +1,6 @@ use crate::{ atoms, - datatypes::{ExDate, ExDateTime, ExTime}, + datatypes::{ExDate, ExDateTime, ExDuration, ExTime}, encoding, ExDataFrame, ExSeries, ExplorerError, }; @@ -76,14 +76,19 @@ pub fn s_from_list_date(name: &str, val: Vec>) -> ExSeries { ) } -#[rustler::nif(schedule = "DirtyCpu")] -pub fn s_from_list_datetime(name: &str, val: Vec>, precision: &str) -> ExSeries { - let timeunit = match precision { +fn precision_to_timeunit(precision: &str) -> TimeUnit { + match precision { "millisecond" => TimeUnit::Milliseconds, "microsecond" => TimeUnit::Microseconds, "nanosecond" => TimeUnit::Nanoseconds, _ => panic!("Unknown datetime precision"), - }; + } +} + +#[rustler::nif(schedule = "DirtyCpu")] +pub fn s_from_list_datetime(name: &str, val: Vec>, precision: &str) -> ExSeries { + let timeunit = precision_to_timeunit(precision); + ExSeries::new( Series::new( name, @@ -96,6 +101,22 @@ pub fn s_from_list_datetime(name: &str, val: Vec>, precision: ) } +#[rustler::nif(schedule = "DirtyCpu")] +pub fn s_from_list_duration(name: &str, val: Vec>, precision: &str) -> ExSeries { + let timeunit = precision_to_timeunit(precision); + + ExSeries::new( + Series::new( + name, + val.iter() + .map(|d| d.map(|d| d.into())) + .collect::>>(), + ) + .cast(&DataType::Duration(timeunit)) + .unwrap(), + ) +} + #[rustler::nif(schedule = "DirtyCpu")] pub fn s_from_list_time(name: &str, val: Vec>) -> ExSeries { ExSeries::new( @@ -1082,6 +1103,9 @@ pub fn cast_str_to_dtype(str_type: &str) -> Result { "datetime[ms]" => Ok(DataType::Datetime(TimeUnit::Milliseconds, None)), "datetime[μs]" => Ok(DataType::Datetime(TimeUnit::Microseconds, None)), "datetime[ns]" => Ok(DataType::Datetime(TimeUnit::Nanoseconds, None)), + "duration[ms]" => Ok(DataType::Duration(TimeUnit::Milliseconds)), + "duration[μs]" => Ok(DataType::Duration(TimeUnit::Microseconds)), + "duration[ns]" => Ok(DataType::Duration(TimeUnit::Nanoseconds)), "boolean" => Ok(DataType::Boolean), "string" => Ok(DataType::Utf8), "binary" => Ok(DataType::Binary), diff --git a/test/explorer/series/duration_test.exs b/test/explorer/series/duration_test.exs new file mode 100644 index 000000000..dcf8a2591 --- /dev/null +++ b/test/explorer/series/duration_test.exs @@ -0,0 +1,368 @@ +defmodule Explorer.Series.DurationTest do + use ExUnit.Case, async: true + + alias Explorer.Duration + alias Explorer.Series + + @one_hour_us 3600 * 1_000_000 + @one_hour_duration_us %Duration{value: @one_hour_us, precision: :microsecond} + + describe "list" do + test "from a list of integers" do + ms = Series.from_list([1], dtype: {:duration, :millisecond}) + us = Series.from_list([1_000], dtype: {:duration, :microsecond}) + ns = Series.from_list([1_000_000], dtype: {:duration, :nanosecond}) + + # The series have the correct dtypes. + assert ms.dtype == {:duration, :millisecond} + assert us.dtype == {:duration, :microsecond} + assert ns.dtype == {:duration, :nanosecond} + + # The orginal integer is preserved when converting back to a list. + [%Duration{value: 1}] = Series.to_list(ms) + [%Duration{value: 1_000}] = Series.to_list(us) + [%Duration{value: 1_000_000}] = Series.to_list(ns) + end + + test "from a list of durations" do + ms = Series.from_list([%Duration{value: 1, precision: :millisecond}]) + us = Series.from_list([%Duration{value: 1_000, precision: :microsecond}]) + ns = Series.from_list([%Duration{value: 1_000_000, precision: :nanosecond}]) + + # The series have the correct dtypes. + assert ms.dtype == {:duration, :millisecond} + assert us.dtype == {:duration, :microsecond} + assert ns.dtype == {:duration, :nanosecond} + + # The orginal integer is preserved when converting back to a list. + [%Duration{value: 1}] = Series.to_list(ms) + [%Duration{value: 1_000}] = Series.to_list(us) + [%Duration{value: 1_000_000}] = Series.to_list(ns) + end + + test "can cast any precision to any other precision" do + ms = Series.from_list([1], dtype: {:duration, :millisecond}) + us = Series.from_list([1_000], dtype: {:duration, :microsecond}) + ns = Series.from_list([1_000_000], dtype: {:duration, :nanosecond}) + + assert ms |> Series.cast({:duration, :microsecond}) |> Series.all_equal(us) + assert ms |> Series.cast({:duration, :nanosecond}) |> Series.all_equal(ns) + assert us |> Series.cast({:duration, :millisecond}) |> Series.all_equal(ms) + assert us |> Series.cast({:duration, :nanosecond}) |> Series.all_equal(ns) + assert ns |> Series.cast({:duration, :millisecond}) |> Series.all_equal(ms) + assert ns |> Series.cast({:duration, :microsecond}) |> Series.all_equal(us) + end + + test "can convert to a list and back without needing the `dtype` option" do + ms = Series.from_list([1], dtype: {:duration, :millisecond}) + us = Series.from_list([1_000], dtype: {:duration, :microsecond}) + ns = Series.from_list([1_000_000], dtype: {:duration, :nanosecond}) + + assert ms |> Series.to_list() |> Series.from_list() |> Series.all_equal(ms) + assert us |> Series.to_list() |> Series.from_list() |> Series.all_equal(us) + assert ns |> Series.to_list() |> Series.from_list() |> Series.all_equal(ns) + end + end + + describe "io" do + test "series to and from binary" do + for precision <- [:millisecond, :microsecond, :nanosecond] do + dtype = {:duration, precision} + durations = Series.from_list([100, 101], dtype: dtype) + + [binary] = Series.to_iovec(durations) + from_binary = Series.from_binary(binary, dtype) + + assert durations.dtype == from_binary.dtype + assert Series.to_list(durations) == Series.to_list(from_binary) + end + end + + test "duration structs to_string similarly to polars" do + strings = [ + "1ms", + "10ms", + "100ms", + "1s", + "10s", + "1m 40s", + "16m 40s", + "2h 46m 40s", + "1d 3h 46m 40s", + "11d 13h 46m 40s", + "115d 17h 46m 40s", + # Like polars, the maximum unit is days so we don't show years. + "1157d 9h 46m 40s" + ] + + for {string, power} <- Enum.with_index(strings) do + assert to_string(%Duration{value: 10 ** power, precision: :millisecond}) == string + end + end + + test "duration structs inspect as \"Duration[*]\"" do + assert inspect(%Duration{value: 1, precision: :millisecond}) == "Duration[1ms]" + end + + test "in a series, equal values are displayed the same regardless of precision" do + ms = Series.from_list([1], dtype: {:duration, :millisecond}) + us = Series.from_list([1_000], dtype: {:duration, :microsecond}) + ns = Series.from_list([1_000_000], dtype: {:duration, :nanosecond}) + + # Each series displays its values as "[1ms]" as well as the correct precision. + assert inspect(ms) == """ + #Explorer.Series< + Polars[1] + duration[ms] [1ms] + >\ + """ + + assert inspect(us) == """ + #Explorer.Series< + Polars[1] + duration[μs] [1ms] + >\ + """ + + assert inspect(ns) == """ + #Explorer.Series< + Polars[1] + duration[ns] [1ms] + >\ + """ + end + end + + describe "add" do + test "datetime[μs] + duration[μs]" do + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) + sum_s = Series.add(eleven_s, one_hour_s) + + assert sum_s.dtype == {:datetime, :microsecond} + twelve_ndt = ~N[2023-08-20 12:00:00.0000000] + assert Series.to_list(sum_s) == [twelve_ndt] + end + + test "duration[μs] + datetime[μs]" do + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) + sum_s = Series.add(one_hour_s, eleven_s) + + assert sum_s.dtype == {:datetime, :microsecond} + twelve_ndt = ~N[2023-08-20 12:00:00.0000000] + assert Series.to_list(sum_s) == [twelve_ndt] + end + + test "duration[μs] + duration[μs]" do + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + two_hour_s = Series.from_list([2 * @one_hour_us], dtype: {:duration, :microsecond}) + sum_s = Series.add(one_hour_s, two_hour_s) + + three_hour_duration_us = %Duration{value: 3 * @one_hour_us, precision: :microsecond} + assert sum_s.dtype == {:duration, :microsecond} + assert Series.to_list(sum_s) == [three_hour_duration_us] + end + + test "NaiveDateTime + duration[μs]" do + eleven = ~N[2023-08-20 11:00:00.0000000] + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + sum_s = Series.add(eleven, one_hour_s) + + assert sum_s.dtype == {:datetime, :microsecond} + assert Series.to_list(sum_s) == [~N[2023-08-20 12:00:00.0000000]] + end + + test "duration[μs] + NaiveDateTime" do + eleven = ~N[2023-08-20 11:00:00.0000000] + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + sum_s = Series.add(one_hour_s, eleven) + + assert sum_s.dtype == {:datetime, :microsecond} + assert Series.to_list(sum_s) == [~N[2023-08-20 12:00:00.0000000]] + end + + test "datetime[μs] + duration[ns] (different precisions)" do + one_hour_ns = 3600 * 1_000_000_000 + one_hour_s = Series.from_list([one_hour_ns], dtype: {:duration, :nanosecond}) + eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) + sum_s = Series.add(eleven_s, one_hour_s) + + # Since we added a duration with :nanosecond precision from a datetime with :microsecond + # precision, the resulting sum has :nanosecond precision since that was the highest + # precision present in the operation. + assert sum_s.dtype == {:datetime, :nanosecond} + assert Series.to_list(sum_s) == [~N[2023-08-20 12:00:00.0000000]] + end + + test "datetime[μs] + datetime[μs] raises ArgumentError" do + eleven_s = Series.from_list([~N[2023-08-20 11:00:00]]) + twelve_s = Series.from_list([~N[2023-08-20 12:00:00]]) + + assert_raise ArgumentError, + "cannot invoke Explorer.Series.add/2 with mismatched dtypes: {:datetime, :microsecond} and {:datetime, :microsecond}", + fn -> Series.add(eleven_s, twelve_s) end + end + end + + describe "subtract" do + test "datetime[μs] - datetime[μs]" do + eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) + twelve_s = Series.from_list([~N[2023-08-20 12:00:00.0000000]]) + diff_s = Series.subtract(twelve_s, eleven_s) + + assert diff_s.dtype == {:duration, :microsecond} + assert Series.to_list(diff_s) == [@one_hour_duration_us] + end + + test "datetime[μs] - duration[μs]" do + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + twelve_s = Series.from_list([~N[2023-08-20 12:00:00.0000000]]) + diff_s = Series.subtract(twelve_s, one_hour_s) + + assert diff_s.dtype == {:datetime, :microsecond} + assert Series.to_list(diff_s) == [~N[2023-08-20 11:00:00.0000000]] + end + + test "duration[μs] - duration[μs]" do + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + two_hour_s = Series.from_list([2 * @one_hour_us], dtype: {:duration, :microsecond}) + diff_s = Series.subtract(two_hour_s, one_hour_s) + + assert diff_s.dtype == {:duration, :microsecond} + assert Series.to_list(diff_s) == [@one_hour_duration_us] + end + + test "NaiveDateTime - datetime[μs]" do + eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) + twelve = ~N[2023-08-20 12:00:00.0000000] + diff_s = Series.subtract(twelve, eleven_s) + + assert diff_s.dtype == {:duration, :microsecond} + assert Series.to_list(diff_s) == [@one_hour_duration_us] + end + + test "datetime[μs] - NaiveDateTime" do + eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) + twelve = ~N[2023-08-20 12:00:00.0000000] + diff_s = Series.subtract(eleven_s, twelve) + + assert diff_s.dtype == {:duration, :microsecond} + assert Series.to_list(diff_s) == [%Duration{value: -@one_hour_us, precision: :microsecond}] + end + + test "NaiveDateTime - duration[μs]" do + twelve = ~N[2023-08-20 12:00:00.0000000] + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + diff_s = Series.subtract(twelve, one_hour_s) + + assert diff_s.dtype == {:datetime, :microsecond} + assert Series.to_list(diff_s) == [~N[2023-08-20 11:00:00.0000000]] + end + + test "datetime[μs] - datetime[ns] (different precisions)" do + one_hour_ns = 3600 * 1_000_000_000 + one_hour_s = Series.from_list([one_hour_ns], dtype: {:duration, :nanosecond}) + twelve_s = Series.from_list([~N[2023-08-20 12:00:00.0000000]]) + diff_s = Series.subtract(twelve_s, one_hour_s) + + # Since we subtracted a duration with :nanosecond precision from a datetime with :microsecond + # precision, the resulting difference has :nanosecond precision since that was the highest + # precision present in the operation. + assert diff_s.dtype == {:datetime, :nanosecond} + assert Series.to_list(diff_s) == [~N[2023-08-20 11:00:00.0000000]] + end + + test "duration[μs] - datetime[μs] raises ArgumentError" do + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + twelve_s = Series.from_list([~N[2023-08-20 12:00:00]]) + + assert_raise ArgumentError, + "cannot invoke Explorer.Series.subtract/2 with mismatched dtypes: {:duration, :microsecond} and {:datetime, :microsecond}", + fn -> Series.subtract(one_hour_s, twelve_s) end + end + end + + describe "multiply" do + test "duration[μs] * integer" do + ten_hour_duration_us = %Duration{value: @one_hour_us * 10, precision: :microsecond} + + ten_s = Series.from_list([10]) + one_hour_s = Series.from_list([@one_hour_duration_us]) + prod_s = Series.multiply(one_hour_s, ten_s) + + assert prod_s.dtype == {:duration, :microsecond} + assert Series.to_list(prod_s) == [ten_hour_duration_us] + end + + test "integer * duration[μs]" do + ten_hour_duration_us = %Duration{value: @one_hour_us * 10, precision: :microsecond} + + ten_s = Series.from_list([10]) + one_hour_s = Series.from_list([@one_hour_duration_us]) + prod_s = Series.multiply(ten_s, one_hour_s) + + assert prod_s.dtype == {:duration, :microsecond} + assert Series.to_list(prod_s) == [ten_hour_duration_us] + end + + test "duration[μs] * duration[μs] raises ArgumentError" do + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + + assert_raise ArgumentError, + "cannot invoke Explorer.Series.multiply/2 with mismatched dtypes: {:duration, :microsecond} and {:duration, :microsecond}", + fn -> Series.multiply(one_hour_s, one_hour_s) end + end + end + + describe "divide" do + test "duration[μs] / integer" do + six_min_duration_us = %Duration{value: @one_hour_us / 10, precision: :microsecond} + + one_hour_s = Series.from_list([@one_hour_duration_us]) + ten_s = Series.from_list([10]) + quot_s = Series.divide(one_hour_s, ten_s) + + assert quot_s.dtype == {:duration, :microsecond} + assert Series.to_list(quot_s) == [six_min_duration_us] + end + + test "integer / duration[μs] raises ArgumentError" do + ten_s = Series.from_list([10]) + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + + assert_raise ArgumentError, + "cannot divide by duration", + fn -> Series.divide(ten_s, one_hour_s) end + end + + test "duration[μs] / duration[μs] raises ArgumentError" do + one_hour_s = Series.from_list([@one_hour_us], dtype: {:duration, :microsecond}) + + assert_raise ArgumentError, + "cannot divide by duration", + fn -> Series.divide(one_hour_s, one_hour_s) end + end + end + + describe "DataFrame (this block belongs elsewhere, but let's keep the tests in one file for now)" do + test "mutate/2" do + require Explorer.DataFrame + alias Explorer.DataFrame, as: DF + + eleven_s = Series.from_list([~N[2023-08-20 11:00:00.0000000]]) + twelve_s = Series.from_list([~N[2023-08-20 12:00:00.0000000]]) + df = DF.new(eleven: eleven_s, twelve: twelve_s) + df_with_diff = DF.mutate(df, diff: twelve - eleven) + + assert inspect(df_with_diff) == """ + #Explorer.DataFrame< + Polars[1 x 3] + eleven datetime[μs] [2023-08-20 11:00:00.000000] + twelve datetime[μs] [2023-08-20 12:00:00.000000] + diff duration[μs] [1h] + >\ + """ + end + end +end diff --git a/test/explorer/series_test.exs b/test/explorer/series_test.exs index cc82a937c..cfcafc88b 100644 --- a/test/explorer/series_test.exs +++ b/test/explorer/series_test.exs @@ -3123,7 +3123,7 @@ defmodule Explorer.SeriesTest do test "error when casting with unknown dtype" do error_message = "Explorer.Series.cast/2 not implemented for dtype :money. " <> - "Valid dtypes are [:binary, :boolean, :category, :date, :time, {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}, :float, :integer, :string]" + "Valid dtypes are [:binary, :boolean, :category, :date, :time, {:datetime, :nanosecond}, {:datetime, :microsecond}, {:datetime, :millisecond}, {:duration, :nanosecond}, {:duration, :microsecond}, {:duration, :millisecond}, :float, :integer, :string]" assert_raise ArgumentError, error_message, fn -> Series.from_list([1, 2, 3]) |> Series.cast(:money)