Skip to content

Commit

Permalink
fix(rust!, python): Reject literal input in sort_by_exprs() (#17606)
Browse files Browse the repository at this point in the history
  • Loading branch information
eitsupi authored Jul 13, 2024
1 parent 99677a7 commit f55cc5b
Show file tree
Hide file tree
Showing 10 changed files with 72 additions and 50 deletions.
34 changes: 22 additions & 12 deletions crates/polars-lazy/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,20 +342,28 @@ impl LazyFrame {
/// fn example(df: DataFrame) -> LazyFrame {
/// df.lazy()
/// .sort_by_exprs(vec![col("sepal_width")], Default::default())
/// .unwrap()
/// }
/// ```
pub fn sort_by_exprs<E: AsRef<[Expr]>>(
self,
by_exprs: E,
sort_options: SortMultipleOptions,
) -> Self {
) -> PolarsResult<Self> {
let by_exprs = by_exprs.as_ref().to_vec();
if by_exprs.is_empty() {
self
Ok(self)
} else {
let is_include_literal = by_exprs.iter().any(|expr| matches!(expr, Expr::Literal(_)));

polars_ensure!(
!is_include_literal,
ComputeError: "literal expressions are not allowed for sorting"
);

let opt_state = self.get_opt_state();
let lp = self.get_plan_builder().sort(by_exprs, sort_options).build();
Self::from_logical_plan(lp, opt_state)
Ok(Self::from_logical_plan(lp, opt_state))
}
}

Expand All @@ -364,24 +372,26 @@ impl LazyFrame {
k: IdxSize,
by_exprs: E,
sort_options: SortMultipleOptions,
) -> Self {
) -> PolarsResult<Self> {
// this will optimize to top-k
self.sort_by_exprs(
by_exprs,
sort_options.with_order_reversed().with_nulls_last(true),
)
.slice(0, k)
Ok(self
.sort_by_exprs(
by_exprs,
sort_options.with_order_reversed().with_nulls_last(true),
)?
.slice(0, k))
}

pub fn bottom_k<E: AsRef<[Expr]>>(
self,
k: IdxSize,
by_exprs: E,
sort_options: SortMultipleOptions,
) -> Self {
) -> PolarsResult<Self> {
// this will optimize to bottom-k
self.sort_by_exprs(by_exprs, sort_options.with_nulls_last(true))
.slice(0, k)
Ok(self
.sort_by_exprs(by_exprs, sort_options.with_nulls_last(true))?
.slice(0, k))
}

/// Reverse the `DataFrame` from top to bottom.
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-lazy/src/tests/queries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1911,7 +1911,7 @@ fn test_sort_maintain_order_true() -> PolarsResult<()> {
SortMultipleOptions::default()
.with_maintain_order(true)
.with_nulls_last(true),
)
)?
.slice(0, 3)
.collect()?;
println!("{:?}", res);
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-lazy/src/tests/streaming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ fn test_streaming_multiple_keys_aggregate() -> PolarsResult<()> {
.sort_by_exprs(
[col("sugars_g"), col("calories")],
SortMultipleOptions::default().with_order_descending_multi([false, false]),
);
)?;

assert_streaming_with_default(q, true, false);
Ok(())
Expand Down Expand Up @@ -138,7 +138,7 @@ fn test_streaming_unique() -> PolarsResult<()> {
.sort_by_exprs(
[cols(["sugars_g", "calories"])],
SortMultipleOptions::default(),
);
)?;

assert_streaming_with_default(q, true, false);
Ok(())
Expand Down Expand Up @@ -392,7 +392,7 @@ fn test_sort_maintain_order_streaming() -> PolarsResult<()> {
SortMultipleOptions::default()
.with_nulls_last(true)
.with_maintain_order(true),
)
)?
.slice(0, 3)
.with_streaming(true)
.collect()?;
Expand All @@ -419,7 +419,7 @@ fn test_streaming_full_outer_join() -> PolarsResult<()> {

let q = lf_left
.full_join(lf_right, col("a"), col("a"))
.sort_by_exprs([all()], SortMultipleOptions::default());
.sort_by_exprs([all()], SortMultipleOptions::default())?;

// Toggle so that the join order is swapped.
for toggle in [true, true] {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-lazy/src/tests/tpch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ fn test_q2() -> PolarsResult<()> {
SortMultipleOptions::default()
.with_order_descending_multi([true, false, false, false])
.with_maintain_order(true),
)
)?
.limit(100)
.with_comm_subplan_elim(true);

Expand Down
4 changes: 2 additions & 2 deletions crates/polars-sql/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1098,13 +1098,13 @@ impl SQLContext {
)?)
}
}
Ok(lf.sort_by_exprs(
lf.sort_by_exprs(
&by,
SortMultipleOptions::default()
.with_order_descending_multi(descending)
.with_nulls_last_multi(nulls_last)
.with_maintain_order(true),
))
)
}

fn process_group_by(
Expand Down
8 changes: 5 additions & 3 deletions crates/polars-sql/tests/ops_distinct_on.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use polars_core::chunked_array::ops::SortMultipleOptions;
use polars_core::df;
use polars_error::PolarsResult;
use polars_lazy::prelude::*;
use polars_sql::*;

#[test]
fn test_distinct_on() {
fn test_distinct_on() -> PolarsResult<()> {
let df = df! {
"Name" => ["Bob", "Pete", "Pete", "Pete", "Martha", "Martha"],
"Record Date" => [1, 1, 2, 4, 1, 3],
Expand Down Expand Up @@ -33,9 +34,10 @@ fn test_distinct_on() {
SortMultipleOptions::default()
.with_order_descending_multi([false, true])
.with_maintain_order(true),
)
)?
.group_by_stable(vec![col("Name")])
.agg(vec![col("*").first()]);
let expected = expected.collect().unwrap();
assert!(actual.equals(&expected))
assert!(actual.equals(&expected));
Ok(())
}
2 changes: 1 addition & 1 deletion crates/polars-sql/tests/simple_exprs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ fn test_group_by_2() -> PolarsResult<()> {
.sort_by_exprs(
vec![col("count"), col("category")],
SortMultipleOptions::default().with_order_descending_multi([false, true]),
)
)?
.limit(2);

let expected = expected.collect()?;
Expand Down
2 changes: 1 addition & 1 deletion crates/polars/src/docs/lazy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
//! let descending = vec![true, false];
//!
//! let sorted = df.lazy()
//! .sort_by_exprs(vec![col("b"), col("a")], descending, false, false)
//! .sort_by_exprs(vec![col("b"), col("a")], descending, false, false)?
//! .collect()?;
//!
//! // sorted:
Expand Down
56 changes: 31 additions & 25 deletions py-polars/src/lazyframe/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -511,41 +511,47 @@ impl PyLazyFrame {
nulls_last: Vec<bool>,
maintain_order: bool,
multithreaded: bool,
) -> Self {
) -> PyResult<Self> {
let ldf = self.ldf.clone();
let exprs = by.to_exprs();
ldf.sort_by_exprs(
exprs,
SortMultipleOptions {
descending,
nulls_last,
maintain_order,
multithreaded,
},
)
.into()
let out = ldf
.sort_by_exprs(
exprs,
SortMultipleOptions {
descending,
nulls_last,
maintain_order,
multithreaded,
},
)
.map_err(PyPolarsErr::from)?;
Ok(out.into())
}

fn top_k(&self, k: IdxSize, by: Vec<PyExpr>, reverse: Vec<bool>) -> Self {
fn top_k(&self, k: IdxSize, by: Vec<PyExpr>, reverse: Vec<bool>) -> PyResult<Self> {
let ldf = self.ldf.clone();
let exprs = by.to_exprs();
ldf.top_k(
k,
exprs,
SortMultipleOptions::new().with_order_descending_multi(reverse),
)
.into()
let out = ldf
.top_k(
k,
exprs,
SortMultipleOptions::new().with_order_descending_multi(reverse),
)
.map_err(PyPolarsErr::from)?;
Ok(out.into())
}

fn bottom_k(&self, k: IdxSize, by: Vec<PyExpr>, reverse: Vec<bool>) -> Self {
fn bottom_k(&self, k: IdxSize, by: Vec<PyExpr>, reverse: Vec<bool>) -> PyResult<Self> {
let ldf = self.ldf.clone();
let exprs = by.to_exprs();
ldf.bottom_k(
k,
exprs,
SortMultipleOptions::new().with_order_descending_multi(reverse),
)
.into()
let out = ldf
.bottom_k(
k,
exprs,
SortMultipleOptions::new().with_order_descending_multi(reverse),
)
.map_err(PyPolarsErr::from)?;
Ok(out.into())
}

fn cache(&self) -> Self {
Expand Down
4 changes: 4 additions & 0 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,10 @@ def test_sort() -> None:
assert_frame_equal(
df.sort(["a", "b"]), pl.DataFrame({"a": [1, 2, 3], "b": [2, 1, 3]})
)
with pytest.raises(ComputeError):
df.sort([1, 2])
with pytest.raises(ComputeError):
df.sort(1)


def test_sort_maintain_order() -> None:
Expand Down

0 comments on commit f55cc5b

Please sign in to comment.