diff --git a/native/explorer/src/dataframe.rs b/native/explorer/src/dataframe.rs index 4c9e47b84..04989a2ea 100644 --- a/native/explorer/src/dataframe.rs +++ b/native/explorer/src/dataframe.rs @@ -461,25 +461,35 @@ pub fn df_relocate( columns: Vec<&str>, position: u64, ) -> Result { - let mut first_series = df.get_columns().to_owned(); - let second_series = first_series.split_off(position as usize); - - let (first_series, first_to_relocate): (Vec, Vec) = first_series + let column_indexes: HashMap<&str, usize> = columns .into_iter() - .partition(|series| !columns.contains(&series.name())); + .enumerate() + .map(|(index, col)| (col, index)) + .collect(); - let (second_series, second_to_relocate): (Vec, Vec) = second_series + let mut columns = df.get_columns().to_owned(); + let right_columns = columns.split_off(position as usize); + + let (mut columns, mut to_relocate): (Vec, Vec) = columns .into_iter() - .partition(|series| !columns.contains(&series.name())); + .partition(|series| !column_indexes.contains_key(&series.name())); - let series = first_series + let (mut right_columns, mut rest_relocate): (Vec, Vec) = right_columns .into_iter() - .chain(first_to_relocate.into_iter()) - .chain(second_to_relocate.into_iter()) - .chain(second_series.into_iter()) - .collect(); + .partition(|series| !column_indexes.contains_key(&series.name())); - let df = DataFrame::new(series)?; + // Ensure that the columns we want to relocate are sorted by the order the caller specifies + to_relocate.append(&mut rest_relocate); + to_relocate.sort_by_key(|series| { + column_indexes + .get(series.name()) + .expect("column should exist") + }); + + columns.append(&mut to_relocate); + columns.append(&mut right_columns); + + let df = DataFrame::new(columns)?; Ok(ExDataFrame::new(df)) } diff --git a/test/explorer/data_frame_test.exs b/test/explorer/data_frame_test.exs index eb1be7b70..62772b173 100644 --- a/test/explorer/data_frame_test.exs +++ b/test/explorer/data_frame_test.exs @@ -1952,7 +1952,7 @@ defmodule Explorer.DataFrameTest do end describe "relocate/2" do - test "single column relative" do + test "with single column and relative" do df = DF.new( first: ["a", "b", "a"], @@ -1976,7 +1976,7 @@ defmodule Explorer.DataFrameTest do assert df3.names == ["second", "third", "last", "first"] end - test "multiple columns relative" do + test "with multiple columns and relative" do df = DF.new( first: ["a", "b", "a"], @@ -1998,7 +1998,7 @@ defmodule Explorer.DataFrameTest do assert df4.names == ["first", "third", "second", "last"] end - test "using atom for last" do + test "with the :last atom" do df = DF.new( a: ["a value", "some other value", "a third value!"], @@ -2016,7 +2016,7 @@ defmodule Explorer.DataFrameTest do assert df3.names == ["b", "c", "a"] end - test "using atom for first" do + test "with the :first atom" do df = DF.new( a: ["a value", "some other value", "a third value!"], @@ -2033,6 +2033,23 @@ defmodule Explorer.DataFrameTest do df3 = DF.relocate(df, ["b", "a"], after: :first) assert df3.names == ["b", "a", "c"] end + + test "ordered DataFrame output after relocation" do + df1 = + Explorer.DataFrame.new( + a: [1, 2], + b: [5.1, 5.2], + c: [4, 5], + d: ["yes", "no"], + e: [4, 1] + ) + + df2 = DF.relocate(df1, [4, 0], before: 2) + assert df2.names == ["b", "e", "a", "c", "d"] + + assert DF.dump_csv(df2) == + {:ok, "b,e,a,c,d\n5.1,4,1,4,yes\n5.2,1,2,5,no\n"} + end end describe "rename/2" do