Skip to content

Commit

Permalink
Ensure stable column order
Browse files Browse the repository at this point in the history
  • Loading branch information
thehabbos007 committed Jun 15, 2023
1 parent acd85e3 commit 234c61c
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 17 deletions.
36 changes: 23 additions & 13 deletions native/explorer/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -461,25 +461,35 @@ pub fn df_relocate(
columns: Vec<&str>,
position: u64,
) -> Result<ExDataFrame, ExplorerError> {
let mut first_series = df.get_columns().to_owned();
let second_series = first_series.split_off(position as usize);

let (first_series, first_to_relocate): (Vec<Series>, Vec<Series>) = first_series
let column_indexes: HashMap<&str, usize> = columns
.into_iter()
.partition(|series| !columns.contains(&series.name()));
.enumerate()
.map(|(index, col)| (col, index))
.collect();

let (second_series, second_to_relocate): (Vec<Series>, Vec<Series>) = second_series
let mut columns = df.get_columns().to_owned();
let right_columns = columns.split_off(position as usize);

let (mut columns, mut to_relocate): (Vec<Series>, Vec<Series>) = columns
.into_iter()
.partition(|series| !columns.contains(&series.name()));
.partition(|series| !column_indexes.contains_key(&series.name()));

let series = first_series
let (mut right_columns, mut rest_relocate): (Vec<Series>, Vec<Series>) = right_columns
.into_iter()
.chain(first_to_relocate.into_iter())
.chain(second_to_relocate.into_iter())
.chain(second_series.into_iter())
.collect();
.partition(|series| !column_indexes.contains_key(&series.name()));

let df = DataFrame::new(series)?;
// Ensure that the columns we want to relocate are sorted by the order the caller specifies
to_relocate.append(&mut rest_relocate);
to_relocate.sort_by_key(|series| {
column_indexes
.get(series.name())
.expect("column should exist")
});

columns.append(&mut to_relocate);
columns.append(&mut right_columns);

let df = DataFrame::new(columns)?;

Ok(ExDataFrame::new(df))
}
Expand Down
25 changes: 21 additions & 4 deletions test/explorer/data_frame_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -1952,7 +1952,7 @@ defmodule Explorer.DataFrameTest do
end

describe "relocate/2" do
test "single column relative" do
test "with single column and relative" do
df =
DF.new(
first: ["a", "b", "a"],
Expand All @@ -1976,7 +1976,7 @@ defmodule Explorer.DataFrameTest do
assert df3.names == ["second", "third", "last", "first"]
end

test "multiple columns relative" do
test "with multiple columns and relative" do
df =
DF.new(
first: ["a", "b", "a"],
Expand All @@ -1998,7 +1998,7 @@ defmodule Explorer.DataFrameTest do
assert df4.names == ["first", "third", "second", "last"]
end

test "using atom for last" do
test "with the :last atom" do
df =
DF.new(
a: ["a value", "some other value", "a third value!"],
Expand All @@ -2016,7 +2016,7 @@ defmodule Explorer.DataFrameTest do
assert df3.names == ["b", "c", "a"]
end

test "using atom for first" do
test "with the :first atom" do
df =
DF.new(
a: ["a value", "some other value", "a third value!"],
Expand All @@ -2033,6 +2033,23 @@ defmodule Explorer.DataFrameTest do
df3 = DF.relocate(df, ["b", "a"], after: :first)
assert df3.names == ["b", "a", "c"]
end

test "ordered DataFrame output after relocation" do
df1 =
Explorer.DataFrame.new(
a: [1, 2],
b: [5.1, 5.2],
c: [4, 5],
d: ["yes", "no"],
e: [4, 1]
)

df2 = DF.relocate(df1, [4, 0], before: 2)
assert df2.names == ["b", "e", "a", "c", "d"]

assert DF.dump_csv(df2) ==
{:ok, "b,e,a,c,d\n5.1,4,1,4,yes\n5.2,1,2,5,no\n"}
end
end

describe "rename/2" do
Expand Down

0 comments on commit 234c61c

Please sign in to comment.