Skip to content

Commit

Permalink
Support column swaps in RedefinableTable (#3120)
Browse files Browse the repository at this point in the history
  • Loading branch information
nbauernfeind authored Dec 1, 2022
1 parent e799d64 commit 6f587a4
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import java.util.*;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* An uncoalesced table that may be redefined without triggering a {@link #coalesce()}.
Expand All @@ -25,17 +26,32 @@ protected RedefinableTable(@NotNull final TableDefinition definition, @NotNull f

@Override
public Table view(Collection<? extends Selectable> selectables) {
return viewInternal(selectables, false);
}

@Override
public Table updateView(Collection<? extends Selectable> selectables) {
return viewInternal(selectables, true);
}

private Table viewInternal(Collection<? extends Selectable> selectables, boolean isUpdate) {
if (selectables == null || selectables.isEmpty()) {
return this;
}
final SelectColumn[] columns = SelectColumn.from(selectables);
Set<ColumnDefinition<?>> resultColumnsInternal = new HashSet<>();
Map<String, ColumnDefinition<?>> resultColumnsExternal = new LinkedHashMap<>();
Map<String, ColumnDefinition<?>> allColumns = new HashMap<>(definition.getColumnNameMap());
Map<String, Set<String>> columnDependency = new HashMap<>();

final SelectColumn[] columns = Stream.concat(
isUpdate ? definition.getColumnStream().map(cd -> new SourceColumn(cd.getName())) : Stream.empty(),
selectables.stream().map(SelectColumn::of))
.toArray(SelectColumn[]::new);

final Set<ColumnDefinition<?>> resultColumnsInternal = new HashSet<>();
final Map<String, ColumnDefinition<?>> resultColumnsExternal = new LinkedHashMap<>();
final Map<String, ColumnDefinition<?>> allColumns = new HashMap<>(definition.getColumnNameMap());
final Map<String, Set<String>> columnDependency = new HashMap<>();
boolean simpleRetain = true;
for (SelectColumn selectColumn : columns) {
for (final SelectColumn selectColumn : columns) {
List<String> usedColumnNames = selectColumn.initDef(allColumns);
usedColumnNames.addAll(selectColumn.getColumnArrays());
columnDependency.put(selectColumn.getName(), new HashSet<>(usedColumnNames));
resultColumnsInternal.addAll(usedColumnNames.stream()
.filter(usedColumnName -> !resultColumnsExternal.containsKey(usedColumnName))
Expand All @@ -60,23 +76,8 @@ public Table view(Collection<? extends Selectable> selectables) {
TableDefinition newDefInternal =
TableDefinition.of(
resultColumnsInternal.toArray(ColumnDefinition.ZERO_LENGTH_COLUMN_DEFINITION_ARRAY));
return redefine(newDefExternal, newDefInternal, columns, columnDependency);
}

@Override
public Table updateView(Collection<? extends Selectable> selectables) {
if (selectables == null || selectables.isEmpty()) {
return this;
}
final SelectColumn[] columns = SelectColumn.from(selectables);
LinkedHashMap<String, SelectColumn> viewColumns = new LinkedHashMap<>();
for (ColumnDefinition<?> cDef : definition.getColumns()) {
viewColumns.put(cDef.getName(), new SourceColumn(cDef.getName()));
}
for (SelectColumn updateColumn : columns) {
viewColumns.put(updateColumn.getName(), updateColumn);
}
return view(viewColumns.values().toArray(SelectColumn.ZERO_LENGTH_SELECT_COLUMN_ARRAY));
return redefine(newDefExternal, newDefInternal, columns, columnDependency);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.function.Function;
import java.util.stream.IntStream;
import java.util.stream.LongStream;

Expand Down Expand Up @@ -391,6 +392,25 @@ public void testBigArrays() {
assertTableEquals(stuff, readBack);
}

@Test
public void testColumnSwapping() {
testWriteRead(emptyTable(10).update("X = ii * 2", "Y = ii * 2 + 1"),
t -> t.updateView("T = X", "X = Y", "Y = T"));
}

@Test
public void testColumnRedefineArrayDep() {
testWriteRead(emptyTable(10).update("X = ii * 2", "Y = ii * 2 + 1"),
t -> t.view("T = X_[i-1]"));
}

private void testWriteRead(Table source, Function<Table, Table> transform) {
final File f2w = new File(testRoot, "testWriteRead.parquet");
ParquetTools.writeTable(source, f2w);
final Table readBack = ParquetTools.readTable(f2w);
assertTableEquals(transform.apply(source), transform.apply(readBack));
}

public static DoubleVector generateDoubles(int howMany) {
final double[] yarr = new double[howMany];
for (int ii = 0; ii < howMany; ii++) {
Expand Down

0 comments on commit 6f587a4

Please sign in to comment.