Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Allow a Filter to be used as the Expression in a SelectColumn. #6365

Merged
merged 22 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,21 @@ public SafeCloseable beginOperation(@NotNull final QueryTable parent) {
void initializeFilters(@NotNull QueryTable parent) {
final QueryCompilerRequestProcessor.BatchProcessor compilationProcessor = QueryCompilerRequestProcessor.batch();
Arrays.stream(whereFilters).forEach(filter -> filter.init(parent.getDefinition(), compilationProcessor));

final List<WhereFilter> disallowedRowVariables =
Arrays.stream(whereFilters).filter(WhereFilter::hasVirtualRowVariables).collect(Collectors.toList());
if (!disallowedRowVariables.isEmpty()) {
throw new UncheckedTableException(
"wouldMatch filters cannot use virtual row variables (i, ii, and k): " + disallowedRowVariables);
}

final List<WhereFilter> disallowedColumnVectors =
Arrays.stream(whereFilters).filter(wf -> !wf.getColumnArrays().isEmpty()).collect(Collectors.toList());
if (!disallowedColumnVectors.isEmpty()) {
throw new UncheckedTableException(
"wouldMatch filters cannot use column Vectors (_ syntax): " + disallowedColumnVectors);
}

compilationProcessor.compile();
}

Expand Down Expand Up @@ -517,13 +532,14 @@ private RowSet update(RowSet added, RowSet removed, RowSet modified,

try (final SafeCloseableList toClose = new SafeCloseableList()) {
// Filter and add addeds
final WritableRowSet filteredAdded = toClose.add(filter.filter(added, source, table, false));
final WritableRowSet filteredAdded = toClose.add(filter.filter(added, table.getRowSet(), table, false));
cpwright marked this conversation as resolved.
Show resolved Hide resolved
RowSet keysToRemove = EMPTY_INDEX;

// If we were affected, recompute mods and re-add the ones that pass.
if (affected) {
downstreamModified.setAll(name);
final RowSet filteredModified = toClose.add(filter.filter(modified, source, table, false));
final RowSet filteredModified =
toClose.add(filter.filter(modified, table.getRowSet(), table, false));

// Now apply the additions and remove any non-matching modifieds
filteredAdded.insert(filteredModified);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,11 @@ public boolean hasConstantArrayAccess() {
return getFormulaShiftColPair() != null;
}

@Override
public boolean hasVirtualRowVariables() {
return usesI || usesII || usesK;
}

/**
* @return a Pair object, consisting of formula string and shift to column MatchPairs, if the filter formula or
* expression has Array Access that conforms to "i +/- &lt;constant&gt;" or "ii +/- &lt;constant&gt;". If
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
//
// Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
//
package io.deephaven.engine.table.impl.select;

import io.deephaven.chunk.ChunkType;
import io.deephaven.chunk.WritableChunk;
import io.deephaven.chunk.WritableObjectChunk;
import io.deephaven.chunk.attributes.Values;
import io.deephaven.engine.exceptions.UncheckedTableException;
import io.deephaven.engine.rowset.*;
import io.deephaven.engine.table.*;
import io.deephaven.engine.table.impl.BaseTable;
import io.deephaven.engine.table.impl.MatchPair;
import io.deephaven.engine.table.impl.QueryCompilerRequestProcessor;
import io.deephaven.engine.table.impl.QueryTable;
import io.deephaven.engine.table.impl.sources.InMemoryColumnSource;
import io.deephaven.engine.table.impl.sources.SparseArrayColumnSource;
import io.deephaven.engine.table.impl.sources.ViewColumnSource;
import org.jetbrains.annotations.NotNull;

import java.util.Collection;
import java.util.List;
import java.util.Map;

/**
* The FilterSelectColumn wraps a {@link io.deephaven.api.filter.Filter} and producing a column of true or false Boolean
* values described by the filter.
*
* <p>
* This column is appropriate as an argument to a {@link Table#view(Collection)} or {@link Table#updateView(String...)},
* lazily evaluating the equivalent of a {@link Table#wouldMatch(String...)} operation. Although select and update can
* also use Filters, the wouldMatch provides a more efficient path for realized results as it stores and updates only a
cpwright marked this conversation as resolved.
Show resolved Hide resolved
* {@link RowSet}. The FilterSelectColumn can only evaluate the filter one chunk at a time, and must write to an
* in-memory Boolean column source.
* </p>
cpwright marked this conversation as resolved.
Show resolved Hide resolved
*/
class FilterSelectColumn implements SelectColumn {

// We don't actually care to do anything, but cannot return null
private static final Formula.FillContext FILL_CONTEXT_INSTANCE = new Formula.FillContext() {};

@NotNull
private final String destName;
@NotNull
private final WhereFilter filter;

private RowSet rowSet;
private Table tableToFilter;

/**
* Create a FilterSelectColumn with the given name and {@link WhereFilter}.
*
* @param destName the name of the result column
* @param filter the filter that is evaluated to true or false for each row of the table
* @return a new FilterSelectColumn representing the provided filter.
*/
static FilterSelectColumn of(@NotNull final String destName, @NotNull final WhereFilter filter) {
return new FilterSelectColumn(destName, filter);
}
cpwright marked this conversation as resolved.
Show resolved Hide resolved

private FilterSelectColumn(@NotNull final String destName, @NotNull final WhereFilter filter) {
this.destName = destName;
this.filter = filter;
}

@Override
public String toString() {
return "filter(" + filter + ')';
}

@Override
public List<String> initInputs(TrackingRowSet rowSet, Map<String, ? extends ColumnSource<?>> columnsOfInterest) {
this.rowSet = rowSet;
this.tableToFilter = new QueryTable(rowSet, columnsOfInterest);
cpwright marked this conversation as resolved.
Show resolved Hide resolved
return filter.getColumns();
cpwright marked this conversation as resolved.
Show resolved Hide resolved
}

@Override
public List<String> initDef(@NotNull Map<String, ColumnDefinition<?>> columnDefinitionMap) {
filter.init(TableDefinition.of(columnDefinitionMap.values()));
return checkForInvalidFilters();
}

@Override
public List<String> initDef(@NotNull final Map<String, ColumnDefinition<?>> columnDefinitionMap,
@NotNull final QueryCompilerRequestProcessor compilationRequestProcessor) {
filter.init(TableDefinition.of(columnDefinitionMap.values()), compilationRequestProcessor);
return checkForInvalidFilters();
}

/**
* Validates the filter to ensure it does not contain invalid filters such as column vectors or virtual row
* variables. Throws an {@link UncheckedTableException} if any invalid filters are found.
*
* @return the list of columns required by the filter.
*/
private List<String> checkForInvalidFilters() {
cpwright marked this conversation as resolved.
Show resolved Hide resolved
if (!filter.getColumnArrays().isEmpty()) {
throw new UncheckedTableException(
"Cannot use a filter with column Vectors (_ syntax) in select, view, update, or updateView: "
+ filter);
}
if (filter.hasVirtualRowVariables()) {
throw new UncheckedTableException(
"Cannot use a filter with virtual row variables (i, ii, or k) in select, view, update, or updateView: "
+ filter);
}

return filter.getColumns();
}

@Override
public Class<?> getReturnedType() {
return Boolean.class;
}

@Override
public Class<?> getReturnedComponentType() {
return null;
}

@Override
public List<String> getColumns() {
return filter.getColumns();
}

@Override
public List<String> getColumnArrays() {
return List.of();
}

@Override
public boolean hasVirtualRowVariables() {
/* This should always be false, because initDef throws when arrays or ii and friends are used. */
return filter.hasVirtualRowVariables();
cpwright marked this conversation as resolved.
Show resolved Hide resolved
cpwright marked this conversation as resolved.
Show resolved Hide resolved
}

@NotNull
@Override
public ColumnSource<Boolean> getDataView() {
return new ViewColumnSource<>(Boolean.class, new FilterFormula(), false);
cpwright marked this conversation as resolved.
Show resolved Hide resolved
}

@NotNull
@Override
public ColumnSource<?> getLazyView() {
return getDataView();
}

@Override
public String getName() {
return destName;
}

@Override
public MatchPair getMatchPair() {
throw new UnsupportedOperationException();
}

@Override
public final WritableColumnSource<?> newDestInstance(final long size) {
return SparseArrayColumnSource.getSparseMemoryColumnSource(size, Boolean.class);
}

@Override
public final WritableColumnSource<?> newFlatDestInstance(final long size) {
return InMemoryColumnSource.getImmutableMemoryColumnSource(size, Boolean.class, null);
}

@Override
public boolean isRetain() {
return false;
}

@Override
public boolean isStateless() {
return filter.permitParallelization();
}

@Override
public FilterSelectColumn copy() {
return new FilterSelectColumn(destName, filter.copy());
}

@Override
public void validateSafeForRefresh(BaseTable<?> sourceTable) {
filter.validateSafeForRefresh(sourceTable);
}

private class FilterFormula extends Formula {
public FilterFormula() {
super(null);
}

@Override
public Boolean getBoolean(long rowKey) {
WritableRowSet filteredIndex = filter.filter(RowSetFactory.fromKeys(rowKey), rowSet, tableToFilter, false);
cpwright marked this conversation as resolved.
Show resolved Hide resolved
cpwright marked this conversation as resolved.
Show resolved Hide resolved
return filteredIndex.isNonempty();
}

@Override
public Boolean getPrevBoolean(long rowKey) {
cpwright marked this conversation as resolved.
Show resolved Hide resolved
WritableRowSet filteredIndex = filter.filter(RowSetFactory.fromKeys(rowKey), rowSet, tableToFilter, true);
cpwright marked this conversation as resolved.
Show resolved Hide resolved
return filteredIndex.isNonempty();
}

@Override
public Object get(final long rowKey) {
cpwright marked this conversation as resolved.
Show resolved Hide resolved
return getBoolean(rowKey);
}

@Override
public Object getPrev(final long rowKey) {
return getPrevBoolean(rowKey);
}

@Override
public ChunkType getChunkType() {
return ChunkType.Object;
}

@Override
public FillContext makeFillContext(final int chunkCapacity) {
return FILL_CONTEXT_INSTANCE;
}

@Override
public void fillChunk(
@NotNull final FillContext fillContext,
@NotNull final WritableChunk<? super Values> destination,
@NotNull final RowSequence rowSequence) {
doFill(rowSequence, destination, false);
}

@Override
public void fillPrevChunk(
@NotNull final FillContext fillContext,
@NotNull final WritableChunk<? super Values> destination,
@NotNull final RowSequence rowSequence) {
doFill(rowSequence, destination, true);
cpwright marked this conversation as resolved.
Show resolved Hide resolved
}

private void doFill(@NotNull RowSequence rowSequence, WritableChunk<? super Values> destination,
boolean usePrev) {
final WritableObjectChunk<Boolean, ?> booleanDestination = destination.asWritableObjectChunk();
booleanDestination.setSize(rowSequence.intSize());
try (final RowSet inputRowSet = rowSequence.asRowSet();
final RowSet filtered = filter.filter(inputRowSet, rowSet, tableToFilter, usePrev);
cpwright marked this conversation as resolved.
Show resolved Hide resolved
final RowSet.Iterator inputIt = inputRowSet.iterator();
final RowSet.Iterator trueIt = filtered.iterator()) {
long nextTrue = trueIt.hasNext() ? trueIt.nextLong() : -1;
int offset = 0;
while (nextTrue >= 0) {
// the input iterator is a superset of the true iterator, so we can always find out what
// the next value is without needing to check hasNext
final long nextInput = inputIt.nextLong();
final boolean found = nextInput == nextTrue;
booleanDestination.set(offset++, found);
if (found) {
nextTrue = trueIt.hasNext() ? trueIt.nextLong() : -1;
}
}
// fill everything else up with false, because nothing else can match
booleanDestination.fillWithBoxedValue(offset, booleanDestination.size() - offset, false);
cpwright marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prefer bulk advance?

Suggested change
final RowSet.Iterator inputIt = inputRowSet.iterator();
final RowSet.Iterator trueIt = filtered.iterator()) {
long nextTrue = trueIt.hasNext() ? trueIt.nextLong() : -1;
int offset = 0;
while (nextTrue >= 0) {
// the input iterator is a superset of the true iterator, so we can always find out what
// the next value is without needing to check hasNext
final long nextInput = inputIt.nextLong();
final boolean found = nextInput == nextTrue;
booleanDestination.set(offset++, found);
if (found) {
nextTrue = trueIt.hasNext() ? trueIt.nextLong() : -1;
}
}
// fill everything else up with false, because nothing else can match
booleanDestination.fillWithBoxedValue(offset, booleanDestination.size() - offset, false);
final RowSequence.Iterator inputRows = inputRowSet.getRowSequenceIterator();
final RowSet.Iterator trueRows = filtered.iterator()) {
int offset = 0;
while (trueRows.hasNext()) {
final long nextTrue = trueRows.nextLong();
// Find all the false rows between the last consumed input row and the next true row
final int falsesSkipped = (int) inputRows.advanceAndGetPositionDistance(nextTrue + 1) - 1;
if (falsesSkipped > 0) {
booleanDestination.fillWithValue(offset, falsesSkipped, false);
offset += falsesSkipped;
}
booleanDestination.set(offset++, true);
}
final int remainingFalses = booleanDestination.size() - offset;
// Fill everything else up with false, because we've exhausted the trues
if (remainingFalses > 0) {
booleanDestination.fillWithValue(offset, remainingFalses, false);
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Original code (density is the fraction of true values, uniformly distributed):

Density: 1.0000, Nanos: 11560028500, per cell=11.56
Density: 0.9999, Nanos: 13277620834, per cell=13.28
Density: 0.9990, Nanos: 14978527958, per cell=14.98
Density: 0.8725, Nanos: 21772909750, per cell=21.77
Density: 0.7500, Nanos: 28369796167, per cell=28.37
Density: 0.5000, Nanos: 36947907333, per cell=36.95
Density: 0.2500, Nanos: 24845209958, per cell=24.85
Density: 0.1250, Nanos: 17964264291, per cell=17.96
Density: 0.0010, Nanos: 11357967500, per cell=11.36
Density: 0.0001, Nanos: 10771053167, per cell=10.77

New code:

Density: 1.0000, Nanos: 11589953167, per cell=11.59
Density: 0.9999, Nanos: 13430658834, per cell=13.43
Density: 0.9990, Nanos: 15374023083, per cell=15.37
Density: 0.8725, Nanos: 22405263417, per cell=22.41
Density: 0.7500, Nanos: 28906296084, per cell=28.91
Density: 0.5000, Nanos: 36402417875, per cell=36.40
Density: 0.2500, Nanos: 24535559166, per cell=24.54
Density: 0.1250, Nanos: 17518296583, per cell=17.52
Density: 0.0010, Nanos: 10686682416, per cell=10.69
Density: 0.0001, Nanos: 10635898708, per cell=10.64

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can swap in which ever implementation you prefer at this point.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I like the new code better, but there's no strong case for either.

}
}
}
}
cpwright marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ public SelectColumn visit(Literal rhs) {

@Override
public SelectColumn visit(Filter rhs) {
return makeSelectColumn(Strings.of(rhs));
return FilterSelectColumn.of(lhs.name(), WhereFilter.of(rhs));
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,13 @@ default boolean canMemoize() {
return false;
}

/**
* Returns true if this filter uses row virtual offset columns of {@code i}, {@code ii} or {@code k}.
*/
default boolean hasVirtualRowVariables() {
return false;
}
cpwright marked this conversation as resolved.
Show resolved Hide resolved

/**
* Create a copy of this WhereFilter.
*
Expand Down Expand Up @@ -331,7 +338,7 @@ default Filter invert() {

@Override
default <T> T walk(Expression.Visitor<T> visitor) {
throw new UnsupportedOperationException("WhereFilters do not implement walk");
return visitor.visit(this);
}

@Override
Expand Down
Loading