-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Optimize single-column FilterFunctions on DictionaryBlocks
Applies the filter at most once on any of the distinct values in a DictionaryBlock over the same dictionary. This optimizes filtering of low cardinality string columns.
- Loading branch information
1 parent
67efdcf
commit 29bb4c6
Showing
2 changed files
with
210 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
141 changes: 141 additions & 0 deletions
141
presto-orc/src/test/java/com/facebook/presto/orc/TestFilterFunction.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
/* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package com.facebook.presto.orc; | ||
|
||
import com.facebook.presto.spi.ConnectorSession; | ||
import com.facebook.presto.spi.Page; | ||
import com.facebook.presto.spi.block.Block; | ||
import com.facebook.presto.spi.block.DictionaryBlock; | ||
import com.facebook.presto.spi.block.LongArrayBlock; | ||
import com.facebook.presto.spi.relation.Predicate; | ||
import com.facebook.presto.testing.TestingConnectorSession; | ||
import com.google.common.collect.ImmutableList; | ||
import org.testng.annotations.Test; | ||
|
||
import java.util.Arrays; | ||
import java.util.Optional; | ||
|
||
import static org.testng.Assert.assertEquals; | ||
import static org.testng.Assert.assertTrue; | ||
|
||
public class TestFilterFunction | ||
{ | ||
private static final long UNLUCKY = 13; | ||
|
||
@Test | ||
public void testFilter() | ||
{ | ||
ConnectorSession session = new TestingConnectorSession(ImmutableList.of()); | ||
FilterFunction filter = new FilterFunction(session, true, new IsOddPredicate()); | ||
|
||
Block numbers = makeNumbers(0, 1000); | ||
int[] allPositions = makePositions(0, 1000, 1); | ||
assertFilter(filter, numbers, allPositions, allPositions.length); | ||
|
||
Block dictionaryNumbers = new DictionaryBlock(numbers, allPositions); | ||
// Sparse coverage of the dictionary values | ||
int[] sparsePositions = makePositions(1, 300, 3); | ||
assertFilter(filter, dictionaryNumbers, sparsePositions, sparsePositions.length); | ||
|
||
// Full coverage of the dictionary values | ||
assertFilter(filter, dictionaryNumbers, allPositions, allPositions.length); | ||
|
||
// Test with a different DictionaryBlock over the same numbers. Results are reused. The DictionaryBlock covers the | ||
// values sparsely. TheDictionaryBlock itself is accessed sparsely. | ||
DictionaryBlock otherDictionary = new DictionaryBlock(numbers, makePositions(1, 332, 3)); | ||
int[] otherDictionaryPositions = makePositions(0, 150, 2); | ||
assertFilter(filter, otherDictionary, otherDictionaryPositions, otherDictionaryPositions.length); | ||
|
||
// Repeat test on a DictionaryBlock over different content to make sure that cached results are not reused. | ||
assertFilter(filter, new DictionaryBlock(makeNumbers(1, 1001), allPositions), allPositions, allPositions.length); | ||
} | ||
|
||
private static void assertFilter(FilterFunction filter, Block input, int[] inputPositions, int positionCount) | ||
{ | ||
// Copy the positions array because filter mutates it. | ||
int[] positions = Arrays.copyOf(inputPositions, positionCount); | ||
RuntimeException[] errors = new RuntimeException[inputPositions[positionCount - 1] + 1]; | ||
// Put a pre-existing error in the 1st half of the input. | ||
int numPreviousErrors = positionCount / 2; | ||
for (int i = 0; i < numPreviousErrors; i++) { | ||
errors[i] = new RuntimeException("Pre-existent error at " + positions[i]); | ||
} | ||
int lastErrorPosition = numPreviousErrors > 0 ? positions[numPreviousErrors - 1] : -1; | ||
int numHits = filter.filter(new Page(positionCount, input), positions, positionCount, errors); | ||
int hitCounter = 0; | ||
for (int position : inputPositions) { | ||
long number = input.getLong(position); | ||
if (number == UNLUCKY) { | ||
assertEquals(positions[hitCounter], position); | ||
assertTrue(errors[hitCounter] instanceof UnluckyError); | ||
hitCounter++; | ||
} | ||
else if ((number & 1) == 1) { | ||
assertEquals(positions[hitCounter], position); | ||
if (position <= lastErrorPosition) { | ||
assertTrue(errors[hitCounter] instanceof RuntimeException); | ||
} | ||
else { | ||
assertEquals(errors[hitCounter], null); | ||
} | ||
hitCounter++; | ||
} | ||
} | ||
assertEquals(numHits, hitCounter); | ||
} | ||
|
||
private static class UnluckyError | ||
extends RuntimeException | ||
{ | ||
} | ||
|
||
private static int[] makePositions(int from, int count, int step) | ||
{ | ||
int[] array = new int[count]; | ||
for (int i = 0; i < count; i++) { | ||
array[i] = from + step * i; | ||
} | ||
return array; | ||
} | ||
|
||
private static Block makeNumbers(int from, int to) | ||
{ | ||
int count = to - from; | ||
long[] array = new long[count]; | ||
for (int i = 0; i < count; i++) { | ||
array[i] = from + i; | ||
} | ||
return new LongArrayBlock(count, Optional.empty(), array); | ||
} | ||
|
||
private static class IsOddPredicate | ||
implements Predicate | ||
{ | ||
@Override | ||
public int[] getInputChannels() | ||
{ | ||
return new int[] {0}; | ||
} | ||
|
||
@Override | ||
public boolean evaluate(ConnectorSession session, Page page, int position) | ||
{ | ||
long number = page.getBlock(0).getLong(position); | ||
if (number == UNLUCKY) { | ||
throw new UnluckyError(); | ||
} | ||
return (number & 1) == 1; | ||
} | ||
} | ||
} |