Skip to content

Commit

Permalink
Fix failure when matching empty dictionary
Browse files Browse the repository at this point in the history
  • Loading branch information
martint committed Aug 19, 2022
1 parent 0998cf8 commit 8494caa
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,11 @@ private static Domain getDomain(Type type, DictionaryDescriptor dictionaryDescri
}

int dictionarySize = dictionaryPage.get().getDictionarySize();

if (dictionarySize == 0) {
return Domain.onlyNull(type);
}

DictionaryValueConverter converter = new DictionaryValueConverter(dictionary);
Function<Integer, Object> convertFunction = converter.getConverter(columnDescriptor.getPrimitiveType());
List<Object> values = new ArrayList<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
import static io.trino.spi.predicate.Domain.all;
import static io.trino.spi.predicate.Domain.create;
import static io.trino.spi.predicate.Domain.notNull;
import static io.trino.spi.predicate.Domain.onlyNull;
import static io.trino.spi.predicate.Domain.singleValue;
import static io.trino.spi.predicate.Range.range;
import static io.trino.spi.predicate.TupleDomain.withColumnDomains;
Expand Down Expand Up @@ -612,6 +613,39 @@ public void testVarcharMatchesWithDictionaryDescriptor()
assertTrue(parquetPredicate.matches(new DictionaryDescriptor(column, Optional.of(page))));
}

@Test
public void testEmptyDictionary()
{
ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"path"}, Types.optional(BINARY).named("Test column"), 0, 0);
RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column"));
ColumnDescriptor descriptor = new ColumnDescriptor(column.getPath(), column.getPrimitiveType(), 0, 0);
VarcharType type = createVarcharType(255);

DictionaryPage dictionary = new DictionaryPage(EMPTY_SLICE, 0, PLAIN_DICTIONARY);
TupleDomainParquetPredicate predicate;

// only non-nulls allowed
predicate = new TupleDomainParquetPredicate(
withColumnDomains(singletonMap(descriptor, notNull(type))),
singletonList(column),
UTC);
assertFalse(predicate.matches(new DictionaryDescriptor(column, Optional.of(dictionary))));

// only nulls allowed
predicate = new TupleDomainParquetPredicate(
withColumnDomains(singletonMap(descriptor, onlyNull(type))),
singletonList(column),
UTC);
assertTrue(predicate.matches(new DictionaryDescriptor(column, Optional.of(dictionary))));

// mixed non-nulls and nulls allowed
predicate = new TupleDomainParquetPredicate(
withColumnDomains(singletonMap(descriptor, singleValue(type, EMPTY_SLICE, true))),
singletonList(column),
UTC);
assertTrue(predicate.matches(new DictionaryDescriptor(column, Optional.of(dictionary))));
}

@Test
public void testColumnIndexWithNullPages()
throws Exception
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.hive.parquet;

import com.google.common.io.Resources;
import io.trino.plugin.hive.HiveColumnHandle;
import io.trino.plugin.hive.HiveConfig;
import io.trino.plugin.hive.HivePageSourceFactory;
import io.trino.plugin.hive.HiveStorageFormat;
import io.trino.plugin.hive.HiveType;
import io.trino.plugin.hive.acid.AcidTransaction;
import io.trino.plugin.hive.benchmark.StandardFileFormats;
import io.trino.spi.connector.ConnectorPageSource;
import io.trino.spi.predicate.Domain;
import io.trino.spi.predicate.TupleDomain;
import io.trino.spi.type.Type;
import io.trino.testing.MaterializedResult;
import io.trino.testing.MaterializedRow;
import org.apache.hadoop.fs.Path;
import org.testng.annotations.Test;

import java.io.File;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.Properties;

import static io.trino.hadoop.ConfigurationInstantiator.newEmptyConfiguration;
import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR;
import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn;
import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT;
import static io.trino.plugin.hive.HiveTestUtils.getHiveSession;
import static io.trino.spi.type.IntegerType.INTEGER;
import static io.trino.testing.MaterializedResult.materializeSourceDataStream;
import static java.util.Collections.singletonList;
import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB;
import static org.assertj.core.api.Assertions.assertThat;

public class TestOnlyNulls
{
@Test
public void testOnlyNulls()
throws Exception
{
// The file contains only nulls and a dictionary page with 0 entries.
File parquetFile = new File(Resources.getResource("issue-10873.parquet").toURI());
String columnName = "x";
Type columnType = INTEGER;

HiveColumnHandle column = createBaseColumn(columnName, 0, HiveType.toHiveType(columnType), columnType, REGULAR, Optional.empty());

// match not null
try (ConnectorPageSource pageSource = createPageSource(parquetFile, column, TupleDomain.withColumnDomains(Map.of(column, Domain.notNull(columnType))))) {
MaterializedResult result = materializeSourceDataStream(getHiveSession(new HiveConfig()), pageSource, List.of(columnType)).toTestTypes();
assertThat(result.getMaterializedRows()).isEmpty();
}

// match null
try (ConnectorPageSource pageSource = createPageSource(parquetFile, column, TupleDomain.withColumnDomains(Map.of(column, Domain.onlyNull(columnType))))) {
MaterializedResult result = materializeSourceDataStream(getHiveSession(new HiveConfig()), pageSource, List.of(columnType)).toTestTypes();

assertThat(result.getMaterializedRows())
.isEqualTo(List.of(
new MaterializedRow(singletonList(null)),
new MaterializedRow(singletonList(null)),
new MaterializedRow(singletonList(null)),
new MaterializedRow(singletonList(null))));
}
}

private static ConnectorPageSource createPageSource(File parquetFile, HiveColumnHandle column, TupleDomain<HiveColumnHandle> domain)
{
HivePageSourceFactory pageSourceFactory = StandardFileFormats.TRINO_PARQUET.getHivePageSourceFactory(HDFS_ENVIRONMENT).orElseThrow();

Properties schema = new Properties();
schema.setProperty(SERIALIZATION_LIB, HiveStorageFormat.PARQUET.getSerde());

return pageSourceFactory.createPageSource(
newEmptyConfiguration(),
getHiveSession(new HiveConfig()),
new Path(parquetFile.toURI()),
0,
parquetFile.length(),
parquetFile.length(),
schema,
List.of(column),
domain,
Optional.empty(),
OptionalInt.empty(),
false,
AcidTransaction.NO_ACID_TRANSACTION)
.orElseThrow()
.get();
}
}
Binary file not shown.

0 comments on commit 8494caa

Please sign in to comment.