From 8494caa5ffc67a1281a31dfd7426e0a71fe8bf0d Mon Sep 17 00:00:00 2001 From: Martin Traverso Date: Mon, 31 Jan 2022 15:16:17 -0800 Subject: [PATCH] Fix failure when matching empty dictionary --- .../TupleDomainParquetPredicate.java | 5 + .../TestTupleDomainParquetPredicate.java | 34 ++++++ .../plugin/hive/parquet/TestOnlyNulls.java | 107 ++++++++++++++++++ .../src/test/resources/issue-10873.parquet | Bin 0 -> 408 bytes 4 files changed, 146 insertions(+) create mode 100644 plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestOnlyNulls.java create mode 100644 plugin/trino-hive/src/test/resources/issue-10873.parquet diff --git a/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java b/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java index bfc2c24c7127..2d4ca7228f94 100644 --- a/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java +++ b/lib/trino-parquet/src/main/java/io/trino/parquet/predicate/TupleDomainParquetPredicate.java @@ -485,6 +485,11 @@ private static Domain getDomain(Type type, DictionaryDescriptor dictionaryDescri } int dictionarySize = dictionaryPage.get().getDictionarySize(); + + if (dictionarySize == 0) { + return Domain.onlyNull(type); + } + DictionaryValueConverter converter = new DictionaryValueConverter(dictionary); Function convertFunction = converter.getConverter(columnDescriptor.getPrimitiveType()); List values = new ArrayList<>(); diff --git a/lib/trino-parquet/src/test/java/io/trino/parquet/TestTupleDomainParquetPredicate.java b/lib/trino-parquet/src/test/java/io/trino/parquet/TestTupleDomainParquetPredicate.java index c6a3342befe2..d38f94b9203a 100644 --- a/lib/trino-parquet/src/test/java/io/trino/parquet/TestTupleDomainParquetPredicate.java +++ b/lib/trino-parquet/src/test/java/io/trino/parquet/TestTupleDomainParquetPredicate.java @@ -69,6 +69,7 @@ import static io.trino.spi.predicate.Domain.all; import static io.trino.spi.predicate.Domain.create; import static io.trino.spi.predicate.Domain.notNull; +import static io.trino.spi.predicate.Domain.onlyNull; import static io.trino.spi.predicate.Domain.singleValue; import static io.trino.spi.predicate.Range.range; import static io.trino.spi.predicate.TupleDomain.withColumnDomains; @@ -612,6 +613,39 @@ public void testVarcharMatchesWithDictionaryDescriptor() assertTrue(parquetPredicate.matches(new DictionaryDescriptor(column, Optional.of(page)))); } + @Test + public void testEmptyDictionary() + { + ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"path"}, Types.optional(BINARY).named("Test column"), 0, 0); + RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column")); + ColumnDescriptor descriptor = new ColumnDescriptor(column.getPath(), column.getPrimitiveType(), 0, 0); + VarcharType type = createVarcharType(255); + + DictionaryPage dictionary = new DictionaryPage(EMPTY_SLICE, 0, PLAIN_DICTIONARY); + TupleDomainParquetPredicate predicate; + + // only non-nulls allowed + predicate = new TupleDomainParquetPredicate( + withColumnDomains(singletonMap(descriptor, notNull(type))), + singletonList(column), + UTC); + assertFalse(predicate.matches(new DictionaryDescriptor(column, Optional.of(dictionary)))); + + // only nulls allowed + predicate = new TupleDomainParquetPredicate( + withColumnDomains(singletonMap(descriptor, onlyNull(type))), + singletonList(column), + UTC); + assertTrue(predicate.matches(new DictionaryDescriptor(column, Optional.of(dictionary)))); + + // mixed non-nulls and nulls allowed + predicate = new TupleDomainParquetPredicate( + withColumnDomains(singletonMap(descriptor, singleValue(type, EMPTY_SLICE, true))), + singletonList(column), + UTC); + assertTrue(predicate.matches(new DictionaryDescriptor(column, Optional.of(dictionary)))); + } + @Test public void testColumnIndexWithNullPages() throws Exception diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestOnlyNulls.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestOnlyNulls.java new file mode 100644 index 000000000000..e5c3a19cbe67 --- /dev/null +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/parquet/TestOnlyNulls.java @@ -0,0 +1,107 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.plugin.hive.parquet; + +import com.google.common.io.Resources; +import io.trino.plugin.hive.HiveColumnHandle; +import io.trino.plugin.hive.HiveConfig; +import io.trino.plugin.hive.HivePageSourceFactory; +import io.trino.plugin.hive.HiveStorageFormat; +import io.trino.plugin.hive.HiveType; +import io.trino.plugin.hive.acid.AcidTransaction; +import io.trino.plugin.hive.benchmark.StandardFileFormats; +import io.trino.spi.connector.ConnectorPageSource; +import io.trino.spi.predicate.Domain; +import io.trino.spi.predicate.TupleDomain; +import io.trino.spi.type.Type; +import io.trino.testing.MaterializedResult; +import io.trino.testing.MaterializedRow; +import org.apache.hadoop.fs.Path; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.OptionalInt; +import java.util.Properties; + +import static io.trino.hadoop.ConfigurationInstantiator.newEmptyConfiguration; +import static io.trino.plugin.hive.HiveColumnHandle.ColumnType.REGULAR; +import static io.trino.plugin.hive.HiveColumnHandle.createBaseColumn; +import static io.trino.plugin.hive.HiveTestUtils.HDFS_ENVIRONMENT; +import static io.trino.plugin.hive.HiveTestUtils.getHiveSession; +import static io.trino.spi.type.IntegerType.INTEGER; +import static io.trino.testing.MaterializedResult.materializeSourceDataStream; +import static java.util.Collections.singletonList; +import static org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_LIB; +import static org.assertj.core.api.Assertions.assertThat; + +public class TestOnlyNulls +{ + @Test + public void testOnlyNulls() + throws Exception + { + // The file contains only nulls and a dictionary page with 0 entries. + File parquetFile = new File(Resources.getResource("issue-10873.parquet").toURI()); + String columnName = "x"; + Type columnType = INTEGER; + + HiveColumnHandle column = createBaseColumn(columnName, 0, HiveType.toHiveType(columnType), columnType, REGULAR, Optional.empty()); + + // match not null + try (ConnectorPageSource pageSource = createPageSource(parquetFile, column, TupleDomain.withColumnDomains(Map.of(column, Domain.notNull(columnType))))) { + MaterializedResult result = materializeSourceDataStream(getHiveSession(new HiveConfig()), pageSource, List.of(columnType)).toTestTypes(); + assertThat(result.getMaterializedRows()).isEmpty(); + } + + // match null + try (ConnectorPageSource pageSource = createPageSource(parquetFile, column, TupleDomain.withColumnDomains(Map.of(column, Domain.onlyNull(columnType))))) { + MaterializedResult result = materializeSourceDataStream(getHiveSession(new HiveConfig()), pageSource, List.of(columnType)).toTestTypes(); + + assertThat(result.getMaterializedRows()) + .isEqualTo(List.of( + new MaterializedRow(singletonList(null)), + new MaterializedRow(singletonList(null)), + new MaterializedRow(singletonList(null)), + new MaterializedRow(singletonList(null)))); + } + } + + private static ConnectorPageSource createPageSource(File parquetFile, HiveColumnHandle column, TupleDomain domain) + { + HivePageSourceFactory pageSourceFactory = StandardFileFormats.TRINO_PARQUET.getHivePageSourceFactory(HDFS_ENVIRONMENT).orElseThrow(); + + Properties schema = new Properties(); + schema.setProperty(SERIALIZATION_LIB, HiveStorageFormat.PARQUET.getSerde()); + + return pageSourceFactory.createPageSource( + newEmptyConfiguration(), + getHiveSession(new HiveConfig()), + new Path(parquetFile.toURI()), + 0, + parquetFile.length(), + parquetFile.length(), + schema, + List.of(column), + domain, + Optional.empty(), + OptionalInt.empty(), + false, + AcidTransaction.NO_ACID_TRANSACTION) + .orElseThrow() + .get(); + } +} diff --git a/plugin/trino-hive/src/test/resources/issue-10873.parquet b/plugin/trino-hive/src/test/resources/issue-10873.parquet new file mode 100644 index 0000000000000000000000000000000000000000..39ff32025909368445a612b62798b2b4e8cc32d5 GIT binary patch literal 408 zcmb7B!AiqG5S=U~?m;9JC+xz4y%-SETG1TzHYpWF#WY~O38922sEw&wKgSR8Y54q0L-|9{TMX{=%epg+A@k+14IuFl}!Tht?|B4g*wc_ zc@#zZAnw-4cz|NZo{bYQ{%Y}9z7#B>+E+aF{KL&qTW8&W*6~3)(&2m;z&f){d479; z*6lXm-l5=_1hbZju87u@R&-8O(WGkCG$p!`Y(jJ?k?T$5YAVv55N8}KnZJ-djG34_ zOw-oMO-7sk=^WA%$=S;K<|;&B_UfW}`zYUsi@F{bO|x7NKFenHw0s?$j>e<0b@I}Q J2axff`~telPQ?HK literal 0 HcmV?d00001