From 67ce2ed97d18dca19e920fdf362ebf3183ab8751 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Grzegorz=20Kokosi=C5=84ski?= Date: Mon, 2 Jul 2018 14:19:23 +0200 Subject: [PATCH] Fix reading table with headers in kerberized environments Previously when table had non zero value set for header or footer line count, then Presto was unable to generate splits for it, raising: Unable to query tables due to Can't get Master Kerberos principal for use as renewer. Mentioned above error was raised from internals of FileInputFormat. This change avoids using FileInputFormat.getSplits. --- .travis.yml | 2 +- .../hive/BackgroundHiveSplitLoader.java | 23 ++++--------------- .../hive/util/InternalHiveSplitFactory.java | 11 +++++---- .../tables_with_header_and_footer.sql | 2 +- 4 files changed, 13 insertions(+), 25 deletions(-) diff --git a/.travis.yml b/.travis.yml index d417cc3caa9d0..88c35defb99e2 100644 --- a/.travis.yml +++ b/.travis.yml @@ -104,7 +104,7 @@ script: - | if [[ -v PRODUCT_TESTS_SPECIFIC_ENVIRONMENT ]]; then presto-product-tests/bin/run_on_docker.sh \ - singlenode-kerberos-hdfs-impersonation -g storage_formats,cli,hdfs_impersonation,authorization + singlenode-kerberos-hdfs-impersonation -g storage_formats,cli,hdfs_impersonation,authorization,hive_file_header fi - | if [[ -v PRODUCT_TESTS_SPECIFIC_ENVIRONMENT_2 ]]; then diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/BackgroundHiveSplitLoader.java b/presto-hive/src/main/java/com/facebook/presto/hive/BackgroundHiveSplitLoader.java index b1ec3c6b3fb0c..fda9a9646d55a 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/BackgroundHiveSplitLoader.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/BackgroundHiveSplitLoader.java @@ -287,7 +287,6 @@ private ListenableFuture loadPartition(HivePartitionMetadata partition) // get the configuration for the target path -- it may be a different hdfs instance FileSystem targetFilesystem = hdfsEnvironment.getFileSystem(hdfsContext, targetPath); JobConf targetJob = toJobConf(targetFilesystem.getConf()); - handleFileHeader(schema, targetJob); targetJob.setInputFormat(TextInputFormat.class); targetInputFormat.configure(targetJob); FileInputFormat.setInputPaths(targetJob, targetPath); @@ -332,12 +331,11 @@ private ListenableFuture loadPartition(HivePartitionMetadata partition) // To support custom input formats, we want to call getSplits() // on the input format to obtain file splits. - if (shouldUseFileSplitsFromInputFormat(inputFormat) || getHeaderCount(schema) > 0 || getFooterCount(schema) > 0) { + if (shouldUseFileSplitsFromInputFormat(inputFormat)) { if (tableBucketInfo.isPresent()) { throw new PrestoException(NOT_SUPPORTED, "Presto cannot read bucketed partition in an input format with UseFileSplitsFromInputFormat annotation: " + inputFormat.getClass().getSimpleName()); } JobConf jobConf = toJobConf(configuration); - handleFileHeader(schema, jobConf); FileInputFormat.setInputPaths(jobConf, path); InputSplit[] splits = inputFormat.getSplits(jobConf, 0); @@ -349,22 +347,11 @@ private ListenableFuture loadPartition(HivePartitionMetadata partition) return hiveSplitSource.addToQueue(getBucketedSplits(path, fs, splitFactory, tableBucketInfo.get(), bucketConversion)); } - fileIterators.addLast(createInternalHiveSplitIterator(path, fs, splitFactory)); + boolean splittable = getHeaderCount(schema) == 0 && getFooterCount(schema) == 0; + fileIterators.addLast(createInternalHiveSplitIterator(path, fs, splitFactory, splittable)); return COMPLETED_FUTURE; } - private void handleFileHeader(Properties schema, JobConf jobConf) - { - int headerCount = getHeaderCount(schema); - int footerCount = getFooterCount(schema); - if (headerCount > 0 || footerCount > 0) { - // do not split file when skip.header.line.count or skip.footer.line.count is used - jobConf.setLong("mapreduce.input.fileinputformat.split.minsize", Long.MAX_VALUE); - // TODO remove this when Hadoop 1.x is not supported - jobConf.setLong("mapred.min.split.size", Long.MAX_VALUE); - } - } - private ListenableFuture addSplitsToSource(InputSplit[] targetSplits, InternalHiveSplitFactory splitFactory) throws IOException { @@ -389,10 +376,10 @@ private static boolean shouldUseFileSplitsFromInputFormat(InputFormat inpu .anyMatch(name -> name.equals("UseFileSplitsFromInputFormat")); } - private Iterator createInternalHiveSplitIterator(Path path, FileSystem fileSystem, InternalHiveSplitFactory splitFactory) + private Iterator createInternalHiveSplitIterator(Path path, FileSystem fileSystem, InternalHiveSplitFactory splitFactory, boolean splittable) { return Streams.stream(new HiveFileIterator(path, fileSystem, directoryLister, namenodeStats, recursiveDirWalkerEnabled ? RECURSE : IGNORED)) - .map(splitFactory::createInternalHiveSplit) + .map(status -> splitFactory.createInternalHiveSplit(status, splittable)) .filter(Optional::isPresent) .map(Optional::get) .iterator(); diff --git a/presto-hive/src/main/java/com/facebook/presto/hive/util/InternalHiveSplitFactory.java b/presto-hive/src/main/java/com/facebook/presto/hive/util/InternalHiveSplitFactory.java index ae9c1ee49fadf..dd19146b5800f 100644 --- a/presto-hive/src/main/java/com/facebook/presto/hive/util/InternalHiveSplitFactory.java +++ b/presto-hive/src/main/java/com/facebook/presto/hive/util/InternalHiveSplitFactory.java @@ -86,18 +86,19 @@ public String getPartitionName() return partitionName; } - public Optional createInternalHiveSplit(LocatedFileStatus status) + public Optional createInternalHiveSplit(LocatedFileStatus status, boolean splittable) { - return createInternalHiveSplit(status, OptionalInt.empty()); + return createInternalHiveSplit(status, OptionalInt.empty(), splittable); } public Optional createInternalHiveSplit(LocatedFileStatus status, int bucketNumber) { - return createInternalHiveSplit(status, OptionalInt.of(bucketNumber)); + return createInternalHiveSplit(status, OptionalInt.of(bucketNumber), false); } - private Optional createInternalHiveSplit(LocatedFileStatus status, OptionalInt bucketNumber) + private Optional createInternalHiveSplit(LocatedFileStatus status, OptionalInt bucketNumber, boolean splittable) { + splittable = splittable && isSplittable(inputFormat, fileSystem, status.getPath()); return createInternalHiveSplit( status.getPath(), status.getBlockLocations(), @@ -105,7 +106,7 @@ private Optional createInternalHiveSplit(LocatedFileStatus st status.getLen(), status.getLen(), bucketNumber, - isSplittable(inputFormat, fileSystem, status.getPath())); + splittable); } public Optional createInternalHiveSplit(FileSplit split) diff --git a/presto-product-tests/src/main/resources/sql-tests/testcases/tables_with_header_and_footer.sql b/presto-product-tests/src/main/resources/sql-tests/testcases/tables_with_header_and_footer.sql index 91241aca3b8e0..4773395798f3b 100644 --- a/presto-product-tests/src/main/resources/sql-tests/testcases/tables_with_header_and_footer.sql +++ b/presto-product-tests/src/main/resources/sql-tests/testcases/tables_with_header_and_footer.sql @@ -1,4 +1,4 @@ --- database: presto; tables: table_with_header, table_with_footer, table_with_header_and_footer; groups: hive; +-- database: presto; tables: table_with_header, table_with_footer, table_with_header_and_footer; groups: hive, hive_file_header; --! name: simple_scan with header SELECT count(*) FROM table_with_header --!