From 47be1ab3b68b987ed8ab349fc351f438c00d9871 Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Mon, 20 May 2024 11:05:25 -0500 Subject: [PATCH 001/113] HADOOP-18679. Add API for bulk/paged delete of files (#6726) Applications can create a BulkDelete instance from a BulkDeleteSource; the BulkDelete interface provides the pageSize(): the maximum number of entries which can be deleted, and a bulkDelete(Collection paths) method which can take a collection up to pageSize() long. This is optimized for object stores with bulk delete APIs; the S3A connector will offer the page size of fs.s3a.bulk.delete.page.size unless bulk delete has been disabled. Even with a page size of 1, the S3A implementation is more efficient than delete(path) as there are no safety checks for the path being a directory or probes for the need to recreate directories. The interface BulkDeleteSource is implemented by all FileSystem implementations, with a page size of 1 and mapped to delete(pathToDelete, false). This means that callers do not need to have special case handling for object stores versus classic filesystems. To aid use through reflection APIs, the class org.apache.hadoop.io.wrappedio.WrappedIO has been created with "reflection friendly" methods. Contributed by Mukund Thakur and Steve Loughran --- .../java/org/apache/hadoop/fs/BulkDelete.java | 90 ++++ .../apache/hadoop/fs/BulkDeleteSource.java | 53 +++ .../org/apache/hadoop/fs/BulkDeleteUtils.java | 66 +++ .../hadoop/fs/CommonPathCapabilities.java | 6 + .../java/org/apache/hadoop/fs/FileSystem.java | 34 +- .../fs/impl/DefaultBulkDeleteOperation.java | 97 +++++ .../fs/statistics/StoreStatisticNames.java | 6 + .../apache/hadoop/io/wrappedio/WrappedIO.java | 93 ++++ .../apache/hadoop/util/functional/Tuples.java | 87 ++++ .../site/markdown/filesystem/bulkdelete.md | 139 ++++++ .../src/site/markdown/filesystem/index.md | 3 +- .../AbstractContractBulkDeleteTest.java | 336 +++++++++++++++ .../TestLocalFSContractBulkDelete.java | 34 ++ .../TestRawLocalContractBulkDelete.java | 35 ++ .../hdfs/TestHDFSContractBulkDelete.java | 49 +++ .../org/apache/hadoop/fs/s3a/Constants.java | 12 + .../apache/hadoop/fs/s3a/S3AFileSystem.java | 183 ++++---- .../apache/hadoop/fs/s3a/S3AInternals.java | 12 +- .../org/apache/hadoop/fs/s3a/S3AStore.java | 129 ++++++ .../org/apache/hadoop/fs/s3a/Statistic.java | 8 + .../fs/s3a/impl/BulkDeleteOperation.java | 128 ++++++ .../BulkDeleteOperationCallbacksImpl.java | 125 ++++++ .../s3a/impl/MultiObjectDeleteException.java | 20 +- .../hadoop/fs/s3a/impl/S3AStoreBuilder.java | 113 +++++ .../hadoop/fs/s3a/impl/S3AStoreImpl.java | 400 ++++++++++++++++++ .../fs/s3a/impl/StoreContextFactory.java | 35 ++ .../tools/hadoop-aws/aws_sdk_upgrade.md | 1 + .../markdown/tools/hadoop-aws/performance.md | 82 +++- .../s3a/ITestS3AContractBulkDelete.java | 230 ++++++++++ .../hadoop/fs/s3a/AbstractS3AMockTest.java | 3 +- .../hadoop/fs/s3a/TestS3ADeleteOnExit.java | 3 +- .../hadoop/fs/s3a/auth/ITestAssumeRole.java | 133 +++++- .../s3a/scale/AbstractSTestS3AHugeFiles.java | 2 + .../contract/ITestAbfsContractBulkDelete.java | 50 +++ .../src/test/resources/log4j.properties | 1 + 35 files changed, 2679 insertions(+), 119 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BulkDelete.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BulkDeleteSource.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BulkDeleteUtils.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/DefaultBulkDeleteOperation.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/Tuples.java create mode 100644 hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractBulkDeleteTest.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractBulkDelete.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/rawlocal/TestRawLocalContractBulkDelete.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/contract/hdfs/TestHDFSContractBulkDelete.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AStore.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteOperation.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteOperationCallbacksImpl.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreBuilder.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreImpl.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StoreContextFactory.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractBulkDelete.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsContractBulkDelete.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BulkDelete.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BulkDelete.java new file mode 100644 index 0000000000000..ab5f73b5624ff --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BulkDelete.java @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; + +import static java.util.Objects.requireNonNull; + +/** + * API for bulk deletion of objects/files, + * but not directories. + * After use, call {@code close()} to release any resources and + * to guarantee store IOStatistics are updated. + *

+ * Callers MUST have no expectation that parent directories will exist after the + * operation completes; if an object store needs to explicitly look for and create + * directory markers, that step will be omitted. + *

+ * Be aware that on some stores (AWS S3) each object listed in a bulk delete counts + * against the write IOPS limit; large page sizes are counterproductive here, as + * are attempts at parallel submissions across multiple threads. + * @see HADOOP-16823. + * Large DeleteObject requests are their own Thundering Herd + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public interface BulkDelete extends IOStatisticsSource, Closeable { + + /** + * The maximum number of objects/files to delete in a single request. + * @return a number greater than zero. + */ + int pageSize(); + + /** + * Base path of a bulk delete operation. + * All paths submitted in {@link #bulkDelete(Collection)} must be under this path. + * @return base path of a bulk delete operation. + */ + Path basePath(); + + /** + * Delete a list of files/objects. + *

+ * @param paths list of paths which must be absolute and under the base path. + * provided in {@link #basePath()}. + * @return a list of paths which failed to delete, with the exception message. + * @throws IOException IO problems including networking, authentication and more. + * @throws IllegalArgumentException if a path argument is invalid. + */ + List> bulkDelete(Collection paths) + throws IOException, IllegalArgumentException; + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BulkDeleteSource.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BulkDeleteSource.java new file mode 100644 index 0000000000000..cad24babb344a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BulkDeleteSource.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Interface for bulk deletion. + * Filesystems which support bulk deletion should implement this interface + * and MUST also declare their support in the path capability + * {@link CommonPathCapabilities#BULK_DELETE}. + * Exporting the interface does not guarantee that the operation is supported; + * returning a {@link BulkDelete} object from the call {@link #createBulkDelete(Path)} + * is. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public interface BulkDeleteSource { + + /** + * Create a bulk delete operation. + * There is no network IO at this point, simply the creation of + * a bulk delete object. + * A path must be supplied to assist in link resolution. + * @param path path to delete under. + * @return the bulk delete. + * @throws UnsupportedOperationException bulk delete under that path is not supported. + * @throws IllegalArgumentException path not valid. + * @throws IOException problems resolving paths + */ + BulkDelete createBulkDelete(Path path) + throws UnsupportedOperationException, IllegalArgumentException, IOException; + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BulkDeleteUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BulkDeleteUtils.java new file mode 100644 index 0000000000000..d9916429425e9 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BulkDeleteUtils.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import java.util.Collection; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.util.Preconditions.checkArgument; + +/** + * Utility class for bulk delete operations. + */ +public final class BulkDeleteUtils { + + private BulkDeleteUtils() { + } + + /** + * Preconditions for bulk delete paths. + * @param paths paths to delete. + * @param pageSize maximum number of paths to delete in a single operation. + * @param basePath base path for the delete operation. + */ + public static void validateBulkDeletePaths(Collection paths, int pageSize, Path basePath) { + requireNonNull(paths); + checkArgument(paths.size() <= pageSize, + "Number of paths (%d) is larger than the page size (%d)", paths.size(), pageSize); + paths.forEach(p -> { + checkArgument(p.isAbsolute(), "Path %s is not absolute", p); + checkArgument(validatePathIsUnderParent(p, basePath), + "Path %s is not under the base path %s", p, basePath); + }); + } + + /** + * Check if a path is under a base path. + * @param p path to check. + * @param basePath base path. + * @return true if the path is under the base path. + */ + public static boolean validatePathIsUnderParent(Path p, Path basePath) { + while (p.getParent() != null) { + if (p.getParent().equals(basePath)) { + return true; + } + p = p.getParent(); + } + return false; + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java index 9ec07cbe966e9..2005f0ae3be31 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java @@ -181,4 +181,10 @@ private CommonPathCapabilities() { */ public static final String DIRECTORY_LISTING_INCONSISTENT = "fs.capability.directory.listing.inconsistent"; + + /** + * Capability string to probe for bulk delete: {@value}. + */ + public static final String BULK_DELETE = "fs.capability.bulk.delete"; + } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index 768fd5b5e1caa..2155e17328a66 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -56,6 +56,7 @@ import org.apache.hadoop.fs.Options.HandleOpt; import org.apache.hadoop.fs.Options.Rename; import org.apache.hadoop.fs.impl.AbstractFSBuilderImpl; +import org.apache.hadoop.fs.impl.DefaultBulkDeleteOperation; import org.apache.hadoop.fs.impl.FutureDataInputStreamBuilderImpl; import org.apache.hadoop.fs.impl.OpenFileParameters; import org.apache.hadoop.fs.permission.AclEntry; @@ -169,7 +170,8 @@ @InterfaceAudience.Public @InterfaceStability.Stable public abstract class FileSystem extends Configured - implements Closeable, DelegationTokenIssuer, PathCapabilities { + implements Closeable, DelegationTokenIssuer, + PathCapabilities, BulkDeleteSource { public static final String FS_DEFAULT_NAME_KEY = CommonConfigurationKeys.FS_DEFAULT_NAME_KEY; public static final String DEFAULT_FS = @@ -3485,12 +3487,16 @@ public Collection getTrashRoots(boolean allUsers) { public boolean hasPathCapability(final Path path, final String capability) throws IOException { switch (validatePathCapabilityArgs(makeQualified(path), capability)) { - case CommonPathCapabilities.FS_SYMLINKS: - // delegate to the existing supportsSymlinks() call. - return supportsSymlinks() && areSymlinksEnabled(); - default: - // the feature is not implemented. - return false; + case CommonPathCapabilities.BULK_DELETE: + // bulk delete has default implementation which + // can called on any FileSystem. + return true; + case CommonPathCapabilities.FS_SYMLINKS: + // delegate to the existing supportsSymlinks() call. + return supportsSymlinks() && areSymlinksEnabled(); + default: + // the feature is not implemented. + return false; } } @@ -4976,4 +4982,18 @@ public MultipartUploaderBuilder createMultipartUploader(Path basePath) methodNotSupported(); return null; } + + /** + * Create a bulk delete operation. + * The default implementation returns an instance of {@link DefaultBulkDeleteOperation}. + * @param path base path for the operation. + * @return an instance of the bulk delete. + * @throws IllegalArgumentException any argument is invalid. + * @throws IOException if there is an IO problem. + */ + @Override + public BulkDelete createBulkDelete(Path path) + throws IllegalArgumentException, IOException { + return new DefaultBulkDeleteOperation(path, this); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/DefaultBulkDeleteOperation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/DefaultBulkDeleteOperation.java new file mode 100644 index 0000000000000..56f6a4622f877 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/DefaultBulkDeleteOperation.java @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.impl; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.BulkDelete; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.functional.Tuples; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.fs.BulkDeleteUtils.validateBulkDeletePaths; + +/** + * Default implementation of the {@link BulkDelete} interface. + */ +public class DefaultBulkDeleteOperation implements BulkDelete { + + private static Logger LOG = LoggerFactory.getLogger(DefaultBulkDeleteOperation.class); + + /** Default page size for bulk delete. */ + private static final int DEFAULT_PAGE_SIZE = 1; + + /** Base path for the bulk delete operation. */ + private final Path basePath; + + /** Delegate File system make actual delete calls. */ + private final FileSystem fs; + + public DefaultBulkDeleteOperation(Path basePath, + FileSystem fs) { + this.basePath = requireNonNull(basePath); + this.fs = fs; + } + + @Override + public int pageSize() { + return DEFAULT_PAGE_SIZE; + } + + @Override + public Path basePath() { + return basePath; + } + + /** + * {@inheritDoc}. + * The default impl just calls {@code FileSystem.delete(path, false)} + * on the single path in the list. + */ + @Override + public List> bulkDelete(Collection paths) + throws IOException, IllegalArgumentException { + validateBulkDeletePaths(paths, DEFAULT_PAGE_SIZE, basePath); + List> result = new ArrayList<>(); + if (!paths.isEmpty()) { + // As the page size is always 1, this should be the only one + // path in the collection. + Path pathToDelete = paths.iterator().next(); + try { + fs.delete(pathToDelete, false); + } catch (IOException ex) { + LOG.debug("Couldn't delete {} - exception occurred: {}", pathToDelete, ex); + result.add(Tuples.pair(pathToDelete, ex.toString())); + } + } + return result; + } + + @Override + public void close() throws IOException { + + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java index 19ee9d1414ecf..a513cffd849b6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java @@ -46,6 +46,9 @@ public final class StoreStatisticNames { /** {@value}. */ public static final String OP_APPEND = "op_append"; + /** {@value}. */ + public static final String OP_BULK_DELETE = "op_bulk-delete"; + /** {@value}. */ public static final String OP_COPY_FROM_LOCAL_FILE = "op_copy_from_local_file"; @@ -194,6 +197,9 @@ public final class StoreStatisticNames { public static final String STORE_IO_RETRY = "store_io_retry"; + public static final String STORE_IO_RATE_LIMITED_DURATION + = "store_io_rate_limited_duration"; + /** * A store's equivalent of a paged LIST request was initiated: {@value}. */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java new file mode 100644 index 0000000000000..696055895a19b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.wrappedio; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.BulkDelete; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +/** + * Reflection-friendly access to APIs which are not available in + * some of the older Hadoop versions which libraries still + * compile against. + *

+ * The intent is to avoid the need for complex reflection operations + * including wrapping of parameter classes, direct instatiation of + * new classes etc. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class WrappedIO { + + private WrappedIO() { + } + + /** + * Get the maximum number of objects/files to delete in a single request. + * @param fs filesystem + * @param path path to delete under. + * @return a number greater than or equal to zero. + * @throws UnsupportedOperationException bulk delete under that path is not supported. + * @throws IllegalArgumentException path not valid. + * @throws IOException problems resolving paths + */ + public static int bulkDelete_PageSize(FileSystem fs, Path path) throws IOException { + try (BulkDelete bulk = fs.createBulkDelete(path)) { + return bulk.pageSize(); + } + } + + /** + * Delete a list of files/objects. + *

    + *
  • Files must be under the path provided in {@code base}.
  • + *
  • The size of the list must be equal to or less than the page size.
  • + *
  • Directories are not supported; the outcome of attempting to delete + * directories is undefined (ignored; undetected, listed as failures...).
  • + *
  • The operation is not atomic.
  • + *
  • The operation is treated as idempotent: network failures may + * trigger resubmission of the request -any new objects created under a + * path in the list may then be deleted.
  • + *
  • There is no guarantee that any parent directories exist after this call. + *
  • + *
+ * @param fs filesystem + * @param base path to delete under. + * @param paths list of paths which must be absolute and under the base path. + * @return a list of all the paths which couldn't be deleted for a reason other than "not found" and any associated error message. + * @throws UnsupportedOperationException bulk delete under that path is not supported. + * @throws IOException IO problems including networking, authentication and more. + * @throws IllegalArgumentException if a path argument is invalid. + */ + public static List> bulkDelete_delete(FileSystem fs, + Path base, + Collection paths) + throws IOException { + try (BulkDelete bulk = fs.createBulkDelete(base)) { + return bulk.bulkDelete(paths); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/Tuples.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/Tuples.java new file mode 100644 index 0000000000000..ed80c1daca726 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/Tuples.java @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.util.Map; + +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Tuple support. + * This allows for tuples to be passed around as part of the public API without + * committing to a third-party library tuple implementation. + */ +@InterfaceStability.Unstable +public final class Tuples { + + private Tuples() { + } + + /** + * Create a 2-tuple. + * @param key element 1 + * @param value element 2 + * @return a tuple. + * @param element 1 type + * @param element 2 type + */ + public static Map.Entry pair(final K key, final V value) { + return new Tuple<>(key, value); + } + + /** + * Simple tuple class: uses the Map.Entry interface as other + * implementations have done, so the API is available across + * all java versions. + * @param key + * @param value + */ + private static final class Tuple implements Map.Entry { + + private final K key; + + private final V value; + + private Tuple(final K key, final V value) { + this.key = key; + this.value = value; + } + + @Override + public K getKey() { + return key; + } + + @Override + public V getValue() { + return value; + } + + @Override + public V setValue(final V value) { + throw new UnsupportedOperationException("Tuple is immutable"); + } + + @Override + public String toString() { + return "(" + key + ", " + value + ')'; + } + + } +} diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md new file mode 100644 index 0000000000000..de0e4e893ba2e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/bulkdelete.md @@ -0,0 +1,139 @@ + + +# interface `BulkDelete` + + + +The `BulkDelete` interface provides an API to perform bulk delete of files/objects +in an object store or filesystem. + +## Key Features + +* An API for submitting a list of paths to delete. +* This list must be no larger than the "page size" supported by the client; This size is also exposed as a method. +* Triggers a request to delete files at the specific paths. +* Returns a list of which paths were reported as delete failures by the store. +* Does not consider a nonexistent file to be a failure. +* Does not offer any atomicity guarantees. +* Idempotency guarantees are weak: retries may delete files newly created by other clients. +* Provides no guarantees as to the outcome if a path references a directory. +* Provides no guarantees that parent directories will exist after the call. + + +The API is designed to match the semantics of the AWS S3 [Bulk Delete](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html) REST API call, but it is not +exclusively restricted to this store. This is why the "provides no guarantees" +restrictions do not state what the outcome will be when executed on other stores. + +### Interface `org.apache.hadoop.fs.BulkDeleteSource` + +The interface `BulkDeleteSource` is offered by a FileSystem/FileContext class if +it supports the API. The default implementation is implemented in base FileSystem +class that returns an instance of `org.apache.hadoop.fs.impl.DefaultBulkDeleteOperation`. +The default implementation details are provided in below sections. + + +```java +@InterfaceAudience.Public +@InterfaceStability.Unstable +public interface BulkDeleteSource { + BulkDelete createBulkDelete(Path path) + throws UnsupportedOperationException, IllegalArgumentException, IOException; + +} + +``` + +### Interface `org.apache.hadoop.fs.BulkDelete` + +This is the bulk delete implementation returned by the `createBulkDelete()` call. + +```java +@InterfaceAudience.Public +@InterfaceStability.Unstable +public interface BulkDelete extends IOStatisticsSource, Closeable { + int pageSize(); + Path basePath(); + List> bulkDelete(List paths) + throws IOException, IllegalArgumentException; + +} + +``` + +### `bulkDelete(paths)` + +#### Preconditions + +```python +if length(paths) > pageSize: throw IllegalArgumentException +``` + +#### Postconditions + +All paths which refer to files are removed from the set of files. +```python +FS'Files = FS.Files - [paths] +``` + +No other restrictions are placed upon the outcome. + + +### Availability + +The `BulkDeleteSource` interface is exported by `FileSystem` and `FileContext` storage clients +which is available for all FS via `org.apache.hadoop.fs.impl.DefaultBulkDeleteSource`. For +integration in applications like Apache Iceberg to work seamlessly, all implementations +of this interface MUST NOT reject the request but instead return a BulkDelete instance +of size >= 1. + +Use the `PathCapabilities` probe `fs.capability.bulk.delete`. + +```java +store.hasPathCapability(path, "fs.capability.bulk.delete") +``` + +### Invocation through Reflection. + +The need for many libraries to compile against very old versions of Hadoop +means that most of the cloud-first Filesystem API calls cannot be used except +through reflection -And the more complicated The API and its data types are, +The harder that reflection is to implement. + +To assist this, the class `org.apache.hadoop.io.wrappedio.WrappedIO` has few methods +which are intended to provide simple access to the API, especially +through reflection. + +```java + + public static int bulkDeletePageSize(FileSystem fs, Path path) throws IOException; + + public static int bulkDeletePageSize(FileSystem fs, Path path) throws IOException; + + public static List> bulkDelete(FileSystem fs, Path base, Collection paths); +``` + +### Implementations + +#### Default Implementation + +The default implementation which will be used by all implementation of `FileSystem` of the +`BulkDelete` interface is `org.apache.hadoop.fs.impl.DefaultBulkDeleteOperation` which fixes the page +size to be 1 and calls `FileSystem.delete(path, false)` on the single path in the list. + + +#### S3A Implementation +The S3A implementation is `org.apache.hadoop.fs.s3a.impl.BulkDeleteOperation` which implements the +multi object delete semantics of the AWS S3 API [Bulk Delete](https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html) +For more details please refer to the S3A Performance documentation. \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md index df39839e831c8..be72f35789aad 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/index.md @@ -43,4 +43,5 @@ HDFS as these are commonly expected by Hadoop client applications. 1. [IOStatistics](iostatistics.html) 1. [openFile()](openfile.html) 1. [SafeMode](safemode.html) -1. [LeaseRecoverable](leaserecoverable.html) \ No newline at end of file +1. [LeaseRecoverable](leaserecoverable.html) +1. [BulkDelete](bulkdelete.html) \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractBulkDeleteTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractBulkDeleteTest.java new file mode 100644 index 0000000000000..9ebf9923f39c2 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractBulkDeleteTest.java @@ -0,0 +1,336 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.assertj.core.api.Assertions; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.CommonPathCapabilities; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.wrappedio.WrappedIO; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; +import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; +import static org.apache.hadoop.io.wrappedio.WrappedIO.bulkDelete_delete; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Contract tests for bulk delete operation. + */ +public abstract class AbstractContractBulkDeleteTest extends AbstractFSContractTestBase { + + private static final Logger LOG = + LoggerFactory.getLogger(AbstractContractBulkDeleteTest.class); + + /** + * Page size for bulk delete. This is calculated based + * on the store implementation. + */ + protected int pageSize; + + /** + * Base path for the bulk delete tests. + * All the paths to be deleted should be under this base path. + */ + protected Path basePath; + + /** + * Test file system. + */ + protected FileSystem fs; + + @Before + public void setUp() throws Exception { + fs = getFileSystem(); + basePath = path(getClass().getName()); + pageSize = WrappedIO.bulkDelete_PageSize(getFileSystem(), basePath); + fs.mkdirs(basePath); + } + + public Path getBasePath() { + return basePath; + } + + protected int getExpectedPageSize() { + return 1; + } + + /** + * Validate the page size for bulk delete operation. Different stores can have different + * implementations for bulk delete operation thus different page size. + */ + @Test + public void validatePageSize() throws Exception { + Assertions.assertThat(pageSize) + .describedAs("Page size should be 1 by default for all stores") + .isEqualTo(getExpectedPageSize()); + } + + @Test + public void testPathsSizeEqualsPageSizePrecondition() throws Exception { + List listOfPaths = createListOfPaths(pageSize, basePath); + // Bulk delete call should pass with no exception. + bulkDelete_delete(getFileSystem(), basePath, listOfPaths); + } + + @Test + public void testPathsSizeGreaterThanPageSizePrecondition() throws Exception { + List listOfPaths = createListOfPaths(pageSize + 1, basePath); + intercept(IllegalArgumentException.class, + () -> bulkDelete_delete(getFileSystem(), basePath, listOfPaths)); + } + + @Test + public void testPathsSizeLessThanPageSizePrecondition() throws Exception { + List listOfPaths = createListOfPaths(pageSize - 1, basePath); + // Bulk delete call should pass with no exception. + bulkDelete_delete(getFileSystem(), basePath, listOfPaths); + } + + @Test + public void testBulkDeleteSuccessful() throws Exception { + runBulkDelete(false); + } + + @Test + public void testBulkDeleteSuccessfulUsingDirectFS() throws Exception { + runBulkDelete(true); + } + + private void runBulkDelete(boolean useDirectFS) throws IOException { + List listOfPaths = createListOfPaths(pageSize, basePath); + for (Path path : listOfPaths) { + touch(fs, path); + } + FileStatus[] fileStatuses = fs.listStatus(basePath); + Assertions.assertThat(fileStatuses) + .describedAs("File count after create") + .hasSize(pageSize); + if (useDirectFS) { + assertSuccessfulBulkDelete( + fs.createBulkDelete(basePath).bulkDelete(listOfPaths)); + } else { + // Using WrappedIO to call bulk delete. + assertSuccessfulBulkDelete( + bulkDelete_delete(getFileSystem(), basePath, listOfPaths)); + } + + FileStatus[] fileStatusesAfterDelete = fs.listStatus(basePath); + Assertions.assertThat(fileStatusesAfterDelete) + .describedAs("File statuses should be empty after delete") + .isEmpty(); + } + + + @Test + public void validatePathCapabilityDeclared() throws Exception { + Assertions.assertThat(fs.hasPathCapability(basePath, CommonPathCapabilities.BULK_DELETE)) + .describedAs("Path capability BULK_DELETE should be declared") + .isTrue(); + } + + /** + * This test should fail as path is not under the base path. + */ + @Test + public void testDeletePathsNotUnderBase() throws Exception { + List paths = new ArrayList<>(); + Path pathNotUnderBase = path("not-under-base"); + paths.add(pathNotUnderBase); + intercept(IllegalArgumentException.class, + () -> bulkDelete_delete(getFileSystem(), basePath, paths)); + } + + /** + * This test should fail as path is not absolute. + */ + @Test + public void testDeletePathsNotAbsolute() throws Exception { + List paths = new ArrayList<>(); + Path pathNotAbsolute = new Path("not-absolute"); + paths.add(pathNotAbsolute); + intercept(IllegalArgumentException.class, + () -> bulkDelete_delete(getFileSystem(), basePath, paths)); + } + + @Test + public void testDeletePathsNotExists() throws Exception { + List paths = new ArrayList<>(); + Path pathNotExists = new Path(basePath, "not-exists"); + paths.add(pathNotExists); + // bulk delete call doesn't verify if a path exist or not before deleting. + assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths)); + } + + @Test + public void testDeletePathsDirectory() throws Exception { + List paths = new ArrayList<>(); + Path dirPath = new Path(basePath, "dir"); + paths.add(dirPath); + Path filePath = new Path(dirPath, "file"); + paths.add(filePath); + pageSizePreconditionForTest(paths.size()); + fs.mkdirs(dirPath); + touch(fs, filePath); + // Outcome is undefined. But call shouldn't fail. + assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths)); + } + + @Test + public void testBulkDeleteParentDirectoryWithDirectories() throws Exception { + List paths = new ArrayList<>(); + Path dirPath = new Path(basePath, "dir"); + fs.mkdirs(dirPath); + Path subDir = new Path(dirPath, "subdir"); + fs.mkdirs(subDir); + // adding parent directory to the list of paths. + paths.add(dirPath); + List> entries = bulkDelete_delete(getFileSystem(), basePath, paths); + Assertions.assertThat(entries) + .describedAs("Parent non empty directory should not be deleted") + .hasSize(1); + // During the bulk delete operation, the non-empty directories are not deleted in default implementation. + assertIsDirectory(dirPath); + } + + @Test + public void testBulkDeleteParentDirectoryWithFiles() throws Exception { + List paths = new ArrayList<>(); + Path dirPath = new Path(basePath, "dir"); + fs.mkdirs(dirPath); + Path file = new Path(dirPath, "file"); + touch(fs, file); + // adding parent directory to the list of paths. + paths.add(dirPath); + List> entries = bulkDelete_delete(getFileSystem(), basePath, paths); + Assertions.assertThat(entries) + .describedAs("Parent non empty directory should not be deleted") + .hasSize(1); + // During the bulk delete operation, the non-empty directories are not deleted in default implementation. + assertIsDirectory(dirPath); + } + + + @Test + public void testDeleteEmptyDirectory() throws Exception { + List paths = new ArrayList<>(); + Path emptyDirPath = new Path(basePath, "empty-dir"); + fs.mkdirs(emptyDirPath); + paths.add(emptyDirPath); + // Should pass as empty directory. + assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths)); + } + + @Test + public void testDeleteEmptyList() throws Exception { + List paths = new ArrayList<>(); + // Empty list should pass. + assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths)); + } + + @Test + public void testDeleteSamePathsMoreThanOnce() throws Exception { + List paths = new ArrayList<>(); + Path path = new Path(basePath, "file"); + paths.add(path); + paths.add(path); + Path another = new Path(basePath, "another-file"); + paths.add(another); + pageSizePreconditionForTest(paths.size()); + touch(fs, path); + touch(fs, another); + assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths)); + } + + /** + * Skip test if paths size is greater than page size. + */ + protected void pageSizePreconditionForTest(int size) { + if (size > pageSize) { + skip("Test requires paths size less than or equal to page size: " + pageSize); + } + } + + /** + * This test validates that files to be deleted don't have + * to be direct children of the base path. + */ + @Test + public void testDeepDirectoryFilesDelete() throws Exception { + List paths = new ArrayList<>(); + Path dir1 = new Path(basePath, "dir1"); + Path dir2 = new Path(dir1, "dir2"); + Path dir3 = new Path(dir2, "dir3"); + fs.mkdirs(dir3); + Path file1 = new Path(dir3, "file1"); + touch(fs, file1); + paths.add(file1); + assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths)); + } + + + @Test + public void testChildPaths() throws Exception { + List paths = new ArrayList<>(); + Path dirPath = new Path(basePath, "dir"); + fs.mkdirs(dirPath); + paths.add(dirPath); + Path filePath = new Path(dirPath, "file"); + touch(fs, filePath); + paths.add(filePath); + pageSizePreconditionForTest(paths.size()); + // Should pass as both paths are under the base path. + assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths)); + } + + + /** + * Assert on returned entries after bulk delete operation. + * Entries should be empty after successful delete. + */ + public static void assertSuccessfulBulkDelete(List> entries) { + Assertions.assertThat(entries) + .describedAs("Bulk delete failed, " + + "return entries should be empty after successful delete") + .isEmpty(); + } + + /** + * Create a list of paths with the given count + * under the given base path. + */ + private List createListOfPaths(int count, Path basePath) { + List paths = new ArrayList<>(); + for (int i = 0; i < count; i++) { + Path path = new Path(basePath, "file-" + i); + paths.add(path); + } + return paths; + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractBulkDelete.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractBulkDelete.java new file mode 100644 index 0000000000000..f1bd641806f42 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/localfs/TestLocalFSContractBulkDelete.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.localfs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractBulkDeleteTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Bulk delete contract tests for the local filesystem. + */ +public class TestLocalFSContractBulkDelete extends AbstractContractBulkDeleteTest { + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new LocalFSContract(conf); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/rawlocal/TestRawLocalContractBulkDelete.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/rawlocal/TestRawLocalContractBulkDelete.java new file mode 100644 index 0000000000000..46d98249ab327 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/rawlocal/TestRawLocalContractBulkDelete.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.rawlocal; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractBulkDeleteTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Bulk delete contract tests for the raw local filesystem. + */ +public class TestRawLocalContractBulkDelete extends AbstractContractBulkDeleteTest { + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new RawlocalFSContract(conf); + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/contract/hdfs/TestHDFSContractBulkDelete.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/contract/hdfs/TestHDFSContractBulkDelete.java new file mode 100644 index 0000000000000..3a851b6ff1c37 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/contract/hdfs/TestHDFSContractBulkDelete.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.hdfs; + +import java.io.IOException; + +import org.junit.AfterClass; +import org.junit.BeforeClass; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractBulkDeleteTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +/** + * Bulk delete contract tests for the HDFS filesystem. + */ +public class TestHDFSContractBulkDelete extends AbstractContractBulkDeleteTest { + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new HDFSContract(conf); + } + + @BeforeClass + public static void createCluster() throws IOException { + HDFSContract.createCluster(); + } + + @AfterClass + public static void teardownCluster() throws IOException { + HDFSContract.destroyCluster(); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 67df37e5ebfcd..185389739cbad 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1641,4 +1641,16 @@ private Constants() { */ public static final String AWS_S3_ACCESS_GRANTS_FALLBACK_TO_IAM_ENABLED = "fs.s3a.access.grants.fallback.to.iam"; + /** + * Default value for {@link #S3A_IO_RATE_LIMIT}. + * Value: {@value}. + * 0 means no rate limiting. + */ + public static final int DEFAULT_S3A_IO_RATE_LIMIT = 0; + + /** + * Config to set the rate limit for S3A IO operations. + * Value: {@value}. + */ + public static final String S3A_IO_RATE_LIMIT = "fs.s3a.io.rate.limit"; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 0e2ae0f74dd0a..d04ca70a68ded 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -81,7 +81,6 @@ import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import software.amazon.awssdk.services.s3.model.PutObjectResponse; -import software.amazon.awssdk.services.s3.model.S3Error; import software.amazon.awssdk.services.s3.model.S3Object; import software.amazon.awssdk.services.s3.model.StorageClass; import software.amazon.awssdk.services.s3.model.UploadPartRequest; @@ -103,6 +102,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BulkDelete; import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.CreateFlag; @@ -120,7 +120,8 @@ import org.apache.hadoop.fs.s3a.commit.magic.InMemoryMagicCommitTracker; import org.apache.hadoop.fs.s3a.impl.AWSCannedACL; import org.apache.hadoop.fs.s3a.impl.AWSHeaders; -import org.apache.hadoop.fs.s3a.impl.BulkDeleteRetryHandler; +import org.apache.hadoop.fs.s3a.impl.BulkDeleteOperation; +import org.apache.hadoop.fs.s3a.impl.BulkDeleteOperationCallbacksImpl; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; import org.apache.hadoop.fs.s3a.impl.ConfigurationHelper; import org.apache.hadoop.fs.s3a.impl.ContextAccessors; @@ -141,9 +142,11 @@ import org.apache.hadoop.fs.s3a.impl.RenameOperation; import org.apache.hadoop.fs.s3a.impl.RequestFactoryImpl; import org.apache.hadoop.fs.s3a.impl.S3AMultipartUploaderBuilder; +import org.apache.hadoop.fs.s3a.impl.S3AStoreBuilder; import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum; import org.apache.hadoop.fs.s3a.impl.StoreContext; import org.apache.hadoop.fs.s3a.impl.StoreContextBuilder; +import org.apache.hadoop.fs.s3a.impl.StoreContextFactory; import org.apache.hadoop.fs.s3a.prefetch.S3APrefetchingInputStream; import org.apache.hadoop.fs.s3a.tools.MarkerToolOperations; import org.apache.hadoop.fs.s3a.tools.MarkerToolOperationsImpl; @@ -162,10 +165,6 @@ import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.security.token.DelegationTokenIssuer; import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.util.DurationInfo; -import org.apache.hadoop.util.LambdaUtils; -import org.apache.hadoop.util.Lists; -import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -203,10 +202,15 @@ import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.fs.store.EtagChecksum; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.BlockingThreadPoolExecutorService; import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.util.BlockingThreadPoolExecutorService; +import org.apache.hadoop.util.DurationInfo; +import org.apache.hadoop.util.LambdaUtils; +import org.apache.hadoop.util.Lists; +import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.util.Progressable; +import org.apache.hadoop.util.RateLimitingFactory; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.SemaphoredDelegatingExecutor; import org.apache.hadoop.util.concurrent.HadoopExecutors; @@ -244,7 +248,6 @@ import static org.apache.hadoop.fs.s3a.impl.InternalConstants.ARN_BUCKET_OPTION; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.CSE_PADDING_LENGTH; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DEFAULT_UPLOAD_PART_COUNT_LIMIT; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DELETE_CONSIDERED_IDEMPOTENT; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_403_FORBIDDEN; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404_NOT_FOUND; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.UPLOAD_PART_COUNT_LIMIT; @@ -258,11 +261,11 @@ import static org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_LIST_REQUEST; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.pairedTrackerFactory; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration; -import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfInvocation; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfOperation; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfSupplier; import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; import static org.apache.hadoop.util.Preconditions.checkArgument; +import static org.apache.hadoop.util.RateLimitingFactory.unlimitedRate; import static org.apache.hadoop.util.functional.RemoteIterators.foreach; import static org.apache.hadoop.util.functional.RemoteIterators.typeCastingRemoteIterator; @@ -283,7 +286,8 @@ @InterfaceStability.Evolving public class S3AFileSystem extends FileSystem implements StreamCapabilities, AWSPolicyProvider, DelegationTokenProvider, IOStatisticsSource, - AuditSpanSource, ActiveThreadSpanSource { + AuditSpanSource, ActiveThreadSpanSource, + StoreContextFactory { /** * Default blocksize as used in blocksize and FS status queries. @@ -296,6 +300,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private String username; + /** + * Store back end. + */ + private S3AStore store; + private S3Client s3Client; /** Async client is used for transfer manager. */ @@ -680,9 +689,6 @@ public void initialize(URI name, Configuration originalConf) // the encryption algorithms) bindAWSClient(name, delegationTokensEnabled); - // This initiates a probe against S3 for the bucket existing. - doBucketProbing(); - inputPolicy = S3AInputPolicy.getPolicy( conf.getTrimmed(INPUT_FADVISE, Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_DEFAULT), @@ -729,9 +735,6 @@ public void initialize(URI name, Configuration originalConf) directoryPolicy = DirectoryPolicyImpl.getDirectoryPolicy(conf, this::allowAuthoritative); LOG.debug("Directory marker retention policy is {}", directoryPolicy); - - initMultipartUploads(conf); - pageSize = intOption(getConf(), BULK_DELETE_PAGE_SIZE, BULK_DELETE_PAGE_SIZE_DEFAULT, 0); checkArgument(pageSize <= InternalConstants.MAX_ENTRIES_TO_DELETE, @@ -756,6 +759,26 @@ public void initialize(URI name, Configuration originalConf) OPTIMIZED_COPY_FROM_LOCAL_DEFAULT); LOG.debug("Using optimized copyFromLocal implementation: {}", optimizedCopyFromLocal); s3AccessGrantsEnabled = conf.getBoolean(AWS_S3_ACCESS_GRANTS_ENABLED, false); + + int rateLimitCapacity = intOption(conf, S3A_IO_RATE_LIMIT, DEFAULT_S3A_IO_RATE_LIMIT, 0); + // now create the store + store = new S3AStoreBuilder() + .withS3Client(s3Client) + .withDurationTrackerFactory(getDurationTrackerFactory()) + .withStoreContextFactory(this) + .withAuditSpanSource(getAuditManager()) + .withInstrumentation(getInstrumentation()) + .withStatisticsContext(statisticsContext) + .withStorageStatistics(getStorageStatistics()) + .withReadRateLimiter(unlimitedRate()) + .withWriteRateLimiter(RateLimitingFactory.create(rateLimitCapacity)) + .build(); + + // The filesystem is now ready to perform operations against + // S3 + // This initiates a probe against S3 for the bucket existing. + doBucketProbing(); + initMultipartUploads(conf); } catch (SdkException e) { // amazon client exception: stop all services then throw the translation cleanupWithLogger(LOG, span); @@ -1417,6 +1440,11 @@ public S3Client getAmazonS3Client(String reason) { return s3Client; } + @Override + public S3AStore getStore() { + return store; + } + /** * S3AInternals method. * {@inheritDoc}. @@ -3064,29 +3092,10 @@ public void incrementWriteOperations() { @Retries.RetryRaw protected void deleteObject(String key) throws SdkException, IOException { - blockRootDelete(key); incrementWriteOperations(); - try (DurationInfo ignored = - new DurationInfo(LOG, false, - "deleting %s", key)) { - invoker.retryUntranslated(String.format("Delete %s:/%s", bucket, key), - DELETE_CONSIDERED_IDEMPOTENT, - () -> { - incrementStatistic(OBJECT_DELETE_OBJECTS); - trackDurationOfInvocation(getDurationTrackerFactory(), - OBJECT_DELETE_REQUEST.getSymbol(), - () -> s3Client.deleteObject(getRequestFactory() - .newDeleteObjectRequestBuilder(key) - .build())); - return null; - }); - } catch (AwsServiceException ase) { - // 404 errors get swallowed; this can be raised by - // third party stores (GCS). - if (!isObjectNotFound(ase)) { - throw ase; - } - } + store.deleteObject(getRequestFactory() + .newDeleteObjectRequestBuilder(key) + .build()); } /** @@ -3112,19 +3121,6 @@ void deleteObjectAtPath(Path f, deleteObject(key); } - /** - * Reject any request to delete an object where the key is root. - * @param key key to validate - * @throws InvalidRequestException if the request was rejected due to - * a mistaken attempt to delete the root directory. - */ - private void blockRootDelete(String key) throws InvalidRequestException { - if (key.isEmpty() || "/".equals(key)) { - throw new InvalidRequestException("Bucket "+ bucket - +" cannot be deleted"); - } - } - /** * Perform a bulk object delete operation against S3. * Increments the {@code OBJECT_DELETE_REQUESTS} and write @@ -3151,38 +3147,11 @@ private void blockRootDelete(String key) throws InvalidRequestException { private DeleteObjectsResponse deleteObjects(DeleteObjectsRequest deleteRequest) throws MultiObjectDeleteException, SdkException, IOException { incrementWriteOperations(); - BulkDeleteRetryHandler retryHandler = - new BulkDeleteRetryHandler(createStoreContext()); - int keyCount = deleteRequest.delete().objects().size(); - try (DurationInfo ignored = - new DurationInfo(LOG, false, "DELETE %d keys", - keyCount)) { - DeleteObjectsResponse response = - invoker.retryUntranslated("delete", DELETE_CONSIDERED_IDEMPOTENT, - (text, e, r, i) -> { - // handle the failure - retryHandler.bulkDeleteRetried(deleteRequest, e); - }, - // duration is tracked in the bulk delete counters - trackDurationOfOperation(getDurationTrackerFactory(), - OBJECT_BULK_DELETE_REQUEST.getSymbol(), () -> { - incrementStatistic(OBJECT_DELETE_OBJECTS, keyCount); - return s3Client.deleteObjects(deleteRequest); - })); - - if (!response.errors().isEmpty()) { - // one or more of the keys could not be deleted. - // log and then throw - List errors = response.errors(); - LOG.debug("Partial failure of delete, {} errors", errors.size()); - for (S3Error error : errors) { - LOG.debug("{}: \"{}\" - {}", error.key(), error.code(), error.message()); - } - throw new MultiObjectDeleteException(errors); - } - - return response; + DeleteObjectsResponse response = store.deleteObjects(deleteRequest).getValue(); + if (!response.errors().isEmpty()) { + throw new MultiObjectDeleteException(response.errors()); } + return response; } /** @@ -3391,20 +3360,16 @@ private void removeKeysS3( List keysToDelete, boolean deleteFakeDir) throws MultiObjectDeleteException, AwsServiceException, IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("Initiating delete operation for {} objects", - keysToDelete.size()); - for (ObjectIdentifier objectIdentifier : keysToDelete) { - LOG.debug(" \"{}\" {}", objectIdentifier.key(), - objectIdentifier.versionId() != null ? objectIdentifier.versionId() : ""); - } - } if (keysToDelete.isEmpty()) { // exit fast if there are no keys to delete return; } - for (ObjectIdentifier objectIdentifier : keysToDelete) { - blockRootDelete(objectIdentifier.key()); + if (keysToDelete.size() == 1) { + // single object is a single delete call. + // this is more informative in server logs and may be more efficient.. + deleteObject(keysToDelete.get(0).key()); + noteDeleted(1, deleteFakeDir); + return; } try { if (enableMultiObjectsDelete) { @@ -5481,7 +5446,6 @@ public boolean hasPathCapability(final Path path, final String capability) case STORE_CAPABILITY_DIRECTORY_MARKER_AWARE: return true; - // multi object delete flag case ENABLE_MULTI_DELETE: return enableMultiObjectsDelete; @@ -5667,6 +5631,7 @@ public S3AMultipartUploaderBuilder createMultipartUploader( * new store context instances should be created as appropriate. * @return the store context of this FS. */ + @Override @InterfaceAudience.Private public StoreContext createStoreContext() { return new StoreContextBuilder().setFsURI(getUri()) @@ -5768,4 +5733,36 @@ public boolean isMultipartUploadEnabled() { return isMultipartUploadEnabled; } + /** + * S3A implementation to create a bulk delete operation using + * which actual bulk delete calls can be made. + * @return an implementation of the bulk delete. + */ + @Override + public BulkDelete createBulkDelete(final Path path) + throws IllegalArgumentException, IOException { + + final Path p = makeQualified(path); + final AuditSpanS3A span = createSpan("bulkdelete", p.toString(), null); + final int size = enableMultiObjectsDelete ? pageSize : 1; + return new BulkDeleteOperation( + createStoreContext(), + createBulkDeleteCallbacks(p, size, span), + p, + size, + span); + } + + /** + * Create the callbacks for the bulk delete operation. + * @param path path to delete. + * @param pageSize page size. + * @param span span for operations. + * @return an instance of the Bulk Delete callbacks. + */ + protected BulkDeleteOperation.BulkDeleteOperationCallbacks createBulkDeleteCallbacks( + Path path, int pageSize, AuditSpanS3A span) { + return new BulkDeleteOperationCallbacksImpl(store, pathToKey(path), pageSize, span); + } + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInternals.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInternals.java index b4116068565c2..3f3178c7e6e28 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInternals.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInternals.java @@ -33,6 +33,9 @@ /** * This is an unstable interface for access to S3A Internal state, S3 operations * and the S3 client connector itself. + *

+ * Note for maintainers: this is documented in {@code aws_sdk_upgrade.md}; update + * on changes. */ @InterfaceStability.Unstable @InterfaceAudience.LimitedPrivate("testing/diagnostics") @@ -52,13 +55,19 @@ public interface S3AInternals { * set to false. *

* Mocking note: this is the same S3Client as is used by the owning - * filesystem; changes to this client will be reflected by changes + * filesystem and S3AStore; changes to this client will be reflected by changes * in the behavior of that filesystem. * @param reason a justification for requesting access. * @return S3Client */ S3Client getAmazonS3Client(String reason); + /** + * Get the store for low-level operations. + * @return the store the S3A FS is working through. + */ + S3AStore getStore(); + /** * Get the region of a bucket. * Invoked from StoreContext; consider an entry point. @@ -131,4 +140,5 @@ public interface S3AInternals { @AuditEntryPoint @Retries.RetryTranslated long abortMultipartUploads(Path path) throws IOException; + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AStore.java new file mode 100644 index 0000000000000..68eacc35b1887 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AStore.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.IOException; +import java.time.Duration; +import java.util.Map; +import java.util.Optional; + +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectResponse; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectsResponse; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.s3a.api.RequestFactory; +import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; +import org.apache.hadoop.fs.s3a.impl.StoreContext; +import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext; +import org.apache.hadoop.fs.statistics.DurationTrackerFactory; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; + +/** + * Interface for the S3A Store; + * S3 client interactions should be via this; mocking + * is possible for unit tests. + */ +@InterfaceAudience.LimitedPrivate("Extensions") +@InterfaceStability.Unstable +public interface S3AStore extends IOStatisticsSource { + + /** + * Acquire write capacity for operations. + * This should be done within retry loops. + * @param capacity capacity to acquire. + * @return time spent waiting for output. + */ + Duration acquireWriteCapacity(int capacity); + + /** + * Acquire read capacity for operations. + * This should be done within retry loops. + * @param capacity capacity to acquire. + * @return time spent waiting for output. + */ + Duration acquireReadCapacity(int capacity); + + StoreContext getStoreContext(); + + DurationTrackerFactory getDurationTrackerFactory(); + + S3AStatisticsContext getStatisticsContext(); + + RequestFactory getRequestFactory(); + + /** + * Perform a bulk object delete operation against S3. + * Increments the {@code OBJECT_DELETE_REQUESTS} and write + * operation statistics + *

+ * {@code OBJECT_DELETE_OBJECTS} is updated with the actual number + * of objects deleted in the request. + *

+ * Retry policy: retry untranslated; delete considered idempotent. + * If the request is throttled, this is logged in the throttle statistics, + * with the counter set to the number of keys, rather than the number + * of invocations of the delete operation. + * This is because S3 considers each key as one mutating operation on + * the store when updating its load counters on a specific partition + * of an S3 bucket. + * If only the request was measured, this operation would under-report. + * A write capacity will be requested proportional to the number of keys + * preset in the request and will be re-requested during retries such that + * retries throttle better. If the request is throttled, the time spent is + * recorded in a duration IOStat named {@code STORE_IO_RATE_LIMITED_DURATION}. + * @param deleteRequest keys to delete on the s3-backend + * @return the AWS response + * @throws MultiObjectDeleteException one or more of the keys could not + * be deleted. + * @throws SdkException amazon-layer failure. + * @throws IOException IO problems. + */ + @Retries.RetryRaw + Map.Entry deleteObjects(DeleteObjectsRequest deleteRequest) + throws MultiObjectDeleteException, SdkException, IOException; + + /** + * Delete an object. + * Increments the {@code OBJECT_DELETE_REQUESTS} statistics. + *

+ * Retry policy: retry untranslated; delete considered idempotent. + * 404 errors other than bucket not found are swallowed; + * this can be raised by third party stores (GCS). + *

+ * A write capacity of 1 ( as it is signle object delete) will be requested before + * the delete call and will be re-requested during retries such that + * retries throttle better. If the request is throttled, the time spent is + * recorded in a duration IOStat named {@code STORE_IO_RATE_LIMITED_DURATION}. + * If an exception is caught and swallowed, the response will be empty; + * otherwise it will be the response from the delete operation. + * @param request request to make + * @return the total duration and response. + * @throws SdkException problems working with S3 + * @throws IllegalArgumentException if the request was rejected due to + * a mistaken attempt to delete the root directory. + */ + @Retries.RetryRaw + Map.Entry> deleteObject( + DeleteObjectRequest request) throws SdkException; + +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java index ce3af3de803a4..7c4883c3d9967 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java @@ -103,6 +103,10 @@ public enum Statistic { StoreStatisticNames.OP_ACCESS, "Calls of access()", TYPE_DURATION), + INVOCATION_BULK_DELETE( + StoreStatisticNames.OP_BULK_DELETE, + "Calls of bulk delete()", + TYPE_COUNTER), INVOCATION_COPY_FROM_LOCAL_FILE( StoreStatisticNames.OP_COPY_FROM_LOCAL_FILE, "Calls of copyFromLocalFile()", @@ -539,6 +543,10 @@ public enum Statistic { "retried requests made of the remote store", TYPE_COUNTER), + STORE_IO_RATE_LIMITED(StoreStatisticNames.STORE_IO_RATE_LIMITED_DURATION, + "Duration of rate limited operations", + TYPE_DURATION), + STORE_IO_THROTTLED( StoreStatisticNames.STORE_IO_THROTTLED, "Requests throttled and retried", diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteOperation.java new file mode 100644 index 0000000000000..64bebd880cd6c --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteOperation.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; + +import org.apache.hadoop.fs.BulkDelete; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.Retries; +import org.apache.hadoop.fs.store.audit.AuditSpan; +import org.apache.hadoop.util.functional.Tuples; + +import static java.util.Collections.emptyList; +import static java.util.Objects.requireNonNull; +import static java.util.stream.Collectors.toList; +import static org.apache.hadoop.fs.BulkDeleteUtils.validatePathIsUnderParent; +import static org.apache.hadoop.util.Preconditions.checkArgument; + +/** + * S3A Implementation of the {@link BulkDelete} interface. + */ +public class BulkDeleteOperation extends AbstractStoreOperation implements BulkDelete { + + private final BulkDeleteOperationCallbacks callbacks; + + private final Path basePath; + + private final int pageSize; + + public BulkDeleteOperation( + final StoreContext storeContext, + final BulkDeleteOperationCallbacks callbacks, + final Path basePath, + final int pageSize, + final AuditSpan span) { + super(storeContext, span); + this.callbacks = requireNonNull(callbacks); + this.basePath = requireNonNull(basePath); + checkArgument(pageSize > 0, "Page size must be greater than 0"); + this.pageSize = pageSize; + } + + @Override + public int pageSize() { + return pageSize; + } + + @Override + public Path basePath() { + return basePath; + } + + /** + * {@inheritDoc} + */ + @Override + public List> bulkDelete(final Collection paths) + throws IOException, IllegalArgumentException { + requireNonNull(paths); + checkArgument(paths.size() <= pageSize, + "Number of paths (%d) is larger than the page size (%d)", paths.size(), pageSize); + final StoreContext context = getStoreContext(); + final List objects = paths.stream().map(p -> { + checkArgument(p.isAbsolute(), "Path %s is not absolute", p); + checkArgument(validatePathIsUnderParent(p, basePath), + "Path %s is not under the base path %s", p, basePath); + final String k = context.pathToKey(p); + return ObjectIdentifier.builder().key(k).build(); + }).collect(toList()); + + final List> errors = callbacks.bulkDelete(objects); + if (!errors.isEmpty()) { + + final List> outcomeElements = errors + .stream() + .map(error -> Tuples.pair( + context.keyToPath(error.getKey()), + error.getValue() + )) + .collect(toList()); + return outcomeElements; + } + return emptyList(); + } + + @Override + public void close() throws IOException { + + } + + /** + * Callbacks for the bulk delete operation. + */ + public interface BulkDeleteOperationCallbacks { + + /** + * Perform a bulk delete operation. + * @param keys key list + * @return paths which failed to delete (if any). + * @throws IOException IO Exception. + * @throws IllegalArgumentException illegal arguments + */ + @Retries.RetryTranslated + List> bulkDelete(final List keys) + throws IOException, IllegalArgumentException; + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteOperationCallbacksImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteOperationCallbacksImpl.java new file mode 100644 index 0000000000000..2edcc3c7bbd3a --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/BulkDeleteOperationCallbacksImpl.java @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.AccessDeniedException; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import software.amazon.awssdk.services.s3.model.DeleteObjectsResponse; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.S3Error; + +import org.apache.hadoop.fs.s3a.Retries; +import org.apache.hadoop.fs.s3a.S3AStore; +import org.apache.hadoop.fs.store.audit.AuditSpan; +import org.apache.hadoop.util.functional.Tuples; + +import static java.util.Collections.emptyList; +import static java.util.Collections.singletonList; +import static org.apache.hadoop.fs.s3a.Invoker.once; +import static org.apache.hadoop.util.Preconditions.checkArgument; +import static org.apache.hadoop.util.functional.Tuples.pair; + +/** + * Callbacks for the bulk delete operation. + */ +public class BulkDeleteOperationCallbacksImpl implements + BulkDeleteOperation.BulkDeleteOperationCallbacks { + + /** + * Path for logging. + */ + private final String path; + + /** Page size for bulk delete. */ + private final int pageSize; + + /** span for operations. */ + private final AuditSpan span; + + /** + * Store. + */ + private final S3AStore store; + + + public BulkDeleteOperationCallbacksImpl(final S3AStore store, + String path, int pageSize, AuditSpan span) { + this.span = span; + this.pageSize = pageSize; + this.path = path; + this.store = store; + } + + @Override + @Retries.RetryTranslated + public List> bulkDelete(final List keysToDelete) + throws IOException, IllegalArgumentException { + span.activate(); + final int size = keysToDelete.size(); + checkArgument(size <= pageSize, + "Too many paths to delete in one operation: %s", size); + if (size == 0) { + return emptyList(); + } + + if (size == 1) { + return deleteSingleObject(keysToDelete.get(0).key()); + } + + final DeleteObjectsResponse response = once("bulkDelete", path, () -> + store.deleteObjects(store.getRequestFactory() + .newBulkDeleteRequestBuilder(keysToDelete) + .build())).getValue(); + final List errors = response.errors(); + if (errors.isEmpty()) { + // all good. + return emptyList(); + } else { + return errors.stream() + .map(e -> pair(e.key(), e.toString())) + .collect(Collectors.toList()); + } + } + + /** + * Delete a single object. + * @param key key to delete + * @return list of keys which failed to delete: length 0 or 1. + * @throws IOException IO problem other than AccessDeniedException + */ + @Retries.RetryTranslated + private List> deleteSingleObject(final String key) throws IOException { + try { + once("bulkDelete", path, () -> + store.deleteObject(store.getRequestFactory() + .newDeleteObjectRequestBuilder(key) + .build())); + } catch (AccessDeniedException e) { + return singletonList(pair(key, e.toString())); + } + return emptyList(); + + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteException.java index 72ead1fb151fc..14ad559ead293 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MultiObjectDeleteException.java @@ -118,11 +118,7 @@ public IOException translateException(final String message) { String exitCode = ""; for (S3Error error : errors()) { String code = error.code(); - String item = String.format("%s: %s%s: %s%n", code, error.key(), - (error.versionId() != null - ? (" (" + error.versionId() + ")") - : ""), - error.message()); + String item = errorToString(error); LOG.info(item); result.append(item); if (exitCode == null || exitCode.isEmpty() || ACCESS_DENIED.equals(code)) { @@ -136,4 +132,18 @@ public IOException translateException(final String message) { return new AWSS3IOException(result.toString(), this); } } + + /** + * Convert an error to a string. + * @param error error from a delete request + * @return string value + */ + public static String errorToString(final S3Error error) { + String code = error.code(); + return String.format("%s: %s%s: %s%n", code, error.key(), + (error.versionId() != null + ? (" (" + error.versionId() + ")") + : ""), + error.message()); + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreBuilder.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreBuilder.java new file mode 100644 index 0000000000000..c1a6fcffab487 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreBuilder.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import software.amazon.awssdk.services.s3.S3Client; + +import org.apache.hadoop.fs.s3a.S3AInstrumentation; +import org.apache.hadoop.fs.s3a.S3AStorageStatistics; +import org.apache.hadoop.fs.s3a.S3AStore; +import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A; +import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext; +import org.apache.hadoop.fs.statistics.DurationTrackerFactory; +import org.apache.hadoop.fs.store.audit.AuditSpanSource; +import org.apache.hadoop.util.RateLimiting; + +/** + * Builder for the S3AStore. + */ +public class S3AStoreBuilder { + + private StoreContextFactory storeContextFactory; + + private S3Client s3Client; + + private DurationTrackerFactory durationTrackerFactory; + + private S3AInstrumentation instrumentation; + + private S3AStatisticsContext statisticsContext; + + private S3AStorageStatistics storageStatistics; + + private RateLimiting readRateLimiter; + + private RateLimiting writeRateLimiter; + + private AuditSpanSource auditSpanSource; + + public S3AStoreBuilder withStoreContextFactory( + final StoreContextFactory storeContextFactoryValue) { + this.storeContextFactory = storeContextFactoryValue; + return this; + } + + public S3AStoreBuilder withS3Client( + final S3Client s3ClientValue) { + this.s3Client = s3ClientValue; + return this; + } + + public S3AStoreBuilder withDurationTrackerFactory( + final DurationTrackerFactory durationTrackerFactoryValue) { + this.durationTrackerFactory = durationTrackerFactoryValue; + return this; + } + + public S3AStoreBuilder withInstrumentation( + final S3AInstrumentation instrumentationValue) { + this.instrumentation = instrumentationValue; + return this; + } + + public S3AStoreBuilder withStatisticsContext( + final S3AStatisticsContext statisticsContextValue) { + this.statisticsContext = statisticsContextValue; + return this; + } + + public S3AStoreBuilder withStorageStatistics( + final S3AStorageStatistics storageStatisticsValue) { + this.storageStatistics = storageStatisticsValue; + return this; + } + + public S3AStoreBuilder withReadRateLimiter( + final RateLimiting readRateLimiterValue) { + this.readRateLimiter = readRateLimiterValue; + return this; + } + + public S3AStoreBuilder withWriteRateLimiter( + final RateLimiting writeRateLimiterValue) { + this.writeRateLimiter = writeRateLimiterValue; + return this; + } + + public S3AStoreBuilder withAuditSpanSource( + final AuditSpanSource auditSpanSourceValue) { + this.auditSpanSource = auditSpanSourceValue; + return this; + } + + public S3AStore build() { + return new S3AStoreImpl(storeContextFactory, s3Client, durationTrackerFactory, instrumentation, + statisticsContext, storageStatistics, readRateLimiter, writeRateLimiter, auditSpanSource); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreImpl.java new file mode 100644 index 0000000000000..6bfe42767d8b1 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreImpl.java @@ -0,0 +1,400 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.time.Duration; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import javax.annotation.Nullable; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.awscore.exception.AwsServiceException; +import software.amazon.awssdk.core.exception.SdkException; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.DeleteObjectRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectResponse; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.DeleteObjectsResponse; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.S3Error; + +import org.apache.hadoop.fs.s3a.Invoker; +import org.apache.hadoop.fs.s3a.Retries; +import org.apache.hadoop.fs.s3a.S3AInstrumentation; +import org.apache.hadoop.fs.s3a.S3AStorageStatistics; +import org.apache.hadoop.fs.s3a.S3AStore; +import org.apache.hadoop.fs.s3a.Statistic; +import org.apache.hadoop.fs.s3a.api.RequestFactory; +import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A; +import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext; +import org.apache.hadoop.fs.statistics.DurationTrackerFactory; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.store.audit.AuditSpanSource; +import org.apache.hadoop.util.DurationInfo; +import org.apache.hadoop.util.RateLimiting; +import org.apache.hadoop.util.functional.Tuples; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.fs.s3a.S3AUtils.isThrottleException; +import static org.apache.hadoop.fs.s3a.Statistic.*; +import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isObjectNotFound; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DELETE_CONSIDERED_IDEMPOTENT; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.STORE_IO_RATE_LIMITED_DURATION; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfOperation; +import static org.apache.hadoop.util.Preconditions.checkArgument; + +/** + * Store Layer. + * This is where lower level storage operations are intended + * to move. + */ +public class S3AStoreImpl implements S3AStore { + + private static final Logger LOG = LoggerFactory.getLogger(S3AStoreImpl.class); + + /** Factory to create store contexts. */ + private final StoreContextFactory storeContextFactory; + + /** The S3 client used to communicate with S3 bucket. */ + private final S3Client s3Client; + + /** The S3 bucket to communicate with. */ + private final String bucket; + + /** Request factory for creating requests. */ + private final RequestFactory requestFactory; + + /** Async client is used for transfer manager. */ + private S3AsyncClient s3AsyncClient; + + /** Duration tracker factory. */ + private final DurationTrackerFactory durationTrackerFactory; + + /** The core instrumentation. */ + private final S3AInstrumentation instrumentation; + + /** Accessors to statistics for this FS. */ + private final S3AStatisticsContext statisticsContext; + + /** Storage Statistics Bonded to the instrumentation. */ + private final S3AStorageStatistics storageStatistics; + + /** Rate limiter for read operations. */ + private final RateLimiting readRateLimiter; + + /** Rate limiter for write operations. */ + private final RateLimiting writeRateLimiter; + + /** Store context. */ + private final StoreContext storeContext; + + /** Invoker for retry operations. */ + private final Invoker invoker; + + /** Audit span source. */ + private final AuditSpanSource auditSpanSource; + + /** Constructor to create S3A store. */ + S3AStoreImpl(StoreContextFactory storeContextFactory, + S3Client s3Client, + DurationTrackerFactory durationTrackerFactory, + S3AInstrumentation instrumentation, + S3AStatisticsContext statisticsContext, + S3AStorageStatistics storageStatistics, + RateLimiting readRateLimiter, + RateLimiting writeRateLimiter, + AuditSpanSource auditSpanSource) { + this.storeContextFactory = requireNonNull(storeContextFactory); + this.s3Client = requireNonNull(s3Client); + this.durationTrackerFactory = requireNonNull(durationTrackerFactory); + this.instrumentation = requireNonNull(instrumentation); + this.statisticsContext = requireNonNull(statisticsContext); + this.storageStatistics = requireNonNull(storageStatistics); + this.readRateLimiter = requireNonNull(readRateLimiter); + this.writeRateLimiter = requireNonNull(writeRateLimiter); + this.auditSpanSource = requireNonNull(auditSpanSource); + this.storeContext = requireNonNull(storeContextFactory.createStoreContext()); + this.invoker = storeContext.getInvoker(); + this.bucket = storeContext.getBucket(); + this.requestFactory = storeContext.getRequestFactory(); + } + + /** Acquire write capacity for rate limiting {@inheritDoc}. */ + @Override + public Duration acquireWriteCapacity(final int capacity) { + return writeRateLimiter.acquire(capacity); + } + + /** Acquire read capacity for rate limiting {@inheritDoc}. */ + @Override + public Duration acquireReadCapacity(final int capacity) { + return readRateLimiter.acquire(capacity); + + } + + /** + * Create a new store context. + * @return a new store context. + */ + private StoreContext createStoreContext() { + return storeContextFactory.createStoreContext(); + } + + @Override + public StoreContext getStoreContext() { + return storeContext; + } + + private S3Client getS3Client() { + return s3Client; + } + + @Override + public DurationTrackerFactory getDurationTrackerFactory() { + return durationTrackerFactory; + } + + private S3AInstrumentation getInstrumentation() { + return instrumentation; + } + + @Override + public S3AStatisticsContext getStatisticsContext() { + return statisticsContext; + } + + private S3AStorageStatistics getStorageStatistics() { + return storageStatistics; + } + + @Override + public RequestFactory getRequestFactory() { + return requestFactory; + } + + /** + * Increment a statistic by 1. + * This increments both the instrumentation and storage statistics. + * @param statistic The operation to increment + */ + protected void incrementStatistic(Statistic statistic) { + incrementStatistic(statistic, 1); + } + + /** + * Increment a statistic by a specific value. + * This increments both the instrumentation and storage statistics. + * @param statistic The operation to increment + * @param count the count to increment + */ + protected void incrementStatistic(Statistic statistic, long count) { + statisticsContext.incrementCounter(statistic, count); + } + + /** + * Decrement a gauge by a specific value. + * @param statistic The operation to decrement + * @param count the count to decrement + */ + protected void decrementGauge(Statistic statistic, long count) { + statisticsContext.decrementGauge(statistic, count); + } + + /** + * Increment a gauge by a specific value. + * @param statistic The operation to increment + * @param count the count to increment + */ + protected void incrementGauge(Statistic statistic, long count) { + statisticsContext.incrementGauge(statistic, count); + } + + /** + * Callback when an operation was retried. + * Increments the statistics of ignored errors or throttled requests, + * depending up on the exception class. + * @param ex exception. + */ + public void operationRetried(Exception ex) { + if (isThrottleException(ex)) { + LOG.debug("Request throttled"); + incrementStatistic(STORE_IO_THROTTLED); + statisticsContext.addValueToQuantiles(STORE_IO_THROTTLE_RATE, 1); + } else { + incrementStatistic(STORE_IO_RETRY); + incrementStatistic(IGNORED_ERRORS); + } + } + + /** + * Callback from {@link Invoker} when an operation is retried. + * @param text text of the operation + * @param ex exception + * @param retries number of retries + * @param idempotent is the method idempotent + */ + public void operationRetried(String text, Exception ex, int retries, boolean idempotent) { + operationRetried(ex); + } + + /** + * Get the instrumentation's IOStatistics. + * @return statistics + */ + @Override + public IOStatistics getIOStatistics() { + return instrumentation.getIOStatistics(); + } + + /** + * Start an operation; this informs the audit service of the event + * and then sets it as the active span. + * @param operation operation name. + * @param path1 first path of operation + * @param path2 second path of operation + * @return a span for the audit + * @throws IOException failure + */ + public AuditSpanS3A createSpan(String operation, @Nullable String path1, @Nullable String path2) + throws IOException { + + return auditSpanSource.createSpan(operation, path1, path2); + } + + /** + * Reject any request to delete an object where the key is root. + * @param key key to validate + * @throws IllegalArgumentException if the request was rejected due to + * a mistaken attempt to delete the root directory. + */ + private void blockRootDelete(String key) throws IllegalArgumentException { + checkArgument(!key.isEmpty() && !"/".equals(key), "Bucket %s cannot be deleted", bucket); + } + + /** + * {@inheritDoc}. + */ + @Override + @Retries.RetryRaw + public Map.Entry deleteObjects( + final DeleteObjectsRequest deleteRequest) + throws SdkException { + + DeleteObjectsResponse response; + BulkDeleteRetryHandler retryHandler = new BulkDeleteRetryHandler(createStoreContext()); + + final List keysToDelete = deleteRequest.delete().objects(); + int keyCount = keysToDelete.size(); + if (LOG.isDebugEnabled()) { + LOG.debug("Initiating delete operation for {} objects", keysToDelete.size()); + keysToDelete.stream().forEach(objectIdentifier -> { + LOG.debug(" \"{}\" {}", objectIdentifier.key(), + objectIdentifier.versionId() != null ? objectIdentifier.versionId() : ""); + }); + } + // block root calls + keysToDelete.stream().map(ObjectIdentifier::key).forEach(this::blockRootDelete); + + try (DurationInfo d = new DurationInfo(LOG, false, "DELETE %d keys", keyCount)) { + response = + invoker.retryUntranslated("delete", + DELETE_CONSIDERED_IDEMPOTENT, (text, e, r, i) -> { + // handle the failure + retryHandler.bulkDeleteRetried(deleteRequest, e); + }, + // duration is tracked in the bulk delete counters + trackDurationOfOperation(getDurationTrackerFactory(), + OBJECT_BULK_DELETE_REQUEST.getSymbol(), () -> { + // acquire the write capacity for the number of keys to delete and record the duration. + Duration durationToAcquireWriteCapacity = acquireWriteCapacity(keyCount); + instrumentation.recordDuration(STORE_IO_RATE_LIMITED, + true, + durationToAcquireWriteCapacity); + incrementStatistic(OBJECT_DELETE_OBJECTS, keyCount); + return s3Client.deleteObjects(deleteRequest); + })); + if (!response.errors().isEmpty()) { + // one or more of the keys could not be deleted. + // log and then throw + List errors = response.errors(); + if (LOG.isDebugEnabled()) { + LOG.debug("Partial failure of delete, {} errors", errors.size()); + for (S3Error error : errors) { + LOG.debug("{}: \"{}\" - {}", error.key(), error.code(), error.message()); + } + } + } + d.close(); + return Tuples.pair(d.asDuration(), response); + + } catch (IOException e) { + // this is part of the retry signature, nothing else. + // convert to unchecked. + throw new UncheckedIOException(e); + } + } + + /** + * {@inheritDoc}. + */ + @Override + @Retries.RetryRaw + public Map.Entry> deleteObject( + final DeleteObjectRequest request) + throws SdkException { + + String key = request.key(); + blockRootDelete(key); + DurationInfo d = new DurationInfo(LOG, false, "deleting %s", key); + try { + DeleteObjectResponse response = + invoker.retryUntranslated(String.format("Delete %s:/%s", bucket, key), + DELETE_CONSIDERED_IDEMPOTENT, + trackDurationOfOperation(getDurationTrackerFactory(), + OBJECT_DELETE_REQUEST.getSymbol(), () -> { + incrementStatistic(OBJECT_DELETE_OBJECTS); + // We try to acquire write capacity just before delete call. + Duration durationToAcquireWriteCapacity = acquireWriteCapacity(1); + instrumentation.recordDuration(STORE_IO_RATE_LIMITED, + true, durationToAcquireWriteCapacity); + return s3Client.deleteObject(request); + })); + d.close(); + return Tuples.pair(d.asDuration(), Optional.of(response)); + } catch (AwsServiceException ase) { + // 404 errors get swallowed; this can be raised by + // third party stores (GCS). + if (!isObjectNotFound(ase)) { + throw ase; + } + d.close(); + return Tuples.pair(d.asDuration(), Optional.empty()); + } catch (IOException e) { + // this is part of the retry signature, nothing else. + // convert to unchecked. + throw new UncheckedIOException(e); + } + } + +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StoreContextFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StoreContextFactory.java new file mode 100644 index 0000000000000..9d8d708b2bcc7 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StoreContextFactory.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Factory for creating store contexts. + */ +@InterfaceAudience.Private +public interface StoreContextFactory { + + /** + * Build an immutable store context, including picking + * up the current audit span. + * @return the store context. + */ + StoreContext createStoreContext(); +} diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md index e2c095e5317a4..abd58bffc6201 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md @@ -324,6 +324,7 @@ They have also been updated to return V2 SDK classes. public interface S3AInternals { S3Client getAmazonS3V2Client(String reason); + S3AStore getStore(); @Retries.RetryTranslated @AuditEntryPoint String getBucketLocation() throws IOException; diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md index 4bb824356e9d4..954823f2172ea 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md @@ -59,7 +59,7 @@ To make most efficient use of S3, care is needed. The S3A FileSystem supports implementation of vectored read api using which a client can provide a list of file ranges to read returning a future read object associated with each range. For full api specification please see -[FSDataInputStream](../../hadoop-common-project/hadoop-common/filesystem/fsdatainputstream.html). +[FSDataInputStream](../../../../../../hadoop-common-project/hadoop-common/target/site/filesystem/fsdatainputstream.html). The following properties can be configured to optimise vectored reads based on the client requirements. @@ -94,6 +94,86 @@ on the client requirements. ``` +## Improving delete performance through bulkdelete API. + +For bulk delete API spec refer to File System specification. [BulkDelete](../../../../../../hadoop-common-project/hadoop-common/target/site/filesystem/bulkdelete.html) + +The S3A client exports this API. + +### S3A Implementation of Bulk Delete. +If multi-object delete is enabled (`fs.s3a.multiobjectdelete.enable` = true), as +it is by default, then the page size is limited to that defined in +`fs.s3a.bulk.delete.page.size`, which MUST be less than or equal to 1000. +* The entire list of paths to delete is aggregated into a single bulk delete request, + issued to the store. +* Provided the caller has the correct permissions, every entry in the list + will, if the path references an object, cause that object to be deleted. +* If the path does not reference an object: the path will not be deleted + "This is for deleting objects, not directories" +* No probes for the existence of parent directories will take place; no + parent directory markers will be created. + "If you need parent directories, call mkdir() yourself" +* The list of failed keys listed in the `DeleteObjectsResponse` response + are converted into paths and returned along with their error messages. +* Network and other IO errors are raised as exceptions. + +If multi-object delete is disabled (or the list of size 1) +* A single `DELETE` call is issued +* Any `AccessDeniedException` raised is converted to a result in the error list. +* Any 404 response from a (non-AWS) store will be ignored. +* Network and other IO errors are raised as exceptions. + +Because there are no probes to ensure the call does not overwrite a directory, +or to see if a parentDirectory marker needs to be created, +this API is still faster than issuing a normal `FileSystem.delete(path)` call. + +That is: all the overhead normally undertaken to preserve the Posix System model are omitted. + + +### S3 Scalability and Performance + +Every entry in a bulk delete request counts as one write operation +against AWS S3 storage. +With the default write rate under a prefix on AWS S3 Standard storage +restricted to 3,500 writes/second, it is very easy to overload +the store by issuing a few bulk delete requests simultaneously. + +* If throttling is triggered then all clients interacting with + the store may observe performance issues. +* The write quota applies even for paths which do not exist. +* The S3A client *may* perform rate throttling as well as page size limiting. + +What does that mean? it means that attempting to issue multiple +bulk delete calls in parallel can be counterproductive. + +When overloaded, the S3 store returns a 403 throttle response. +This will trigger it back off and retry of posting the request. +However, the repeated request will still include the same number of objects and +*so generate the same load*. + +This can lead to a pathological situation where the repeated requests will +never be satisfied because the request itself is sufficient to overload the store. +See [HADOOP-16823.Large DeleteObject requests are their own Thundering Herd] +(https://issues.apache.org/jira/browse/HADOOP-16823) +for an example of where this did actually surface in production. + +This is why the default page size of S3A clients is 250 paths, not the store limit of 1000 entries. +It is also why the S3A delete/rename operations do not attempt to do massive parallel deletions, +Instead bulk delete requests are queued for a single blocking thread to issue. +Consider a similar design. + + +When working with versioned S3 buckets, every path deleted will add a tombstone marker +to the store at that location, even if there was no object at that path. +While this has no negative performance impact on the bulk delete call, +it will slow down list requests subsequently made against that path. +That is: bulk delete requests of paths which do not exist will hurt future queries. + +Avoid this. Note also that TPC-DS Benchmark do not create the right load to make the +performance problems observable -but they can surface in production. +* Configure buckets to have a limited number of days for tombstones to be preserved. +* Do not delete paths which you know reference nonexistent files or directories. + ## Improving data input performance through fadvise The S3A Filesystem client supports the notion of input policies, similar diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractBulkDelete.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractBulkDelete.java new file mode 100644 index 0000000000000..71c3a30359e10 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractBulkDelete.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.s3a; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BulkDelete; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractContractBulkDeleteTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.S3ATestUtils; +import org.apache.hadoop.fs.statistics.MeanStatistic; + +import static java.util.stream.Collectors.toList; +import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; +import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; +import static org.apache.hadoop.fs.s3a.S3AUtils.propagateBucketOptions; +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.lookupMeanStatistic; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.STORE_IO_RATE_LIMITED_DURATION; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.SUFFIX_MEAN; +import static org.apache.hadoop.io.wrappedio.WrappedIO.bulkDelete_delete; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Contract tests for bulk delete operation for S3A Implementation. + */ +@RunWith(Parameterized.class) +public class ITestS3AContractBulkDelete extends AbstractContractBulkDeleteTest { + + private static final Logger LOG = LoggerFactory.getLogger(ITestS3AContractBulkDelete.class); + + /** + * Delete Page size: {@value}. + * This is the default page size for bulk delete operation for this contract test. + * All the tests in this class should pass number of paths equal to or less than + * this page size during the bulk delete operation. + */ + private static final int DELETE_PAGE_SIZE = 20; + + private final boolean enableMultiObjectDelete; + + @Parameterized.Parameters(name = "enableMultiObjectDelete = {0}") + public static Iterable enableMultiObjectDelete() { + return Arrays.asList(new Object[][]{ + {true}, + {false} + }); + } + + public ITestS3AContractBulkDelete(boolean enableMultiObjectDelete) { + this.enableMultiObjectDelete = enableMultiObjectDelete; + } + + @Override + protected Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + S3ATestUtils.disableFilesystemCaching(conf); + conf = propagateBucketOptions(conf, getTestBucketName(conf)); + if (enableMultiObjectDelete) { + // if multi-object delete is disabled, skip the test. + skipIfNotEnabled(conf, Constants.ENABLE_MULTI_DELETE, + "Bulk delete is explicitly disabled for this bucket"); + } + S3ATestUtils.removeBaseAndBucketOverrides(conf, + Constants.BULK_DELETE_PAGE_SIZE); + conf.setInt(Constants.BULK_DELETE_PAGE_SIZE, DELETE_PAGE_SIZE); + conf.setBoolean(Constants.ENABLE_MULTI_DELETE, enableMultiObjectDelete); + return conf; + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new S3AContract(createConfiguration()); + } + + @Override + protected int getExpectedPageSize() { + if (!enableMultiObjectDelete) { + // if multi-object delete is disabled, page size should be 1. + return 1; + } + return DELETE_PAGE_SIZE; + } + + @Override + public void validatePageSize() throws Exception { + Assertions.assertThat(pageSize) + .describedAs("Page size should match the configured page size") + .isEqualTo(getExpectedPageSize()); + } + + @Test + public void testBulkDeleteZeroPageSizePrecondition() throws Exception { + if (!enableMultiObjectDelete) { + // if multi-object delete is disabled, skip this test as + // page size is always 1. + skip("Multi-object delete is disabled"); + } + Configuration conf = getContract().getConf(); + conf.setInt(Constants.BULK_DELETE_PAGE_SIZE, 0); + Path testPath = path(getMethodName()); + try (S3AFileSystem fs = S3ATestUtils.createTestFileSystem(conf)) { + intercept(IllegalArgumentException.class, + () -> fs.createBulkDelete(testPath)); + } + } + + @Test + public void testPageSizeWhenMultiObjectsDisabled() throws Exception { + Configuration conf = getContract().getConf(); + conf.setBoolean(Constants.ENABLE_MULTI_DELETE, false); + Path testPath = path(getMethodName()); + try (S3AFileSystem fs = S3ATestUtils.createTestFileSystem(conf)) { + BulkDelete bulkDelete = fs.createBulkDelete(testPath); + Assertions.assertThat(bulkDelete.pageSize()) + .describedAs("Page size should be 1 when multi-object delete is disabled") + .isEqualTo(1); + } + } + + @Override + public void testDeletePathsDirectory() throws Exception { + List paths = new ArrayList<>(); + Path dirPath = new Path(basePath, "dir"); + fs.mkdirs(dirPath); + paths.add(dirPath); + Path filePath = new Path(dirPath, "file"); + touch(fs, filePath); + if (enableMultiObjectDelete) { + // Adding more paths only if multi-object delete is enabled. + paths.add(filePath); + } + assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths)); + // During the bulk delete operation, the directories are not deleted in S3A. + assertIsDirectory(dirPath); + } + + @Test + public void testBulkDeleteParentDirectoryWithDirectories() throws Exception { + List paths = new ArrayList<>(); + Path dirPath = new Path(basePath, "dir"); + fs.mkdirs(dirPath); + Path subDir = new Path(dirPath, "subdir"); + fs.mkdirs(subDir); + // adding parent directory to the list of paths. + paths.add(dirPath); + assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths)); + // During the bulk delete operation, the directories are not deleted in S3A. + assertIsDirectory(dirPath); + assertIsDirectory(subDir); + } + + public void testBulkDeleteParentDirectoryWithFiles() throws Exception { + List paths = new ArrayList<>(); + Path dirPath = new Path(basePath, "dir"); + fs.mkdirs(dirPath); + Path file = new Path(dirPath, "file"); + touch(fs, file); + // adding parent directory to the list of paths. + paths.add(dirPath); + assertSuccessfulBulkDelete(bulkDelete_delete(getFileSystem(), basePath, paths)); + // During the bulk delete operation, + // the directories are not deleted in S3A. + assertIsDirectory(dirPath); + } + + + @Test + public void testRateLimiting() throws Exception { + if (!enableMultiObjectDelete) { + skip("Multi-object delete is disabled so hard to trigger rate limiting"); + } + Configuration conf = getContract().getConf(); + conf.setInt(Constants.S3A_IO_RATE_LIMIT, 5); + Path basePath = path(getMethodName()); + try (S3AFileSystem fs = S3ATestUtils.createTestFileSystem(conf)) { + createFiles(fs, basePath, 1, 20, 0); + FileStatus[] fileStatuses = fs.listStatus(basePath); + List paths = Arrays.stream(fileStatuses) + .map(FileStatus::getPath) + .collect(toList()); + pageSizePreconditionForTest(paths.size()); + BulkDelete bulkDelete = fs.createBulkDelete(basePath); + bulkDelete.bulkDelete(paths); + MeanStatistic meanStatisticBefore = lookupMeanStatistic(fs.getIOStatistics(), + STORE_IO_RATE_LIMITED_DURATION + SUFFIX_MEAN); + Assertions.assertThat(meanStatisticBefore.mean()) + .describedAs("Rate limiting should not have happened during first delete call") + .isEqualTo(0.0); + bulkDelete.bulkDelete(paths); + bulkDelete.bulkDelete(paths); + bulkDelete.bulkDelete(paths); + MeanStatistic meanStatisticAfter = lookupMeanStatistic(fs.getIOStatistics(), + STORE_IO_RATE_LIMITED_DURATION + SUFFIX_MEAN); + Assertions.assertThat(meanStatisticAfter.mean()) + .describedAs("Rate limiting should have happened during multiple delete calls") + .isGreaterThan(0.0); + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java index 734bcfd9c5d30..f43710cf25eb0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java @@ -35,8 +35,7 @@ /** - * Abstract base class for S3A unit tests using a mock S3 client and a null - * metadata store. + * Abstract base class for S3A unit tests using a mock S3 client. */ public abstract class AbstractS3AMockTest { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java index a4162f212179b..28a443f04cda9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java @@ -61,9 +61,8 @@ public boolean deleteOnExit(Path f) throws IOException { // processDeleteOnExit. @Override protected boolean deleteWithoutCloseCheck(Path f, boolean recursive) throws IOException { - boolean result = super.deleteWithoutCloseCheck(f, recursive); deleteOnDnExitCount--; - return result; + return true; } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java index a7ccc92e133c8..0676dd5b16ed8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java @@ -23,7 +23,11 @@ import java.io.IOException; import java.net.URI; import java.nio.file.AccessDeniedException; +import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; import java.util.stream.IntStream; import software.amazon.awssdk.auth.credentials.AwsCredentials; @@ -35,9 +39,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BulkDelete; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; @@ -55,7 +60,10 @@ import org.apache.hadoop.fs.s3a.impl.InstantiationIOException; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool; import org.apache.hadoop.fs.s3a.statistics.CommitterStatistics; +import org.apache.hadoop.io.wrappedio.WrappedIO; +import static org.apache.hadoop.fs.contract.AbstractContractBulkDeleteTest.assertSuccessfulBulkDelete; +import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.Constants.S3EXPRESS_CREATE_SESSION; @@ -702,6 +710,122 @@ public void testPartialDeleteSingleDelete() throws Throwable { executePartialDelete(createAssumedRoleConfig(), true); } + @Test + public void testBulkDeleteOnReadOnlyAccess() throws Throwable { + describe("Bulk delete with part of the child tree read only"); + executeBulkDeleteOnReadOnlyFiles(createAssumedRoleConfig()); + } + + @Test + public void testBulkDeleteWithReadWriteAccess() throws Throwable { + describe("Bulk delete with read write access"); + executeBulkDeleteOnSomeReadOnlyFiles(createAssumedRoleConfig()); + } + + /** + * Execute bulk delete on read only files and some read write files. + */ + private void executeBulkDeleteOnReadOnlyFiles(Configuration assumedRoleConfig) throws Exception { + Path destDir = methodPath(); + Path readOnlyDir = new Path(destDir, "readonlyDir"); + + // the full FS + S3AFileSystem fs = getFileSystem(); + WrappedIO.bulkDelete_delete(fs, destDir, new ArrayList<>()); + + bindReadOnlyRolePolicy(assumedRoleConfig, readOnlyDir); + roleFS = (S3AFileSystem) destDir.getFileSystem(assumedRoleConfig); + int bulkDeletePageSize = WrappedIO.bulkDelete_PageSize(roleFS, destDir); + int range = bulkDeletePageSize == 1 ? bulkDeletePageSize : 10; + touchFiles(fs, readOnlyDir, range); + touchFiles(roleFS, destDir, range); + FileStatus[] fileStatuses = roleFS.listStatus(readOnlyDir); + List pathsToDelete = Arrays.stream(fileStatuses) + .map(FileStatus::getPath) + .collect(Collectors.toList()); + // bulk delete in the read only FS should fail. + BulkDelete bulkDelete = roleFS.createBulkDelete(readOnlyDir); + assertAccessDeniedForEachPath(bulkDelete.bulkDelete(pathsToDelete)); + BulkDelete bulkDelete2 = roleFS.createBulkDelete(destDir); + assertAccessDeniedForEachPath(bulkDelete2.bulkDelete(pathsToDelete)); + // delete the files in the original FS should succeed. + BulkDelete bulkDelete3 = fs.createBulkDelete(readOnlyDir); + assertSuccessfulBulkDelete(bulkDelete3.bulkDelete(pathsToDelete)); + FileStatus[] fileStatusesUnderDestDir = roleFS.listStatus(destDir); + List pathsToDeleteUnderDestDir = Arrays.stream(fileStatusesUnderDestDir) + .map(FileStatus::getPath) + .collect(Collectors.toList()); + BulkDelete bulkDelete4 = fs.createBulkDelete(destDir); + assertSuccessfulBulkDelete(bulkDelete4.bulkDelete(pathsToDeleteUnderDestDir)); + } + + /** + * Execute bulk delete on some read only files and some read write files. + */ + private void executeBulkDeleteOnSomeReadOnlyFiles(Configuration assumedRoleConfig) + throws IOException { + Path destDir = methodPath(); + Path readOnlyDir = new Path(destDir, "readonlyDir"); + bindReadOnlyRolePolicy(assumedRoleConfig, readOnlyDir); + roleFS = (S3AFileSystem) destDir.getFileSystem(assumedRoleConfig); + S3AFileSystem fs = getFileSystem(); + if (WrappedIO.bulkDelete_PageSize(fs, destDir) == 1) { + String msg = "Skipping as this test requires more than one path to be deleted in bulk"; + LOG.debug(msg); + skip(msg); + } + WrappedIO.bulkDelete_delete(fs, destDir, new ArrayList<>()); + // creating 5 files in the read only dir. + int readOnlyRange = 5; + int readWriteRange = 3; + touchFiles(fs, readOnlyDir, readOnlyRange); + // creating 3 files in the base destination dir. + touchFiles(roleFS, destDir, readWriteRange); + RemoteIterator locatedFileStatusRemoteIterator = roleFS.listFiles(destDir, true); + List pathsToDelete2 = new ArrayList<>(); + while (locatedFileStatusRemoteIterator.hasNext()) { + pathsToDelete2.add(locatedFileStatusRemoteIterator.next().getPath()); + } + Assertions.assertThat(pathsToDelete2.size()) + .describedAs("Number of paths to delete in base destination dir") + .isEqualTo(readOnlyRange + readWriteRange); + BulkDelete bulkDelete5 = roleFS.createBulkDelete(destDir); + List> entries = bulkDelete5.bulkDelete(pathsToDelete2); + Assertions.assertThat(entries.size()) + .describedAs("Number of error entries in bulk delete result") + .isEqualTo(readOnlyRange); + assertAccessDeniedForEachPath(entries); + // delete the files in the original FS should succeed. + BulkDelete bulkDelete6 = fs.createBulkDelete(destDir); + assertSuccessfulBulkDelete(bulkDelete6.bulkDelete(pathsToDelete2)); + } + + /** + * Bind a read only role policy to a directory to the FS conf. + */ + private static void bindReadOnlyRolePolicy(Configuration assumedRoleConfig, + Path readOnlyDir) + throws JsonProcessingException { + bindRolePolicyStatements(assumedRoleConfig, STATEMENT_ALLOW_KMS_RW, + statement(true, S3_ALL_BUCKETS, S3_ALL_OPERATIONS), + new Statement(Effects.Deny) + .addActions(S3_PATH_WRITE_OPERATIONS) + .addResources(directory(readOnlyDir)) + ); + } + + /** + * Validate delete results for each path in the list + * has access denied error. + */ + private void assertAccessDeniedForEachPath(List> entries) { + for (Map.Entry entry : entries) { + Assertions.assertThat(entry.getValue()) + .describedAs("Error message for path %s is %s", entry.getKey(), entry.getValue()) + .contains("AccessDenied"); + } + } + /** * Have a directory with full R/W permissions, but then remove * write access underneath, and try to delete it. @@ -719,12 +843,7 @@ public void executePartialDelete(final Configuration conf, S3AFileSystem fs = getFileSystem(); fs.delete(destDir, true); - bindRolePolicyStatements(conf, STATEMENT_ALLOW_KMS_RW, - statement(true, S3_ALL_BUCKETS, S3_ALL_OPERATIONS), - new Statement(Effects.Deny) - .addActions(S3_PATH_WRITE_OPERATIONS) - .addResources(directory(readOnlyDir)) - ); + bindReadOnlyRolePolicy(conf, readOnlyDir); roleFS = (S3AFileSystem) destDir.getFileSystem(conf); int range = 10; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java index 79e5a93371a8d..dc81077257bcc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java @@ -831,4 +831,6 @@ protected void delete(Path path, boolean recursive) throws IOException { timer.end("time to delete %s", path); } + + } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsContractBulkDelete.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsContractBulkDelete.java new file mode 100644 index 0000000000000..7ec11abe733b7 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsContractBulkDelete.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractBulkDeleteTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +public class ITestAbfsContractBulkDelete extends AbstractContractBulkDeleteTest { + + private final boolean isSecure; + private final ABFSContractTestBinding binding; + + public ITestAbfsContractBulkDelete() throws Exception { + binding = new ABFSContractTestBinding(); + this.isSecure = binding.isSecureMode(); + } + + @Override + public void setup() throws Exception { + binding.setup(); + super.setup(); + } + + @Override + protected Configuration createConfiguration() { + return binding.getRawConfiguration(); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new AbfsFileSystemContract(conf, isSecure); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/resources/log4j.properties b/hadoop-tools/hadoop-azure/src/test/resources/log4j.properties index 9f72d03653306..64562ecdcf047 100644 --- a/hadoop-tools/hadoop-azure/src/test/resources/log4j.properties +++ b/hadoop-tools/hadoop-azure/src/test/resources/log4j.properties @@ -26,6 +26,7 @@ log4j.logger.org.apache.hadoop.fs.azure.AzureFileSystemThreadPoolExecutor=DEBUG log4j.logger.org.apache.hadoop.fs.azure.BlockBlobAppendStream=DEBUG log4j.logger.org.apache.hadoop.fs.azurebfs.contracts.services.TracingService=TRACE log4j.logger.org.apache.hadoop.fs.azurebfs.services.AbfsClient=DEBUG +log4j.logger.org.apache.hadoop.fs.impl.DefaultBulkDeleteOperation=DEBUG # after here: turn off log messages from other parts of the system # which only clutter test reports. From f11a8cfa6ee4ab733fd5a93f114361973b0e33f7 Mon Sep 17 00:00:00 2001 From: Sebb Date: Mon, 20 May 2024 19:38:08 +0100 Subject: [PATCH 002/113] HADOOP-13147. Constructors must not call overrideable methods in PureJavaCrc32C (#6408). Contributed by Sebb. --- .../apache/hadoop/util/PureJavaCrc32C.java | 8 +++- .../hadoop/util/TestPureJavaCrc32C.java | 37 +++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestPureJavaCrc32C.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32C.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32C.java index bdf929709ab8a..11388f0f1cba9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32C.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/PureJavaCrc32C.java @@ -39,7 +39,7 @@ public class PureJavaCrc32C implements Checksum { /** Create a new PureJavaCrc32 object. */ public PureJavaCrc32C() { - reset(); + resetFinal(); // safe to call as it cannot be overridden } @Override @@ -50,6 +50,12 @@ public long getValue() { @Override public void reset() { + resetFinal(); + } + + // This must be final as it is called by the ctor + // (can't also be private, as checkstyle then complains) + final void resetFinal() { crc = 0xffffffff; } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestPureJavaCrc32C.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestPureJavaCrc32C.java new file mode 100644 index 0000000000000..b085bbf2ac928 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestPureJavaCrc32C.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +import java.util.zip.Checksum; + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + + +public class TestPureJavaCrc32C { + + @Test + public void testChecksumInit() { + Checksum csum = new PureJavaCrc32C(); + long crc1 = csum.getValue(); + csum.reset(); + long crc2 = csum.getValue(); + assertEquals("reset should give same as initial value", crc1, crc2); + } +} From be28467374bfc97b731eda59cccefcaac0fa79b8 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Tue, 21 May 2024 08:46:14 +0800 Subject: [PATCH 003/113] Revert "Bump org.apache.derby:derby in /hadoop-project (#6816)" (#6841) This reverts commit b5a90d9500f11aabd1f01c7e905b994962ea0424. --- hadoop-project/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 58576b4287992..c795b41340f6f 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -135,7 +135,7 @@ 3.8.2 1.1.1 4.0.3 - 10.17.1.0 + 10.14.2.0 6.2.1.jre7 4.11.0 1.6.20 From fb156e8f054d3c8cf2e4984cee5e162ed3abc391 Mon Sep 17 00:00:00 2001 From: hfutatzhanghb Date: Tue, 21 May 2024 09:46:21 +0800 Subject: [PATCH 004/113] HDFS-17464. Improve some logs output in class FsDatasetImpl (#6724) --- .../datanode/fsdataset/impl/FsDatasetImpl.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java index 0ca222c083c9b..eeec1bb728825 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsDatasetImpl.java @@ -1826,7 +1826,7 @@ public ReplicaInPipeline convertTemporaryToRbw( if(rbw.getState() != ReplicaState.RBW) { throw new IOException("Expected replica state: " + ReplicaState.RBW + " obtained " + rbw.getState() + " for converting block " - + b.getBlockId()); + + b); } // overwrite the RBW in the volume map volumeMap.add(b.getBlockPoolId(), rbw.getReplicaInfo()); @@ -1977,7 +1977,7 @@ public void finalizeBlock(ExtendedBlock b, boolean fsyncDir) b.getBlockPoolId(), getStorageUuidForLock(b))) { if (Thread.interrupted()) { // Don't allow data modifications from interrupted threads - throw new IOException("Cannot finalize block from Interrupted Thread"); + throw new IOException("Cannot finalize block: " + b + " from Interrupted Thread"); } replicaInfo = getReplicaInfo(b); if (replicaInfo.getState() == ReplicaState.FINALIZED) { @@ -2016,7 +2016,7 @@ private ReplicaInfo finalizeReplica(String bpid, ReplicaInfo replicaInfo) if (volumeMap.get(bpid, replicaInfo.getBlockId()).getGenerationStamp() > replicaInfo.getGenerationStamp()) { throw new IOException("Generation Stamp should be monotonically " - + "increased."); + + "increased bpid: " + bpid + ", block: " + replicaInfo); } ReplicaInfo newReplicaInfo = null; @@ -2028,7 +2028,7 @@ private ReplicaInfo finalizeReplica(String bpid, ReplicaInfo replicaInfo) } else { FsVolumeImpl v = (FsVolumeImpl)replicaInfo.getVolume(); if (v == null) { - throw new IOException("No volume for block " + replicaInfo); + throw new IOException("No volume for bpid: " + bpid + ", block: " + replicaInfo); } newReplicaInfo = v.addFinalizedBlock( @@ -2070,7 +2070,7 @@ public void unfinalizeBlock(ExtendedBlock b) throws IOException { volumeMap.remove(b.getBlockPoolId(), b.getLocalBlock()); // delete the on-disk temp file - if (delBlockFromDisk(replicaInfo)) { + if (delBlockFromDisk(replicaInfo, b.getBlockPoolId())) { LOG.warn("Block " + b + " unfinalized and removed. "); } if (replicaInfo.getVolume().isTransientStorage()) { @@ -2091,14 +2091,14 @@ public void unfinalizeBlock(ExtendedBlock b) throws IOException { * @param info the replica that needs to be deleted * @return true if data for the replica are deleted; false otherwise */ - private boolean delBlockFromDisk(ReplicaInfo info) { + private boolean delBlockFromDisk(ReplicaInfo info, String bpid) { if (!info.deleteBlockData()) { - LOG.warn("Not able to delete the block data for replica " + info); + LOG.warn("Not able to delete the block data for replica {}, bpid: {}", info, bpid); return false; } else { // remove the meta file if (!info.deleteMetadata()) { - LOG.warn("Not able to delete the meta data for replica " + info); + LOG.warn("Not able to delete the meta data for replica {}, bpid: {}", info, bpid); return false; } } From d876505b67cc1c1f9903386237874beb37137dc1 Mon Sep 17 00:00:00 2001 From: Benjamin Teke Date: Tue, 21 May 2024 17:41:32 +0200 Subject: [PATCH 005/113] YARN-11681. Update the cgroup documentation with v2 support (#6834) Co-authored-by: Benjamin Teke Co-authored-by: K0K0V0K <109747532+K0K0V0K@users.noreply.github.com> --- .../src/site/markdown/DockerContainers.md | 2 +- .../src/site/markdown/NodeManagerCgroups.md | 56 ++++++++++--------- .../site/markdown/PluggableDeviceFramework.md | 2 +- .../src/site/markdown/RuncContainers.md | 2 +- .../src/site/markdown/UsingFPGA.md | 2 +- 5 files changed, 33 insertions(+), 31 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md index e512363d022b3..ea12ecf056f42 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/DockerContainers.md @@ -514,7 +514,7 @@ uid:gid pair will be used to launch the container's process. As an example of what is meant by uid:gid pair, consider the following. By default, in non-secure mode, YARN will launch processes as the user `nobody` (see the table at the bottom of -[Using CGroups with YARN](./NodeManagerCgroups.html) for how the run as user is +[Using Cgroups with YARN](./NodeManagerCgroups.html) for how the run as user is determined in non-secure mode). On CentOS based systems, the `nobody` user's uid is `99` and the `nobody` group is `99`. As a result, YARN will call `docker run` with `--user 99:99`. If the `nobody` user does not have the uid `99` in the diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCgroups.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCgroups.md index 7a48f6db1125e..84cee43f20601 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCgroups.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/NodeManagerCgroups.md @@ -12,51 +12,53 @@ limitations under the License. See accompanying LICENSE file. --> -Using CGroups with YARN +Using Cgroups with YARN ======================= -CGroups is a mechanism for aggregating/partitioning sets of tasks, and all their future children, into hierarchical groups with specialized behaviour. CGroups is a Linux kernel feature and was merged into kernel version 2.6.24. From a YARN perspective, this allows containers to be limited in their resource usage. A good example of this is CPU usage. Without CGroups, it becomes hard to limit container CPU usage. +Cgroups is a mechanism for aggregating/partitioning sets of tasks, and all their future children, into hierarchical groups with specialized behaviour. Cgroups (v1) is a Linux kernel feature and was merged into kernel version 2.6.24, while Control Group v2 is available since the kernel version 4.5. From a YARN perspective, this allows containers to be limited in their resource usage. A good example of this is CPU usage. Without cgroups, it becomes hard to limit container CPU usage. -CGroups Configuration +Cgroups Configuration --------------------- -This section describes the configuration variables for using CGroups. +This section describes the configuration variables for using cgroups. -The following settings are related to setting up CGroups. These need to be set in *yarn-site.xml*. +The following settings are related to setting up cgroups. These need to be set in *yarn-site.xml*. -|Configuration Name | Description | -|:---- |:---- | -| `yarn.nodemanager.container-executor.class` | This should be set to "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor". CGroups is a Linux kernel feature and is exposed via the LinuxContainerExecutor. | -| `yarn.nodemanager.linux-container-executor.resources-handler.class` | This should be set to "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler". Using the LinuxContainerExecutor doesn't force you to use CGroups. If you wish to use CGroups, the resource-handler-class must be set to CGroupsLCEResourceHandler. DefaultLCEResourcesHandler won't work. | -| `yarn.nodemanager.linux-container-executor.cgroups.hierarchy` | The cgroups hierarchy under which to place YARN proccesses(cannot contain commas). If yarn.nodemanager.linux-container-executor.cgroups.mount is false (that is, if cgroups have been pre-configured) and the YARN user has write access to the parent directory, then the directory will be created. If the directory already exists, the administrator has to give YARN write permissions to it recursively. | -| `yarn.nodemanager.linux-container-executor.cgroups.mount` | Whether the LCE should attempt to mount cgroups if not found - can be true or false. | -| `yarn.nodemanager.linux-container-executor.cgroups.mount-path` | Optional. Where CGroups are located. LCE will try to mount them here, if `yarn.nodemanager.linux-container-executor.cgroups.mount` is true. LCE will try to use CGroups from this location, if `yarn.nodemanager.linux-container-executor.cgroups.mount` is false. If specified, this path and its subdirectories (CGroup hierarchies) must exist and they should be readable and writable by YARN before the NodeManager is launched. See CGroups mount options below for details. | -| `yarn.nodemanager.linux-container-executor.group` | The Unix group of the NodeManager. It should match the setting in "container-executor.cfg". This configuration is required for validating the secure access of the container-executor binary. | +|Configuration Name | Description | +|:---- |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `yarn.nodemanager.container-executor.class` | This should be set to "org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor". Cgroups is a Linux kernel feature and is exposed via the LinuxContainerExecutor. | +| `yarn.nodemanager.linux-container-executor.resources-handler.class` | This should be set to "org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler". Using the LinuxContainerExecutor doesn't force you to use cgroups. If you wish to use cgroups, the resource-handler-class must be set to CGroupsLCEResourceHandler. DefaultLCEResourcesHandler won't work. | +| `yarn.nodemanager.linux-container-executor.cgroups.v2.enabled` | A property to enable cgroup v2 support. Setting this to true YARN will try to use the cgroup v2 structure and controllers. If this setting is true, but no unified (v2) hierarchy is mounted it will automatically fall back to v1. Defaults to false. | +| `yarn.nodemanager.linux-container-executor.cgroups.hierarchy` | The cgroups hierarchy under which to place YARN proccesses (cannot contain commas). If `yarn.nodemanager.linux-container-executor.cgroups.mount` is false (that is, if cgroups have been pre-configured) and the YARN user has write access to the parent directory, then the directory will be created. If the directory already exists, the administrator has to give YARN write permissions to it recursively. | +| `yarn.nodemanager.linux-container-executor.cgroups.mount` | Whether the LCE should attempt to mount cgroups if not found - can be true or false. Mounting is not supported with cgroup v2. | +| `yarn.nodemanager.linux-container-executor.cgroups.mount-path` | Optional. Where cgroup is located. LCE will try to mount them here, if `yarn.nodemanager.linux-container-executor.cgroups.mount` is true (and cgroup v1 is used). LCE will try to use cgroups from this location, if `yarn.nodemanager.linux-container-executor.cgroups.mount` is false. If specified, this path and its subdirectories (cgroup hierarchies) must exist and they should be readable and writable by YARN before the NodeManager is launched. See Cgroups mount options below for details. | +| `yarn.nodemanager.linux-container-executor.cgroups.v2.mount-path` | Optional. Where cgroup v2 is located. This property needs to be specified only if both cgroup v1 and v2 is used. For example in mixed mode cgroup v1 controllers can be mounted under /sys/fs/cgroup/ (i.e. /sys/fs/cgroup/cpu,cpuacct), while v2 can be mounted in /sys/fs/cgroup/unified folder. If specified, this path (cgroup v2 hierarchy) must exist and it should be readable and writable by YARN before the NodeManager is launched. | +| `yarn.nodemanager.linux-container-executor.group` | The Unix group of the NodeManager. It should match the setting in "container-executor.cfg". This configuration is required for validating the secure access of the container-executor binary. | -Once CGroups enabled, the following settings related to limiting resource usage of YARN containers can works: +Once cgroup is enabled, the following settings related to limiting resource usage of YARN containers can works: -|Configuration Name | Description | -|:---- |:---- | -| `yarn.nodemanager.resource.percentage-physical-cpu-limit` | This setting lets you limit the cpu usage of all YARN containers. It sets a hard upper limit on the cumulative CPU usage of the containers. For example, if set to 60, the combined CPU usage of all YARN containers will not exceed 60%. | -| `yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage` | CGroups allows cpu usage limits to be hard or soft. When this setting is true, containers cannot use more CPU usage than allocated even if spare CPU is available. This ensures that containers can only use CPU that they were allocated. When set to false, containers can use spare CPU if available. It should be noted that irrespective of whether set to true or false, at no time can the combined CPU usage of all containers exceed the value specified in "yarn.nodemanager.resource.percentage-physical-cpu-limit". | +|Configuration Name | Description | +|:---- |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `yarn.nodemanager.resource.percentage-physical-cpu-limit` | This setting lets you limit the cpu usage of all YARN containers. It sets a hard upper limit on the cumulative CPU usage of the containers. For example, if set to 60, the combined CPU usage of all YARN containers will not exceed 60%. | +| `yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage` | Cgroups allows cpu usage limits to be hard or soft. When this setting is true, containers cannot use more CPU usage than allocated even if spare CPU is available. This ensures that containers can only use CPU that they were allocated. When set to false, containers can use spare CPU if available. It should be noted that irrespective of whether set to true or false, at no time can the combined CPU usage of all containers exceed the value specified in "yarn.nodemanager.resource.percentage-physical-cpu-limit". | -CGroups mount options +Cgroups mount options --------------------- -YARN uses CGroups through a directory structure mounted into the file system by the kernel. There are three options to attach to CGroups. +YARN uses cgroups through a directory structure mounted into the file system by the kernel. There are three options to attach to cgroups. -| Option | Description | -|:---- |:---- | -| Discover CGroups mounted already | This should be used on newer systems like RHEL7 or Ubuntu16 or if the administrator mounts CGroups before YARN starts. Set `yarn.nodemanager.linux-container-executor.cgroups.mount` to false and leave other settings set to their defaults. YARN will locate the mount points in `/proc/mounts`. Common locations include `/sys/fs/cgroup` and `/cgroup`. The default location can vary depending on the Linux distribution in use.| -| CGroups mounted by YARN | IMPORTANT: This option is deprecated due to security reasons with the `container-executor.cfg` option `feature.mount-cgroup.enabled=0` by default. Please mount cgroups before launching YARN.| -| CGroups mounted already or linked but not in `/proc/mounts` | If cgroups is accessible through lxcfs or simulated by another filesystem, then point `yarn.nodemanager.linux-container-executor.cgroups.mount-path` to your CGroups root directory. Set `yarn.nodemanager.linux-container-executor.cgroups.mount` to false. YARN tries to use this path first, before any CGroup mount point discovery. The path should have a subdirectory for each CGroup hierarchy named by the comma separated CGroup subsystems supported like `/cpu,cpuacct`. Valid subsystem names are `cpu, cpuacct, cpuset, memory, net_cls, blkio, freezer, devices`.| +| Option | Description | +|:------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Discover cgroups mounted already | This should be used on newer systems like RHEL7 or Ubuntu16 or if the administrator mounts cgroups before YARN starts. Set `yarn.nodemanager.linux-container-executor.cgroups.mount` to false and leave other settings set to their defaults. YARN will locate the mount points in `/proc/mounts`. Common locations include `/sys/fs/cgroup` and `/cgroup`. The default location can vary depending on the Linux distribution in use. | +| Cgroups mounted by YARN | IMPORTANT: This option is deprecated due to security reasons with the `container-executor.cfg` option `feature.mount-cgroup.enabled=0` by default. Please mount cgroups before launching YARN. | +| Cgroups mounted already or linked but not in `/proc/mounts` | If cgroups is accessible through lxcfs or simulated by another filesystem, then point `yarn.nodemanager.linux-container-executor.cgroups.mount-path` to your cgroups root directory. Set `yarn.nodemanager.linux-container-executor.cgroups.mount` to false. YARN tries to use this path first, before any cgroup mount point discovery. The path should have a subdirectory in cgroup v1 for each cgroup hierarchy named by the comma separated cgroup subsystems supported like `/cpu,cpuacct`. Valid subsystem names are `cpu, cpuacct, cpuset, memory, net_cls, blkio, freezer, devices`. | -CGroups and security +Cgroups and security -------------------- -CGroups itself has no requirements related to security. However, the LinuxContainerExecutor does have some requirements. If running in non-secure mode, by default, the LCE runs all jobs as user "nobody". This user can be changed by setting "yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user" to the desired user. However, it can also be configured to run jobs as the user submitting the job. In that case "yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users" should be set to false. +Cgroups itself has no requirements related to security. However, the LinuxContainerExecutor does have some requirements. If running in non-secure mode, by default, the LCE runs all jobs as user "nobody". This user can be changed by setting "yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user" to the desired user. However, it can also be configured to run jobs as the user submitting the job. In that case "yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users" should be set to false. | yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user | yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users | User running jobs | |:---- |:---- |:---- | diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/PluggableDeviceFramework.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/PluggableDeviceFramework.md index c835df858f47d..8e5dc348c77cd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/PluggableDeviceFramework.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/PluggableDeviceFramework.md @@ -62,7 +62,7 @@ containers run with both YARN cgroups and Nvidia Docker runtime v2. 1. The pluggable device framework depends on LinuxContainerExecutor to handle resource isolation and Docker stuff. So LCE and Docker enabled on YARN is a must. -See [Using CGroups with YARN](./NodeManagerCgroups.html) and [Docker on YARN](./DockerContainers.html) +See [Using Cgroups with YARN](./NodeManagerCgroups.html) and [Docker on YARN](./DockerContainers.html) 2. The sample plugin `NvidiaGPUPluginForRuntimeV2` requires Nvidia GPU drivers and Nvidia Docker runtime v2 installed in the nodes. See Nvidia official diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/RuncContainers.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/RuncContainers.md index 2ad59a390bd45..88201ae18216a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/RuncContainers.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/RuncContainers.md @@ -602,7 +602,7 @@ uid:gid pair will be used to launch the container's process. As an example of what is meant by uid:gid pair, consider the following. By default, in non-secure mode, YARN will launch processes as the user `nobody` (see the table at the bottom of -[Using CGroups with YARN](./NodeManagerCgroups.html) for how the run as user is +[Using Cgroups with YARN](./NodeManagerCgroups.html) for how the run as user is determined in non-secure mode). On CentOS based systems, the `nobody` user's uid is `99` and the `nobody` group is `99`. As a result, YARN will invoke runC with uid `99` and gid `99`. If the `nobody` user does not have the uid `99` in the diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingFPGA.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingFPGA.md index 276c4b407d193..be587474b5f54 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingFPGA.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingFPGA.md @@ -102,7 +102,7 @@ FPGA isolation uses CGroup [devices controller](https://www.kernel.org/doc/Docum | --- | --- | | yarn.nodemanager.linux-container-executor.cgroups.mount | true | -For more details of YARN CGroups configurations, please refer to [Using CGroups with YARN](https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html) +For more details of YARN CGroups configurations, please refer to [Using Cgroups with YARN](https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html) ### In `container-executor.cfg` From d168d3ffeee15ea71786263d7eaa60dc92c4d3a0 Mon Sep 17 00:00:00 2001 From: Anmol Asrani Date: Thu, 23 May 2024 19:40:10 +0530 Subject: [PATCH 006/113] HADOOP-18325: ABFS: Add correlated metric support for ABFS operations (#6314) Adds support for metric collection at the filesystem instance level. Metrics are pushed to the store upon the closure of a filesystem instance, encompassing all operations that utilized that specific instance. Collected Metrics: - Number of successful requests without any retries. - Count of requests that succeeded after a specified number of retries (x retries). - Request count subjected to throttling. - Number of requests that failed despite exhausting all retry attempts. etc. Implementation Details: Incorporated logic in the AbfsClient to facilitate metric pushing through an additional request. This occurs in scenarios where no requests are sent to the backend for a defined idle period. By implementing these enhancements, we ensure comprehensive monitoring and analysis of filesystem interactions, enabling a deeper understanding of success rates, retry scenarios, throttling instances, and exhaustive failure scenarios. Additionally, the AbfsClient logic ensures that metrics are proactively pushed even during idle periods, maintaining a continuous and accurate representation of filesystem performance. Contributed by Anmol Asrani --- .../fs/azurebfs/AbfsBackoffMetrics.java | 312 ++++++++++ .../hadoop/fs/azurebfs/AbfsConfiguration.java | 45 ++ .../hadoop/fs/azurebfs/AbfsCountersImpl.java | 102 +++- .../fs/azurebfs/AzureBlobFileSystem.java | 15 +- .../azurebfs/constants/ConfigurationKeys.java | 7 + .../constants/FileSystemConfigurations.java | 6 + .../constants/HttpHeaderConfigurations.java | 1 + .../services/AzureServiceErrorCode.java | 18 +- .../fs/azurebfs/services/AbfsClient.java | 195 ++++++- .../azurebfs/services/AbfsClientContext.java | 2 +- .../fs/azurebfs/services/AbfsCounters.java | 11 + .../fs/azurebfs/services/AbfsInputStream.java | 18 +- .../services/AbfsReadFooterMetrics.java | 549 ++++++++++++++++++ .../azurebfs/services/AbfsRestOperation.java | 212 ++++++- .../azurebfs/services/TimerFunctionality.java | 4 +- .../fs/azurebfs/utils/MetricFormat.java | 36 ++ .../fs/azurebfs/utils/TracingContext.java | 22 +- .../hadoop-azure/src/site/markdown/abfs.md | 43 ++ .../ITestAbfsInputStreamStatistics.java | 1 - .../azurebfs/ITestAbfsReadFooterMetrics.java | 385 ++++++++++++ .../ITestAzureBlobFileSystemListStatus.java | 9 +- .../azurebfs/services/AbfsClientTestUtil.java | 9 +- .../fs/azurebfs/services/ITestAbfsClient.java | 21 +- .../services/TestAbfsInputStream.java | 7 +- .../services/TestAbfsRestOperation.java | 81 +++ .../TestAbfsRestOperationMockFailures.java | 3 +- 26 files changed, 2042 insertions(+), 72 deletions(-) create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsBackoffMetrics.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/MetricFormat.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadFooterMetrics.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsBackoffMetrics.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsBackoffMetrics.java new file mode 100644 index 0000000000000..37dbdfffeed6f --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsBackoffMetrics.java @@ -0,0 +1,312 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; + +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.HUNDRED; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.THOUSAND; + +public class AbfsBackoffMetrics { + + private AtomicLong numberOfRequestsSucceeded; + + private AtomicLong minBackoff; + + private AtomicLong maxBackoff; + + private AtomicLong totalRequests; + + private AtomicLong totalBackoff; + + private String retryCount; + + private AtomicLong numberOfIOPSThrottledRequests; + + private AtomicLong numberOfBandwidthThrottledRequests; + + private AtomicLong numberOfOtherThrottledRequests; + + private AtomicLong numberOfNetworkFailedRequests; + + private AtomicLong maxRetryCount; + + private AtomicLong totalNumberOfRequests; + + private AtomicLong numberOfRequestsSucceededWithoutRetrying; + + private AtomicLong numberOfRequestsFailed; + + private final Map metricsMap + = new ConcurrentHashMap<>(); + + public AbfsBackoffMetrics() { + initializeMap(); + this.numberOfIOPSThrottledRequests = new AtomicLong(); + this.numberOfBandwidthThrottledRequests = new AtomicLong(); + this.numberOfOtherThrottledRequests = new AtomicLong(); + this.totalNumberOfRequests = new AtomicLong(); + this.maxRetryCount = new AtomicLong(); + this.numberOfRequestsSucceededWithoutRetrying = new AtomicLong(); + this.numberOfRequestsFailed = new AtomicLong(); + this.numberOfNetworkFailedRequests = new AtomicLong(); + } + + public AbfsBackoffMetrics(String retryCount) { + this.retryCount = retryCount; + this.numberOfRequestsSucceeded = new AtomicLong(); + this.minBackoff = new AtomicLong(Long.MAX_VALUE); + this.maxBackoff = new AtomicLong(); + this.totalRequests = new AtomicLong(); + this.totalBackoff = new AtomicLong(); + } + + private void initializeMap() { + ArrayList retryCountList = new ArrayList( + Arrays.asList("1", "2", "3", "4", "5_15", "15_25", "25AndAbove")); + for (String s : retryCountList) { + metricsMap.put(s, new AbfsBackoffMetrics(s)); + } + } + + public long getNumberOfRequestsSucceeded() { + return this.numberOfRequestsSucceeded.get(); + } + + public void setNumberOfRequestsSucceeded(long numberOfRequestsSucceeded) { + this.numberOfRequestsSucceeded.set(numberOfRequestsSucceeded); + } + + public void incrementNumberOfRequestsSucceeded() { + this.numberOfRequestsSucceeded.getAndIncrement(); + } + + public long getMinBackoff() { + return this.minBackoff.get(); + } + + public void setMinBackoff(long minBackoff) { + this.minBackoff.set(minBackoff); + } + + public long getMaxBackoff() { + return this.maxBackoff.get(); + } + + public void setMaxBackoff(long maxBackoff) { + this.maxBackoff.set(maxBackoff); + } + + public long getTotalRequests() { + return this.totalRequests.get(); + } + + public void incrementTotalRequests() { + this.totalRequests.incrementAndGet(); + } + + public void setTotalRequests(long totalRequests) { + this.totalRequests.set(totalRequests); + } + + public long getTotalBackoff() { + return this.totalBackoff.get(); + } + + public void setTotalBackoff(long totalBackoff) { + this.totalBackoff.set(totalBackoff); + } + + public String getRetryCount() { + return this.retryCount; + } + + public long getNumberOfIOPSThrottledRequests() { + return this.numberOfIOPSThrottledRequests.get(); + } + + public void setNumberOfIOPSThrottledRequests(long numberOfIOPSThrottledRequests) { + this.numberOfIOPSThrottledRequests.set(numberOfIOPSThrottledRequests); + } + + public void incrementNumberOfIOPSThrottledRequests() { + this.numberOfIOPSThrottledRequests.getAndIncrement(); + } + + public long getNumberOfBandwidthThrottledRequests() { + return this.numberOfBandwidthThrottledRequests.get(); + } + + public void setNumberOfBandwidthThrottledRequests(long numberOfBandwidthThrottledRequests) { + this.numberOfBandwidthThrottledRequests.set(numberOfBandwidthThrottledRequests); + } + + public void incrementNumberOfBandwidthThrottledRequests() { + this.numberOfBandwidthThrottledRequests.getAndIncrement(); + } + + public long getNumberOfOtherThrottledRequests() { + return this.numberOfOtherThrottledRequests.get(); + } + + public void setNumberOfOtherThrottledRequests(long numberOfOtherThrottledRequests) { + this.numberOfOtherThrottledRequests.set(numberOfOtherThrottledRequests); + } + + public void incrementNumberOfOtherThrottledRequests() { + this.numberOfOtherThrottledRequests.getAndIncrement(); + } + + public long getMaxRetryCount() { + return this.maxRetryCount.get(); + } + + public void setMaxRetryCount(long maxRetryCount) { + this.maxRetryCount.set(maxRetryCount); + } + + public void incrementMaxRetryCount() { + this.maxRetryCount.getAndIncrement(); + } + + public long getTotalNumberOfRequests() { + return this.totalNumberOfRequests.get(); + } + + public void setTotalNumberOfRequests(long totalNumberOfRequests) { + this.totalNumberOfRequests.set(totalNumberOfRequests); + } + + public void incrementTotalNumberOfRequests() { + this.totalNumberOfRequests.getAndIncrement(); + } + + public Map getMetricsMap() { + return metricsMap; + } + + public long getNumberOfRequestsSucceededWithoutRetrying() { + return this.numberOfRequestsSucceededWithoutRetrying.get(); + } + + public void setNumberOfRequestsSucceededWithoutRetrying(long numberOfRequestsSucceededWithoutRetrying) { + this.numberOfRequestsSucceededWithoutRetrying.set(numberOfRequestsSucceededWithoutRetrying); + } + + public void incrementNumberOfRequestsSucceededWithoutRetrying() { + this.numberOfRequestsSucceededWithoutRetrying.getAndIncrement(); + } + + public long getNumberOfRequestsFailed() { + return this.numberOfRequestsFailed.get(); + } + + public void setNumberOfRequestsFailed(long numberOfRequestsFailed) { + this.numberOfRequestsFailed.set(numberOfRequestsFailed); + } + + public void incrementNumberOfRequestsFailed() { + this.numberOfRequestsFailed.getAndIncrement(); + } + + public long getNumberOfNetworkFailedRequests() { + return this.numberOfNetworkFailedRequests.get(); + } + + public void setNumberOfNetworkFailedRequests(long numberOfNetworkFailedRequests) { + this.numberOfNetworkFailedRequests.set(numberOfNetworkFailedRequests); + } + + public void incrementNumberOfNetworkFailedRequests() { + this.numberOfNetworkFailedRequests.getAndIncrement(); + } + + /* + Acronyms :- + 1.RCTSI :- Request count that succeeded in x retries + 2.MMA :- Min Max Average (This refers to the backoff or sleep time between 2 requests) + 3.s :- seconds + 4.BWT :- Number of Bandwidth throttled requests + 5.IT :- Number of IOPS throttled requests + 6.OT :- Number of Other throttled requests + 7.NFR :- Number of requests which failed due to network errors + 8.%RT :- Percentage of requests that are throttled + 9.TRNR :- Total number of requests which succeeded without retrying + 10.TRF :- Total number of requests which failed + 11.TR :- Total number of requests which were made + 12.MRC :- Max retry count across all requests + */ + @Override + public String toString() { + StringBuilder metricString = new StringBuilder(); + long totalRequestsThrottled = getNumberOfBandwidthThrottledRequests() + + getNumberOfIOPSThrottledRequests() + + getNumberOfOtherThrottledRequests(); + double percentageOfRequestsThrottled = + ((double) totalRequestsThrottled / getTotalNumberOfRequests()) * HUNDRED; + for (Map.Entry entry : metricsMap.entrySet()) { + metricString.append("$RCTSI$_").append(entry.getKey()) + .append("R_").append("=") + .append(entry.getValue().getNumberOfRequestsSucceeded()); + long totalRequests = entry.getValue().getTotalRequests(); + if (totalRequests > 0) { + metricString.append("$MMA$_").append(entry.getKey()) + .append("R_").append("=") + .append(String.format("%.3f", + (double) entry.getValue().getMinBackoff() / THOUSAND)) + .append("s") + .append(String.format("%.3f", + (double) entry.getValue().getMaxBackoff() / THOUSAND)) + .append("s") + .append(String.format("%.3f", + ((double) entry.getValue().getTotalBackoff() / totalRequests) + / THOUSAND)) + .append("s"); + } else { + metricString.append("$MMA$_").append(entry.getKey()) + .append("R_").append("=0s"); + } + } + metricString.append("$BWT=") + .append(getNumberOfBandwidthThrottledRequests()) + .append("$IT=") + .append(getNumberOfIOPSThrottledRequests()) + .append("$OT=") + .append(getNumberOfOtherThrottledRequests()) + .append("$RT=") + .append(String.format("%.3f", percentageOfRequestsThrottled)) + .append("$NFR=") + .append(getNumberOfNetworkFailedRequests()) + .append("$TRNR=") + .append(getNumberOfRequestsSucceededWithoutRetrying()) + .append("$TRF=") + .append(getNumberOfRequestsFailed()) + .append("$TR=") + .append(getTotalNumberOfRequests()) + .append("$MRC=") + .append(getMaxRetryCount()); + + return metricString + ""; + } +} + diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index a1b6fc12a5ce1..6e5e772e18160 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -22,6 +22,7 @@ import java.lang.reflect.Field; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; import org.apache.hadoop.util.Preconditions; import org.apache.commons.lang3.StringUtils; @@ -291,6 +292,26 @@ public class AbfsConfiguration{ DefaultValue = DEFAULT_ENABLE_AUTOTHROTTLING) private boolean enableAutoThrottling; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRIC_IDLE_TIMEOUT, + DefaultValue = DEFAULT_METRIC_IDLE_TIMEOUT_MS) + private int metricIdleTimeout; + + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRIC_ANALYSIS_TIMEOUT, + DefaultValue = DEFAULT_METRIC_ANALYSIS_TIMEOUT_MS) + private int metricAnalysisTimeout; + + @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRIC_URI, + DefaultValue = EMPTY_STRING) + private String metricUri; + + @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRIC_ACCOUNT_NAME, + DefaultValue = EMPTY_STRING) + private String metricAccount; + + @StringConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_METRIC_ACCOUNT_KEY, + DefaultValue = EMPTY_STRING) + private String metricAccountKey; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_ACCOUNT_OPERATION_IDLE_TIMEOUT, DefaultValue = DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS) private int accountOperationIdleTimeout; @@ -818,6 +839,26 @@ public boolean isAutoThrottlingEnabled() { return this.enableAutoThrottling; } + public int getMetricIdleTimeout() { + return this.metricIdleTimeout; + } + + public int getMetricAnalysisTimeout() { + return this.metricAnalysisTimeout; + } + + public String getMetricUri() { + return metricUri; + } + + public String getMetricAccount() { + return metricAccount; + } + + public String getMetricAccountKey() { + return metricAccountKey; + } + public int getAccountOperationIdleTimeout() { return accountOperationIdleTimeout; } @@ -854,6 +895,10 @@ public TracingHeaderFormat getTracingHeaderFormat() { return getEnum(FS_AZURE_TRACINGHEADER_FORMAT, TracingHeaderFormat.ALL_ID_FORMAT); } + public MetricFormat getMetricFormat() { + return getEnum(FS_AZURE_METRIC_FORMAT, MetricFormat.EMPTY); + } + public AuthType getAuthType(String accountName) { return getEnum(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.SharedKey); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java index 67ee8e90efb3d..c4d3e05cdb25d 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsCountersImpl.java @@ -21,10 +21,12 @@ import java.net.URI; import java.util.Map; import java.util.UUID; +import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.classification.VisibleForTesting; - import org.apache.hadoop.fs.azurebfs.services.AbfsCounters; +import org.apache.hadoop.fs.azurebfs.services.AbfsReadFooterMetrics; +import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; import org.apache.hadoop.fs.statistics.DurationTracker; import org.apache.hadoop.fs.statistics.IOStatistics; import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; @@ -34,8 +36,42 @@ import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableMetric; -import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.*; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.BYTES_RECEIVED; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.BYTES_SENT; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CALL_APPEND; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CALL_CREATE; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CALL_CREATE_NON_RECURSIVE; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CALL_DELETE; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CALL_EXIST; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CALL_GET_DELEGATION_TOKEN; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CALL_GET_FILE_STATUS; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CALL_LIST_STATUS; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CALL_MKDIRS; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CALL_OPEN; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CALL_RENAME; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.CONNECTIONS_MADE; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.DIRECTORIES_CREATED; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.DIRECTORIES_DELETED; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.ERROR_IGNORED; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.FILES_CREATED; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.FILES_DELETED; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.GET_RESPONSES; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.HTTP_DELETE_REQUEST; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.HTTP_GET_REQUEST; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.HTTP_HEAD_REQUEST; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.HTTP_PATCH_REQUEST; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.HTTP_POST_REQUEST; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.HTTP_PUT_REQUEST; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.METADATA_INCOMPLETE_RENAME_FAILURES; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.READ_THROTTLES; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.RENAME_PATH_ATTEMPTS; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.RENAME_RECOVERY; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.SEND_REQUESTS; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.SERVER_UNAVAILABLE; +import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.WRITE_THROTTLES; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.iostatisticsStore; +import static org.apache.hadoop.util.Time.now; + /** * Instrumentation of Abfs counters. @@ -63,6 +99,12 @@ public class AbfsCountersImpl implements AbfsCounters { private final IOStatisticsStore ioStatisticsStore; + private AbfsBackoffMetrics abfsBackoffMetrics = null; + + private AbfsReadFooterMetrics abfsReadFooterMetrics = null; + + private AtomicLong lastExecutionTime = null; + private static final AbfsStatistic[] STATISTIC_LIST = { CALL_CREATE, CALL_OPEN, @@ -91,7 +133,6 @@ public class AbfsCountersImpl implements AbfsCounters { RENAME_RECOVERY, METADATA_INCOMPLETE_RENAME_FAILURES, RENAME_PATH_ATTEMPTS - }; private static final AbfsStatistic[] DURATION_TRACKER_LIST = { @@ -121,6 +162,25 @@ public AbfsCountersImpl(URI uri) { ioStatisticsStoreBuilder.withDurationTracking(durationStats.getStatName()); } ioStatisticsStore = ioStatisticsStoreBuilder.build(); + lastExecutionTime = new AtomicLong(now()); + } + + @Override + public void initializeMetrics(MetricFormat metricFormat) { + switch (metricFormat) { + case INTERNAL_BACKOFF_METRIC_FORMAT: + abfsBackoffMetrics = new AbfsBackoffMetrics(); + break; + case INTERNAL_FOOTER_METRIC_FORMAT: + abfsReadFooterMetrics = new AbfsReadFooterMetrics(); + break; + case INTERNAL_METRIC_FORMAT: + abfsBackoffMetrics = new AbfsBackoffMetrics(); + abfsReadFooterMetrics = new AbfsReadFooterMetrics(); + break; + default: + break; + } } /** @@ -188,6 +248,21 @@ private MetricsRegistry getRegistry() { return registry; } + @Override + public AbfsBackoffMetrics getAbfsBackoffMetrics() { + return abfsBackoffMetrics != null ? abfsBackoffMetrics : null; + } + + @Override + public AtomicLong getLastExecutionTime() { + return lastExecutionTime; + } + + @Override + public AbfsReadFooterMetrics getAbfsReadFooterMetrics() { + return abfsReadFooterMetrics != null ? abfsReadFooterMetrics : null; + } + /** * {@inheritDoc} * @@ -244,4 +319,25 @@ public IOStatistics getIOStatistics() { public DurationTracker trackDuration(String key) { return ioStatisticsStore.trackDuration(key); } + + @Override + public String toString() { + String metric = ""; + if (abfsBackoffMetrics != null) { + long totalNoRequests = getAbfsBackoffMetrics().getTotalNumberOfRequests(); + if (totalNoRequests > 0) { + metric += "#BO:" + getAbfsBackoffMetrics().toString(); + } + } + if (abfsReadFooterMetrics != null) { + Map metricsMap = getAbfsReadFooterMetrics().getMetricsMap(); + if (metricsMap != null && !(metricsMap.isEmpty())) { + String readFooterMetric = getAbfsReadFooterMetrics().toString(); + if (!readFooterMetric.equals("")) { + metric += "#FO:" + getAbfsReadFooterMetrics().toString(); + } + } + } + return metric; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java index 51ba90f8e0a95..7ca960d569d09 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java @@ -41,7 +41,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; - import javax.annotation.Nullable; import org.apache.hadoop.classification.VisibleForTesting; @@ -50,7 +49,6 @@ import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import org.apache.commons.lang3.ArrayUtils; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.classification.InterfaceAudience; @@ -700,6 +698,18 @@ public synchronized void close() throws IOException { if (isClosed) { return; } + if (abfsStore.getClient().isMetricCollectionEnabled()) { + TracingContext tracingMetricContext = new TracingContext( + clientCorrelationId, + fileSystemId, FSOperationType.GET_ATTR, true, + tracingHeaderFormat, + listener, abfsCounters.toString()); + try { + getAbfsClient().getMetricCall(tracingMetricContext); + } catch (IOException e) { + throw new IOException(e); + } + } // does all the delete-on-exit calls, and may be slow. super.close(); LOG.debug("AzureBlobFileSystem.close"); @@ -1680,3 +1690,4 @@ public IOStatistics getIOStatistics() { return abfsCounters != null ? abfsCounters.getIOStatistics() : null; } } + diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index b667934c39e4e..299cc5c9c4513 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -41,6 +41,10 @@ public final class ConfigurationKeys { */ public static final String FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED = "fs.azure.account.expect.header.enabled"; public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME = "fs.azure.account.key"; + public static final String FS_AZURE_METRIC_ACCOUNT_NAME = "fs.azure.metric.account.name"; + public static final String FS_AZURE_METRIC_ACCOUNT_KEY = "fs.azure.metric.account.key"; + public static final String FS_AZURE_METRIC_URI = "fs.azure.metric.uri"; + public static final String FS_AZURE_ACCOUNT_KEY_PROPERTY_NAME_REGX = "fs\\.azure\\.account\\.key\\.(.*)"; public static final String FS_AZURE_SECURE_MODE = "fs.azure.secure.mode"; public static final String FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED = "fs.azure.account.throttling.enabled"; @@ -150,6 +154,8 @@ public final class ConfigurationKeys { public static final String AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION = "fs.azure.createRemoteFileSystemDuringInitialization"; public static final String AZURE_SKIP_USER_GROUP_METADATA_DURING_INITIALIZATION = "fs.azure.skipUserGroupMetadataDuringInitialization"; public static final String FS_AZURE_ENABLE_AUTOTHROTTLING = "fs.azure.enable.autothrottling"; + public static final String FS_AZURE_METRIC_IDLE_TIMEOUT = "fs.azure.metric.idle.timeout"; + public static final String FS_AZURE_METRIC_ANALYSIS_TIMEOUT = "fs.azure.metric.analysis.timeout"; public static final String FS_AZURE_ACCOUNT_OPERATION_IDLE_TIMEOUT = "fs.azure.account.operation.idle.timeout"; public static final String FS_AZURE_ANALYSIS_PERIOD = "fs.azure.analysis.period"; public static final String FS_AZURE_ALWAYS_USE_HTTPS = "fs.azure.always.use.https"; @@ -190,6 +196,7 @@ public final class ConfigurationKeys { * character constraints are not satisfied. **/ public static final String FS_AZURE_CLIENT_CORRELATIONID = "fs.azure.client.correlationid"; public static final String FS_AZURE_TRACINGHEADER_FORMAT = "fs.azure.tracingheader.format"; + public static final String FS_AZURE_METRIC_FORMAT = "fs.azure.metric.format"; public static final String FS_AZURE_CLUSTER_NAME = "fs.azure.cluster.name"; public static final String FS_AZURE_CLUSTER_TYPE = "fs.azure.cluster.type"; public static final String FS_AZURE_SSL_CHANNEL_MODE_KEY = "fs.azure.ssl.channel.mode"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index 0af485bbe56b1..ade0dc39cfe18 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -108,6 +108,8 @@ public final class FileSystemConfigurations { public static final boolean DEFAULT_ENABLE_FLUSH = true; public static final boolean DEFAULT_DISABLE_OUTPUTSTREAM_FLUSH = true; public static final boolean DEFAULT_ENABLE_AUTOTHROTTLING = true; + public static final int DEFAULT_METRIC_IDLE_TIMEOUT_MS = 60_000; + public static final int DEFAULT_METRIC_ANALYSIS_TIMEOUT_MS = 60_000; public static final boolean DEFAULT_FS_AZURE_ACCOUNT_LEVEL_THROTTLING_ENABLED = true; public static final int DEFAULT_ACCOUNT_OPERATION_IDLE_TIMEOUT_MS = 60_000; public static final int DEFAULT_ANALYSIS_PERIOD_MS = 10_000; @@ -161,5 +163,9 @@ public final class FileSystemConfigurations { */ public static final int RATE_LIMIT_DEFAULT = 1_000; + public static final int ZERO = 0; + public static final int HUNDRED = 100; + public static final long THOUSAND = 1000L; + private FileSystemConfigurations() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java index 84a94b994c2d3..b3c2b21d3c277 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpHeaderConfigurations.java @@ -43,6 +43,7 @@ public final class HttpHeaderConfigurations { public static final String USER_AGENT = "User-Agent"; public static final String X_HTTP_METHOD_OVERRIDE = "X-HTTP-Method-Override"; public static final String X_MS_CLIENT_REQUEST_ID = "x-ms-client-request-id"; + public static final String X_MS_FECLIENT_METRICS = "x-ms-feclient-metrics"; public static final String X_MS_EXISTING_RESOURCE_TYPE = "x-ms-existing-resource-type"; public static final String X_MS_DATE = "x-ms-date"; public static final String X_MS_REQUEST_ID = "x-ms-request-id"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java index 12e687c15bb43..439caabe2327f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/services/AzureServiceErrorCode.java @@ -21,7 +21,8 @@ import java.net.HttpURLConnection; import java.util.ArrayList; import java.util.List; - +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -60,6 +61,9 @@ public enum AzureServiceErrorCode { private final String errorCode; private final int httpStatusCode; private final String errorMessage; + + private static final Logger LOG1 = LoggerFactory.getLogger(AzureServiceErrorCode.class); + AzureServiceErrorCode(String errorCode, int httpStatusCodes, String errorMessage) { this.errorCode = errorCode; this.httpStatusCode = httpStatusCodes; @@ -105,7 +109,6 @@ public static AzureServiceErrorCode getAzureServiceCode(int httpStatusCode, Stri return azureServiceErrorCode; } } - return UNKNOWN; } @@ -113,16 +116,15 @@ public static AzureServiceErrorCode getAzureServiceCode(int httpStatusCode, Stri if (errorCode == null || errorCode.isEmpty() || httpStatusCode == UNKNOWN.httpStatusCode || errorMessage == null || errorMessage.isEmpty()) { return UNKNOWN; } - + String[] errorMessages = errorMessage.split(System.lineSeparator(), 2); for (AzureServiceErrorCode azureServiceErrorCode : AzureServiceErrorCode.values()) { - if (azureServiceErrorCode.httpStatusCode == httpStatusCode - && errorCode.equalsIgnoreCase(azureServiceErrorCode.errorCode) - && errorMessage.equalsIgnoreCase(azureServiceErrorCode.errorMessage) - ) { + if (azureServiceErrorCode.getStatusCode() == httpStatusCode + && azureServiceErrorCode.getErrorCode().equalsIgnoreCase(errorCode) + && azureServiceErrorCode.getErrorMessage() + .equalsIgnoreCase(errorMessages[0])) { return azureServiceErrorCode; } } - return UNKNOWN; } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 1ab1c7a0afb8f..f4ff181357960 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -22,24 +22,31 @@ import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.HttpURLConnection; +import java.net.InetAddress; import java.net.MalformedURLException; import java.net.URL; import java.net.URLEncoder; +import java.net.UnknownHostException; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Base64; import java.util.List; import java.util.Locale; +import java.util.Timer; +import java.util.TimerTask; import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsInvalidChecksumException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; +import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; import org.apache.hadoop.fs.azurebfs.utils.NamespaceUtil; import org.apache.hadoop.fs.store.LogExactlyOnce; import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.Permissions; @@ -115,6 +122,13 @@ public class AbfsClient implements Closeable { private AccessTokenProvider tokenProvider; private SASTokenProvider sasTokenProvider; private final AbfsCounters abfsCounters; + private final Timer timer; + private final String abfsMetricUrl; + private boolean isMetricCollectionEnabled = false; + private final MetricFormat metricFormat; + private final AtomicBoolean isMetricCollectionStopped; + private final int metricAnalysisPeriod; + private final int metricIdlePeriod; private EncryptionContextProvider encryptionContextProvider = null; private EncryptionType encryptionType = EncryptionType.NONE; private final AbfsThrottlingIntercept intercept; @@ -123,6 +137,9 @@ public class AbfsClient implements Closeable { private Boolean isNamespaceEnabled; private boolean renameResilience; + private TimerTask runningTimerTask; + private boolean isSendMetricCall; + private SharedKeyCredentials metricSharedkeyCredentials = null; /** * logging the rename failure if metadata is in an incomplete state. @@ -181,6 +198,35 @@ private AbfsClient(final URL baseUrl, new ThreadFactoryBuilder().setNameFormat("AbfsClient Lease Ops").setDaemon(true).build(); this.executorService = MoreExecutors.listeningDecorator( HadoopExecutors.newScheduledThreadPool(this.abfsConfiguration.getNumLeaseThreads(), tf)); + this.metricFormat = abfsConfiguration.getMetricFormat(); + this.isMetricCollectionStopped = new AtomicBoolean(false); + this.metricAnalysisPeriod = abfsConfiguration.getMetricAnalysisTimeout(); + this.metricIdlePeriod = abfsConfiguration.getMetricIdleTimeout(); + if (!metricFormat.toString().equals("")) { + isMetricCollectionEnabled = true; + abfsCounters.initializeMetrics(metricFormat); + String metricAccountName = abfsConfiguration.getMetricAccount(); + int dotIndex = metricAccountName.indexOf(AbfsHttpConstants.DOT); + if (dotIndex <= 0) { + throw new InvalidUriException( + metricAccountName + " - account name is not fully qualified."); + } + String metricAccountKey = abfsConfiguration.getMetricAccountKey(); + try { + metricSharedkeyCredentials = new SharedKeyCredentials(metricAccountName.substring(0, dotIndex), + metricAccountKey); + } catch (IllegalArgumentException e) { + throw new IOException("Exception while initializing metric credentials " + e); + } + } + this.timer = new Timer( + "abfs-timer-client", true); + if (isMetricCollectionEnabled) { + timer.schedule(new TimerTaskImpl(), + metricIdlePeriod, + metricIdlePeriod); + } + this.abfsMetricUrl = abfsConfiguration.getMetricUri(); } public AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCredentials, @@ -207,6 +253,10 @@ public AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCredent @Override public void close() throws IOException { + if (runningTimerTask != null) { + runningTimerTask.cancel(); + timer.purge(); + } if (tokenProvider instanceof Closeable) { IOUtils.cleanupWithLogger(LOG, (Closeable) tokenProvider); @@ -246,6 +296,10 @@ SharedKeyCredentials getSharedKeyCredentials() { return sharedKeyCredentials; } + SharedKeyCredentials getMetricSharedkeyCredentials() { + return metricSharedkeyCredentials; + } + public void setEncryptionType(EncryptionType encryptionType) { this.encryptionType = encryptionType; } @@ -1057,7 +1111,6 @@ public AbfsRestOperation getPathStatus(final String path, final ContextEncryptionAdapter contextEncryptionAdapter) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); String operation = SASTokenProvider.GET_PROPERTIES_OPERATION; if (!includeProperties) { @@ -1318,7 +1371,6 @@ public AbfsRestOperation getAclStatus(final String path, TracingContext tracingC public AbfsRestOperation getAclStatus(final String path, final boolean useUPN, TracingContext tracingContext) throws AzureBlobFileSystemException { final List requestHeaders = createDefaultHeaders(); - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_ACTION, AbfsHttpConstants.GET_ACCESS_CONTROL); abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_UPN, String.valueOf(useUPN)); @@ -1435,6 +1487,7 @@ private String appendSASTokenToQuery(String path, return sasToken; } + @VisibleForTesting private URL createRequestUrl(final String query) throws AzureBlobFileSystemException { return createRequestUrl(EMPTY_STRING, query); } @@ -1442,7 +1495,12 @@ private URL createRequestUrl(final String query) throws AzureBlobFileSystemExcep @VisibleForTesting protected URL createRequestUrl(final String path, final String query) throws AzureBlobFileSystemException { - final String base = baseUrl.toString(); + return createRequestUrl(baseUrl, path, query); + } + + @VisibleForTesting + protected URL createRequestUrl(final URL baseUrl, final String path, final String query) + throws AzureBlobFileSystemException { String encodedPath = path; try { encodedPath = urlEncode(path); @@ -1452,7 +1510,10 @@ protected URL createRequestUrl(final String path, final String query) } final StringBuilder sb = new StringBuilder(); - sb.append(base); + if (baseUrl == null) { + throw new InvalidUriException("URL provided is null"); + } + sb.append(baseUrl.toString()); sb.append(encodedPath); sb.append(query); @@ -1460,7 +1521,7 @@ protected URL createRequestUrl(final String path, final String query) try { url = new URL(sb.toString()); } catch (MalformedURLException ex) { - throw new InvalidUriException(sb.toString()); + throw new InvalidUriException("URL is malformed" + sb.toString()); } return url; } @@ -1693,7 +1754,7 @@ void setIsNamespaceEnabled(final Boolean isNamespaceEnabled) { * Getter for abfsCounters from AbfsClient. * @return AbfsCounters instance. */ - protected AbfsCounters getAbfsCounters() { + public AbfsCounters getAbfsCounters() { return abfsCounters; } @@ -1731,6 +1792,128 @@ protected AccessTokenProvider getTokenProvider() { return tokenProvider; } + /** + * Retrieves a TracingContext object configured for metric tracking. + * This method creates a TracingContext object with the validated client correlation ID, + * the host name of the local machine (or "UnknownHost" if unable to determine), + * the file system operation type set to GET_ATTR, and additional configuration parameters + * for metric tracking. + * The TracingContext is intended for use in tracking metrics related to Azure Blob FileSystem (ABFS) operations. + * + * @return A TracingContext object configured for metric tracking. + */ + private TracingContext getMetricTracingContext() { + String hostName; + try { + hostName = InetAddress.getLocalHost().getHostName(); + } catch (UnknownHostException e) { + hostName = "UnknownHost"; + } + return new TracingContext(TracingContext.validateClientCorrelationID( + abfsConfiguration.getClientCorrelationId()), + hostName, FSOperationType.GET_ATTR, true, + abfsConfiguration.getTracingHeaderFormat(), + null, abfsCounters.toString()); + } + + /** + * Synchronized method to suspend or resume timer. + * @param timerFunctionality resume or suspend. + * @param timerTask The timertask object. + * @return true or false. + */ + boolean timerOrchestrator(TimerFunctionality timerFunctionality, TimerTask timerTask) { + switch (timerFunctionality) { + case RESUME: + if (isMetricCollectionStopped.get()) { + synchronized (this) { + if (isMetricCollectionStopped.get()) { + resumeTimer(); + } + } + } + break; + case SUSPEND: + long now = System.currentTimeMillis(); + long lastExecutionTime = abfsCounters.getLastExecutionTime().get(); + if (isMetricCollectionEnabled && (now - lastExecutionTime >= metricAnalysisPeriod)) { + synchronized (this) { + if (!isMetricCollectionStopped.get()) { + timerTask.cancel(); + timer.purge(); + isMetricCollectionStopped.set(true); + return true; + } + } + } + break; + default: + break; + } + return false; + } + + private void resumeTimer() { + isMetricCollectionStopped.set(false); + timer.schedule(new TimerTaskImpl(), + metricIdlePeriod, + metricIdlePeriod); + } + + /** + * Initiates a metric call to the Azure Blob FileSystem (ABFS) for retrieving file system properties. + * This method performs a HEAD request to the specified metric URL, using default headers and query parameters. + * + * @param tracingContext The tracing context to be used for capturing tracing information. + * @throws IOException throws IOException. + */ + public void getMetricCall(TracingContext tracingContext) throws IOException { + this.isSendMetricCall = true; + final List requestHeaders = createDefaultHeaders(); + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM); + + final URL url = createRequestUrl(new URL(abfsMetricUrl), EMPTY_STRING, abfsUriQueryBuilder.toString()); + + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.GetFileSystemProperties, + HTTP_METHOD_HEAD, + url, + requestHeaders); + try { + op.execute(tracingContext); + } finally { + this.isSendMetricCall = false; + } + } + + public boolean isSendMetricCall() { + return isSendMetricCall; + } + + public boolean isMetricCollectionEnabled() { + return isMetricCollectionEnabled; + } + + class TimerTaskImpl extends TimerTask { + TimerTaskImpl() { + runningTimerTask = this; + } + @Override + public void run() { + try { + if (timerOrchestrator(TimerFunctionality.SUSPEND, this)) { + try { + getMetricCall(getMetricTracingContext()); + } finally { + abfsCounters.initializeMetrics(metricFormat); + } + } + } catch (IOException e) { + } + } + } + /** * Creates an AbfsRestOperation with additional parameters for buffer and SAS token. * diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContext.java index 0a5182a69914d..baf79e7dd8b0c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientContext.java @@ -53,7 +53,7 @@ public AbfsPerfTracker getAbfsPerfTracker() { return abfsPerfTracker; } - public AbfsCounters getAbfsCounters() { + AbfsCounters getAbfsCounters() { return abfsCounters; } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsCounters.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsCounters.java index d01a3598afcf8..65e5fa29a138b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsCounters.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsCounters.java @@ -19,12 +19,15 @@ package org.apache.hadoop.fs.azurebfs.services; import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.azurebfs.AbfsBackoffMetrics; import org.apache.hadoop.fs.azurebfs.AbfsStatistic; +import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; import org.apache.hadoop.fs.statistics.DurationTracker; import org.apache.hadoop.fs.statistics.DurationTrackerFactory; import org.apache.hadoop.fs.statistics.IOStatisticsSource; @@ -74,4 +77,12 @@ String formString(String prefix, String separator, String suffix, */ @Override DurationTracker trackDuration(String key); + + void initializeMetrics(MetricFormat metricFormat); + + AbfsBackoffMetrics getAbfsBackoffMetrics(); + + AbfsReadFooterMetrics getAbfsReadFooterMetrics(); + + AtomicLong getLastExecutionTime(); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStream.java index 19c67a8358867..cacd3b092eb3f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStream.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStream.java @@ -110,15 +110,15 @@ public class AbfsInputStream extends FSInputStream implements CanUnbuffer, private int bCursorBkp; private long fCursorBkp; private long fCursorAfterLastReadBkp; - + private final AbfsReadFooterMetrics abfsReadFooterMetrics; /** Stream statistics. */ private final AbfsInputStreamStatistics streamStatistics; private long bytesFromReadAhead; // bytes read from readAhead; for testing private long bytesFromRemoteRead; // bytes read remotely; for testing private Listener listener; - private final AbfsInputStreamContext context; private IOStatistics ioStatistics; + private String filePathIdentifier; /** * This is the actual position within the object, used by * lazy seek to decide whether to seek on the next read or not. @@ -141,9 +141,6 @@ public AbfsInputStream( this.path = path; this.contentLength = contentLength; this.bufferSize = abfsInputStreamContext.getReadBufferSize(); - /* - * FooterReadSize should not be more than bufferSize. - */ this.footerReadSize = Math.min(bufferSize, abfsInputStreamContext.getFooterReadBufferSize()); this.readAheadQueueDepth = abfsInputStreamContext.getReadAheadQueueDepth(); this.tolerateOobAppends = abfsInputStreamContext.isTolerateOobAppends(); @@ -157,12 +154,19 @@ public AbfsInputStream( this.cachedSasToken = new CachedSASToken( abfsInputStreamContext.getSasTokenRenewPeriodForStreamsInSeconds()); this.streamStatistics = abfsInputStreamContext.getStreamStatistics(); + this.abfsReadFooterMetrics = client.getAbfsCounters().getAbfsReadFooterMetrics(); this.inputStreamId = createInputStreamId(); this.tracingContext = new TracingContext(tracingContext); this.tracingContext.setOperation(FSOperationType.READ); this.tracingContext.setStreamID(inputStreamId); this.context = abfsInputStreamContext; readAheadBlockSize = abfsInputStreamContext.getReadAheadBlockSize(); + if (abfsReadFooterMetrics != null) { + this.filePathIdentifier = eTag + path; + synchronized (this) { + abfsReadFooterMetrics.updateMap(filePathIdentifier); + } + } this.fsBackRef = abfsInputStreamContext.getFsBackRef(); contextEncryptionAdapter = abfsInputStreamContext.getEncryptionAdapter(); @@ -253,6 +257,9 @@ public synchronized int read(final byte[] b, final int off, final int len) throw // go back and read from buffer is fCursor - limit. // There maybe case that we read less than requested data. long filePosAtStartOfBuffer = fCursor - limit; + if (abfsReadFooterMetrics != null) { + abfsReadFooterMetrics.checkMetricUpdate(filePathIdentifier, len, contentLength, nextReadPos); + } if (nextReadPos >= filePosAtStartOfBuffer && nextReadPos <= fCursor) { // Determining position in buffer from where data is to be read. bCursor = (int) (nextReadPos - filePosAtStartOfBuffer); @@ -339,7 +346,6 @@ private int readOneBlock(final byte[] b, final int off, final int len) throws IO if (firstRead) { firstRead = false; } - if (bytesRead == -1) { return -1; } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java new file mode 100644 index 0000000000000..5abb97cd9ce03 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsReadFooterMetrics.java @@ -0,0 +1,549 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azurebfs.services; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.StringJoiner; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_KB; + +public class AbfsReadFooterMetrics { + private final AtomicBoolean isParquetFile; + private final AtomicBoolean isParquetEvaluated; + private final AtomicBoolean isLenUpdated; + private String sizeReadByFirstRead; + private String offsetDiffBetweenFirstAndSecondRead; + private final AtomicLong fileLength; + private double avgFileLength; + private double avgReadLenRequested; + private final AtomicBoolean collectMetrics; + private final AtomicBoolean collectMetricsForNextRead; + private final AtomicBoolean collectLenMetrics; + private final AtomicLong dataLenRequested; + private final AtomicLong offsetOfFirstRead; + private final AtomicInteger readCount; + private final ConcurrentSkipListMap metricsMap; + private static final String FOOTER_LENGTH = "20"; + + public AbfsReadFooterMetrics() { + this.isParquetFile = new AtomicBoolean(false); + this.isParquetEvaluated = new AtomicBoolean(false); + this.isLenUpdated = new AtomicBoolean(false); + this.fileLength = new AtomicLong(); + this.readCount = new AtomicInteger(0); + this.offsetOfFirstRead = new AtomicLong(); + this.collectMetrics = new AtomicBoolean(false); + this.collectMetricsForNextRead = new AtomicBoolean(false); + this.collectLenMetrics = new AtomicBoolean(false); + this.dataLenRequested = new AtomicLong(0); + this.metricsMap = new ConcurrentSkipListMap<>(); + } + + public Map getMetricsMap() { + return metricsMap; + } + + private boolean getIsParquetFile() { + return isParquetFile.get(); + } + + public void setIsParquetFile(boolean isParquetFile) { + this.isParquetFile.set(isParquetFile); + } + + private String getSizeReadByFirstRead() { + return sizeReadByFirstRead; + } + + public void setSizeReadByFirstRead(final String sizeReadByFirstRead) { + this.sizeReadByFirstRead = sizeReadByFirstRead; + } + + private String getOffsetDiffBetweenFirstAndSecondRead() { + return offsetDiffBetweenFirstAndSecondRead; + } + + public void setOffsetDiffBetweenFirstAndSecondRead(final String offsetDiffBetweenFirstAndSecondRead) { + this.offsetDiffBetweenFirstAndSecondRead + = offsetDiffBetweenFirstAndSecondRead; + } + + private long getFileLength() { + return fileLength.get(); + } + + private void setFileLength(long fileLength) { + this.fileLength.set(fileLength); + } + + private double getAvgFileLength() { + return avgFileLength; + } + + public void setAvgFileLength(final double avgFileLength) { + this.avgFileLength = avgFileLength; + } + + private double getAvgReadLenRequested() { + return avgReadLenRequested; + } + + public void setAvgReadLenRequested(final double avgReadLenRequested) { + this.avgReadLenRequested = avgReadLenRequested; + } + + private boolean getCollectMetricsForNextRead() { + return collectMetricsForNextRead.get(); + } + + private void setCollectMetricsForNextRead(boolean collectMetricsForNextRead) { + this.collectMetricsForNextRead.set(collectMetricsForNextRead); + } + + private long getOffsetOfFirstRead() { + return offsetOfFirstRead.get(); + } + + private void setOffsetOfFirstRead(long offsetOfFirstRead) { + this.offsetOfFirstRead.set(offsetOfFirstRead); + } + + private int getReadCount() { + return readCount.get(); + } + + private void setReadCount(int readCount) { + this.readCount.set(readCount); + } + + private int incrementReadCount() { + this.readCount.incrementAndGet(); + return getReadCount(); + } + + private boolean getCollectLenMetrics() { + return collectLenMetrics.get(); + } + + private void setCollectLenMetrics(boolean collectLenMetrics) { + this.collectLenMetrics.set(collectLenMetrics); + + } + + private long getDataLenRequested() { + return dataLenRequested.get(); + } + + private void setDataLenRequested(long dataLenRequested) { + this.dataLenRequested.set(dataLenRequested); + } + + private void updateDataLenRequested(long dataLenRequested){ + this.dataLenRequested.addAndGet(dataLenRequested); + } + + private boolean getCollectMetrics() { + return collectMetrics.get(); + } + + private void setCollectMetrics(boolean collectMetrics) { + this.collectMetrics.set(collectMetrics); + } + + private boolean getIsParquetEvaluated() { + return isParquetEvaluated.get(); + } + + private void setIsParquetEvaluated(boolean isParquetEvaluated) { + this.isParquetEvaluated.set(isParquetEvaluated); + } + + private boolean getIsLenUpdated() { + return isLenUpdated.get(); + } + + private void setIsLenUpdated(boolean isLenUpdated) { + this.isLenUpdated.set(isLenUpdated); + } + + /** + * Updates the metrics map with an entry for the specified file if it doesn't already exist. + * + * @param filePathIdentifier The unique identifier for the file. + */ + public void updateMap(String filePathIdentifier) { + // If the file is not already in the metrics map, add it with a new AbfsReadFooterMetrics object. + metricsMap.computeIfAbsent(filePathIdentifier, key -> new AbfsReadFooterMetrics()); + } + + /** + * Checks and updates metrics for a specific file identified by filePathIdentifier. + * If the metrics do not exist for the file, they are initialized. + * + * @param filePathIdentifier The unique identifier for the file. + * @param len The length of the read operation. + * @param contentLength The total content length of the file. + * @param nextReadPos The position of the next read operation. + */ + public void checkMetricUpdate(final String filePathIdentifier, final int len, final long contentLength, + final long nextReadPos) { + AbfsReadFooterMetrics readFooterMetrics = metricsMap.computeIfAbsent( + filePathIdentifier, key -> new AbfsReadFooterMetrics()); + if (readFooterMetrics.getReadCount() == 0 + || (readFooterMetrics.getReadCount() >= 1 + && readFooterMetrics.getCollectMetrics())) { + updateMetrics(filePathIdentifier, len, contentLength, nextReadPos); + } + } + + /** + * Updates metrics for a specific file identified by filePathIdentifier. + * + * @param filePathIdentifier The unique identifier for the file. + * @param len The length of the read operation. + * @param contentLength The total content length of the file. + * @param nextReadPos The position of the next read operation. + */ + private void updateMetrics(final String filePathIdentifier, final int len, final long contentLength, + final long nextReadPos) { + AbfsReadFooterMetrics readFooterMetrics = metricsMap.get(filePathIdentifier); + + // Create a new AbfsReadFooterMetrics object if it doesn't exist in the metricsMap. + if (readFooterMetrics == null) { + readFooterMetrics = new AbfsReadFooterMetrics(); + metricsMap.put(filePathIdentifier, readFooterMetrics); + } + + int readCount; + synchronized (this) { + readCount = readFooterMetrics.incrementReadCount(); + } + + if (readCount == 1) { + // Update metrics for the first read. + updateMetricsOnFirstRead(readFooterMetrics, nextReadPos, len, contentLength); + } + + synchronized (this) { + if (readFooterMetrics.getCollectLenMetrics()) { + readFooterMetrics.updateDataLenRequested(len); + } + } + + if (readCount == 2) { + // Update metrics for the second read. + updateMetricsOnSecondRead(readFooterMetrics, nextReadPos, len); + } + } + + /** + * Updates metrics for the first read operation. + * + * @param readFooterMetrics The metrics object to update. + * @param nextReadPos The position of the next read operation. + * @param len The length of the read operation. + * @param contentLength The total content length of the file. + */ + private void updateMetricsOnFirstRead(AbfsReadFooterMetrics readFooterMetrics, long nextReadPos, int len, long contentLength) { + if (nextReadPos >= contentLength - (long) Integer.parseInt(FOOTER_LENGTH) * ONE_KB) { + readFooterMetrics.setCollectMetrics(true); + readFooterMetrics.setCollectMetricsForNextRead(true); + readFooterMetrics.setOffsetOfFirstRead(nextReadPos); + readFooterMetrics.setSizeReadByFirstRead(len + "_" + Math.abs(contentLength - nextReadPos)); + readFooterMetrics.setFileLength(contentLength); + } + } + + /** + * Updates metrics for the second read operation. + * + * @param readFooterMetrics The metrics object to update. + * @param nextReadPos The position of the next read operation. + * @param len The length of the read operation. + */ + private void updateMetricsOnSecondRead(AbfsReadFooterMetrics readFooterMetrics, long nextReadPos, int len) { + if (readFooterMetrics.getCollectMetricsForNextRead()) { + long offsetDiff = Math.abs(nextReadPos - readFooterMetrics.getOffsetOfFirstRead()); + readFooterMetrics.setOffsetDiffBetweenFirstAndSecondRead(len + "_" + offsetDiff); + readFooterMetrics.setCollectLenMetrics(true); + } + } + + + /** + * Check if the given file should be marked as a Parquet file. + * + * @param metrics The metrics to evaluate. + * @return True if the file meet the criteria for being marked as a Parquet file, false otherwise. + */ + private boolean shouldMarkAsParquet(AbfsReadFooterMetrics metrics) { + return metrics.getCollectMetrics() + && metrics.getReadCount() >= 2 + && !metrics.getIsParquetEvaluated() + && haveEqualValues(metrics.getSizeReadByFirstRead()) + && haveEqualValues(metrics.getOffsetDiffBetweenFirstAndSecondRead()); + } + + /** + * Check if two values are equal, considering they are in the format "value1_value2". + * + * @param value The value to check. + * @return True if the two parts of the value are equal, false otherwise. + */ + private boolean haveEqualValues(String value) { + String[] parts = value.split("_"); + return parts.length == 2 && parts[0].equals(parts[1]); + } + + /** + * Mark the given metrics as a Parquet file and update related values. + * + * @param metrics The metrics to mark as Parquet. + */ + private void markAsParquet(AbfsReadFooterMetrics metrics) { + metrics.setIsParquetFile(true); + String[] parts = metrics.getSizeReadByFirstRead().split("_"); + metrics.setSizeReadByFirstRead(parts[0]); + parts = metrics.getOffsetDiffBetweenFirstAndSecondRead().split("_"); + metrics.setOffsetDiffBetweenFirstAndSecondRead(parts[0]); + metrics.setIsParquetEvaluated(true); + } + + /** + * Check each metric in the provided map and mark them as Parquet files if they meet the criteria. + * + * @param metricsMap The map containing metrics to evaluate. + */ + public void checkIsParquet(Map metricsMap) { + for (Map.Entry entry : metricsMap.entrySet()) { + AbfsReadFooterMetrics readFooterMetrics = entry.getValue(); + if (shouldMarkAsParquet(readFooterMetrics)) { + markAsParquet(readFooterMetrics); + metricsMap.replace(entry.getKey(), readFooterMetrics); + } + } + } + + /** + * Updates the average read length requested for metrics of all files in the metrics map. + * If the metrics indicate that the update is needed, it calculates the average read length and updates the metrics. + * + * @param metricsMap A map containing metrics for different files with unique identifiers. + */ + private void updateLenRequested(Map metricsMap) { + for (AbfsReadFooterMetrics readFooterMetrics : metricsMap.values()) { + if (shouldUpdateLenRequested(readFooterMetrics)) { + int readReqCount = readFooterMetrics.getReadCount() - 2; + readFooterMetrics.setAvgReadLenRequested( + (double) readFooterMetrics.getDataLenRequested() / readReqCount); + readFooterMetrics.setIsLenUpdated(true); + } + } + } + + /** + * Checks whether the average read length requested should be updated for the given metrics. + * + * The method returns true if the following conditions are met: + * - Metrics collection is enabled. + * - The number of read counts is greater than 2. + * - The average read length has not been updated previously. + * + * @param readFooterMetrics The metrics object to evaluate. + * @return True if the average read length should be updated, false otherwise. + */ + private boolean shouldUpdateLenRequested(AbfsReadFooterMetrics readFooterMetrics) { + return readFooterMetrics.getCollectMetrics() + && readFooterMetrics.getReadCount() > 2 + && !readFooterMetrics.getIsLenUpdated(); + } + + /** + * Calculates the average metrics from a list of AbfsReadFooterMetrics and sets the values in the provided 'avgParquetReadFooterMetrics' object. + * + * @param isParquetList The list of AbfsReadFooterMetrics to compute the averages from. + * @param avgParquetReadFooterMetrics The target AbfsReadFooterMetrics object to store the computed average values. + * + * This method calculates various average metrics from the provided list and sets them in the 'avgParquetReadFooterMetrics' object. + * The metrics include: + * - Size read by the first read + * - Offset difference between the first and second read + * - Average file length + * - Average requested read length + */ + private void getParquetReadFooterMetricsAverage(List isParquetList, + AbfsReadFooterMetrics avgParquetReadFooterMetrics){ + avgParquetReadFooterMetrics.setSizeReadByFirstRead( + String.format("%.3f", isParquetList.stream() + .map(AbfsReadFooterMetrics::getSizeReadByFirstRead).mapToDouble( + Double::parseDouble).average().orElse(0.0))); + avgParquetReadFooterMetrics.setOffsetDiffBetweenFirstAndSecondRead( + String.format("%.3f", isParquetList.stream() + .map(AbfsReadFooterMetrics::getOffsetDiffBetweenFirstAndSecondRead) + .mapToDouble(Double::parseDouble).average().orElse(0.0))); + avgParquetReadFooterMetrics.setAvgFileLength(isParquetList.stream() + .mapToDouble(AbfsReadFooterMetrics::getFileLength).average().orElse(0.0)); + avgParquetReadFooterMetrics.setAvgReadLenRequested(isParquetList.stream(). + map(AbfsReadFooterMetrics::getAvgReadLenRequested). + mapToDouble(Double::doubleValue).average().orElse(0.0)); + } + + /** + * Calculates the average metrics from a list of non-Parquet AbfsReadFooterMetrics instances. + * + * This method takes a list of AbfsReadFooterMetrics representing non-Parquet reads and calculates + * the average values for the size read by the first read and the offset difference between the first + * and second read. The averages are then set in the provided AbfsReadFooterMetrics instance. + * + * @param isNonParquetList A list of AbfsReadFooterMetrics instances representing non-Parquet reads. + * @param avgNonParquetReadFooterMetrics The AbfsReadFooterMetrics instance to store the calculated averages. + * It is assumed that the size of the list is at least 1, and the first + * element of the list is used to determine the size of arrays. + * The instance is modified in-place with the calculated averages. + * + * + **/ + private void getNonParquetReadFooterMetricsAverage(List isNonParquetList, + AbfsReadFooterMetrics avgNonParquetReadFooterMetrics) { + int size = isNonParquetList.get(0).getSizeReadByFirstRead().split("_").length; + double[] store = new double[2 * size]; + // Calculating sum of individual values + isNonParquetList.forEach(abfsReadFooterMetrics -> { + String[] firstReadSize = abfsReadFooterMetrics.getSizeReadByFirstRead().split("_"); + String[] offDiffFirstSecondRead = abfsReadFooterMetrics.getOffsetDiffBetweenFirstAndSecondRead().split("_"); + + for (int i = 0; i < firstReadSize.length; i++) { + store[i] += Long.parseLong(firstReadSize[i]); + store[i + size] += Long.parseLong(offDiffFirstSecondRead[i]); + } + }); + + // Calculating averages and creating formatted strings + StringJoiner firstReadSize = new StringJoiner("_"); + StringJoiner offDiffFirstSecondRead = new StringJoiner("_"); + + for (int j = 0; j < size; j++) { + firstReadSize.add(String.format("%.3f", store[j] / isNonParquetList.size())); + offDiffFirstSecondRead.add(String.format("%.3f", store[j + size] / isNonParquetList.size())); + } + + avgNonParquetReadFooterMetrics.setSizeReadByFirstRead(firstReadSize.toString()); + avgNonParquetReadFooterMetrics.setOffsetDiffBetweenFirstAndSecondRead(offDiffFirstSecondRead.toString()); + avgNonParquetReadFooterMetrics.setAvgFileLength(isNonParquetList.stream() + .mapToDouble(AbfsReadFooterMetrics::getFileLength).average().orElse(0.0)); + avgNonParquetReadFooterMetrics.setAvgReadLenRequested(isNonParquetList.stream() + .mapToDouble(AbfsReadFooterMetrics::getAvgReadLenRequested).average().orElse(0.0)); + } + + /* + Acronyms: + 1.FR :- First Read (In case of parquet we only maintain the size requested by application for + the first read, in case of non parquet we maintain a string separated by "_" delimiter where the first + substring represents the len requested for first read and the second substring represents the seek pointer difference from the + end of the file.) + 2.SR :- Second Read (In case of parquet we only maintain the size requested by application for + the second read, in case of non parquet we maintain a string separated by "_" delimiter where the first + substring represents the len requested for second read and the second substring represents the seek pointer difference from the + offset of the first read.) + 3.FL :- Total length of the file requested for read + */ + public String getReadFooterMetrics(AbfsReadFooterMetrics avgReadFooterMetrics) { + String readFooterMetric = ""; + if (avgReadFooterMetrics.getIsParquetFile()) { + readFooterMetric += "$Parquet:"; + } else { + readFooterMetric += "$NonParquet:"; + } + readFooterMetric += "$FR=" + avgReadFooterMetrics.getSizeReadByFirstRead() + + "$SR=" + + avgReadFooterMetrics.getOffsetDiffBetweenFirstAndSecondRead() + + "$FL=" + String.format("%.3f", + avgReadFooterMetrics.getAvgFileLength()) + + "$RL=" + String.format("%.3f", + avgReadFooterMetrics.getAvgReadLenRequested()); + return readFooterMetric; + } + +/** + * Retrieves and aggregates read footer metrics for both Parquet and non-Parquet files from a list + * of AbfsReadFooterMetrics instances. The function calculates the average metrics separately for + * Parquet and non-Parquet files and returns a formatted string containing the aggregated metrics. + * + * @param readFooterMetricsList A list of AbfsReadFooterMetrics instances containing read footer metrics + * for both Parquet and non-Parquet files. + * + * @return A formatted string containing the aggregated read footer metrics for both Parquet and non-Parquet files. + * + **/ +private String getFooterMetrics(List readFooterMetricsList) { + List isParquetList = new ArrayList<>(); + List isNonParquetList = new ArrayList<>(); + for (AbfsReadFooterMetrics abfsReadFooterMetrics : readFooterMetricsList) { + if (abfsReadFooterMetrics.getIsParquetFile()) { + isParquetList.add(abfsReadFooterMetrics); + } else { + if (abfsReadFooterMetrics.getReadCount() >= 2) { + isNonParquetList.add(abfsReadFooterMetrics); + } + } + } + AbfsReadFooterMetrics avgParquetReadFooterMetrics = new AbfsReadFooterMetrics(); + AbfsReadFooterMetrics avgNonparquetReadFooterMetrics = new AbfsReadFooterMetrics(); + String readFooterMetric = ""; + if (!isParquetList.isEmpty()) { + avgParquetReadFooterMetrics.setIsParquetFile(true); + getParquetReadFooterMetricsAverage(isParquetList, avgParquetReadFooterMetrics); + readFooterMetric += getReadFooterMetrics(avgParquetReadFooterMetrics); + } + if (!isNonParquetList.isEmpty()) { + avgNonparquetReadFooterMetrics.setIsParquetFile(false); + getNonParquetReadFooterMetricsAverage(isNonParquetList, avgNonparquetReadFooterMetrics); + readFooterMetric += getReadFooterMetrics(avgNonparquetReadFooterMetrics); + } + return readFooterMetric; +} + + + @Override + public String toString() { + Map metricsMap = getMetricsMap(); + List readFooterMetricsList = new ArrayList<>(); + if (metricsMap != null && !(metricsMap.isEmpty())) { + checkIsParquet(metricsMap); + updateLenRequested(metricsMap); + for (Map.Entry entry : metricsMap.entrySet()) { + AbfsReadFooterMetrics abfsReadFooterMetrics = entry.getValue(); + if (abfsReadFooterMetrics.getCollectMetrics()) { + readFooterMetricsList.add(entry.getValue()); + } + } + } + String readFooterMetrics = ""; + if (!readFooterMetricsList.isEmpty()) { + readFooterMetrics = getFooterMetrics(readFooterMetricsList); + } + return readFooterMetrics; + } +} + diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java index 4abe9a574a872..c696bd8e68639 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java @@ -37,6 +37,12 @@ import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding; +import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; +import java.util.Map; +import org.apache.hadoop.fs.azurebfs.AbfsBackoffMetrics; + +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ZERO; +import static org.apache.hadoop.util.Time.now; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_CONTINUE; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.EGRESS_LIMIT_BREACH_ABBREVIATION; @@ -68,17 +74,20 @@ public class AbfsRestOperation { private final String sasToken; private static final Logger LOG = LoggerFactory.getLogger(AbfsClient.class); - + private static final Logger LOG1 = LoggerFactory.getLogger(AbfsRestOperation.class); // For uploads, this is the request entity body. For downloads, // this will hold the response entity body. private byte[] buffer; private int bufferOffset; private int bufferLength; private int retryCount = 0; - + private boolean isThrottledRequest = false; + private long maxRetryCount = 0L; + private final int maxIoRetries; private AbfsHttpOperation result; - private AbfsCounters abfsCounters; - + private final AbfsCounters abfsCounters; + private AbfsBackoffMetrics abfsBackoffMetrics; + private Map metricsMap; /** * This variable contains the reason of last API call within the same * AbfsRestOperation object. @@ -124,6 +133,11 @@ String getSasToken() { return sasToken; } + private static final int MIN_FIRST_RANGE = 1; + private static final int MAX_FIRST_RANGE = 5; + private static final int MAX_SECOND_RANGE = 15; + private static final int MAX_THIRD_RANGE = 25; + /** * Initializes a new REST operation. * @@ -165,6 +179,13 @@ String getSasToken() { || AbfsHttpConstants.HTTP_METHOD_PATCH.equals(method)); this.sasToken = sasToken; this.abfsCounters = client.getAbfsCounters(); + if (abfsCounters != null) { + this.abfsBackoffMetrics = abfsCounters.getAbfsBackoffMetrics(); + } + if (abfsBackoffMetrics != null) { + this.metricsMap = abfsBackoffMetrics.getMetricsMap(); + } + this.maxIoRetries = client.getAbfsConfiguration().getMaxIoRetries(); this.intercept = client.getIntercept(); this.retryPolicy = client.getExponentialRetryPolicy(); } @@ -196,7 +217,6 @@ String getSasToken() { this.buffer = buffer; this.bufferOffset = bufferOffset; this.bufferLength = bufferLength; - this.abfsCounters = client.getAbfsCounters(); } /** @@ -206,11 +226,12 @@ String getSasToken() { */ public void execute(TracingContext tracingContext) throws AzureBlobFileSystemException { - // Since this might be a sub-sequential or parallel rest operation // triggered by a single file system call, using a new tracing context. lastUsedTracingContext = createNewTracingContext(tracingContext); try { + abfsCounters.getLastExecutionTime().set(now()); + client.timerOrchestrator(TimerFunctionality.RESUME, null); IOStatisticsBinding.trackDurationOfInvocation(abfsCounters, AbfsStatistic.getStatNameFromHttpCall(method), () -> completeExecute(lastUsedTracingContext)); @@ -241,6 +262,12 @@ void completeExecute(TracingContext tracingContext) retryCount = 0; retryPolicy = client.getExponentialRetryPolicy(); LOG.debug("First execution of REST operation - {}", operationType); + long sleepDuration = 0L; + if (abfsBackoffMetrics != null) { + synchronized (this) { + abfsBackoffMetrics.incrementTotalNumberOfRequests(); + } + } while (!executeHttpOperation(retryCount, tracingContext)) { try { ++retryCount; @@ -248,12 +275,17 @@ void completeExecute(TracingContext tracingContext) long retryInterval = retryPolicy.getRetryInterval(retryCount); LOG.debug("Rest operation {} failed with failureReason: {}. Retrying with retryCount = {}, retryPolicy: {} and sleepInterval: {}", operationType, failureReason, retryCount, retryPolicy.getAbbreviation(), retryInterval); + if (abfsBackoffMetrics != null) { + updateBackoffTimeMetrics(retryCount, sleepDuration); + } Thread.sleep(retryInterval); } catch (InterruptedException ex) { Thread.currentThread().interrupt(); } } - + if (abfsBackoffMetrics != null) { + updateBackoffMetrics(retryCount, result.getStatusCode()); + } int status = result.getStatusCode(); /* If even after exhausting all retries, the http status code has an @@ -272,6 +304,30 @@ void completeExecute(TracingContext tracingContext) LOG.trace("{} REST operation complete", operationType); } + @VisibleForTesting + void updateBackoffMetrics(int retryCount, int statusCode) { + if (abfsBackoffMetrics != null) { + if (statusCode < HttpURLConnection.HTTP_OK + || statusCode >= HttpURLConnection.HTTP_INTERNAL_ERROR) { + synchronized (this) { + if (retryCount >= maxIoRetries) { + abfsBackoffMetrics.incrementNumberOfRequestsFailed(); + } + } + } else { + synchronized (this) { + if (retryCount > ZERO && retryCount <= maxIoRetries) { + maxRetryCount = Math.max(abfsBackoffMetrics.getMaxRetryCount(), retryCount); + abfsBackoffMetrics.setMaxRetryCount(maxRetryCount); + updateCount(retryCount); + } else { + abfsBackoffMetrics.incrementNumberOfRequestsSucceededWithoutRetrying(); + } + } + } + } + } + @VisibleForTesting String getClientLatency() { return client.getAbfsPerfTracker().getClientLatency(); @@ -315,7 +371,35 @@ private boolean executeHttpOperation(final int retryCount, } httpOperation.processResponse(buffer, bufferOffset, bufferLength); - incrementCounter(AbfsStatistic.GET_RESPONSES, 1); + if (!isThrottledRequest && httpOperation.getStatusCode() + >= HttpURLConnection.HTTP_INTERNAL_ERROR) { + isThrottledRequest = true; + AzureServiceErrorCode serviceErrorCode = + AzureServiceErrorCode.getAzureServiceCode( + httpOperation.getStatusCode(), + httpOperation.getStorageErrorCode(), + httpOperation.getStorageErrorMessage()); + LOG1.trace("Service code is " + serviceErrorCode + " status code is " + + httpOperation.getStatusCode() + " error code is " + + httpOperation.getStorageErrorCode() + + " error message is " + httpOperation.getStorageErrorMessage()); + if (abfsBackoffMetrics != null) { + synchronized (this) { + if (serviceErrorCode.equals( + AzureServiceErrorCode.INGRESS_OVER_ACCOUNT_LIMIT) + || serviceErrorCode.equals( + AzureServiceErrorCode.EGRESS_OVER_ACCOUNT_LIMIT)) { + abfsBackoffMetrics.incrementNumberOfBandwidthThrottledRequests(); + } else if (serviceErrorCode.equals( + AzureServiceErrorCode.TPS_OVER_ACCOUNT_LIMIT)) { + abfsBackoffMetrics.incrementNumberOfIOPSThrottledRequests(); + } else { + abfsBackoffMetrics.incrementNumberOfOtherThrottledRequests(); + } + } + } + } + incrementCounter(AbfsStatistic.GET_RESPONSES, 1); //Only increment bytesReceived counter when the status code is 2XX. if (httpOperation.getStatusCode() >= HttpURLConnection.HTTP_OK && httpOperation.getStatusCode() <= HttpURLConnection.HTTP_PARTIAL) { @@ -351,7 +435,13 @@ private boolean executeHttpOperation(final int retryCount, retryPolicy = client.getRetryPolicy(failureReason); LOG.warn("Unknown host name: {}. Retrying to resolve the host name...", hostname); + if (abfsBackoffMetrics != null) { + synchronized (this) { + abfsBackoffMetrics.incrementNumberOfNetworkFailedRequests(); + } + } if (!retryPolicy.shouldRetry(retryCount, -1)) { + updateBackoffMetrics(retryCount, httpOperation.getStatusCode()); throw new InvalidAbfsRestOperationException(ex, retryCount); } return false; @@ -360,13 +450,17 @@ private boolean executeHttpOperation(final int retryCount, if (LOG.isDebugEnabled()) { LOG.debug("HttpRequestFailure: {}, {}", httpOperation, ex); } - + if (abfsBackoffMetrics != null) { + synchronized (this) { + abfsBackoffMetrics.incrementNumberOfNetworkFailedRequests(); + } + } failureReason = RetryReason.getAbbreviation(ex, -1, ""); retryPolicy = client.getRetryPolicy(failureReason); if (!retryPolicy.shouldRetry(retryCount, -1)) { + updateBackoffMetrics(retryCount, httpOperation.getStatusCode()); throw new InvalidAbfsRestOperationException(ex, retryCount); } - return false; } finally { int statusCode = httpOperation.getStatusCode(); @@ -388,26 +482,30 @@ private boolean executeHttpOperation(final int retryCount, */ @VisibleForTesting public void signRequest(final AbfsHttpOperation httpOperation, int bytesToSign) throws IOException { - switch(client.getAuthType()) { - case Custom: - case OAuth: - LOG.debug("Authenticating request with OAuth2 access token"); - httpOperation.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, - client.getAccessToken()); - break; - case SAS: - // do nothing; the SAS token should already be appended to the query string - httpOperation.setMaskForSAS(); //mask sig/oid from url for logs - break; - case SharedKey: - default: - // sign the HTTP request - LOG.debug("Signing request with shared key"); - // sign the HTTP request - client.getSharedKeyCredentials().signRequest( - httpOperation.getConnection(), - bytesToSign); - break; + if (client.isSendMetricCall()) { + client.getMetricSharedkeyCredentials().signRequest(httpOperation.getConnection(), bytesToSign); + } else { + switch (client.getAuthType()) { + case Custom: + case OAuth: + LOG.debug("Authenticating request with OAuth2 access token"); + httpOperation.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, + client.getAccessToken()); + break; + case SAS: + // do nothing; the SAS token should already be appended to the query string + httpOperation.setMaskForSAS(); //mask sig/oid from url for logs + break; + case SharedKey: + default: + // sign the HTTP request + LOG.debug("Signing request with shared key"); + // sign the HTTP request + client.getSharedKeyCredentials().signRequest( + httpOperation.getConnection(), + bytesToSign); + break; + } } } @@ -436,6 +534,60 @@ private void incrementCounter(AbfsStatistic statistic, long value) { } } + /** + * Updates the count metrics based on the provided retry count. + * @param retryCount The retry count used to determine the metrics category. + * + * This method increments the number of succeeded requests for the specified retry count. + */ + private void updateCount(int retryCount){ + String retryCounter = getKey(retryCount); + metricsMap.get(retryCounter).incrementNumberOfRequestsSucceeded(); + } + + /** + * Updates backoff time metrics based on the provided retry count and sleep duration. + * @param retryCount The retry count used to determine the metrics category. + * @param sleepDuration The duration of sleep during backoff. + * + * This method calculates and updates various backoff time metrics, including minimum, maximum, + * and total backoff time, as well as the total number of requests for the specified retry count. + */ + private void updateBackoffTimeMetrics(int retryCount, long sleepDuration) { + synchronized (this) { + String retryCounter = getKey(retryCount); + AbfsBackoffMetrics abfsBackoffMetrics = metricsMap.get(retryCounter); + long minBackoffTime = Math.min(abfsBackoffMetrics.getMinBackoff(), sleepDuration); + long maxBackoffForTime = Math.max(abfsBackoffMetrics.getMaxBackoff(), sleepDuration); + long totalBackoffTime = abfsBackoffMetrics.getTotalBackoff() + sleepDuration; + abfsBackoffMetrics.incrementTotalRequests(); + abfsBackoffMetrics.setMinBackoff(minBackoffTime); + abfsBackoffMetrics.setMaxBackoff(maxBackoffForTime); + abfsBackoffMetrics.setTotalBackoff(totalBackoffTime); + metricsMap.put(retryCounter, abfsBackoffMetrics); + } + } + + /** + * Generates a key based on the provided retry count to categorize metrics. + * + * @param retryCount The retry count used to determine the key. + * @return A string key representing the metrics category for the given retry count. + * + * This method categorizes retry counts into different ranges and assigns a corresponding key. + */ + private String getKey(int retryCount) { + if (retryCount >= MIN_FIRST_RANGE && retryCount < MAX_FIRST_RANGE) { + return Integer.toString(retryCount); + } else if (retryCount >= MAX_FIRST_RANGE && retryCount < MAX_SECOND_RANGE) { + return "5_15"; + } else if (retryCount >= MAX_SECOND_RANGE && retryCount < MAX_THIRD_RANGE) { + return "15_25"; + } else { + return "25AndAbove"; + } + } + /** * Updating Client Side Throttling Metrics for relevant response status codes. * Following criteria is used to decide based on status code and failure reason. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/TimerFunctionality.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/TimerFunctionality.java index bf7da69ec4982..ca94c7f86ba34 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/TimerFunctionality.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/TimerFunctionality.java @@ -18,9 +18,11 @@ package org.apache.hadoop.fs.azurebfs.services; +/** + * Class for Timer Functionality. + */ public enum TimerFunctionality { RESUME, - SUSPEND } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/MetricFormat.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/MetricFormat.java new file mode 100644 index 0000000000000..48c216ff6e5bb --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/MetricFormat.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azurebfs.utils; + +public enum MetricFormat { + INTERNAL_BACKOFF_METRIC_FORMAT, // :: + // : + + INTERNAL_FOOTER_METRIC_FORMAT, // :: + // : + + INTERNAL_METRIC_FORMAT, // :: + // :: + + EMPTY; + + @Override + public String toString() { + return this == EMPTY ? "" : this.name(); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java index 3c54c204dda92..b0a9a021c5e47 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java @@ -63,6 +63,8 @@ public class TracingContext { private Listener listener = null; // null except when testing //final concatenated ID list set into x-ms-client-request-id header private String header = EMPTY_STRING; + private String metricResults = EMPTY_STRING; + private String metricHeader = EMPTY_STRING; /** * If {@link #primaryRequestId} is null, this field shall be set equal @@ -112,6 +114,15 @@ public TracingContext(String clientCorrelationID, String fileSystemID, } } + public TracingContext(String clientCorrelationID, String fileSystemID, + FSOperationType opType, boolean needsPrimaryReqId, + TracingHeaderFormat tracingHeaderFormat, Listener listener, String metricResults) { + this(clientCorrelationID, fileSystemID, opType, needsPrimaryReqId, tracingHeaderFormat, + listener); + this.metricResults = metricResults; + } + + public TracingContext(TracingContext originalTracingContext) { this.fileSystemID = originalTracingContext.fileSystemID; this.streamID = originalTracingContext.streamID; @@ -123,8 +134,8 @@ public TracingContext(TracingContext originalTracingContext) { if (originalTracingContext.listener != null) { this.listener = originalTracingContext.listener.getClone(); } + this.metricResults = originalTracingContext.metricResults; } - public static String validateClientCorrelationID(String clientCorrelationID) { if ((clientCorrelationID.length() > MAX_CLIENT_CORRELATION_ID_LENGTH) || (!clientCorrelationID.matches(CLIENT_CORRELATION_ID_PATTERN))) { @@ -181,17 +192,24 @@ public void constructHeader(AbfsHttpOperation httpOperation, String previousFail + getPrimaryRequestIdForHeader(retryCount > 0) + ":" + streamID + ":" + opType + ":" + retryCount; header = addFailureReasons(header, previousFailure, retryPolicyAbbreviation); + metricHeader += !(metricResults.trim().isEmpty()) ? metricResults : ""; break; case TWO_ID_FORMAT: header = clientCorrelationID + ":" + clientRequestId; + metricHeader += !(metricResults.trim().isEmpty()) ? metricResults : ""; break; default: - header = clientRequestId; //case SINGLE_ID_FORMAT + //case SINGLE_ID_FORMAT + header = clientRequestId; + metricHeader += !(metricResults.trim().isEmpty()) ? metricResults : ""; } if (listener != null) { //for testing listener.callTracingHeaderValidator(header, format); } httpOperation.setRequestProperty(HttpHeaderConfigurations.X_MS_CLIENT_REQUEST_ID, header); + if (!metricHeader.equals(EMPTY_STRING)) { + httpOperation.setRequestProperty(HttpHeaderConfigurations.X_MS_FECLIENT_METRICS, metricHeader); + } /* * In case the primaryRequestId is an empty-string and if it is the first try to * API call (previousFailure shall be null), maintain the last part of clientRequestId's diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md index 008cb143542a4..c0e20dfe16e3f 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md @@ -1052,6 +1052,49 @@ Note that these performance numbers are also sent back to the ADLS Gen 2 API end in the `x-ms-abfs-client-latency` HTTP headers in subsequent requests. Azure uses these settings to track their end-to-end latency. +### Driver Metric Options + +Config `fs.azure.metric.format` provides an option to select the format of IDs included in the `header` for metrics. +This config accepts a String value corresponding to the following enum options. +`INTERNAL_METRIC_FORMAT` : backoff + footer metrics +`INTERNAL_BACKOFF_METRIC_FORMAT` : backoff metrics +`INTERNAL_FOOTER_METRIC_FORMAT` : footer metrics +`EMPTY` : default + +`fs.azure.metric.account.name`: This configuration parameter is used to specify the name of the account which will be +used to push the metrics to the backend. We can configure a separate account to push metrics to the store or use the +same for as the existing account on which other requests are made. + +```xml + + + fs.azure.metric.account.name + METRICACCOUNTNAME.dfs.core.windows.net + +``` + +`fs.azure.metric.account.key`: This is the access key for the storage account used for pushing metrics to the store. + +```xml + + + fs.azure.metric.account.key + ACCOUNTKEY + +``` + +`fs.azure.metric.uri`: This configuration provides the uri in the format of 'https://`` +.dfs.core.windows.net/``'. This should be a part of the config in order to prevent extra calls to create +the filesystem. We use an existing filsystem to push the metrics. + +```xml + + + fs.azure.metric.uri + https://METRICACCOUNTNAME.dfs.core.windows.net/CONTAINERNAME + +``` + ## Troubleshooting The problems associated with the connector usually come down to, in order diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsInputStreamStatistics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsInputStreamStatistics.java index e8cbeb1255209..afc92c111a913 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsInputStreamStatistics.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsInputStreamStatistics.java @@ -34,7 +34,6 @@ import org.apache.hadoop.fs.statistics.IOStatistics; import org.apache.hadoop.fs.statistics.StoreStatisticNames; import org.apache.hadoop.io.IOUtils; - import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.extractStatistics; import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.lookupMeanStatistic; import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToPrettyString; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadFooterMetrics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadFooterMetrics.java new file mode 100644 index 0000000000000..0071b90771c49 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadFooterMetrics.java @@ -0,0 +1,385 @@ + +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azurebfs; + +import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_INFO; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_READ_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_WRITE_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_KB; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_READ_BUFFER_SIZE; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; +import org.junit.Test; + +import java.io.IOException; +import java.util.Random; +import org.apache.hadoop.fs.azurebfs.services.AbfsReadFooterMetrics; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.statistics.IOStatisticsLogging; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.fs.azurebfs.services.AbfsInputStream; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; + +public class ITestAbfsReadFooterMetrics extends AbstractAbfsScaleTest { + + public ITestAbfsReadFooterMetrics() throws Exception { + } + + private static final String TEST_PATH = "/testfile"; + private static final String SLEEP_PERIOD = "90000"; + + /** + * Integration test for reading footer metrics with both Parquet and non-Parquet reads. + */ + @Test + public void testReadFooterMetricsWithParquetAndNonParquet() throws Exception { + testReadWriteAndSeek(8 * ONE_MB, DEFAULT_READ_BUFFER_SIZE, ONE_KB, 4 * ONE_KB); + } + + /** + * Configures the AzureBlobFileSystem with the given buffer size. + * + * @param bufferSize Buffer size to set for write and read operations. + * @return AbfsConfiguration used for configuration. + */ + private Configuration getConfiguration(int bufferSize) { + final Configuration configuration = getRawConfiguration(); + configuration.set(FS_AZURE_METRIC_FORMAT, String.valueOf(MetricFormat.INTERNAL_FOOTER_METRIC_FORMAT)); + configuration.setInt(AZURE_READ_BUFFER_SIZE, bufferSize); + configuration.setInt(AZURE_WRITE_BUFFER_SIZE, bufferSize); + return configuration; + } + + /** + * Writes data to the specified file path in the AzureBlobFileSystem. + * + * @param fs AzureBlobFileSystem instance. + * @param testPath Path to the file. + * @param data Data to write to the file. + */ + private void writeDataToFile(AzureBlobFileSystem fs, Path testPath, byte[] data) throws IOException { + FSDataOutputStream stream = fs.create(testPath); + try { + stream.write(data); + } finally { + stream.close(); + } + IOStatisticsLogging.logIOStatisticsAtLevel(LOG, IOSTATISTICS_LOGGING_LEVEL_INFO, stream); + } + + /** + * Asserts that the actual metrics obtained from the AzureBlobFileSystem match the expected metrics string. + * + * @param fs AzureBlobFileSystem instance. + * @param expectedMetrics Expected metrics string. + */ + private void assertMetricsEquality(AzureBlobFileSystem fs, String expectedMetrics) { + AbfsReadFooterMetrics actualMetrics = fs.getAbfsClient().getAbfsCounters().getAbfsReadFooterMetrics(); + assertNotNull("AbfsReadFooterMetrics is null", actualMetrics); + assertEquals("The computed metrics differs from the actual metrics", expectedMetrics, actualMetrics.toString()); + } + + /** + * Test for reading footer metrics with a non-Parquet file. + */ + @Test + public void testReadFooterMetrics() throws Exception { + // Initialize AzureBlobFileSystem and set buffer size for configuration. + int bufferSize = MIN_BUFFER_SIZE; + Configuration configuration = getConfiguration(bufferSize); + final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(configuration); + AbfsConfiguration abfsConfiguration = fs.getAbfsStore().getAbfsConfiguration(); + + // Generate random data to write to the test file. + final byte[] b = new byte[2 * bufferSize]; + new Random().nextBytes(b); + + // Set up the test file path. + Path testPath = path(TEST_PATH); + + // Write random data to the test file. + writeDataToFile(fs, testPath, b); + + // Initialize a buffer for reading data. + final byte[] readBuffer = new byte[2 * bufferSize]; + int result; + + // Initialize statistics source for logging. + IOStatisticsSource statisticsSource = null; + + try (FSDataInputStream inputStream = fs.open(testPath)) { + // Register a listener for tracing header validation. + statisticsSource = inputStream; + ((AbfsInputStream) inputStream.getWrappedStream()).registerListener( + new TracingHeaderValidator(abfsConfiguration.getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.READ, true, 0, + ((AbfsInputStream) inputStream.getWrappedStream()) + .getStreamID())); + + // Perform the first read operation with seek. + inputStream.seek(bufferSize); + result = inputStream.read(readBuffer, bufferSize, bufferSize); + assertNotEquals(-1, result); + + // To test tracingHeader for case with bypassReadAhead == true + inputStream.seek(0); + byte[] temp = new byte[5]; + int t = inputStream.read(temp, 0, 1); + + // Seek back to the beginning and perform another read operation. + inputStream.seek(0); + result = inputStream.read(readBuffer, 0, bufferSize); + } + + // Log IO statistics at the INFO level. + IOStatisticsLogging.logIOStatisticsAtLevel(LOG, + IOSTATISTICS_LOGGING_LEVEL_INFO, statisticsSource); + + // Ensure data is read successfully and matches the written data. + assertNotEquals("data read in final read()", -1, result); + assertArrayEquals(readBuffer, b); + + // Get non-Parquet metrics and assert metrics equality. + AbfsReadFooterMetrics nonParquetMetrics = getNonParquetMetrics(); + String metrics = nonParquetMetrics.getReadFooterMetrics(nonParquetMetrics); + assertMetricsEquality(fs, metrics); + + // Close the AzureBlobFileSystem. + fs.close(); + } + + /** + * Generates and returns an instance of AbfsReadFooterMetrics for non-Parquet files. + */ + private AbfsReadFooterMetrics getNonParquetMetrics() { + AbfsReadFooterMetrics nonParquetMetrics = new AbfsReadFooterMetrics(); + nonParquetMetrics.setIsParquetFile(false); + nonParquetMetrics.setSizeReadByFirstRead("16384.000_16384.000"); + nonParquetMetrics.setOffsetDiffBetweenFirstAndSecondRead("1.000_16384.000"); + nonParquetMetrics.setAvgFileLength(Double.parseDouble("32768.000")); + nonParquetMetrics.setAvgReadLenRequested(Double.parseDouble("16384.000")); + return nonParquetMetrics; + } + + /** + * Generates and returns an instance of AbfsReadFooterMetrics for parquet files. + */ + private AbfsReadFooterMetrics getParquetMetrics() { + AbfsReadFooterMetrics parquetMetrics = new AbfsReadFooterMetrics(); + parquetMetrics.setIsParquetFile(true); + parquetMetrics.setSizeReadByFirstRead("1024.000"); + parquetMetrics.setOffsetDiffBetweenFirstAndSecondRead("4096.000"); + parquetMetrics.setAvgFileLength(Double.parseDouble("8388608.000")); + parquetMetrics.setAvgReadLenRequested(0.000); + return parquetMetrics; + } + + /** + * Test for reading, writing, and seeking with footer metrics. + * + * This method performs the integration test for reading, writing, and seeking operations + * with footer metrics. It creates an AzureBlobFileSystem, configures it, writes random data + * to a test file, performs read and seek operations, and checks the footer metrics for both + * Parquet and non-Parquet scenarios. + * + * @param fileSize Size of the test file. + * @param bufferSize Size of the buffer used for read and write operations. + * @param seek1 The position to seek to in the test file. + * @param seek2 Additional position to seek to in the test file (if not 0). + */ + private void testReadWriteAndSeek(int fileSize, int bufferSize, Integer seek1, Integer seek2) throws Exception { + // Create an AzureBlobFileSystem instance. + Configuration configuration = getConfiguration(bufferSize); + final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(configuration); + AbfsConfiguration abfsConfiguration = fs.getAbfsStore().getAbfsConfiguration(); + + // Generate random data to write to the test file. + final byte[] b = new byte[fileSize]; + new Random().nextBytes(b); + + // Define the path for the test file. + Path testPath = path("/testfile"); + + // Write the random data to the test file. + writeDataToFile(fs, testPath, b); + + // Initialize a buffer for reading. + final byte[] readBuffer = new byte[fileSize]; + + // Initialize a source for IO statistics. + IOStatisticsSource statisticsSource = null; + + // Open an input stream for the test file. + FSDataInputStream inputStream = fs.open(testPath); + statisticsSource = inputStream; + + // Register a listener for tracing headers. + ((AbfsInputStream) inputStream.getWrappedStream()).registerListener( + new TracingHeaderValidator(abfsConfiguration.getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.READ, true, 0, + ((AbfsInputStream) inputStream.getWrappedStream()) + .getStreamID())); + + // Seek to the specified position in the test file and read data. + inputStream.seek(fileSize - seek1); + inputStream.read(readBuffer, 0, seek1); + + // If seek2 is non-zero, perform an additional seek and read. + if (seek2 != 0) { + inputStream.seek(fileSize - seek1 - seek2); + inputStream.read(readBuffer, 0, seek2); + } + + // Close the input stream. + inputStream.close(); + + // Set a new buffer size for read and write operations. + int bufferSize1 = MIN_BUFFER_SIZE; + abfsConfiguration.setWriteBufferSize(bufferSize1); + abfsConfiguration.setReadBufferSize(bufferSize1); + + // Generate new random data for a second test file. + final byte[] b1 = new byte[2 * bufferSize1]; + new Random().nextBytes(b1); + + // Define the path for the second test file. + Path testPath1 = path("/testfile1"); + + // Write the new random data to the second test file. + writeDataToFile(fs, testPath1, b1); + + // Initialize a buffer for reading from the second test file. + final byte[] readBuffer1 = new byte[2 * bufferSize1]; + + // Open an input stream for the second test file. + FSDataInputStream inputStream1 = fs.open(testPath1); + statisticsSource = inputStream1; + + // Register a listener for tracing headers. + ((AbfsInputStream) inputStream1.getWrappedStream()).registerListener( + new TracingHeaderValidator(abfsConfiguration.getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.READ, true, 0, + ((AbfsInputStream) inputStream1.getWrappedStream()) + .getStreamID())); + + // Seek to a position in the second test file and read data. + inputStream1.seek(bufferSize1); + inputStream1.read(readBuffer1, bufferSize1, bufferSize1); + + // To test tracingHeader for case with bypassReadAhead == true. + inputStream1.seek(0); + byte[] temp = new byte[5]; + int t = inputStream1.read(temp, 0, 1); + + // Seek to the beginning of the second test file and read data. + inputStream1.seek(0); + inputStream1.read(readBuffer1, 0, bufferSize1); + + // Close the input stream for the second test file. + inputStream1.close(); + + // Get footer metrics for both Parquet and non-Parquet scenarios. + AbfsReadFooterMetrics parquetMetrics = getParquetMetrics(); + AbfsReadFooterMetrics nonParquetMetrics = getNonParquetMetrics(); + + // Concatenate and assert the metrics equality. + String metrics = parquetMetrics.getReadFooterMetrics(parquetMetrics); + metrics += nonParquetMetrics.getReadFooterMetrics(nonParquetMetrics); + assertMetricsEquality(fs, metrics); + + // Close the AzureBlobFileSystem instance. + fs.close(); + } + + /** + * Test for reading footer metrics with an idle period. + * + * This method tests reading footer metrics with an idle period. It creates an AzureBlobFileSystem, + * configures it, writes random data to a test file, performs read operations, introduces an idle + * period, and checks the footer metrics for non-Parquet scenarios. + * + */ + @Test + public void testMetricWithIdlePeriod() throws Exception { + // Set the buffer size for the test. + int bufferSize = MIN_BUFFER_SIZE; + Configuration configuration = getConfiguration(bufferSize); + final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(configuration); + AbfsConfiguration abfsConfiguration = fs.getAbfsStore().getAbfsConfiguration(); + + // Generate random data to write to the test file. + final byte[] b = new byte[2 * bufferSize]; + new Random().nextBytes(b); + + // Define the path for the test file. + Path testPath = path(TEST_PATH); + + // Write the random data to the test file. + writeDataToFile(fs, testPath, b); + + // Initialize a buffer for reading. + final byte[] readBuffer = new byte[2 * bufferSize]; + + // Initialize a source for IO statistics. + IOStatisticsSource statisticsSource = null; + + // Open an input stream for the test file. + try (FSDataInputStream inputStream = fs.open(testPath)) { + // Register a listener for tracing headers. + ((AbfsInputStream) inputStream.getWrappedStream()).registerListener( + new TracingHeaderValidator(abfsConfiguration.getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.READ, true, 0, + ((AbfsInputStream) inputStream.getWrappedStream()) + .getStreamID())); + + // Seek to the specified position in the test file and read data. + inputStream.seek(bufferSize); + inputStream.read(readBuffer, bufferSize, bufferSize); + + // Introduce an idle period by sleeping. + int sleepPeriod = Integer.parseInt(SLEEP_PERIOD); + Thread.sleep(sleepPeriod); + + // To test tracingHeader for case with bypassReadAhead == true. + inputStream.seek(0); + byte[] temp = new byte[5]; + int t = inputStream.read(temp, 0, 1); + + // Seek to the beginning of the test file and read data. + inputStream.seek(0); + inputStream.read(readBuffer, 0, bufferSize); + + // Get and assert the footer metrics for non-Parquet scenarios. + AbfsReadFooterMetrics nonParquetMetrics = getNonParquetMetrics(); + String metrics = nonParquetMetrics.getReadFooterMetrics(nonParquetMetrics); + assertMetricsEquality(fs, metrics); + + // Introduce an additional idle period by sleeping. + Thread.sleep(sleepPeriod); + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java index b374193e9bc91..8cdd355e00791 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemListStatus.java @@ -80,8 +80,8 @@ public ITestAzureBlobFileSystemListStatus() throws Exception { public void testListPath() throws Exception { Configuration config = new Configuration(this.getRawConfiguration()); config.set(AZURE_LIST_MAX_RESULTS, "5000"); - try (final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem - .newInstance(getFileSystem().getUri(), config)) { + final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem + .newInstance(getFileSystem().getUri(), config); final List> tasks = new ArrayList<>(); ExecutorService es = Executors.newFixedThreadPool(10); @@ -108,7 +108,10 @@ public Void call() throws Exception { fs.getFileSystemId(), FSOperationType.LISTSTATUS, true, 0)); FileStatus[] files = fs.listStatus(new Path("/")); assertEquals(TEST_FILES_NUMBER, files.length /* user directory */); - } + fs.registerListener( + new TracingHeaderValidator(getConfiguration().getClientCorrelationId(), + fs.getFileSystemId(), FSOperationType.GET_ATTR, true, 0)); + fs.close(); } /** diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java index b1b093d67063e..2b60cb57fdf39 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java @@ -20,6 +20,8 @@ import java.io.IOException; import java.net.HttpURLConnection; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.util.ArrayList; import java.util.HashSet; @@ -27,6 +29,7 @@ import java.util.concurrent.locks.ReentrantLock; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AbfsCountersImpl; import org.assertj.core.api.Assertions; import org.mockito.AdditionalMatchers; import org.mockito.Mockito; @@ -112,9 +115,13 @@ public static void addGeneralMockBehaviourToRestOpAndHttpOp(final AbfsRestOperat public static void addGeneralMockBehaviourToAbfsClient(final AbfsClient abfsClient, final ExponentialRetryPolicy exponentialRetryPolicy, final StaticRetryPolicy staticRetryPolicy, - final AbfsThrottlingIntercept intercept) throws IOException { + final AbfsThrottlingIntercept intercept) throws IOException, URISyntaxException { Mockito.doReturn(OAuth).when(abfsClient).getAuthType(); Mockito.doReturn("").when(abfsClient).getAccessToken(); + AbfsConfiguration abfsConfiguration = Mockito.mock(AbfsConfiguration.class); + Mockito.doReturn(abfsConfiguration).when(abfsClient).getAbfsConfiguration(); + AbfsCounters abfsCounters = Mockito.spy(new AbfsCountersImpl(new URI("abcd"))); + Mockito.doReturn(abfsCounters).when(abfsClient).getAbfsCounters(); Mockito.doReturn(intercept).when(abfsClient).getIntercept(); Mockito.doNothing() diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java index c16bbf7c536f7..ca2ea92388d97 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -22,11 +22,14 @@ import java.lang.reflect.Field; import java.net.HttpURLConnection; import java.net.ProtocolException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.util.List; import java.util.Random; import java.util.regex.Pattern; +import org.apache.hadoop.fs.azurebfs.AbfsCountersImpl; import org.assertj.core.api.Assertions; import org.junit.Test; import org.mockito.Mockito; @@ -133,8 +136,9 @@ public ITestAbfsClient() throws Exception { } private String getUserAgentString(AbfsConfiguration config, - boolean includeSSLProvider) throws IOException { - AbfsClientContext abfsClientContext = new AbfsClientContextBuilder().build(); + boolean includeSSLProvider) throws IOException, URISyntaxException { + AbfsCounters abfsCounters = Mockito.spy(new AbfsCountersImpl(new URI("abcd"))); + AbfsClientContext abfsClientContext = new AbfsClientContextBuilder().withAbfsCounters(abfsCounters).build(); AbfsClient client = new AbfsClient(new URL("https://azure.com"), null, config, (AccessTokenProvider) null, null, abfsClientContext); String sslProviderName = null; @@ -175,7 +179,7 @@ private void verifyBasicInfo(String userAgentStr) { @Test public void verifyUserAgentPrefix() - throws IOException, IllegalAccessException { + throws IOException, IllegalAccessException, URISyntaxException { final Configuration configuration = new Configuration(); configuration.addResource(TEST_CONFIGURATION_FILE_NAME); configuration.set(ConfigurationKeys.FS_AZURE_USER_AGENT_PREFIX_KEY, FS_AZURE_USER_AGENT_PREFIX); @@ -209,7 +213,7 @@ public void verifyUserAgentPrefix() */ @Test public void verifyUserAgentExpectHeader() - throws IOException, IllegalAccessException { + throws IOException, IllegalAccessException, URISyntaxException { final Configuration configuration = new Configuration(); configuration.addResource(TEST_CONFIGURATION_FILE_NAME); configuration.set(ConfigurationKeys.FS_AZURE_USER_AGENT_PREFIX_KEY, FS_AZURE_USER_AGENT_PREFIX); @@ -315,18 +319,20 @@ public void verifyUserAgentClusterType() throws Exception { public static AbfsClient createTestClientFromCurrentContext( AbfsClient baseAbfsClientInstance, - AbfsConfiguration abfsConfig) throws IOException { + AbfsConfiguration abfsConfig) throws IOException, URISyntaxException { AuthType currentAuthType = abfsConfig.getAuthType( abfsConfig.getAccountName()); AbfsPerfTracker tracker = new AbfsPerfTracker("test", abfsConfig.getAccountName(), abfsConfig); + AbfsCounters abfsCounters = Mockito.spy(new AbfsCountersImpl(new URI("abcd"))); AbfsClientContext abfsClientContext = new AbfsClientContextBuilder().withAbfsPerfTracker(tracker) .withExponentialRetryPolicy( new ExponentialRetryPolicy(abfsConfig.getMaxIoRetries())) + .withAbfsCounters(abfsCounters) .build(); // Create test AbfsClient @@ -352,6 +358,7 @@ public static AbfsClient getMockAbfsClient(AbfsClient baseAbfsClientInstance, AbfsConfiguration abfsConfig) throws Exception { AuthType currentAuthType = abfsConfig.getAuthType( abfsConfig.getAccountName()); + AbfsCounters abfsCounters = Mockito.spy(new AbfsCountersImpl(new URI("abcd"))); org.junit.Assume.assumeTrue( (currentAuthType == AuthType.SharedKey) @@ -372,14 +379,18 @@ public static AbfsClient getMockAbfsClient(AbfsClient baseAbfsClientInstance, when(client.createDefaultUriQueryBuilder()).thenCallRealMethod(); when(client.createRequestUrl(any(), any())).thenCallRealMethod(); + when(client.createRequestUrl(any(), any(), any())).thenCallRealMethod(); when(client.getAccessToken()).thenCallRealMethod(); when(client.getSharedKeyCredentials()).thenCallRealMethod(); when(client.createDefaultHeaders()).thenCallRealMethod(); when(client.getAbfsConfiguration()).thenReturn(abfsConfig); + when(client.getIntercept()).thenReturn( AbfsThrottlingInterceptFactory.getInstance( abfsConfig.getAccountName().substring(0, abfsConfig.getAccountName().indexOf(DOT)), abfsConfig)); + when(client.getAbfsCounters()).thenReturn(abfsCounters); + // override baseurl client = ITestAbfsClient.setAbfsClientField(client, "abfsConfiguration", abfsConfig); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java index 9027e56c9cd61..e4ed9881ffa4f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsInputStream.java @@ -19,11 +19,14 @@ package org.apache.hadoop.fs.azurebfs.services; import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; import java.util.Arrays; import java.util.Optional; import java.util.Random; import java.util.concurrent.ExecutionException; +import org.apache.hadoop.fs.azurebfs.AbfsCountersImpl; import org.assertj.core.api.Assertions; import org.junit.Assert; import org.junit.Test; @@ -98,9 +101,11 @@ private AbfsRestOperation getMockRestOp() { return op; } - private AbfsClient getMockAbfsClient() { + private AbfsClient getMockAbfsClient() throws URISyntaxException { // Mock failure for client.read() AbfsClient client = mock(AbfsClient.class); + AbfsCounters abfsCounters = Mockito.spy(new AbfsCountersImpl(new URI("abcd"))); + Mockito.doReturn(abfsCounters).when(client).getAbfsCounters(); AbfsPerfTracker tracker = new AbfsPerfTracker( "test", this.getAccountName(), diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java new file mode 100644 index 0000000000000..683528b9c54d1 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java @@ -0,0 +1,81 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; +import org.junit.Test; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_DELETE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; +import static org.apache.hadoop.fs.azurebfs.services.AbfsRestOperationType.DeletePath; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import java.util.ArrayList; +import java.util.Arrays; +import org.junit.Assert; +import java.net.HttpURLConnection; + +public class TestAbfsRestOperation extends + AbstractAbfsIntegrationTest { + + public TestAbfsRestOperation() throws Exception { + } + + /** + * Test for backoff retry metrics. + * + * This method tests backoff retry metrics by creating an AzureBlobFileSystem, initializing an + * AbfsClient, and performing mock operations on an AbfsRestOperation. The method then updates + * backoff metrics using the AbfsRestOperation. + * + */ + @Test + public void testBackoffRetryMetrics() throws Exception { + // Create an AzureBlobFileSystem instance. + final Configuration configuration = getRawConfiguration(); + configuration.set(FS_AZURE_METRIC_FORMAT, String.valueOf(MetricFormat.INTERNAL_BACKOFF_METRIC_FORMAT)); + final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(configuration); + AbfsConfiguration abfsConfiguration = fs.getAbfsStore().getAbfsConfiguration(); + + // Get an instance of AbfsClient and AbfsRestOperation. + AbfsClient testClient = super.getAbfsClient(super.getAbfsStore(fs)); + AbfsRestOperation op = ITestAbfsClient.getRestOp( + DeletePath, testClient, HTTP_METHOD_DELETE, + ITestAbfsClient.getTestUrl(testClient, "/NonExistingPath"), ITestAbfsClient.getTestRequestHeaders(testClient)); + + // Mock retry counts and status code. + ArrayList retryCounts = new ArrayList<>(Arrays.asList("35", "28", "31", "45", "10", "2", "9")); + int statusCode = HttpURLConnection.HTTP_UNAVAILABLE; + + // Update backoff metrics. + for (String retryCount : retryCounts) { + op.updateBackoffMetrics(Integer.parseInt(retryCount), statusCode); + } + + // For retry count greater than the max configured value, the request should fail. + Assert.assertEquals("Number of failed requests does not match expected value.", + "3", String.valueOf(testClient.getAbfsCounters().getAbfsBackoffMetrics().getNumberOfRequestsFailed())); + + // Close the AzureBlobFileSystem. + fs.close(); + } + +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java index 078b42cf0db1a..966b34f872a8b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java @@ -23,7 +23,6 @@ import java.net.SocketTimeoutException; import java.net.UnknownHostException; import java.util.ArrayList; - import org.assertj.core.api.Assertions; import org.junit.Test; import org.mockito.Mockito; @@ -308,7 +307,7 @@ private void testClientRequestIdForStatusRetry(int status, int[] statusCount = new int[1]; statusCount[0] = 0; Mockito.doAnswer(answer -> { - if (statusCount[0] <= 5) { + if (statusCount[0] <= 10) { statusCount[0]++; return status; } From f5c5d35eb01dbaebe2f8413bd1cabc727aa7a133 Mon Sep 17 00:00:00 2001 From: Felix Nguyen <23214709+kokonguyen191@users.noreply.github.com> Date: Fri, 24 May 2024 09:41:08 +0800 Subject: [PATCH 007/113] HDFS-17529. RBF: Improve router state store cache entry deletion (#6833) --- .../federation/store/CachedRecordStore.java | 20 +++--- .../driver/StateStoreRecordOperations.java | 25 ++++++++ .../store/driver/impl/StateStoreBaseImpl.java | 36 +++++++++++ .../driver/impl/StateStoreZooKeeperImpl.java | 63 +++++++++++++++---- .../store/driver/TestStateStoreZK.java | 21 +++++-- 5 files changed, 138 insertions(+), 27 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java index 59da6145352a2..3a2995eba2a6d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java @@ -21,6 +21,7 @@ import java.util.ArrayList; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -172,7 +173,7 @@ private boolean isUpdateTime() { */ public void overrideExpiredRecords(QueryResult query) throws IOException { List commitRecords = new ArrayList<>(); - List deleteRecords = new ArrayList<>(); + List toDeleteRecords = new ArrayList<>(); List newRecords = query.getRecords(); long currentDriverTime = query.getTimestamp(); if (newRecords == null || currentDriverTime <= 0) { @@ -182,13 +183,8 @@ public void overrideExpiredRecords(QueryResult query) throws IOException { for (R record : newRecords) { if (record.shouldBeDeleted(currentDriverTime)) { String recordName = StateStoreUtils.getRecordName(record.getClass()); - if (getDriver().remove(record)) { - deleteRecords.add(record); - LOG.info("Deleted State Store record {}: {}", recordName, record); - } else { - LOG.warn("Couldn't delete State Store record {}: {}", recordName, - record); - } + LOG.info("State Store record to delete {}: {}", recordName, record); + toDeleteRecords.add(record); } else if (!record.isExpired() && record.checkExpired(currentDriverTime)) { String recordName = StateStoreUtils.getRecordName(record.getClass()); LOG.info("Override State Store record {}: {}", recordName, record); @@ -198,8 +194,12 @@ public void overrideExpiredRecords(QueryResult query) throws IOException { if (commitRecords.size() > 0) { getDriver().putAll(commitRecords, true, false); } - if (deleteRecords.size() > 0) { - newRecords.removeAll(deleteRecords); + if (!toDeleteRecords.isEmpty()) { + for (Map.Entry entry : getDriver().removeMultiple(toDeleteRecords).entrySet()) { + if (entry.getValue()) { + newRecords.remove(entry.getKey()); + } + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreRecordOperations.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreRecordOperations.java index 716f41daf4dd9..97f6c680a4b5b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreRecordOperations.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreRecordOperations.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.util.List; +import java.util.Map; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -127,6 +128,17 @@ StateStoreOperationResult putAll( @AtMostOnce boolean remove(T record) throws IOException; + /** + * Remove multiple records. + * + * @param Record class of the records. + * @param records Records to be removed. + * @return Map of record to a boolean indicating if the record has being removed successfully. + * @throws IOException Throws exception if unable to query the data store. + */ + @AtMostOnce + Map removeMultiple(List records) throws IOException; + /** * Remove all records of this class from the store. * @@ -152,4 +164,17 @@ StateStoreOperationResult putAll( int remove(Class clazz, Query query) throws IOException; + /** + * Remove all records of a specific class that match any query in a list of queries. + * Requires the getAll implementation to fetch fresh records on each call. + * + * @param clazz The class to match the records with. + * @param queries Queries (logical OR) to filter what to remove. + * @param Record class of the records. + * @return Map of query to number of records removed by that query. + * @throws IOException Throws exception if unable to query the data store. + */ + @AtMostOnce + Map, Integer> remove(Class clazz, List> queries) + throws IOException; } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreBaseImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreBaseImpl.java index df3ce21dee277..93ad279e187cc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreBaseImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreBaseImpl.java @@ -21,7 +21,10 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -86,4 +89,37 @@ public boolean remove(T record) throws IOException { Class recordClass = (Class)StateStoreUtils.getRecordClass(clazz); return remove(recordClass, query) == 1; } + + @Override + public Map removeMultiple(List records) throws IOException { + assert !records.isEmpty(); + // Fall back to iterative remove() calls if all records don't share 1 class + Class expectedClazz = records.get(0).getClass(); + if (!records.stream().allMatch(x -> x.getClass() == expectedClazz)) { + Map result = new HashMap<>(); + for (T record : records) { + result.put(record, remove(record)); + } + return result; + } + + final List> queries = new ArrayList<>(); + for (T record : records) { + queries.add(new Query<>(record)); + } + @SuppressWarnings("unchecked") + Class recordClass = (Class) StateStoreUtils.getRecordClass(expectedClazz); + Map, Integer> result = remove(recordClass, queries); + return result.entrySet().stream() + .collect(Collectors.toMap(e -> e.getKey().getPartial(), e -> e.getValue() == 1)); + } + + public Map, Integer> remove(Class clazz, + List> queries) throws IOException { + Map, Integer> result = new HashMap<>(); + for (Query query : queries) { + result.put(query, remove(clazz, query)); + } + return result; + } } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java index 19a23cb0225a4..0e72cf417565c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java @@ -25,7 +25,11 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; +import java.util.Map; +import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.Future; import java.util.concurrent.LinkedBlockingQueue; @@ -284,38 +288,47 @@ public StateStoreOperationResult putAll( } @Override - public int remove( - Class clazz, Query query) throws IOException { + public Map, Integer> remove(Class clazz, + List> queries) throws IOException { verifyDriverReady(); - if (query == null) { - return 0; + // Track how many entries are deleted by each query + Map, Integer> ret = new HashMap<>(); + final List trueRemoved = Collections.synchronizedList(new ArrayList<>()); + if (queries.isEmpty()) { + return ret; } // Read the current data long start = monotonicNow(); - List records = null; + List records; try { QueryResult result = get(clazz); records = result.getRecords(); } catch (IOException ex) { LOG.error("Cannot get existing records", ex); getMetrics().addFailure(monotonicNow() - start); - return 0; + return ret; } // Check the records to remove String znode = getZNodeForClass(clazz); - List recordsToRemove = filterMultiple(query, records); + Set recordsToRemove = new HashSet<>(); + Map, List> queryToRecords = new HashMap<>(); + for (Query query : queries) { + List filtered = filterMultiple(query, records); + queryToRecords.put(query, filtered); + recordsToRemove.addAll(filtered); + } // Remove the records - int removed = 0; - for (T existingRecord : recordsToRemove) { + List> callables = new ArrayList<>(); + recordsToRemove.forEach(existingRecord -> callables.add(() -> { LOG.info("Removing \"{}\"", existingRecord); try { String primaryKey = getPrimaryKey(existingRecord); String path = getNodePath(znode, primaryKey); if (zkManager.delete(path)) { - removed++; + trueRemoved.add(existingRecord); } else { LOG.error("Did not remove \"{}\"", existingRecord); } @@ -323,12 +336,38 @@ public int remove( LOG.error("Cannot remove \"{}\"", existingRecord, e); getMetrics().addFailure(monotonicNow() - start); } + return null; + })); + try { + if (enableConcurrent) { + executorService.invokeAll(callables); + } else { + for (Callable callable : callables) { + callable.call(); + } + } + } catch (Exception e) { + LOG.error("Record removal failed : {}", e.getMessage(), e); } long end = monotonicNow(); - if (removed > 0) { + if (!trueRemoved.isEmpty()) { getMetrics().addRemove(end - start); } - return removed; + // Generate return map + for (Map.Entry, List> entry : queryToRecords.entrySet()) { + for (T record : entry.getValue()) { + if (trueRemoved.contains(record)) { + ret.compute(entry.getKey(), (k, v) -> (v == null) ? 1 : v + 1); + } + } + } + return ret; + } + + @Override + public int remove(Class clazz, Query query) + throws IOException { + return remove(clazz, Collections.singletonList(query)).get(query); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java index f94e415b4d514..5ddf93e05b526 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java @@ -140,17 +140,28 @@ public void testAsyncPerformance() throws Exception { insertList.add(newRecord); } // Insert Multiple on sync mode - long startSync = Time.now(); + long startSyncPut = Time.now(); stateStoreDriver.putAll(insertList, true, false); - long endSync = Time.now(); + long endSyncPut = Time.now(); + // Removing 1000 records synchronously is painfully slow so test with only 5 records + // Then remove the rest with removeAll() + long startSyncRemove = Time.now(); + for (MountTable entry : insertList.subList(0, 5)) { + stateStoreDriver.remove(entry); + } + long endSyncRemove = Time.now(); stateStoreDriver.removeAll(MembershipState.class); stateStoreDriver.setEnableConcurrent(true); // Insert Multiple on async mode - long startAsync = Time.now(); + long startAsyncPut = Time.now(); stateStoreDriver.putAll(insertList, true, false); - long endAsync = Time.now(); - assertTrue((endSync - startSync) > (endAsync - startAsync)); + long endAsyncPut = Time.now(); + long startAsyncRemove = Time.now(); + stateStoreDriver.removeMultiple(insertList.subList(0, 5)); + long endAsyncRemove = Time.now(); + assertTrue((endSyncPut - startSyncPut) > (endAsyncPut - startAsyncPut)); + assertTrue((endSyncRemove - startSyncRemove) > (endAsyncRemove - startAsyncRemove)); } @Test From 1baf0e889fec54b6560417b62cada75daf6fe312 Mon Sep 17 00:00:00 2001 From: Murali Krishna Date: Fri, 24 May 2024 22:10:37 +0530 Subject: [PATCH 008/113] HADOOP-18962. Upgrade kafka to 3.4.0 (#6247) Upgrade Kafka Client due to CVEs * CVE-2023-25194 * CVE-2021-38153 * CVE-2018-17196 Contributed by Murali Krishna --- LICENSE-binary | 4 ++-- hadoop-project/pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 8e2c57b1032bd..c0258e9311b1b 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -317,7 +317,7 @@ org.apache.htrace:htrace-core:3.1.0-incubating org.apache.htrace:htrace-core4:4.1.0-incubating org.apache.httpcomponents:httpclient:4.5.13 org.apache.httpcomponents:httpcore:4.4.13 -org.apache.kafka:kafka-clients:2.8.2 +org.apache.kafka:kafka-clients:3.4.0 org.apache.kerby:kerb-admin:2.0.3 org.apache.kerby:kerb-client:2.0.3 org.apache.kerby:kerb-common:2.0.3 @@ -377,7 +377,7 @@ hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/com hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/util/tree.h hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/{fstatat|openat|unlinkat}.h -com.github.luben:zstd-jni:1.4.9-1 +com.github.luben:zstd-jni:1.5.2-1 dnsjava:dnsjava:2.1.7 org.codehaus.woodstox:stax2-api:4.2.1 diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index c795b41340f6f..ba7631189a1a4 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -50,7 +50,7 @@ 2.12.2 - 2.8.2 + 3.4.0 1.0.13 From 74d30a5dce704543578baac12b0cff9684fd9d44 Mon Sep 17 00:00:00 2001 From: Felix Nguyen <23214709+kokonguyen191@users.noreply.github.com> Date: Tue, 28 May 2024 11:17:08 +0800 Subject: [PATCH 009/113] HDFS-17532. RBF: Allow router state store cache update to overwrite and delete in parallel (#6839) --- .../federation/router/RBFConfigKeys.java | 3 + .../federation/store/CachedRecordStore.java | 19 ++-- .../store/driver/StateStoreDriver.java | 89 ++++++++++++++++++- .../driver/impl/StateStoreFileBaseImpl.java | 1 + .../driver/impl/StateStoreMySQLImpl.java | 1 + .../driver/impl/StateStoreZooKeeperImpl.java | 1 + .../src/main/resources/hdfs-rbf-default.xml | 10 +++ .../src/site/markdown/HDFSRouterFederation.md | 1 + .../store/records/MockStateStoreDriver.java | 1 + 9 files changed, 113 insertions(+), 13 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java index 5189b6b134597..512b1936f4327 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java @@ -218,6 +218,9 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic { FEDERATION_STORE_PREFIX + "driver.class"; public static final Class FEDERATION_STORE_DRIVER_CLASS_DEFAULT = StateStoreZooKeeperImpl.class; + public static final String FEDERATION_STORE_DRIVER_ASYNC_OVERRIDE_MAX_THREADS = + FEDERATION_STORE_PREFIX + "driver.async.override.max.threads"; + public static final int FEDERATION_STORE_DRIVER_ASYNC_OVERRIDE_MAX_THREADS_DEFAULT = -1; public static final String FEDERATION_STORE_CONNECTION_TEST_MS = FEDERATION_STORE_PREFIX + "connection.test"; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java index 3a2995eba2a6d..0686f6b302e4d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/CachedRecordStore.java @@ -21,7 +21,6 @@ import java.util.ArrayList; import java.util.LinkedList; import java.util.List; -import java.util.Map; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -173,7 +172,7 @@ private boolean isUpdateTime() { */ public void overrideExpiredRecords(QueryResult query) throws IOException { List commitRecords = new ArrayList<>(); - List toDeleteRecords = new ArrayList<>(); + List deleteRecords = new ArrayList<>(); List newRecords = query.getRecords(); long currentDriverTime = query.getTimestamp(); if (newRecords == null || currentDriverTime <= 0) { @@ -184,22 +183,18 @@ public void overrideExpiredRecords(QueryResult query) throws IOException { if (record.shouldBeDeleted(currentDriverTime)) { String recordName = StateStoreUtils.getRecordName(record.getClass()); LOG.info("State Store record to delete {}: {}", recordName, record); - toDeleteRecords.add(record); + deleteRecords.add(record); } else if (!record.isExpired() && record.checkExpired(currentDriverTime)) { String recordName = StateStoreUtils.getRecordName(record.getClass()); LOG.info("Override State Store record {}: {}", recordName, record); commitRecords.add(record); } } - if (commitRecords.size() > 0) { - getDriver().putAll(commitRecords, true, false); - } - if (!toDeleteRecords.isEmpty()) { - for (Map.Entry entry : getDriver().removeMultiple(toDeleteRecords).entrySet()) { - if (entry.getValue()) { - newRecords.remove(entry.getKey()); - } - } + List removedRecords = getDriver().handleOverwriteAndDelete(commitRecords, deleteRecords); + // In driver async mode, driver will return null and skip the next block. + // newRecords might be stale as a result but will sort itself out the next override cycle. + if (removedRecords != null && !removedRecords.isEmpty()) { + newRecords.removeAll(removedRecords); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreDriver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreDriver.java index dfd6c97ed36e7..274b14b24f244 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreDriver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/StateStoreDriver.java @@ -17,13 +17,22 @@ */ package org.apache.hadoop.hdfs.server.federation.store.driver; +import java.io.IOException; import java.net.InetAddress; +import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.server.federation.metrics.StateStoreMetrics; +import org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys; import org.apache.hadoop.hdfs.server.federation.store.StateStoreService; import org.apache.hadoop.hdfs.server.federation.store.StateStoreUnavailableException; import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils; @@ -54,6 +63,9 @@ public abstract class StateStoreDriver implements StateStoreRecordOperations { /** State Store metrics. */ private StateStoreMetrics metrics; + /** Thread pool to delegate overwrite and deletion asynchronously. */ + private ThreadPoolExecutor executor = null; + /** * Initialize the state store connection. * @@ -88,6 +100,18 @@ public boolean init(final Configuration config, final String id, return false; } } + + int nThreads = conf.getInt( + RBFConfigKeys.FEDERATION_STORE_DRIVER_ASYNC_OVERRIDE_MAX_THREADS, + RBFConfigKeys.FEDERATION_STORE_DRIVER_ASYNC_OVERRIDE_MAX_THREADS_DEFAULT); + if (nThreads > 0) { + executor = new ThreadPoolExecutor(nThreads, nThreads, 1L, TimeUnit.MINUTES, + new LinkedBlockingQueue<>()); + executor.allowCoreThreadTimeOut(true); + LOG.info("Init StateStoreDriver in async mode with {} threads.", nThreads); + } else { + LOG.info("Init StateStoreDriver in sync mode."); + } return true; } @@ -169,7 +193,12 @@ public void verifyDriverReady() throws StateStoreUnavailableException { * * @throws Exception if something goes wrong while closing the state store driver connection. */ - public abstract void close() throws Exception; + public void close() throws Exception { + if (executor != null) { + executor.shutdown(); + executor = null; + } + } /** * Returns the current time synchronization from the underlying store. @@ -206,4 +235,62 @@ private String getHostname() { } return hostname; } + + /** + * Try to overwrite records in commitRecords and remove records in deleteRecords. + * Should return null if async mode is used. Else return removed records. + * @param commitRecords records to overwrite in state store + * @param deleteRecords records to remove from state store + * @param record class + * @throws IOException when there is a failure during overwriting or deletion + * @return null if async mode is used, else removed records + */ + public List handleOverwriteAndDelete(List commitRecords, + List deleteRecords) throws IOException { + List result = null; + try { + // Overwrite all expired records. + if (commitRecords != null && !commitRecords.isEmpty()) { + Runnable overwriteCallable = + () -> { + try { + putAll(commitRecords, true, false); + } catch (IOException e) { + throw new RuntimeException(e); + } + }; + if (executor != null) { + executor.execute(overwriteCallable); + } else { + overwriteCallable.run(); + } + } + + // Delete all deletable records. + if (deleteRecords != null && !deleteRecords.isEmpty()) { + Map removedRecords = new HashMap<>(); + Runnable deletionCallable = () -> { + try { + removedRecords.putAll(removeMultiple(deleteRecords)); + } catch (IOException e) { + throw new RuntimeException(e); + } + }; + if (executor != null) { + executor.execute(deletionCallable); + } else { + result = new ArrayList<>(); + deletionCallable.run(); + for (Map.Entry entry : removedRecords.entrySet()) { + if (entry.getValue()) { + result.add(entry.getKey()); + } + } + } + } + } catch (Exception e) { + throw new IOException(e); + } + return result; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java index f9f04f187bb19..07ca94649b80b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreFileBaseImpl.java @@ -201,6 +201,7 @@ public boolean initDriver() { @Override public void close() throws Exception { + super.close(); if (this.concurrentStoreAccessPool != null) { this.concurrentStoreAccessPool.shutdown(); boolean isTerminated = this.concurrentStoreAccessPool.awaitTermination(5, TimeUnit.SECONDS); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreMySQLImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreMySQLImpl.java index 43d65e4023e32..d3a8a063e0f9c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreMySQLImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreMySQLImpl.java @@ -125,6 +125,7 @@ public boolean isDriverReady() { @Override public void close() throws Exception { + super.close(); connectionFactory.shutdown(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java index 0e72cf417565c..4b45197f63e9d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java @@ -140,6 +140,7 @@ public void setEnableConcurrent(boolean enableConcurrent) { @Override public void close() throws Exception { + super.close(); if (executorService != null) { executorService.shutdown(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml index ec4fa46ecc356..c49f576675423 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml @@ -370,6 +370,16 @@ + + dfs.federation.router.store.driver.async.override.max.threads + -1 + + Number of threads used by StateStoreDriver to overwrite and delete records asynchronously. + Only used by MembershipStore and RouterStore. Non-positive values will make StateStoreDriver + run in sync mode. + + + dfs.federation.router.store.connection.test 60000 diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md index 9d565f3c4248b..ed62aec7209c2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md @@ -469,6 +469,7 @@ The connection to the State Store and the internal caching at the Router. | dfs.federation.router.store.connection.test | 60000 | How often to check for the connection to the State Store in milliseconds. | | dfs.federation.router.cache.ttl | 60000 | How often to refresh the State Store caches in milliseconds. | | dfs.federation.router.store.membership.expiration | 300000 | Expiration time in milliseconds for a membership record. | +| dfs.federation.router.store.driver.async.override.max.threads | | Number of threads to overwrite and delete records asynchronously when overriding. | | dfs.federation.router.mount-table.cache.update | false | If true, Mount table cache is updated whenever a mount table entry is added, modified or removed for all the routers. | | dfs.federation.router.mount-table.cache.update.timeout | 1m | Max time to wait for all the routers to finish their mount table cache update. | | dfs.federation.router.mount-table.cache.update.client.max.time | 5m | Max time a RouterClient connection can be cached. | diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/MockStateStoreDriver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/MockStateStoreDriver.java index d0821a1711b5e..24874375e24b1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/MockStateStoreDriver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/records/MockStateStoreDriver.java @@ -58,6 +58,7 @@ public boolean isDriverReady() { @Override public void close() throws Exception { + super.close(); VALUE_MAP.clear(); initialized = false; } From f4fde40524d73d855bb5ea6375834dce24cd4688 Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Tue, 28 May 2024 11:27:33 -0500 Subject: [PATCH 010/113] HADOOP-19184. S3A Fix TestStagingCommitter.testJobCommitFailure (#6843) Follow up on HADOOP-18679 Contributed by: Mukund Thakur --- .../apache/hadoop/fs/s3a/MockS3AFileSystem.java | 6 +++++- .../fs/s3a/commit/staging/StagingTestBase.java | 17 +++++++++++++---- .../commit/staging/TestStagingCommitter.java | 2 +- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java index b7e55f01a371e..bf3fd27701ab8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java @@ -353,7 +353,11 @@ void deleteObjectAtPath(Path f, String key, boolean isFile) throws SdkException, IOException { - deleteObject(key); + mock.getS3AInternals() + .getAmazonS3Client("test") + .deleteObject(getRequestFactory() + .newDeleteObjectRequestBuilder(key) + .build()); } @Override diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java index e64822d8c8802..9fba584fbdccb 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java @@ -43,6 +43,9 @@ import software.amazon.awssdk.services.s3.model.MultipartUpload; import software.amazon.awssdk.services.s3.model.UploadPartRequest; import software.amazon.awssdk.services.s3.model.UploadPartResponse; + +import org.apache.hadoop.fs.s3a.S3AInternals; +import org.apache.hadoop.fs.s3a.S3AStore; import org.apache.hadoop.util.Lists; import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.junit.AfterClass; @@ -129,9 +132,10 @@ protected StagingTestBase() { * @throws IOException IO problems. */ protected static S3AFileSystem createAndBindMockFSInstance(Configuration conf, - Pair outcome) + Pair outcome, + S3Client mockS3Client) throws IOException { - S3AFileSystem mockFs = mockS3AFileSystemRobustly(); + S3AFileSystem mockFs = mockS3AFileSystemRobustly(mockS3Client); MockS3AFileSystem wrapperFS = new MockS3AFileSystem(mockFs, outcome); URI uri = RAW_BUCKET_URI; wrapperFS.initialize(uri, conf); @@ -142,8 +146,13 @@ protected static S3AFileSystem createAndBindMockFSInstance(Configuration conf, return mockFs; } - private static S3AFileSystem mockS3AFileSystemRobustly() { + private static S3AFileSystem mockS3AFileSystemRobustly(S3Client mockS3Client) { S3AFileSystem mockFS = mock(S3AFileSystem.class); + S3AInternals s3AInternals = mock(S3AInternals.class); + when(mockFS.getS3AInternals()).thenReturn(s3AInternals); + when(s3AInternals.getStore()).thenReturn(mock(S3AStore.class)); + when(s3AInternals.getAmazonS3Client(anyString())) + .thenReturn(mockS3Client); doNothing().when(mockFS).incrementReadOperations(); doNothing().when(mockFS).incrementWriteOperations(); doNothing().when(mockFS).incrementWriteOperations(); @@ -350,7 +359,7 @@ public void setupJob() throws Exception { this.errors = new StagingTestBase.ClientErrors(); this.mockClient = newMockS3Client(results, errors); this.mockFS = createAndBindMockFSInstance(jobConf, - Pair.of(results, errors)); + Pair.of(results, errors), mockClient); this.wrapperFS = lookupWrapperFS(jobConf); // and bind the FS wrapperFS.setAmazonS3Client(mockClient); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java index 71ed0b6891a58..fae7a6232d1c9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java @@ -158,7 +158,7 @@ public void setupCommitter() throws Exception { this.errors = new StagingTestBase.ClientErrors(); this.mockClient = newMockS3Client(results, errors); this.mockFS = createAndBindMockFSInstance(jobConf, - Pair.of(results, errors)); + Pair.of(results, errors), mockClient); this.wrapperFS = lookupWrapperFS(jobConf); // and bind the FS wrapperFS.setAmazonS3Client(mockClient); From 6c08e8e2aadeb5576172abac62c233c82b597105 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E6=96=8C?= <43056633+liubin101@users.noreply.github.com> Date: Wed, 29 May 2024 20:44:36 +0800 Subject: [PATCH 011/113] HADOOP-19156. ZooKeeper based state stores use different ZK address configs. (#6767). Contributed by liu bin. Signed-off-by: Ayush Saxena Signed-off-by: He Xiaoqiao --- .../hadoop/util/curator/ZKCuratorManager.java | 36 +++++++++++++++---- .../curator/TestSecureZKCuratorManager.java | 4 +-- .../util/curator/TestZKCuratorManager.java | 5 ++- .../federation/router/RBFConfigKeys.java | 2 ++ .../driver/impl/StateStoreZooKeeperImpl.java | 3 +- .../src/main/resources/hdfs-rbf-default.xml | 7 ++++ .../router/TestRouterHeartbeatService.java | 3 +- .../TestRouterMountTableCacheRefresh.java | 3 +- ...estRouterMountTableCacheRefreshSecure.java | 3 +- .../store/driver/TestStateStoreZK.java | 3 +- .../hadoop/yarn/conf/YarnConfiguration.java | 5 +-- .../conf/TestYarnConfigurationFields.java | 1 - .../src/main/resources/yarn-default.xml | 16 +++++++++ .../impl/ZookeeperFederationStateStore.java | 3 +- .../TestZookeeperFederationStateStore.java | 3 +- .../resourcemanager/ResourceManager.java | 3 +- .../server/resourcemanager/RMHATestBase.java | 2 +- .../recovery/TestZKRMStateStore.java | 6 ++-- ...TestZKRMStateStoreZKClientConnections.java | 2 +- .../conf/TestZKConfigurationStore.java | 3 +- .../src/site/markdown/Federation.md | 8 ++--- .../src/site/markdown/ResourceManagerHA.md | 4 +-- .../site/markdown/ResourceManagerRestart.md | 4 +-- 23 files changed, 86 insertions(+), 43 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java index a0cbf86fb9f9a..4f279fbfaf277 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/curator/ZKCuratorManager.java @@ -126,7 +126,7 @@ public static List getZKAuths(Configuration conf) * Start the connection to the ZooKeeper ensemble. * @throws IOException If the connection cannot be started. */ - public void start() throws IOException{ + public void start() throws IOException { this.start(new ArrayList<>()); } @@ -139,25 +139,47 @@ public void start(List authInfos) throws IOException { this.start(authInfos, false); } + /** + * Start the connection to the ZooKeeper ensemble. + * @param zkHostPort Host:Port of the ZooKeeper. + * @throws IOException If the connection cannot be started. + */ + public void start(String zkHostPort) throws IOException { + this.start(new ArrayList<>(), false, zkHostPort); + } + + /** + * Start the connection to the ZooKeeper ensemble. + * @param authInfos List of authentication keys. + * @param sslEnabled If the connection should be SSL/TLS encrypted. + * @throws IOException If the connection cannot be started. + */ + public void start(List authInfos, boolean sslEnabled) throws IOException { + this.start(authInfos, sslEnabled, null); + } + /** * Start the connection to the ZooKeeper ensemble. * * @param authInfos List of authentication keys. * @param sslEnabled If the connection should be SSL/TLS encrypted. + * @param zkHostPort Host:Port of the ZooKeeper. * @throws IOException If the connection cannot be started. */ - public void start(List authInfos, boolean sslEnabled) - throws IOException{ + public void start(List authInfos, boolean sslEnabled, String zkHostPort) + throws IOException { ZKClientConfig zkClientConfig = new ZKClientConfig(); // Connect to the ZooKeeper ensemble - String zkHostPort = conf.get(CommonConfigurationKeys.ZK_ADDRESS); if (zkHostPort == null) { - throw new IOException( - CommonConfigurationKeys.ZK_ADDRESS + " is not configured."); + zkHostPort = conf.get(CommonConfigurationKeys.ZK_ADDRESS); + if (zkHostPort == null) { + throw new IOException( + CommonConfigurationKeys.ZK_ADDRESS + " is not configured."); + } + LOG.debug("Configured {} as {}", CommonConfigurationKeys.ZK_ADDRESS, zkHostPort); } - LOG.debug("Configured {} as {}", CommonConfigurationKeys.ZK_ADDRESS, zkHostPort); int numRetries = conf.getInt(CommonConfigurationKeys.ZK_NUM_RETRIES, CommonConfigurationKeys.ZK_NUM_RETRIES_DEFAULT); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestSecureZKCuratorManager.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestSecureZKCuratorManager.java index 4862c1c79838d..7720534dc7ed4 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestSecureZKCuratorManager.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestSecureZKCuratorManager.java @@ -71,9 +71,9 @@ public void setup() throws Exception { DELETE_DATA_DIRECTORY_ON_CLOSE, SERVER_ID, TICK_TIME, MAX_CLIENT_CNXNS, customConfiguration); this.server = new TestingServer(spec, true); - this.hadoopConf.set(CommonConfigurationKeys.ZK_ADDRESS, this.server.getConnectString()); + String zkHostPort = this.server.getConnectString(); this.curator = new ZKCuratorManager(this.hadoopConf); - this.curator.start(new ArrayList<>(), true); + this.curator.start(new ArrayList<>(), true, zkHostPort); } /** diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestZKCuratorManager.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestZKCuratorManager.java index 69cba8f4ad01d..354be1d94da52 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestZKCuratorManager.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/curator/TestZKCuratorManager.java @@ -60,11 +60,10 @@ public void setup() throws Exception { this.server = new TestingServer(); Configuration conf = new Configuration(); - conf.set( - CommonConfigurationKeys.ZK_ADDRESS, this.server.getConnectString()); + String zkHostPort = this.server.getConnectString(); this.curator = new ZKCuratorManager(conf); - this.curator.start(); + this.curator.start(zkHostPort); } @After diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java index 512b1936f4327..64f27bd3ba32e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RBFConfigKeys.java @@ -260,6 +260,8 @@ public class RBFConfigKeys extends CommonConfigurationKeysPublic { FEDERATION_STORE_ZK_DRIVER_PREFIX + "async.max.threads"; public static final int FEDERATION_STORE_ZK_ASYNC_MAX_THREADS_DEFAULT = -1; + public static final String FEDERATION_STORE_ZK_ADDRESS = + FEDERATION_STORE_ZK_DRIVER_PREFIX + "address"; // HDFS Router-based federation File based store implementation specific configs public static final String FEDERATION_STORE_FILE_ASYNC_THREADS = diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java index 4b45197f63e9d..1b9331f3b3d98 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/driver/impl/StateStoreZooKeeperImpl.java @@ -108,9 +108,10 @@ public boolean initDriver() { } else { LOG.info("Init StateStoreZookeeperImpl by sync mode."); } + String zkHostPort = conf.get(RBFConfigKeys.FEDERATION_STORE_ZK_ADDRESS); try { this.zkManager = new ZKCuratorManager(conf); - this.zkManager.start(); + this.zkManager.start(zkHostPort); this.zkAcl = ZKCuratorManager.getZKAcls(conf); } catch (IOException e) { LOG.error("Cannot initialize the ZK connection", e); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml index c49f576675423..26b89ce0313fd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/resources/hdfs-rbf-default.xml @@ -388,6 +388,13 @@ + + dfs.federation.router.store.driver.zk.address + + Host:Port of the ZooKeeper for StateStoreZooKeeperImpl. + + + dfs.federation.router.store.driver.zk.parent-path /hdfs-federation diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterHeartbeatService.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterHeartbeatService.java index 80f2327037817..6260d271aa6e1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterHeartbeatService.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterHeartbeatService.java @@ -22,7 +22,6 @@ import org.apache.curator.retry.RetryNTimes; import org.apache.curator.test.TestingServer; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.hdfs.server.federation.RouterConfigBuilder; import org.apache.hadoop.hdfs.server.federation.store.RouterStore; import org.apache.hadoop.hdfs.server.federation.store.StateStoreService; @@ -74,7 +73,7 @@ public void setup() throws Exception { .retryPolicy(new RetryNTimes(100, 100)) .build(); curatorFramework.start(); - routerConfig.set(CommonConfigurationKeys.ZK_ADDRESS, connectStr); + routerConfig.set(RBFConfigKeys.FEDERATION_STORE_ZK_ADDRESS, connectStr); router.init(routerConfig); router.start(); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTableCacheRefresh.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTableCacheRefresh.java index 82bc7d905e606..139236d24e19a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTableCacheRefresh.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTableCacheRefresh.java @@ -32,7 +32,6 @@ import org.apache.curator.test.TestingServer; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.hdfs.server.federation.FederationTestUtils; import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster; import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.RouterContext; @@ -81,7 +80,7 @@ public static void setUp() throws Exception { conf.setClass(RBFConfigKeys.FEDERATION_FILE_RESOLVER_CLIENT_CLASS, RBFConfigKeys.FEDERATION_FILE_RESOLVER_CLIENT_CLASS_DEFAULT, FileSubclusterResolver.class); - conf.set(CommonConfigurationKeys.ZK_ADDRESS, connectString); + conf.set(RBFConfigKeys.FEDERATION_STORE_ZK_ADDRESS, connectString); conf.setBoolean(RBFConfigKeys.DFS_ROUTER_STORE_ENABLE, true); cluster.addRouterOverrides(conf); cluster.startCluster(); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTableCacheRefreshSecure.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTableCacheRefreshSecure.java index 0cfdaea365651..fa81ee5bdd121 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTableCacheRefreshSecure.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterMountTableCacheRefreshSecure.java @@ -30,7 +30,6 @@ import org.apache.curator.test.TestingServer; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.hdfs.server.federation.FederationTestUtils; import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster; import org.apache.hadoop.hdfs.server.federation.MiniRouterDFSCluster.RouterContext; @@ -86,7 +85,7 @@ public static void setUp() throws Exception { conf.setClass(RBFConfigKeys.FEDERATION_FILE_RESOLVER_CLIENT_CLASS, RBFConfigKeys.FEDERATION_FILE_RESOLVER_CLIENT_CLASS_DEFAULT, FileSubclusterResolver.class); - conf.set(CommonConfigurationKeys.ZK_ADDRESS, connectString); + conf.set(RBFConfigKeys.FEDERATION_STORE_ZK_ADDRESS, connectString); conf.setBoolean(RBFConfigKeys.DFS_ROUTER_STORE_ENABLE, true); cluster = new MiniRouterDFSCluster(false, numNameservices, conf); cluster.addRouterOverrides(conf); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java index 5ddf93e05b526..f215ab41c0188 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/store/driver/TestStateStoreZK.java @@ -32,7 +32,6 @@ import org.apache.curator.retry.RetryNTimes; import org.apache.curator.test.TestingServer; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.hdfs.server.federation.router.RBFConfigKeys; import org.apache.hadoop.hdfs.server.federation.store.StateStoreUtils; import org.apache.hadoop.hdfs.server.federation.store.driver.impl.StateStoreZooKeeperImpl; @@ -71,7 +70,7 @@ public static void setupCluster() throws Exception { // Create the ZK State Store Configuration conf = getStateStoreConfiguration(StateStoreZooKeeperImpl.class); - conf.set(CommonConfigurationKeys.ZK_ADDRESS, connectString); + conf.set(RBFConfigKeys.FEDERATION_STORE_ZK_ADDRESS, connectString); // Disable auto-repair of connection conf.setLong(RBFConfigKeys.FEDERATION_STORE_CONNECTION_TEST_MS, TimeUnit.HOURS.toMillis(1)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 2a51065404cac..7747d4cb73410 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -112,8 +112,6 @@ private static void addDeprecatedKeys() { SYSTEM_METRICS_PUBLISHER_ENABLED), new DeprecationDelta(RM_ZK_ACL, CommonConfigurationKeys.ZK_ACL), new DeprecationDelta(RM_ZK_AUTH, CommonConfigurationKeys.ZK_AUTH), - new DeprecationDelta(RM_ZK_ADDRESS, - CommonConfigurationKeys.ZK_ADDRESS), new DeprecationDelta(RM_ZK_NUM_RETRIES, CommonConfigurationKeys.ZK_NUM_RETRIES), new DeprecationDelta(RM_ZK_TIMEOUT_MS, @@ -4038,6 +4036,9 @@ public static boolean isAclEnabled(Configuration conf) { public static final String DEFAULT_FEDERATION_STATESTORE_CLIENT_CLASS = "org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore"; + public static final String FEDERATION_STATESTORE_ZK_ADDRESS = + FEDERATION_PREFIX + "state-store.zk.address"; + public static final String FEDERATION_CACHE_TIME_TO_LIVE_SECS = FEDERATION_PREFIX + "cache-ttl.secs"; // 5 minutes diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java index e4b48c45f33ec..b63ed40039102 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/test/java/org/apache/hadoop/yarn/conf/TestYarnConfigurationFields.java @@ -142,7 +142,6 @@ public void initializeMemberVariables() { .add(YarnConfiguration.RM_SYSTEM_METRICS_PUBLISHER_ENABLED); // skip deprecated ZooKeeper settings - configurationPropsToSkipCompare.add(YarnConfiguration.RM_ZK_ADDRESS); configurationPropsToSkipCompare.add(YarnConfiguration.RM_ZK_NUM_RETRIES); configurationPropsToSkipCompare.add(YarnConfiguration.RM_ZK_TIMEOUT_MS); configurationPropsToSkipCompare.add( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 275fc08cb2ca9..927d0c1aa41e0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -581,6 +581,15 @@ ${yarn.resourcemanager.max-completed-applications} + + Host:Port of the ZooKeeper server to be used by the RM. This + must be supplied when using the ZooKeeper based implementation of the + RM state store and/or embedded automatic failover in an HA setting. + + yarn.resourcemanager.zk-address + + + Full path of the ZooKeeper znode where RM state will be stored. This must be supplied when using @@ -3798,6 +3807,13 @@ org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore + + + Host:Port of the ZooKeeper server to be used by the federation state store + + yarn.federation.state-store.zk.address + + The time in seconds after which the federation state store local cache diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZookeeperFederationStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZookeeperFederationStateStore.java index 63b853e82ab6e..7a511bcffe24c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZookeeperFederationStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/impl/ZookeeperFederationStateStore.java @@ -265,9 +265,10 @@ public void init(Configuration conf) throws YarnException { baseZNode = conf.get( YarnConfiguration.FEDERATION_STATESTORE_ZK_PARENT_PATH, YarnConfiguration.DEFAULT_FEDERATION_STATESTORE_ZK_PARENT_PATH); + String zkHostPort = conf.get(YarnConfiguration.FEDERATION_STATESTORE_ZK_ADDRESS); try { this.zkManager = new ZKCuratorManager(conf); - this.zkManager.start(); + this.zkManager.start(zkHostPort); } catch (IOException e) { LOG.error("Cannot initialize the ZK connection", e); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestZookeeperFederationStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestZookeeperFederationStateStore.java index ba22a1e1894d5..3c528096f73da 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestZookeeperFederationStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/store/impl/TestZookeeperFederationStateStore.java @@ -27,7 +27,6 @@ import org.apache.curator.retry.RetryNTimes; import org.apache.curator.test.TestingServer; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.metrics2.MetricsRecord; import org.apache.hadoop.metrics2.impl.MetricsCollectorImpl; import org.apache.hadoop.metrics2.impl.MetricsRecords; @@ -94,7 +93,7 @@ public void before() throws IOException, YarnException { curatorFramework.start(); Configuration conf = new YarnConfiguration(); - conf.set(CommonConfigurationKeys.ZK_ADDRESS, connectString); + conf.set(YarnConfiguration.FEDERATION_STATESTORE_ZK_ADDRESS, connectString); conf.setInt(YarnConfiguration.FEDERATION_STATESTORE_MAX_APPLICATIONS, 10); setConf(conf); } catch (Exception e) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java index a3d529c1020a3..143dff30d926d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ResourceManager.java @@ -410,6 +410,7 @@ protected EmbeddedElector createEmbeddedElector() throws IOException { */ public ZKCuratorManager createAndStartZKManager(Configuration config) throws IOException { + String zkHostPort = config.get(YarnConfiguration.RM_ZK_ADDRESS); ZKCuratorManager manager = new ZKCuratorManager(config); // Get authentication @@ -432,7 +433,7 @@ public ZKCuratorManager createAndStartZKManager(Configuration config.getBoolean(CommonConfigurationKeys.ZK_CLIENT_SSL_ENABLED, config.getBoolean(YarnConfiguration.RM_ZK_CLIENT_SSL_ENABLED, YarnConfiguration.DEFAULT_RM_ZK_CLIENT_SSL_ENABLED)); - manager.start(authInfos, isSSLEnabled); + manager.start(authInfos, isSSLEnabled, zkHostPort); return manager; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/RMHATestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/RMHATestBase.java index ca76c24b87818..c4a6e9dad64f8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/RMHATestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/RMHATestBase.java @@ -64,7 +64,7 @@ public void setup() throws Exception { configuration.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true); configuration.set(YarnConfiguration.RM_STORE, ZKRMStateStore.class.getName()); - configuration.set(CommonConfigurationKeys.ZK_ADDRESS, hostPort); + configuration.set(YarnConfiguration.RM_ZK_ADDRESS, hostPort); configuration.setInt(CommonConfigurationKeys.ZK_TIMEOUT_MS, ZK_TIMEOUT_MS); configuration.setBoolean(YarnConfiguration.AUTO_FAILOVER_ENABLED, false); configuration.set(YarnConfiguration.RM_CLUSTER_ID, "test-yarn-cluster"); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java index 404fae9d853f0..bbf5e2ed351c7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStore.java @@ -213,7 +213,7 @@ private String getDelegationTokenNode(int rmDTSequenceNumber, int splitIdx) { private RMStateStore createStore(Configuration conf) throws Exception { workingZnode = "/jira/issue/3077/rmstore"; - conf.set(CommonConfigurationKeys.ZK_ADDRESS, + conf.set(YarnConfiguration.RM_ZK_ADDRESS, curatorTestingServer.getConnectString()); conf.set(YarnConfiguration.ZK_RM_STATE_STORE_PARENT_PATH, workingZnode); conf.setLong(YarnConfiguration.RM_EPOCH, epoch); @@ -347,7 +347,7 @@ public Version getCurrentVersion() throws Exception { public RMStateStore getRMStateStore() throws Exception { YarnConfiguration conf = new YarnConfiguration(); workingZnode = "/jira/issue/3077/rmstore"; - conf.set(CommonConfigurationKeys.ZK_ADDRESS, + conf.set(YarnConfiguration.RM_ZK_ADDRESS, curatorTestingServer.getConnectString()); conf.set(YarnConfiguration.ZK_RM_STATE_STORE_PARENT_PATH, workingZnode); this.store = new TestZKRMStateStoreInternal(conf, workingZnode) { @@ -388,7 +388,7 @@ public static Configuration createHARMConf(String rmIds, String rmId, conf.set(YarnConfiguration.RM_HA_IDS, rmIds); conf.setBoolean(YarnConfiguration.RECOVERY_ENABLED, true); conf.set(YarnConfiguration.RM_STORE, ZKRMStateStore.class.getName()); - conf.set(CommonConfigurationKeys.ZK_ADDRESS, + conf.set(YarnConfiguration.RM_ZK_ADDRESS, curatorTestServer.getConnectString()); conf.setInt(CommonConfigurationKeys.ZK_TIMEOUT_MS, ZK_TIMEOUT_MS); conf.set(YarnConfiguration.RM_HA_ID, rmId); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java index 752df13ccca9a..dce2bed5522e7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestZKRMStateStoreZKClientConnections.java @@ -88,7 +88,7 @@ public TestZKRMStateStore(Configuration conf, String workingZnode) public RMStateStore getRMStateStore(Configuration conf) throws Exception { String workingZnode = "/Test"; - conf.set(CommonConfigurationKeys.ZK_ADDRESS, + conf.set(YarnConfiguration.RM_ZK_ADDRESS, testingServer.getConnectString()); conf.set(YarnConfiguration.ZK_RM_STATE_STORE_PARENT_PATH, workingZnode); this.store = new TestZKRMStateStore(conf, workingZnode); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/TestZKConfigurationStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/TestZKConfigurationStore.java index 155996d11feb7..fa55b51a1b0a2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/TestZKConfigurationStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/conf/TestZKConfigurationStore.java @@ -23,7 +23,6 @@ import org.apache.curator.retry.RetryNTimes; import org.apache.curator.test.TestingServer; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.ha.HAServiceProtocol; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.service.Service; @@ -100,7 +99,7 @@ public void setUp() throws Exception { curatorTestingServer = setupCuratorServer(); curatorFramework = setupCuratorFramework(curatorTestingServer); - conf.set(CommonConfigurationKeys.ZK_ADDRESS, + conf.set(YarnConfiguration.RM_ZK_ADDRESS, curatorTestingServer.getConnectString()); rm = new MockRM(conf); rm.start(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/Federation.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/Federation.md index 5702f8d165dbb..3886f54041e6b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/Federation.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/Federation.md @@ -177,7 +177,7 @@ ZooKeeper: one must set the ZooKeeper settings for Hadoop: | Property | Example | Description | |:------------------------------------|:------------------------------------------------------------------------------------|:----------------------------------------| | `yarn.federation.state-store.class` | `org.apache.hadoop.yarn.server.federation.store.impl.ZookeeperFederationStateStore` | The type of state-store to use. | -| `hadoop.zk.address` | `host:port` | The address for the ZooKeeper ensemble. | +| `yarn.federation.state-store.zk.address` | `host:port` | The address for the ZooKeeper ensemble. | SQL: one must setup the following parameters: @@ -1006,7 +1006,7 @@ Example of Machine-Role Mapping(Exclude HDFS): - hadoop.zk.address + yarn.federation.state-store.zk.address zkHost:zkPort @@ -1067,7 +1067,7 @@ $HADOOP_HOME/bin/yarn --daemon start resourcemanager - hadoop.zk.address + yarn.federation.state-store.zk.address zkHost:zkPort @@ -1135,7 +1135,7 @@ After we have finished configuring the `YARN-2` cluster, we can proceed with sta - hadoop.zk.address + yarn.federation.state-store.zk.address zkHost:zkPort diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceManagerHA.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceManagerHA.md index 83b2af2ef9cba..636077e00c2a4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceManagerHA.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceManagerHA.md @@ -56,7 +56,7 @@ Most of the failover functionality is tunable using various configuration proper | Configuration Properties | Description | |:---- |:---- | -| `hadoop.zk.address` | Address of the ZK-quorum. Used both for the state-store and embedded leader-election. | +| `yarn.resourcemanager.zk-address` | Address of the ZK-quorum. Used both for the state-store and embedded leader-election. | | `yarn.resourcemanager.ha.enabled` | Enable RM HA. | | `yarn.resourcemanager.ha.rm-ids` | List of logical IDs for the RMs. e.g., "rm1,rm2". | | `yarn.resourcemanager.hostname.`*rm-id* | For each *rm-id*, specify the hostname the RM corresponds to. Alternately, one could set each of the RM's service addresses. | @@ -112,7 +112,7 @@ Here is the sample of minimal setup for RM failover. master2:8088 - hadoop.zk.address + yarn.resourcemanager.zk-address zk1:2181,zk2:2181,zk3:2181 ``` diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceManagerRestart.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceManagerRestart.md index 68e44e98cbbee..428d48e680cab 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceManagerRestart.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/ResourceManagerRestart.md @@ -93,7 +93,7 @@ This section describes the configurations involved to enable RM Restart feature. | Property | Description | |:---- |:---- | -| `hadoop.zk.address` | Comma separated list of Host:Port pairs. Each corresponds to a ZooKeeper server (e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002") to be used by the RM for storing RM state. | +| `yarn.resourcemanager.zk-address` | Comma separated list of Host:Port pairs. Each corresponds to a ZooKeeper server (e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002") to be used by the RM for storing RM state. | | `yarn.resourcemanager.zk-state-store.parent-path` | The full path of the root znode where RM state will be stored. Default value is /rmstore. | * Configure the retry policy state-store client uses to connect with the ZooKeeper server. @@ -157,7 +157,7 @@ Below is a minimum set of configurations for enabling RM work-preserving restart (e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002") to be used by the RM for storing RM state. This must be supplied when using org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore as the value for yarn.resourcemanager.store.class - hadoop.zk.address + yarn.resourcemanager.zk-address 127.0.0.1:2181 From ccb8ff4360d3bf0951d32e012ca37bae5db98db6 Mon Sep 17 00:00:00 2001 From: K0K0V0K <109747532+K0K0V0K@users.noreply.github.com> Date: Wed, 29 May 2024 17:20:23 +0200 Subject: [PATCH 012/113] YARN-11687. CGroupV2 resource calculator (#6835) Co-authored-by: Benjamin Teke --- .../util/ResourceCalculatorProcessTree.java | 5 - .../resources/AbstractCGroupsHandler.java | 5 + .../AbstractCGroupsResourceCalculator.java | 212 +++++++++ .../linux/resources/CGroupsHandler.java | 6 + .../resources/CGroupsResourceCalculator.java | 412 +++++------------- .../CGroupsV2ResourceCalculator.java | 136 ++++++ .../resources/CombinedResourceCalculator.java | 83 ++-- .../monitor/ContainersMonitorImpl.java | 2 +- .../TestCGroupsResourceCalculator.java | 320 ++++---------- .../TestCGroupsV2ResourceCalculator.java | 110 +++++ .../TestCompareResourceCalculators.java | 2 +- 11 files changed, 710 insertions(+), 583 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsResourceCalculator.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2ResourceCalculator.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2ResourceCalculator.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java index 7209f9264747e..0feed1340c640 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ResourceCalculatorProcessTree.java @@ -20,8 +20,6 @@ import java.lang.reflect.Constructor; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.conf.Configuration; @@ -37,8 +35,6 @@ @Public @Evolving public abstract class ResourceCalculatorProcessTree extends Configured { - static final Logger LOG = LoggerFactory - .getLogger(ResourceCalculatorProcessTree.class); public static final int UNAVAILABLE = -1; /** @@ -169,7 +165,6 @@ public float getCpuUsagePercent() { */ public static ResourceCalculatorProcessTree getResourceCalculatorProcessTree( String pid, Class clazz, Configuration conf) { - if (clazz != null) { try { Constructor c = clazz.getConstructor(String.class); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java index becb68e22f0ff..10df0ecfba730 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsHandler.java @@ -559,6 +559,11 @@ public String getCGroupMountPath() { return this.cGroupsMountConfig.getMountPath(); } + @Override + public String getCGroupV2MountPath() { + return this.cGroupsMountConfig.getV2MountPath(); + } + @Override public String toString() { return CGroupsHandlerImpl.class.getName() + "{" + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsResourceCalculator.java new file mode 100644 index 0000000000000..8ff851a03a77d --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/AbstractCGroupsResourceCalculator.java @@ -0,0 +1,212 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; + +import java.io.IOException; +import java.math.BigInteger; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.util.CpuTimeTracker; +import org.apache.hadoop.util.SysInfoLinux; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree; +import org.apache.hadoop.yarn.util.SystemClock; + +/** + * Common code base for the CGroupsResourceCalculator implementations. + */ +public abstract class AbstractCGroupsResourceCalculator extends ResourceCalculatorProcessTree { + private static final Logger LOG = + LoggerFactory.getLogger(AbstractCGroupsResourceCalculator.class); + private final String pid; + private final Clock clock = SystemClock.getInstance(); + private final Map stats = new ConcurrentHashMap<>(); + + private long jiffyLengthMs = SysInfoLinux.JIFFY_LENGTH_IN_MILLIS; + private CpuTimeTracker cpuTimeTracker; + private CGroupsHandler cGroupsHandler; + private String procFs = "/proc"; + + private final List totalJiffiesKeys; + private final String rssMemoryKey; + private final String virtualMemoryKey; + + protected AbstractCGroupsResourceCalculator( + String pid, + List totalJiffiesKeys, + String rssMemoryKey, + String virtualMemoryKey + ) { + super(pid); + this.pid = pid; + this.totalJiffiesKeys = totalJiffiesKeys; + this.rssMemoryKey = rssMemoryKey; + this.virtualMemoryKey = virtualMemoryKey; + } + + @Override + public void initialize() throws YarnException { + cpuTimeTracker = new CpuTimeTracker(jiffyLengthMs); + cGroupsHandler = ResourceHandlerModule.getCGroupsHandler(); + } + + @Override + public long getCumulativeCpuTime() { + long totalJiffies = getTotalJiffies(); + return jiffyLengthMs == UNAVAILABLE || totalJiffies == UNAVAILABLE + ? UNAVAILABLE + : getTotalJiffies() * jiffyLengthMs; + } + + @Override + public long getRssMemorySize(int olderThanAge) { + return 1 < olderThanAge ? UNAVAILABLE : getStat(rssMemoryKey); + } + + @Override + public long getVirtualMemorySize(int olderThanAge) { + return 1 < olderThanAge ? UNAVAILABLE : getStat(virtualMemoryKey); + } + + @Override + public String getProcessTreeDump() { + // We do not have a process tree in cgroups return just the pid for tracking + return pid; + } + + @Override + public boolean checkPidPgrpidForMatch() { + // We do not have a process tree in cgroups returning default ok + return true; + } + + @Override + public float getCpuUsagePercent() { + return cpuTimeTracker.getCpuTrackerUsagePercent(); + } + + @Override + public void updateProcessTree() { + stats.clear(); + for (Path statFile : getCGroupFilesToLoadInStats()) { + try { + List lines = fileToLines(statFile); + if (1 == lines.size()) { + addSingleLineToStat(statFile, lines.get(0)); + } else if (1 < lines.size()) { + addMultiLineToStat(statFile, lines); + } + } catch (IOException e) { + LOG.debug(String.format("Failed to read cgroup file %s for pid %s", statFile, pid), e); + } + } + LOG.debug("After updateProcessTree the {} pid has stats {}", pid, stats); + cpuTimeTracker.updateElapsedJiffies(BigInteger.valueOf(getTotalJiffies()), clock.getTime()); + } + + private void addSingleLineToStat(Path file, String line) { + Path fileName = file.getFileName(); + if (fileName != null) { + stats.put(fileName.toString(), line.trim()); + } + } + + private void addMultiLineToStat(Path file, List lines) { + for (String line : lines) { + String[] parts = line.split(" "); + if (1 < parts.length) { + stats.put(file.getFileName() + "#" + parts[0], parts[1]); + } + } + } + + private long getTotalJiffies() { + Long reduce = totalJiffiesKeys.stream() + .map(this::getStat) + .filter(statValue -> statValue != UNAVAILABLE) + .reduce(0L, Long::sum); + return reduce == 0 ? UNAVAILABLE : reduce; + } + + private long getStat(String key) { + return Long.parseLong(stats.getOrDefault(key, String.valueOf(UNAVAILABLE))); + } + + protected abstract List getCGroupFilesToLoadInStats(); + + protected List readLinesFromCGroupFileFromProcDir() throws IOException { + // https://docs.kernel.org/admin-guide/cgroup-v2.html#processes + // https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v1/cgroups.html + Path cgroup = Paths.get(procFs, pid, "cgroup"); + List result = Arrays.asList(fileToString(cgroup).split(System.lineSeparator())); + LOG.debug("The {} pid has the following lines in the procfs cgroup file {}", pid, result); + return result; + } + + protected String fileToString(Path path) throws IOException { + return FileUtils.readFileToString(path.toFile(), StandardCharsets.UTF_8).trim(); + } + + protected List fileToLines(Path path) throws IOException { + return !path.toFile().exists() ? Collections.emptyList() + : Arrays.asList(FileUtils.readFileToString(path.toFile(), StandardCharsets.UTF_8) + .trim().split(System.lineSeparator())); + } + + @VisibleForTesting + void setJiffyLengthMs(long jiffyLengthMs) { + this.jiffyLengthMs = jiffyLengthMs; + } + + @VisibleForTesting + void setCpuTimeTracker(CpuTimeTracker cpuTimeTracker) { + this.cpuTimeTracker = cpuTimeTracker; + } + + @VisibleForTesting + void setcGroupsHandler(CGroupsHandler cGroupsHandler) { + this.cGroupsHandler = cGroupsHandler; + } + + @VisibleForTesting + void setProcFs(String procFs) { + this.procFs = procFs; + } + + public CGroupsHandler getcGroupsHandler() { + return cGroupsHandler; + } + + public String getPid() { + return pid; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java index e13d390e64c61..d23d378b60e3f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsHandler.java @@ -239,4 +239,10 @@ String getCGroupParam(CGroupController controller, String cGroupId, * @return parameter value as read from the parameter file */ String getCGroupMountPath(); + + /** + * Returns CGroupV2 Mount Path. + * @return parameter value as read from the parameter file + */ + String getCGroupV2MountPath(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsResourceCalculator.java index f5e987deee074..1ebdc6a8517dc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsResourceCalculator.java @@ -18,338 +18,146 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; -import org.apache.hadoop.classification.VisibleForTesting; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.util.CpuTimeTracker; -import org.apache.hadoop.util.Shell; -import org.apache.hadoop.util.SysInfoLinux; -import org.apache.hadoop.yarn.exceptions.YarnException; -import org.apache.hadoop.yarn.util.Clock; -import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree; -import org.apache.hadoop.yarn.util.SystemClock; - -import java.io.BufferedReader; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; -import java.io.InputStreamReader; -import java.math.BigInteger; -import java.nio.charset.StandardCharsets; -import java.util.function.Function; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** - * A cgroups file-system based Resource calculator without the process tree - * features. + * A Cgroup version 1 file-system based Resource calculator without the process tree features. * - * CGroups has its limitations. It can only be enabled, if both CPU and memory - * cgroups are enabled with yarn.nodemanager.resource.cpu.enabled and - * yarn.nodemanager.resource.memory.enabled respectively. This means that - * memory limits are enforced by default. You can turn this off and keep - * memory reporting only with yarn.nodemanager.resource.memory.enforced. + * Warning: this implementation will not work properly + * when configured using the mapreduce.job.process-tree.class job property. + * Theoretically the ResourceCalculatorProcessTree can be configured using the + * mapreduce.job.process-tree.class job property, however it has a dependency on an + * instantiated ResourceHandlerModule, which is only initialised in the NodeManager process + * and not in the containers. * - * Another limitation is virtual memory measurement. CGroups does not have the - * ability to measure virtual memory usage. This includes memory reserved but - * not used. CGroups measures used memory as sa sum of - * physical memory and swap usage. This will be returned in the virtual - * memory counters. - * If the real virtual memory is required please use the legacy procfs based - * resource calculator or CombinedResourceCalculator. + * Limitation: + * The ResourceCalculatorProcessTree class can be configured using the + * mapreduce.job.process-tree.class property within a MapReduce job. + * However, it is important to note that instances of ResourceCalculatorProcessTree operate + * within the context of a MapReduce task. This presents a limitation: + * these instances do not have access to the ResourceHandlerModule, + * which is only initialized within the NodeManager process + * and not within individual containers where MapReduce tasks execute. + * As a result, the current implementation of ResourceCalculatorProcessTree is incompatible + * with the mapreduce.job.process-tree.class property. This incompatibility arises + * because the ResourceHandlerModule is essential for managing and monitoring resource usage, + * and without it, the ResourceCalculatorProcessTree cannot function as intended + * within the confines of a MapReduce task. Therefore, any attempts to utilize this class + * through the mapreduce.job.process-tree.class property + * will not succeed under the current architecture. */ -public class CGroupsResourceCalculator extends ResourceCalculatorProcessTree { - enum Result { - Continue, - Exit - } - protected static final Logger LOG = LoggerFactory - .getLogger(CGroupsResourceCalculator.class); - private static final String PROCFS = "/proc"; - static final String CGROUP = "cgroup"; - static final String CPU_STAT = "cpuacct.stat"; - static final String MEM_STAT = "memory.usage_in_bytes"; - static final String MEMSW_STAT = "memory.memsw.usage_in_bytes"; - private static final String USER = "user "; - private static final String SYSTEM = "system "; - - private static final Pattern CGROUP_FILE_FORMAT = Pattern.compile( - "^(\\d+):([^:]+):/(.*)$"); - private final String procfsDir; - private CGroupsHandler cGroupsHandler; - - private String pid; - private File cpuStat; - private File memStat; - private File memswStat; - - private BigInteger processTotalJiffies; - private long processPhysicalMemory; - private long processVirtualMemory; - - private final long jiffyLengthMs; - private final CpuTimeTracker cpuTimeTracker; - private Clock clock; - - /** - * Create resource calculator for all Yarn containers. - */ - public CGroupsResourceCalculator() - throws YarnException { - this(null, PROCFS, ResourceHandlerModule.getCGroupsHandler(), - SystemClock.getInstance(), SysInfoLinux.JIFFY_LENGTH_IN_MILLIS); - } +public class CGroupsResourceCalculator extends AbstractCGroupsResourceCalculator { + private static final Logger LOG = LoggerFactory.getLogger(CGroupsResourceCalculator.class); /** - * Create resource calculator for the container that has the specified pid. - * @param pid A pid from the cgroup or null for all containers + * DOC + * + * ... + * cpuacct.stat file lists a few statistics which further divide the CPU time obtained + * by the cgroup into user and system times. + * Currently the following statistics are supported: + * - user: Time spent by tasks of the cgroup in user mode. + * - system: Time spent by tasks of the cgroup in kernel mode. + * user and system are in USER_HZ unit. + * ... + * + * DOC + * + * ... + * In kernels earlier than 2.6, changing the value of HZ resulted in user-space anomalies. + * This happened because values were exported to user-space in units of ticks-per-second. + * As these interfaces became permanent, applications grew to rely on a specific value of HZ. + * Consequently, changing HZ would scale various exported values + * by some constantwithout user-space knowing! + * Uptime would read 20 hours when it was in fact two! + * + * To prevent such problems, the kernel needs to scale all exported jiffies values. + * It does this by defining USER_HZ, which is the HZ value that user-space expects. On x86, + * because HZ was historically 100, USER_HZ is 100. The macro jiffies_to_clock_t() + * is then used to scale a tick count in terms of HZ to a tick count in terms of USER_HZ. + * The macro used depends on whether USER_HZ and HZ are integer multiples of themselves. + * ... + * */ - public CGroupsResourceCalculator(String pid) { - this(pid, PROCFS, ResourceHandlerModule.getCGroupsHandler(), - SystemClock.getInstance(), SysInfoLinux.JIFFY_LENGTH_IN_MILLIS); - } + private static final String CPU_STAT = "cpuacct.stat"; /** - * Create resource calculator for testing. - * @param pid A pid from the cgroup or null for all containers - * @param procfsDir Path to /proc or a mock /proc directory - * @param cGroupsHandler Initialized cgroups handler object - * @param clock A clock object - * @param jiffyLengthMs0 Jiffy length in milliseconds + * DOC + * + * ... + * For efficiency, as other kernel components, memory cgroup uses some optimization + * to avoid unnecessary cacheline false sharing. + * usage_in_bytes is affected by the method + * and doesn’t show ‘exact’ value of memory (and swap) usage, + * it’s a fuzz value for efficient access. (Of course, when necessary, it’s synchronized.) + * ... + * */ - @VisibleForTesting - CGroupsResourceCalculator(String pid, String procfsDir, - CGroupsHandler cGroupsHandler, - Clock clock, - long jiffyLengthMs0) { - super(pid); - this.procfsDir = procfsDir; - this.cGroupsHandler = cGroupsHandler; - this.pid = pid != null && pid.equals("0") ? "1" : pid; - this.jiffyLengthMs = jiffyLengthMs0; - this.cpuTimeTracker = - new CpuTimeTracker(this.jiffyLengthMs); - this.clock = clock; - this.processTotalJiffies = BigInteger.ZERO; - this.processPhysicalMemory = UNAVAILABLE; - this.processVirtualMemory = UNAVAILABLE; - } - - @Override - public void initialize() throws YarnException { - if (!CGroupsResourceCalculator.isAvailable()) { - throw new YarnException("CGroupsResourceCalculator is not available"); - } - setCGroupFilePaths(); - } + private static final String MEM_STAT = "memory.usage_in_bytes"; + private static final String MEMSW_STAT = "memory.memsw.usage_in_bytes"; - @Override - public float getCpuUsagePercent() { - LOG.debug("Process {} jiffies:{}", pid, processTotalJiffies); - return cpuTimeTracker.getCpuTrackerUsagePercent(); - } - - @Override - public long getCumulativeCpuTime() { - if (jiffyLengthMs < 0) { - return UNAVAILABLE; - } - return processTotalJiffies.longValue() * jiffyLengthMs; - } - - @Override - public long getRssMemorySize(int olderThanAge) { - if (olderThanAge > 1) { - return UNAVAILABLE; - } - return processPhysicalMemory; - } - - @Override - public long getVirtualMemorySize(int olderThanAge) { - if (olderThanAge > 1) { - return UNAVAILABLE; - } - return processVirtualMemory; - } - - @Override - public void updateProcessTree() { - try { - this.processTotalJiffies = readTotalProcessJiffies(); - cpuTimeTracker.updateElapsedJiffies(processTotalJiffies, - clock.getTime()); - } catch (YarnException e) { - LOG.warn("Failed to parse " + pid, e); - } - processPhysicalMemory = getMemorySize(memStat); - if (memswStat.exists()) { - processVirtualMemory = getMemorySize(memswStat); - } else { - LOG.debug("Swap cgroups monitoring is not compiled into the kernel {}", - memswStat.getAbsolutePath()); - } - } - - @Override - public String getProcessTreeDump() { - // We do not have a process tree in cgroups return just the pid for tracking - return pid; + public CGroupsResourceCalculator(String pid) { + super( + pid, + Arrays.asList(CPU_STAT + "#user", CPU_STAT + "#system"), + MEM_STAT, + MEMSW_STAT + ); } @Override - public boolean checkPidPgrpidForMatch() { - // We do not have a process tree in cgroups returning default ok - return true; - } + protected List getCGroupFilesToLoadInStats() { + List result = new ArrayList<>(); - /** - * Checks if the CGroupsResourceCalculator is available on this system. - * This assumes that Linux container executor is already initialized. - * - * @return true if CGroupsResourceCalculator is available. False otherwise. - */ - public static boolean isAvailable() { try { - if (!Shell.LINUX) { - LOG.info("CGroupsResourceCalculator currently is supported only on " - + "Linux."); - return false; - } - if (ResourceHandlerModule.getCGroupsHandler() == null || - ResourceHandlerModule.getCpuResourceHandler() == null || - ResourceHandlerModule.getMemoryResourceHandler() == null) { - LOG.info("CGroupsResourceCalculator requires enabling CGroups" + - "cpu and memory"); - return false; + String cpuRelative = getCGroupRelativePath(CGroupsHandler.CGroupController.CPUACCT); + if (cpuRelative != null) { + File cpuDir = new File(getcGroupsHandler().getControllerPath( + CGroupsHandler.CGroupController.CPUACCT), cpuRelative); + result.add(Paths.get(cpuDir.getAbsolutePath(), CPU_STAT)); } - } catch (SecurityException se) { - LOG.warn("Failed to get Operating System name. " + se); - return false; + } catch (IOException e) { + LOG.debug("Exception while looking for CPUACCT controller for pid: " + getPid(), e); } - return true; - } - private long getMemorySize(File cgroupUsageFile) { - long[] mem = new long[1]; try { - processFile(cgroupUsageFile, (String line) -> { - mem[0] = Long.parseLong(line); - return Result.Exit; - }); - return mem[0]; - } catch (YarnException e) { - LOG.warn("Failed to parse cgroups " + memswStat, e); - } - return UNAVAILABLE; - } - - private BigInteger readTotalProcessJiffies() throws YarnException { - final BigInteger[] totalCPUTimeJiffies = new BigInteger[1]; - totalCPUTimeJiffies[0] = BigInteger.ZERO; - processFile(cpuStat, (String line) -> { - if (line.startsWith(USER)) { - totalCPUTimeJiffies[0] = totalCPUTimeJiffies[0].add( - new BigInteger(line.substring(USER.length()))); - } - if (line.startsWith(SYSTEM)) { - totalCPUTimeJiffies[0] = totalCPUTimeJiffies[0].add( - new BigInteger(line.substring(SYSTEM.length()))); + String memoryRelative = getCGroupRelativePath(CGroupsHandler.CGroupController.MEMORY); + if (memoryRelative != null) { + File memDir = new File(getcGroupsHandler().getControllerPath( + CGroupsHandler.CGroupController.MEMORY), memoryRelative); + result.add(Paths.get(memDir.getAbsolutePath(), MEM_STAT)); + result.add(Paths.get(memDir.getAbsolutePath(), MEMSW_STAT)); } - return Result.Continue; - }); - return totalCPUTimeJiffies[0]; - } - - private String getCGroupRelativePath( - CGroupsHandler.CGroupController controller) - throws YarnException { - if (pid == null) { - return cGroupsHandler.getRelativePathForCGroup(""); - } else { - return getCGroupRelativePathForPid(controller); + } catch (IOException e) { + LOG.debug("Exception while looking for MEMORY controller for pid: " + getPid(), e); } - } - - private String getCGroupRelativePathForPid( - CGroupsHandler.CGroupController controller) - throws YarnException { - File pidCgroupFile = new File(new File(procfsDir, pid), CGROUP); - String[] result = new String[1]; - processFile(pidCgroupFile, (String line)->{ - Matcher m = CGROUP_FILE_FORMAT.matcher(line); - boolean mat = m.find(); - if (mat) { - if (m.group(2).contains(controller.getName())) { - // Instead of returning the full path we compose it - // based on the last item as the container id - // This helps to avoid confusion within a privileged Docker container - // where the path is referred in /proc//cgroup as - // /docker//hadoop-yarn/ - // but it is /hadoop-yarn/ in the cgroups hierarchy - String cgroupPath = m.group(3); - if (cgroupPath != null) { - String cgroup = - new File(cgroupPath).toPath().getFileName().toString(); - result[0] = cGroupsHandler.getRelativePathForCGroup(cgroup); - } else { - LOG.warn("Invalid cgroup path for " + pidCgroupFile); - } - return Result.Exit; - } - } else { - LOG.warn( - "Unexpected: cgroup file is not in the expected format" - + " for process with pid " + pid); - } - return Result.Continue; - }); - if (result[0] == null) { - throw new YarnException(controller.getName() + " CGroup for pid " + pid + - " not found " + pidCgroupFile); - } - return result[0]; + return result; } - private void processFile(File file, Function processLine) - throws YarnException { - // Read "procfsDir//stat" file - typically /proc//stat - try (InputStreamReader fReader = new InputStreamReader( - new FileInputStream(file), StandardCharsets.UTF_8)) { - try (BufferedReader in = new BufferedReader(fReader)) { - try { - String str; - while ((str = in.readLine()) != null) { - Result result = processLine.apply(str); - if (result == Result.Exit) { - return; - } - } - } catch (IOException io) { - throw new YarnException("Error reading the stream " + io, io); + private String getCGroupRelativePath(CGroupsHandler.CGroupController controller) + throws IOException { + for (String line : readLinesFromCGroupFileFromProcDir()) { + // example line: 6:cpuacct,cpu:/yarn/container_1 + String[] parts = line.split(":"); + if (parts[1].contains(controller.getName())) { + String cgroupPath = parts[2]; + Path fileName = new File(cgroupPath).toPath().getFileName(); + if (fileName != null) { + return getcGroupsHandler().getRelativePathForCGroup(fileName.toString()); } } - } catch (IOException f) { - throw new YarnException("The process vanished in the interim " + pid, f); } + LOG.debug("No {} controller found for pid {}", controller, getPid()); + return null; } - - void setCGroupFilePaths() throws YarnException { - if (cGroupsHandler == null) { - throw new YarnException("CGroups handler is not initialized"); - } - File cpuDir = new File( - cGroupsHandler.getControllerPath( - CGroupsHandler.CGroupController.CPUACCT), - getCGroupRelativePath(CGroupsHandler.CGroupController.CPUACCT)); - File memDir = new File( - cGroupsHandler.getControllerPath( - CGroupsHandler.CGroupController.MEMORY), - getCGroupRelativePath(CGroupsHandler.CGroupController.MEMORY)); - cpuStat = new File(cpuDir, CPU_STAT); - memStat = new File(memDir, MEM_STAT); - memswStat = new File(memDir, MEMSW_STAT); - } - } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2ResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2ResourceCalculator.java new file mode 100644 index 0000000000000..21b768d158dfa --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CGroupsV2ResourceCalculator.java @@ -0,0 +1,136 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.stream.Stream; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.lang3.StringUtils; + +/** + * A Cgroup version 2 file-system based Resource calculator without the process tree features. + * + * Warning: this implementation will not work properly when configured + * using the mapreduce.job.process-tree.class job property. + * Theoretically the ResourceCalculatorProcessTree can be configured + * using the mapreduce.job.process-tree.class job property, however it + * has a dependency on an instantiated ResourceHandlerModule, + * which is only initialised in the NodeManager process and not in the containers. + * + * Limitation: + * The ResourceCalculatorProcessTree class can be configured using the + * mapreduce.job.process-tree.class property within a MapReduce job. + * However, it is important to note that instances of ResourceCalculatorProcessTree operate + * within the context of a MapReduce task. This presents a limitation: + * these instances do not have access to the ResourceHandlerModule, + * which is only initialized within the NodeManager process + * and not within individual containers where MapReduce tasks execute. + * As a result, the current implementation of ResourceCalculatorProcessTree is incompatible + * with the mapreduce.job.process-tree.class property. This incompatibility arises + * because the ResourceHandlerModule is essential for managing and monitoring resource usage, + * and without it, the ResourceCalculatorProcessTree cannot function as intended + * within the confines of a MapReduce task. Therefore, any attempts to utilize this class + * through the mapreduce.job.process-tree.class property + * will not succeed under the current architecture. + */ +public class CGroupsV2ResourceCalculator extends AbstractCGroupsResourceCalculator { + private static final Logger LOG = LoggerFactory.getLogger(CGroupsV2ResourceCalculator.class); + + /** + * DOC + * + * ... + * cpu.stat + * A read-only flat-keyed file. This file exists whether the controller is enabled or not. + * It always reports the following three stats: + * - usage_usec + * - user_usec + * - system_usec + * ... + * + */ + private static final String CPU_STAT = "cpu.stat#usage_usec"; + + /** + * DOC + * + * ... + * memory.stat + * A read-only flat-keyed file which exists on non-root cgroups. + * This breaks down the cgroup’s memory footprint into different types of memory, + * type-specific details, and other information on the state + * and past events of the memory management system. + * All memory amounts are in bytes. + * ... + * anon + * Amount of memory used in anonymous mappings such as brk(), sbrk(), and mmap(MAP_ANONYMOUS) + * ... + * + */ + private static final String MEM_STAT = "memory.stat#anon"; + + /** + * DOC + * + * ... + * memory.swap.current + * A read-only single value file which exists on non-root cgroups. + * The total amount of swap currently being used by the cgroup and its descendants. + * ... + * + */ + private static final String MEMSW_STAT = "memory.swap.current"; + + public CGroupsV2ResourceCalculator(String pid) { + super( + pid, + Collections.singletonList(CPU_STAT), + MEM_STAT, + MEMSW_STAT + ); + } + + @Override + protected List getCGroupFilesToLoadInStats() { + List result = new ArrayList<>(); + try (Stream cGroupFiles = Files.list(getCGroupPath())){ + cGroupFiles.forEach(result::add); + } catch (IOException e) { + LOG.debug("Failed to list cgroup files for pid: " + getPid(), e); + } + LOG.debug("Found cgroup files for pid {} is {}", getPid(), result); + return result; + } + + private Path getCGroupPath() throws IOException { + return Paths.get( + getcGroupsHandler().getCGroupV2MountPath(), + StringUtils.substringAfterLast(readLinesFromCGroupFileFromProcDir().get(0), ":") + ); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CombinedResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CombinedResourceCalculator.java index 5d118182a1b8b..e5595104eb7f9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CombinedResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/CombinedResourceCalculator.java @@ -18,8 +18,9 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import java.util.Arrays; +import java.util.List; + import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.util.ProcfsBasedProcessTree; import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree; @@ -29,80 +30,68 @@ * it is backward compatible with procfs in terms of virtual memory usage. */ public class CombinedResourceCalculator extends ResourceCalculatorProcessTree { - protected static final Logger LOG = LoggerFactory - .getLogger(CombinedResourceCalculator.class); - private ProcfsBasedProcessTree procfs; - private CGroupsResourceCalculator cgroup; + private final List resourceCalculators; + private final ProcfsBasedProcessTree procfsBasedProcessTree; public CombinedResourceCalculator(String pid) { super(pid); - procfs = new ProcfsBasedProcessTree(pid); - cgroup = new CGroupsResourceCalculator(pid); + this.procfsBasedProcessTree = new ProcfsBasedProcessTree(pid); + this.resourceCalculators = Arrays.asList( + new CGroupsV2ResourceCalculator(pid), + new CGroupsResourceCalculator(pid), + procfsBasedProcessTree + ); } @Override public void initialize() throws YarnException { - procfs.initialize(); - cgroup.initialize(); + for (ResourceCalculatorProcessTree calculator : resourceCalculators) { + calculator.initialize(); + } } @Override public void updateProcessTree() { - procfs.updateProcessTree(); - cgroup.updateProcessTree(); + resourceCalculators.stream().parallel() + .forEach(ResourceCalculatorProcessTree::updateProcessTree); } @Override public String getProcessTreeDump() { - return procfs.getProcessTreeDump(); + return procfsBasedProcessTree.getProcessTreeDump(); } @Override - public float getCpuUsagePercent() { - float cgroupUsage = cgroup.getCpuUsagePercent(); - if (LOG.isDebugEnabled()) { - float procfsUsage = procfs.getCpuUsagePercent(); - LOG.debug("CPU Comparison:" + procfsUsage + " " + cgroupUsage); - LOG.debug("Jiffy Comparison:" + - procfs.getCumulativeCpuTime() + " " + - cgroup.getCumulativeCpuTime()); - } - - return cgroupUsage; + public boolean checkPidPgrpidForMatch() { + return procfsBasedProcessTree.checkPidPgrpidForMatch(); } @Override - public boolean checkPidPgrpidForMatch() { - return procfs.checkPidPgrpidForMatch(); + public long getVirtualMemorySize(int olderThanAge) { + return procfsBasedProcessTree.getVirtualMemorySize(olderThanAge); } @Override - public long getCumulativeCpuTime() { - if (LOG.isDebugEnabled()) { - LOG.debug("CPU Comparison:" + - procfs.getCumulativeCpuTime() + " " + - cgroup.getCumulativeCpuTime()); - } - return cgroup.getCumulativeCpuTime(); + public long getRssMemorySize(int olderThanAge) { + return resourceCalculators.stream() + .map(calculator -> calculator.getRssMemorySize(olderThanAge)) + .filter(result -> UNAVAILABLE < result) + .findAny().orElse((long) UNAVAILABLE); } @Override - public long getRssMemorySize(int olderThanAge) { - if (LOG.isDebugEnabled()) { - LOG.debug("MEM Comparison:" + - procfs.getRssMemorySize(olderThanAge) + " " + - cgroup.getRssMemorySize(olderThanAge)); - } - return cgroup.getRssMemorySize(olderThanAge); + public long getCumulativeCpuTime() { + return resourceCalculators.stream() + .map(ResourceCalculatorProcessTree::getCumulativeCpuTime) + .filter(result -> UNAVAILABLE < result) + .findAny().orElse((long) UNAVAILABLE); } @Override - public long getVirtualMemorySize(int olderThanAge) { - if (LOG.isDebugEnabled()) { - LOG.debug("VMEM Comparison:" + - procfs.getVirtualMemorySize(olderThanAge) + " " + - cgroup.getVirtualMemorySize(olderThanAge)); - } - return procfs.getVirtualMemorySize(olderThanAge); + public float getCpuUsagePercent() { + return resourceCalculators.stream() + .map(ResourceCalculatorProcessTree::getCpuUsagePercent) + .filter(result -> UNAVAILABLE < result) + .findAny().orElse((float) UNAVAILABLE); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java index e82fcefb6ac06..0b4bd4a3fbd81 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/monitor/ContainersMonitorImpl.java @@ -295,7 +295,7 @@ private boolean isResourceCalculatorAvailable() { + "{} is disabled.", this.getClass().getName()); return false; } - if (getResourceCalculatorProcessTree("0") == null) { + if (getResourceCalculatorProcessTree("1") == null) { LOG.info("ResourceCalculatorProcessTree is unavailable on this system. " + "{} is disabled.", this.getClass().getName()); return false; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsResourceCalculator.java index 0158bc2503823..6b02b296a8ff4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsResourceCalculator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsResourceCalculator.java @@ -18,258 +18,124 @@ package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.yarn.exceptions.YarnException; -import org.apache.hadoop.yarn.util.ControlledClock; -import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree; -import org.junit.Assert; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.stream.Collectors; + +import org.junit.After; +import org.junit.Before; import org.junit.Test; -import java.io.File; -import java.nio.charset.StandardCharsets; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.util.CpuTimeTracker; -import static org.mockito.Mockito.*; +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; /** * Unit test for CGroupsResourceCalculator. */ public class TestCGroupsResourceCalculator { - private ControlledClock clock = new ControlledClock(); - private CGroupsHandler cGroupsHandler = mock(CGroupsHandler.class); - private String basePath = "/tmp/" + this.getClass().getName(); + private Path root; - public TestCGroupsResourceCalculator() { - when(cGroupsHandler.getRelativePathForCGroup("container_1")) - .thenReturn("/yarn/container_1"); - when(cGroupsHandler.getRelativePathForCGroup("")).thenReturn("/yarn/"); + @Before + public void before() throws IOException { + root = Files.createTempDirectory("TestCGroupsResourceCalculator"); } - @Test(expected = YarnException.class) - public void testPidNotFound() throws Exception { - CGroupsResourceCalculator calculator = - new CGroupsResourceCalculator( - "1234", ".", cGroupsHandler, clock, 10); - calculator.setCGroupFilePaths(); - Assert.assertEquals("Expected exception", null, calculator); + @After + public void after() throws IOException { + FileUtils.deleteDirectory(root.toFile()); } - @Test(expected = YarnException.class) + @Test public void testNoMemoryCGgroupMount() throws Exception { - File procfs = new File(basePath + "/1234"); - Assert.assertTrue("Setup error", procfs.mkdirs()); - try { - FileUtils.writeStringToFile( - new File(procfs, CGroupsResourceCalculator.CGROUP), - "7:devices:/yarn/container_1\n" + - "6:cpuacct,cpu:/yarn/container_1\n" + - "5:pids:/yarn/container_1\n", StandardCharsets.UTF_8); - CGroupsResourceCalculator calculator = - new CGroupsResourceCalculator( - "1234", basePath, - cGroupsHandler, clock, 10); - calculator.setCGroupFilePaths(); - Assert.assertEquals("Expected exception", null, calculator); - } finally { - FileUtils.deleteDirectory(new File(basePath)); - } + writeToFile("proc/41/cgroup", + "7:devices:/yarn/container_1", + "6:cpuacct,cpu:/yarn/container_1", + "5:pids:/yarn/container_1" + ); + + CGroupsResourceCalculator calculator = createCalculator(); + calculator.updateProcessTree(); + assertEquals(-1, calculator.getVirtualMemorySize()); } @Test public void testCGgroupNotFound() throws Exception { - File procfs = new File(basePath + "/1234"); - Assert.assertTrue("Setup error", procfs.mkdirs()); - try { - FileUtils.writeStringToFile( - new File(procfs, CGroupsResourceCalculator.CGROUP), - "7:devices:/yarn/container_1\n" + - "6:cpuacct,cpu:/yarn/container_1\n" + - "5:pids:/yarn/container_1\n" + - "4:memory:/yarn/container_1\n", StandardCharsets.UTF_8); - - CGroupsResourceCalculator calculator = - new CGroupsResourceCalculator( - "1234", basePath, - cGroupsHandler, clock, 10); - calculator.setCGroupFilePaths(); - calculator.updateProcessTree(); - Assert.assertEquals("cgroups should be missing", - (long)ResourceCalculatorProcessTree.UNAVAILABLE, - calculator.getRssMemorySize(0)); - } finally { - FileUtils.deleteDirectory(new File(basePath)); - } + writeToFile("proc/41/cgroup", + "7:devices:/yarn/container_1", + "6:cpuacct,cpu:/yarn/container_1", + "5:pids:/yarn/container_1", + "4:memory:/yarn/container_1" + ); + + CGroupsResourceCalculator calculator = createCalculator(); + calculator.updateProcessTree(); + assertEquals(-1, calculator.getCumulativeCpuTime()); } @Test - public void testCPUParsing() throws Exception { - File cgcpuacctDir = - new File(basePath + "/cgcpuacct"); - File cgcpuacctContainerDir = - new File(cgcpuacctDir, "/yarn/container_1"); - File procfs = new File(basePath + "/1234"); - when(cGroupsHandler.getControllerPath( - CGroupsHandler.CGroupController.CPUACCT)). - thenReturn(cgcpuacctDir.getAbsolutePath()); - Assert.assertTrue("Setup error", procfs.mkdirs()); - Assert.assertTrue("Setup error", cgcpuacctContainerDir.mkdirs()); - try { - FileUtils.writeStringToFile( - new File(procfs, CGroupsResourceCalculator.CGROUP), - "7:devices:/yarn/container_1\n" + - "6:cpuacct,cpu:/yarn/container_1\n" + - "5:pids:/yarn/container_1\n" + - "4:memory:/yarn/container_1\n", StandardCharsets.UTF_8); - FileUtils.writeStringToFile( - new File(cgcpuacctContainerDir, CGroupsResourceCalculator.CPU_STAT), - "Can you handle this?\n" + - "user 5415\n" + - "system 3632", StandardCharsets.UTF_8); - CGroupsResourceCalculator calculator = - new CGroupsResourceCalculator( - "1234", basePath, - cGroupsHandler, clock, 10); - calculator.setCGroupFilePaths(); - calculator.updateProcessTree(); - Assert.assertEquals("Incorrect CPU usage", - 90470, - calculator.getCumulativeCpuTime()); - } finally { - FileUtils.deleteDirectory(new File(basePath)); - } + public void testParsing() throws Exception { + writeToFile("proc/41/cgroup", + "7:devices:/yarn/container_1", + "6:cpuacct,cpu:/yarn/container_1", + "5:pids:/yarn/container_1", + "4:memory:/yarn/container_1" + ); + + writeToFile("mount/cgroup/yarn/container_1/cpuacct.stat", + "Can you handle this?", + "user 5415", + "system 3632" + ); + + CGroupsResourceCalculator calculator = createCalculator(); + calculator.updateProcessTree(); + assertEquals(90470, calculator.getCumulativeCpuTime()); + + writeToFile("mount/cgroup/yarn/container_1/memory.usage_in_bytes", + "418496512" + ); + + calculator.updateProcessTree(); + assertEquals(418496512, calculator.getRssMemorySize()); + assertEquals(-1, calculator.getVirtualMemorySize()); + + writeToFile("mount/cgroup/yarn/container_1/memory.memsw.usage_in_bytes", + "418496513" + ); + + calculator.updateProcessTree(); + assertEquals(418496512, calculator.getRssMemorySize()); + assertEquals(418496513, calculator.getVirtualMemorySize()); } - @Test - public void testMemoryParsing() throws Exception { - File cgcpuacctDir = - new File(basePath + "/cgcpuacct"); - File cgcpuacctContainerDir = - new File(cgcpuacctDir, "/yarn/container_1"); - File cgmemoryDir = - new File(basePath + "/memory"); - File cgMemoryContainerDir = - new File(cgmemoryDir, "/yarn/container_1"); - File procfs = new File(basePath + "/1234"); - when(cGroupsHandler.getControllerPath( - CGroupsHandler.CGroupController.MEMORY)). - thenReturn(cgmemoryDir.getAbsolutePath()); - Assert.assertTrue("Setup error", procfs.mkdirs()); - Assert.assertTrue("Setup error", cgcpuacctContainerDir.mkdirs()); - Assert.assertTrue("Setup error", cgMemoryContainerDir.mkdirs()); - try { - FileUtils.writeStringToFile( - new File(procfs, CGroupsResourceCalculator.CGROUP), - "6:cpuacct,cpu:/yarn/container_1\n" + - "4:memory:/yarn/container_1\n", StandardCharsets.UTF_8); - FileUtils.writeStringToFile( - new File(cgMemoryContainerDir, CGroupsResourceCalculator.MEM_STAT), - "418496512\n", StandardCharsets.UTF_8); - - CGroupsResourceCalculator calculator = - new CGroupsResourceCalculator( - "1234", basePath, - cGroupsHandler, clock, 10); - calculator.setCGroupFilePaths(); - - calculator.updateProcessTree(); - // Test the case where memsw is not available (Ubuntu) - Assert.assertEquals("Incorrect memory usage", - 418496512, - calculator.getRssMemorySize()); - Assert.assertEquals("Incorrect swap usage", - (long)ResourceCalculatorProcessTree.UNAVAILABLE, - calculator.getVirtualMemorySize()); - - // Test the case where memsw is available - FileUtils.writeStringToFile( - new File(cgMemoryContainerDir, CGroupsResourceCalculator.MEMSW_STAT), - "418496513\n", StandardCharsets.UTF_8); - calculator.updateProcessTree(); - Assert.assertEquals("Incorrect swap usage", - 418496513, - calculator.getVirtualMemorySize()); - } finally { - FileUtils.deleteDirectory(new File(basePath)); - } - } - - @Test - public void testCPUParsingRoot() throws Exception { - File cgcpuacctDir = - new File(basePath + "/cgcpuacct"); - File cgcpuacctRootDir = - new File(cgcpuacctDir, "/yarn"); - when(cGroupsHandler.getControllerPath( - CGroupsHandler.CGroupController.CPUACCT)). - thenReturn(cgcpuacctDir.getAbsolutePath()); - Assert.assertTrue("Setup error", cgcpuacctRootDir.mkdirs()); - try { - FileUtils.writeStringToFile( - new File(cgcpuacctRootDir, CGroupsResourceCalculator.CPU_STAT), - "user 5415\n" + - "system 3632", StandardCharsets.UTF_8); - CGroupsResourceCalculator calculator = - new CGroupsResourceCalculator( - null, basePath, - cGroupsHandler, clock, 10); - calculator.setCGroupFilePaths(); - calculator.updateProcessTree(); - Assert.assertEquals("Incorrect CPU usage", - 90470, - calculator.getCumulativeCpuTime()); - } finally { - FileUtils.deleteDirectory(new File(basePath)); - } + private CGroupsResourceCalculator createCalculator() { + CGroupsResourceCalculator calculator = new CGroupsResourceCalculator("41"); + calculator.setCpuTimeTracker(mock(CpuTimeTracker.class)); + calculator.setcGroupsHandler(mock(CGroupsHandler.class)); + when(calculator.getcGroupsHandler().getRelativePathForCGroup("container_1")) + .thenReturn("/yarn/container_1"); + when(calculator.getcGroupsHandler().getRelativePathForCGroup("")) + .thenReturn("/yarn/"); + when(calculator.getcGroupsHandler().getControllerPath(any())) + .thenReturn(root.resolve("mount/cgroup").toString()); + calculator.setProcFs(root.toString() + "/proc/"); + calculator.setJiffyLengthMs(10); + return calculator; } - @Test - public void testMemoryParsingRoot() throws Exception { - File cgcpuacctDir = - new File(basePath + "/cgcpuacct"); - File cgcpuacctRootDir = - new File(cgcpuacctDir, "/yarn"); - File cgmemoryDir = - new File(basePath + "/memory"); - File cgMemoryRootDir = - new File(cgmemoryDir, "/yarn"); - File procfs = new File(basePath + "/1234"); - when(cGroupsHandler.getControllerPath( - CGroupsHandler.CGroupController.MEMORY)). - thenReturn(cgmemoryDir.getAbsolutePath()); - Assert.assertTrue("Setup error", procfs.mkdirs()); - Assert.assertTrue("Setup error", cgcpuacctRootDir.mkdirs()); - Assert.assertTrue("Setup error", cgMemoryRootDir.mkdirs()); - try { - FileUtils.writeStringToFile( - new File(cgMemoryRootDir, CGroupsResourceCalculator.MEM_STAT), - "418496512\n", StandardCharsets.UTF_8); - - CGroupsResourceCalculator calculator = - new CGroupsResourceCalculator( - null, basePath, - cGroupsHandler, clock, 10); - calculator.setCGroupFilePaths(); - - calculator.updateProcessTree(); - - // Test the case where memsw is not available (Ubuntu) - Assert.assertEquals("Incorrect memory usage", - 418496512, - calculator.getRssMemorySize()); - Assert.assertEquals("Incorrect swap usage", - (long)ResourceCalculatorProcessTree.UNAVAILABLE, - calculator.getVirtualMemorySize()); - - // Test the case where memsw is available - FileUtils.writeStringToFile( - new File(cgMemoryRootDir, CGroupsResourceCalculator.MEMSW_STAT), - "418496513\n", StandardCharsets.UTF_8); - calculator.updateProcessTree(); - Assert.assertEquals("Incorrect swap usage", - 418496513, - calculator.getVirtualMemorySize()); - } finally { - FileUtils.deleteDirectory(new File(basePath)); - } + private void writeToFile(String path, String... lines) throws IOException { + FileUtils.writeStringToFile( + root.resolve(path).toFile(), + Arrays.stream(lines).collect(Collectors.joining(System.lineSeparator())), + StandardCharsets.UTF_8); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2ResourceCalculator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2ResourceCalculator.java new file mode 100644 index 0000000000000..63a67eb0b19c5 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCGroupsV2ResourceCalculator.java @@ -0,0 +1,110 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.stream.Collectors; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.util.CpuTimeTracker; + +import static org.junit.Assert.assertEquals; +import static org.mockito.ArgumentMatchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Unit test for CGroupsV2ResourceCalculator. + */ +public class TestCGroupsV2ResourceCalculator { + + private Path root; + + @Before + public void before() throws IOException { + root = Files.createTempDirectory("TestCGroupsV2ResourceCalculator"); + } + + @After + public void after() throws IOException { + FileUtils.deleteDirectory(root.toFile()); + } + + @Test + public void testPidNotFound() { + CGroupsV2ResourceCalculator calculator = createCalculator(); + calculator.updateProcessTree(); + assertEquals(-1, calculator.getRssMemorySize(), 0L); + } + + @Test + public void readFiles() throws IOException { + Files.createDirectories(root.resolve("proc/42")); + Files.createDirectories(root.resolve("mount/cgroup2/yarn/container_1")); + + writeToFile("proc/42/cgroup", + "0::/container_1"); + writeToFile("mount/cgroup2/yarn/container_1/memory.stat", + "anon 22000", + "slab 1774128"); + writeToFile("mount/cgroup2/yarn/container_1/memory.swap.current", + "11000"); + writeToFile("mount/cgroup2/yarn/container_1/cpu.stat", + "usage_usec 333", + "meaning_of_life 42"); + + CGroupsV2ResourceCalculator calculator = createCalculator(); + when(calculator.getcGroupsHandler().getCGroupV2MountPath()) + .thenReturn(root.resolve("mount/cgroup2/yarn").toString()); + when(calculator.getcGroupsHandler().getRelativePathForCGroup(eq("/container_1"))) + .thenReturn("container_1"); + + calculator.updateProcessTree(); + + assertEquals(333000L, calculator.getCumulativeCpuTime(), 0L); + assertEquals(22000L, calculator.getRssMemorySize(), 0L); + assertEquals(11000L, calculator.getVirtualMemorySize(), 0L); + assertEquals(-1L, calculator.getRssMemorySize(2), 0L); + assertEquals(-1L, calculator.getVirtualMemorySize(2), 0L); + } + + private CGroupsV2ResourceCalculator createCalculator() { + CGroupsV2ResourceCalculator calculator = new CGroupsV2ResourceCalculator("42"); + calculator.setCpuTimeTracker(mock(CpuTimeTracker.class)); + calculator.setcGroupsHandler(mock(CGroupsHandler.class)); + calculator.setProcFs(root.toString() + "/proc/"); + calculator.setJiffyLengthMs(1_000); + return calculator; + } + + private void writeToFile(String path, String... lines) throws IOException { + FileUtils.writeStringToFile( + root.resolve(path).toFile(), + Arrays.stream(lines).collect(Collectors.joining(System.lineSeparator())), + StandardCharsets.UTF_8); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCompareResourceCalculators.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCompareResourceCalculators.java index 8be0590afa9fe..9c8173839c0c9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCompareResourceCalculators.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestCompareResourceCalculators.java @@ -98,7 +98,7 @@ public void testCompareResults() new ProcfsBasedProcessTree(Long.toString(getPid())); CGroupsResourceCalculator cgroupsCalculator = new CGroupsResourceCalculator(Long.toString(getPid())); - cgroupsCalculator.setCGroupFilePaths(); + cgroupsCalculator.initialize(); for (int i = 0; i < 5; ++i) { Thread.sleep(3000); From d107931fc7299a91743a38d73ec25a3d33d93abf Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Wed, 29 May 2024 11:27:09 -0500 Subject: [PATCH 013/113] HADOOP-19188. Fix TestHarFileSystem and TestFilterFileSystem failing after bulk delete API got added. (#6848) Follow up to: HADOOP-18679 Add API for bulk/paged delete of files and objects Contributed by Mukund Thakur --- .../test/java/org/apache/hadoop/fs/TestFilterFileSystem.java | 1 + .../src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java | 2 ++ 2 files changed, 3 insertions(+) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java index 3d8ea0e826cf2..1b42290cedc5e 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestFilterFileSystem.java @@ -148,6 +148,7 @@ public Token[] addDelegationTokens(String renewer, Credentials creds) FSDataOutputStream append(Path f, int bufferSize, Progressable progress, boolean appendToNewBlock) throws IOException; + BulkDelete createBulkDelete(Path path) throws IllegalArgumentException, IOException; } @Test diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java index 0287b7ec1fb84..26d0361d6a255 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestHarFileSystem.java @@ -257,6 +257,8 @@ FSDataOutputStream append(Path f, int bufferSize, Progressable progress, boolean appendToNewBlock) throws IOException; Path getEnclosingRoot(Path path) throws IOException; + + BulkDelete createBulkDelete(Path path) throws IllegalArgumentException, IOException; } @Test From d00b3acd5ecac9907dae2f09f42a0c2ce4f94d86 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 30 May 2024 19:34:30 +0100 Subject: [PATCH 014/113] HADOOP-18679. Followup: change method name case (#6854) WrappedIO.bulkDelete_PageSize() => bulkDelete_pageSize() Makes it consistent with the HADOOP-19131 naming scheme. The name needs to be fixed before invoking it through reflection, as once that is attempted the binding won't work at run time, though compilation will be happy. Contributed by Steve Loughran --- .../main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java | 2 +- .../hadoop/fs/contract/AbstractContractBulkDeleteTest.java | 2 +- .../java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java index 696055895a19b..286557c2c378c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java @@ -54,7 +54,7 @@ private WrappedIO() { * @throws IllegalArgumentException path not valid. * @throws IOException problems resolving paths */ - public static int bulkDelete_PageSize(FileSystem fs, Path path) throws IOException { + public static int bulkDelete_pageSize(FileSystem fs, Path path) throws IOException { try (BulkDelete bulk = fs.createBulkDelete(path)) { return bulk.pageSize(); } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractBulkDeleteTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractBulkDeleteTest.java index 9ebf9923f39c2..1413e74a7e0b6 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractBulkDeleteTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractBulkDeleteTest.java @@ -69,7 +69,7 @@ public abstract class AbstractContractBulkDeleteTest extends AbstractFSContractT public void setUp() throws Exception { fs = getFileSystem(); basePath = path(getClass().getName()); - pageSize = WrappedIO.bulkDelete_PageSize(getFileSystem(), basePath); + pageSize = WrappedIO.bulkDelete_pageSize(getFileSystem(), basePath); fs.mkdirs(basePath); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java index 0676dd5b16ed8..5aa72e6949064 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java @@ -735,7 +735,7 @@ private void executeBulkDeleteOnReadOnlyFiles(Configuration assumedRoleConfig) t bindReadOnlyRolePolicy(assumedRoleConfig, readOnlyDir); roleFS = (S3AFileSystem) destDir.getFileSystem(assumedRoleConfig); - int bulkDeletePageSize = WrappedIO.bulkDelete_PageSize(roleFS, destDir); + int bulkDeletePageSize = WrappedIO.bulkDelete_pageSize(roleFS, destDir); int range = bulkDeletePageSize == 1 ? bulkDeletePageSize : 10; touchFiles(fs, readOnlyDir, range); touchFiles(roleFS, destDir, range); @@ -769,7 +769,7 @@ private void executeBulkDeleteOnSomeReadOnlyFiles(Configuration assumedRoleConfi bindReadOnlyRolePolicy(assumedRoleConfig, readOnlyDir); roleFS = (S3AFileSystem) destDir.getFileSystem(assumedRoleConfig); S3AFileSystem fs = getFileSystem(); - if (WrappedIO.bulkDelete_PageSize(fs, destDir) == 1) { + if (WrappedIO.bulkDelete_pageSize(fs, destDir) == 1) { String msg = "Skipping as this test requires more than one path to be deleted in bulk"; LOG.debug(msg); skip(msg); From d8b485a51229392874eb54f86bc3fdc61ce6084e Mon Sep 17 00:00:00 2001 From: Anuj Modi <128447756+anujmodi2021@users.noreply.github.com> Date: Fri, 31 May 2024 01:16:19 +0530 Subject: [PATCH 015/113] HADOOP-18516: [ABFS][Authentication] Support Fixed SAS Token for ABFS Authentication (#6552) Contributed by Anuj Modi --- .../hadoop/fs/azurebfs/AbfsConfiguration.java | 75 +++++--- .../fs/azurebfs/AzureBlobFileSystem.java | 3 +- .../fs/azurebfs/AzureBlobFileSystemStore.java | 2 +- .../azurebfs/constants/ConfigurationKeys.java | 5 +- .../fs/azurebfs/services/AbfsClient.java | 9 +- .../services/FixedSASTokenProvider.java | 65 +++++++ .../hadoop-azure/src/site/markdown/abfs.md | 149 +++++++++++--- .../azurebfs/AbstractAbfsIntegrationTest.java | 23 ++- .../ITestAzureBlobFileSystemChooseSAS.java | 182 ++++++++++++++++++ .../MockDelegationSASTokenProvider.java | 2 +- .../extensions/MockSASTokenProvider.java | 16 +- .../azurebfs/utils/AccountSASGenerator.java | 103 ++++++++++ .../fs/azurebfs/utils/SASGenerator.java | 34 +++- .../azurebfs/utils/ServiceSASGenerator.java | 15 +- 14 files changed, 611 insertions(+), 72 deletions(-) create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index 6e5e772e18160..bf9008bfe6dee 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -22,6 +22,7 @@ import java.lang.reflect.Field; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.services.FixedSASTokenProvider; import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; import org.apache.hadoop.util.Preconditions; @@ -1025,33 +1026,63 @@ public AccessTokenProvider getTokenProvider() throws TokenAccessProviderExceptio } } + /** + * Returns the SASTokenProvider implementation to be used to generate SAS token.
+ * Users can choose between a custom implementation of {@link SASTokenProvider} + * or an in house implementation {@link FixedSASTokenProvider}.
+ * For Custom implementation "fs.azure.sas.token.provider.type" needs to be provided.
+ * For Fixed SAS Token use "fs.azure.sas.fixed.token" needs to be provided.
+ * In case both are provided, Preference will be given to Custom implementation.
+ * Avoid using a custom tokenProvider implementation just to read the configured + * fixed token, as this could create confusion. Also,implementing the SASTokenProvider + * requires relying on the raw configurations. It is more stable to depend on + * the AbfsConfiguration with which a filesystem is initialized, and eliminate + * chances of dynamic modifications and spurious situations.
+ * @return sasTokenProvider object based on configurations provided + * @throws AzureBlobFileSystemException + */ public SASTokenProvider getSASTokenProvider() throws AzureBlobFileSystemException { AuthType authType = getEnum(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.SharedKey); if (authType != AuthType.SAS) { throw new SASTokenProviderException(String.format( - "Invalid auth type: %s is being used, expecting SAS", authType)); + "Invalid auth type: %s is being used, expecting SAS.", authType)); } try { - String configKey = FS_AZURE_SAS_TOKEN_PROVIDER_TYPE; - Class sasTokenProviderClass = - getTokenProviderClass(authType, configKey, null, - SASTokenProvider.class); - - Preconditions.checkArgument(sasTokenProviderClass != null, - String.format("The configuration value for \"%s\" is invalid.", configKey)); - - SASTokenProvider sasTokenProvider = ReflectionUtils - .newInstance(sasTokenProviderClass, rawConfig); - Preconditions.checkArgument(sasTokenProvider != null, - String.format("Failed to initialize %s", sasTokenProviderClass)); - - LOG.trace("Initializing {}", sasTokenProviderClass.getName()); - sasTokenProvider.initialize(rawConfig, accountName); - LOG.trace("{} init complete", sasTokenProviderClass.getName()); - return sasTokenProvider; + Class customSasTokenProviderImplementation = + getTokenProviderClass(authType, FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, + null, SASTokenProvider.class); + String configuredFixedToken = this.getTrimmedPasswordString(FS_AZURE_SAS_FIXED_TOKEN, EMPTY_STRING); + + if (customSasTokenProviderImplementation == null && configuredFixedToken.isEmpty()) { + throw new SASTokenProviderException(String.format( + "At least one of the \"%s\" and \"%s\" must be set.", + FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, FS_AZURE_SAS_FIXED_TOKEN)); + } + + // Prefer Custom SASTokenProvider Implementation if configured. + if (customSasTokenProviderImplementation != null) { + LOG.trace("Using Custom SASTokenProvider implementation because it is given precedence when it is set."); + SASTokenProvider sasTokenProvider = ReflectionUtils.newInstance( + customSasTokenProviderImplementation, rawConfig); + if (sasTokenProvider == null) { + throw new SASTokenProviderException(String.format( + "Failed to initialize %s", customSasTokenProviderImplementation)); + } + LOG.trace("Initializing {}", customSasTokenProviderImplementation.getName()); + sasTokenProvider.initialize(rawConfig, accountName); + LOG.trace("{} init complete", customSasTokenProviderImplementation.getName()); + return sasTokenProvider; + } else { + LOG.trace("Using FixedSASTokenProvider implementation"); + FixedSASTokenProvider fixedSASTokenProvider = new FixedSASTokenProvider(configuredFixedToken); + return fixedSASTokenProvider; + } + } catch (SASTokenProviderException e) { + throw e; } catch (Exception e) { - throw new TokenAccessProviderException("Unable to load SAS token provider class: " + e, e); + throw new SASTokenProviderException( + "Unable to load SAS token provider class: " + e, e); } } @@ -1064,14 +1095,14 @@ public EncryptionContextProvider createEncryptionContextProvider() { Class encryptionContextClass = getAccountSpecificClass(configKey, null, EncryptionContextProvider.class); - Preconditions.checkArgument(encryptionContextClass != null, String.format( + Preconditions.checkArgument(encryptionContextClass != null, "The configuration value for %s is invalid, or config key is not account-specific", - configKey)); + configKey); EncryptionContextProvider encryptionContextProvider = ReflectionUtils.newInstance(encryptionContextClass, rawConfig); Preconditions.checkArgument(encryptionContextProvider != null, - String.format("Failed to initialize %s", encryptionContextClass)); + "Failed to initialize %s", encryptionContextClass); LOG.trace("{} init complete", encryptionContextClass.getName()); return encryptionContextProvider; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java index 7ca960d569d09..5475ff30651bd 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java @@ -1302,10 +1302,9 @@ public void access(final Path path, final FsAction mode) throws IOException { /** * Incrementing exists() calls from superclass for statistic collection. - * * @param f source path. * @return true if the path exists. - * @throws IOException + * @throws IOException if some issue in checking path. */ @Override public boolean exists(Path f) throws IOException { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 5c8a3acbcb023..85d9d96ac2ddb 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -1729,7 +1729,7 @@ private void initializeClient(URI uri, String fileSystemName, creds = new SharedKeyCredentials(accountName.substring(0, dotIndex), abfsConfiguration.getStorageAccountKey()); } else if (authType == AuthType.SAS) { - LOG.trace("Fetching SAS token provider"); + LOG.trace("Fetching SAS Token Provider"); sasTokenProvider = abfsConfiguration.getSASTokenProvider(); } else { LOG.trace("Fetching token provider"); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index 299cc5c9c4513..2ccc6ade876f1 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -297,7 +297,10 @@ public static String accountProperty(String property, String account) { public static final String FS_AZURE_ENABLE_DELEGATION_TOKEN = "fs.azure.enable.delegation.token"; public static final String FS_AZURE_DELEGATION_TOKEN_PROVIDER_TYPE = "fs.azure.delegation.token.provider.type"; - /** Key for SAS token provider **/ + /** Key for fixed SAS token: {@value}. **/ + public static final String FS_AZURE_SAS_FIXED_TOKEN = "fs.azure.sas.fixed.token"; + + /** Key for SAS token provider: {@value}. **/ public static final String FS_AZURE_SAS_TOKEN_PROVIDER_TYPE = "fs.azure.sas.token.provider.type"; /** For performance, AbfsInputStream/AbfsOutputStream re-use SAS tokens until the expiry is within this number of seconds. **/ diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index f4ff181357960..f76f0ca6e87f7 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -1065,6 +1065,7 @@ public AbfsRestOperation flush(final String path, final long position, abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, Long.toString(position)); abfsUriQueryBuilder.addQuery(QUERY_PARAM_RETAIN_UNCOMMITTED_DATA, String.valueOf(retainUncommittedData)); abfsUriQueryBuilder.addQuery(QUERY_PARAM_CLOSE, String.valueOf(isClose)); + // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.WRITE_OPERATION, abfsUriQueryBuilder, cachedSasToken); @@ -1160,6 +1161,7 @@ public AbfsRestOperation read(final String path, } final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.READ_OPERATION, abfsUriQueryBuilder, cachedSasToken); @@ -1471,16 +1473,17 @@ private String appendSASTokenToQuery(String path, sasToken = cachedSasToken; LOG.trace("Using cached SAS token."); } + // if SAS Token contains a prefix of ?, it should be removed if (sasToken.charAt(0) == '?') { sasToken = sasToken.substring(1); } + queryBuilder.setSASToken(sasToken); LOG.trace("SAS token fetch complete for {} on {}", operation, path); } catch (Exception ex) { - throw new SASTokenProviderException(String.format("Failed to acquire a SAS token for %s on %s due to %s", - operation, - path, + throw new SASTokenProviderException(String.format( + "Failed to acquire a SAS token for %s on %s due to %s", operation, path, ex.toString())); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java new file mode 100644 index 0000000000000..1a2614dcc1d2f --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/FixedSASTokenProvider.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; +import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; + +/** + * In house implementation of {@link SASTokenProvider} to use a fixed SAS token with ABFS. + * Use this to avoid implementing a Custom Token Provider just to return fixed SAS. + * Fixed SAS Token to be provided using the config "fs.azure.sas.fixed.token". + */ +public class FixedSASTokenProvider implements SASTokenProvider { + private String fixedSASToken; + + public FixedSASTokenProvider(final String fixedSASToken) throws SASTokenProviderException { + this.fixedSASToken = fixedSASToken; + if (fixedSASToken == null || fixedSASToken.isEmpty()) { + throw new SASTokenProviderException( + String.format("Configured Fixed SAS Token is Invalid: %s", fixedSASToken)); + } + } + + @Override + public void initialize(final Configuration configuration, + final String accountName) + throws IOException { + } + + /** + * Returns the fixed SAS Token configured. + * @param account the name of the storage account. + * @param fileSystem the name of the fileSystem. + * @param path the file or directory path. + * @param operation the operation to be performed on the path. + * @return Fixed SAS Token + * @throws IOException never + */ + @Override + public String getSASToken(final String account, + final String fileSystem, + final String path, + final String operation) throws IOException { + return fixedSASToken; + } +} diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md index c0e20dfe16e3f..3ab8eee3ac49d 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md @@ -12,7 +12,7 @@ limitations under the License. See accompanying LICENSE file. --> -# Hadoop Azure Support: ABFS — Azure Data Lake Storage Gen2 +# Hadoop Azure Support: ABFS - Azure Data Lake Storage Gen2 @@ -309,12 +309,13 @@ in different deployment situations. The ABFS client can be deployed in different ways, with its authentication needs driven by them. -1. With the storage account's authentication secret in the configuration: -"Shared Key". -1. Using OAuth 2.0 tokens of one form or another. -1. Deployed in-Azure with the Azure VMs providing OAuth 2.0 tokens to the application, - "Managed Instance". -1. Using Shared Access Signature (SAS) tokens provided by a custom implementation of the SASTokenProvider interface. +1. With the storage account's authentication secret in the configuration: "Shared Key". +2. Using OAuth 2.0 tokens of one form or another. +3. Deployed in-Azure with the Azure VMs providing OAuth 2.0 tokens to the application, "Managed Instance". +4. Using Shared Access Signature (SAS) tokens provided by a custom implementation of the SASTokenProvider interface. +5. By directly configuring a fixed Shared Access Signature (SAS) token in the account configuration settings files. + +Note: SAS Based Authentication should be used only with HNS Enabled accounts. What can be changed is what secrets/credentials are used to authenticate the caller. @@ -355,14 +356,14 @@ the password, "key", retrieved from the XML/JCECKs configuration files. ```xml - fs.azure.account.auth.type.abfswales1.dfs.core.windows.net + fs.azure.account.auth.type.ACCOUNT_NAME.dfs.core.windows.net SharedKey - fs.azure.account.key.abfswales1.dfs.core.windows.net - ZGlkIHlvdSByZWFsbHkgdGhpbmsgSSB3YXMgZ29pbmcgdG8gcHV0IGEga2V5IGluIGhlcmU/IA== + fs.azure.account.key.ACCOUNT_NAME.dfs.core.windows.net + ACCOUNT_KEY The secret password. Never share these. @@ -609,21 +610,119 @@ In case delegation token is enabled, and the config `fs.azure.delegation.token ### Shared Access Signature (SAS) Token Provider -A Shared Access Signature (SAS) token provider supplies the ABFS connector with SAS -tokens by implementing the SASTokenProvider interface. - -```xml - - fs.azure.account.auth.type - SAS - - - fs.azure.sas.token.provider.type - {fully-qualified-class-name-for-implementation-of-SASTokenProvider-interface} - -``` - -The declared class must implement `org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider`. +A shared access signature (SAS) provides secure delegated access to resources in +your storage account. With a SAS, you have granular control over how a client can access your data. +To know more about how SAS Authentication works refer to +[Grant limited access to Azure Storage resources using shared access signatures (SAS)](https://learn.microsoft.com/en-us/azure/storage/common/storage-sas-overview) + +There are three types of SAS supported by Azure Storage: +- [User Delegation SAS](https://learn.microsoft.com/en-us/rest/api/storageservices/create-user-delegation-sas): Recommended for use with ABFS Driver with HNS Enabled ADLS Gen2 accounts. It is Identity based SAS that works at blob/directory level) +- [Service SAS](https://learn.microsoft.com/en-us/rest/api/storageservices/create-service-sas): Global and works at container level. +- [Account SAS](https://learn.microsoft.com/en-us/rest/api/storageservices/create-account-sas): Global and works at account level. + +#### Known Issues With SAS +- SAS Based Authentication works only with HNS Enabled ADLS Gen2 Accounts which +is a recommended account type to be used with ABFS. +- Certain root level operations are known to fail with SAS Based Authentication. + +#### Using User Delegation SAS with ABFS + +- **Description**: ABFS allows you to implement your custom SAS Token Provider +that uses your identity to create a user delegation key which then can be used to +create SAS instead of storage account key. The declared class must implement +`org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider`. + +- **Configuration**: To use this method with ABFS Driver, specify the following properties in your `core-site.xml` file: + 1. Authentication Type: + ```xml + + fs.azure.account.auth.type + SAS + + ``` + + 1. Custom SAS Token Provider Class: + ```xml + + fs.azure.sas.token.provider.type + CUSTOM_SAS_TOKEN_PROVIDER_CLASS + + ``` + + Replace `CUSTOM_SAS_TOKEN_PROVIDER_CLASS` with fully qualified class name of +your custom token provider implementation. Depending upon the implementation you +might need to specify additional configurations that are required by your custom +implementation. + +- **Example**: ABFS Hadoop Driver provides a [MockDelegationSASTokenProvider](https://github.com/apache/hadoop/blob/trunk/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java) +implementation that can be used as an example on how to implement your own custom +SASTokenProvider. This requires the Application credentials to be specifed using +the following configurations apart from above two: + + 1. App Service Principle Tenant Id: + ```xml + + fs.azure.test.app.service.principal.tenant.id + TENANT_ID + + ``` + 1. App Service Principle Object Id: + ```xml + + fs.azure.test.app.service.principal.object.id + OBJECT_ID + + ``` + 1. App Id: + ```xml + + fs.azure.test.app.id + APPLICATION_ID + + ``` + 1. App Secret: + ```xml + + fs.azure.test.app.secret + APPLICATION_SECRET + + ``` + +- **Security**: More secure than Shared Key and allows granting limited access +to data without exposing the access key. Recommended to be used only with HNS Enabled, +ADLS Gen 2 storage accounts. + +#### Using Account/Service SAS with ABFS + +- **Description**: ABFS allows user to use Account/Service SAS for authenticating +requests. User can specify them as fixed SAS Token to be used across all the requests. + +- **Configuration**: To use this method with ABFS Driver, specify the following properties in your `core-site.xml` file: + + 1. Authentication Type: + ```xml + + fs.azure.account.auth.type + SAS + + ``` + + 1. Fixed SAS Token: + ```xml + + fs.azure.sas.fixed.token + FIXED_SAS_TOKEN + + ``` + + Replace `FIXED_SAS_TOKEN` with fixed Account/Service SAS. You can also +generate SAS from Azure portal. Account -> Security + Networking -> Shared Access Signature + +- **Security**: Account/Service SAS requires account keys to be used which makes +them less secure. There is no scope of having delegated access to different users. + +*Note:* When `fs.azure.sas.token.provider.type` and `fs.azure.fixed.sas.token` +are both configured, precedence will be given to the custom token provider implementation. ## Technical notes diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java index 00d853175108d..2f0d52f056bd9 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java @@ -284,13 +284,30 @@ public void loadConfiguredFileSystem() throws Exception { useConfiguredFileSystem = true; } + /** + * Create a filesystem for SAS tests using the SharedKey authentication. + * We do not allow filesystem creation with SAS because certain type of SAS do not have + * required permissions, and it is not known what type of SAS is configured by user. + * @throws Exception + */ protected void createFilesystemForSASTests() throws Exception { - // The SAS tests do not have permission to create a filesystem - // so first create temporary instance of the filesystem using SharedKey - // then re-use the filesystem it creates with SAS auth instead of SharedKey. + createFilesystemWithTestFileForSASTests(null); + } + + /** + * Create a filesystem for SAS tests along with a test file using SharedKey authentication. + * We do not allow filesystem creation with SAS because certain type of SAS do not have + * required permissions, and it is not known what type of SAS is configured by user. + * @param testPath path of the test file. + * @throws Exception + */ + protected void createFilesystemWithTestFileForSASTests(Path testPath) throws Exception { try (AzureBlobFileSystem tempFs = (AzureBlobFileSystem) FileSystem.newInstance(rawConfig)){ ContractTestUtils.assertPathExists(tempFs, "This path should exist", new Path("/")); + if (testPath != null) { + tempFs.create(testPath).close(); + } abfsConfig.set(FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME, AuthType.SAS.name()); usingFilesystemForSASTests = true; } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java new file mode 100644 index 0000000000000..d8db901151fe7 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemChooseSAS.java @@ -0,0 +1,182 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.azurebfs; + +import java.io.IOException; +import java.nio.file.AccessDeniedException; + +import org.assertj.core.api.Assertions; +import org.junit.Assume; +import org.junit.Test; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.SASTokenProviderException; +import org.apache.hadoop.fs.azurebfs.extensions.MockDelegationSASTokenProvider; +import org.apache.hadoop.fs.azurebfs.services.AuthType; +import org.apache.hadoop.fs.azurebfs.services.FixedSASTokenProvider; +import org.apache.hadoop.fs.azurebfs.utils.AccountSASGenerator; +import org.apache.hadoop.fs.azurebfs.utils.Base64; + +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SAS_FIXED_TOKEN; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_SAS_TOKEN_PROVIDER_TYPE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.accountProperty; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Tests to validate the choice between using a custom SASTokenProvider + * implementation and FixedSASTokenProvider. + */ +public class ITestAzureBlobFileSystemChooseSAS extends AbstractAbfsIntegrationTest{ + + private String accountSAS = null; + private static final String TEST_PATH = "testPath"; + + /** + * To differentiate which SASTokenProvider was used we will use different type of SAS Tokens. + * FixedSASTokenProvider will return an Account SAS with only read permissions. + * SASTokenProvider will return a User Delegation SAS Token with both read and write permissions. += */ + public ITestAzureBlobFileSystemChooseSAS() throws Exception { + // SAS Token configured might not have permissions for creating file system. + // Shared Key must be configured to create one. Once created, a new instance + // of same file system will be used with SAS Authentication. + Assume.assumeTrue(this.getAuthType() == AuthType.SharedKey); + } + + @Override + public void setup() throws Exception { + createFilesystemWithTestFileForSASTests(new Path(TEST_PATH)); + super.setup(); + generateAccountSAS(); + } + + /** + * Generates an Account SAS Token using the Account Shared Key to be used as a fixed SAS Token. + * Account SAS used here will have only read permissions to resources. + * This will be used by individual tests to set in the configurations. + * @throws AzureBlobFileSystemException + */ + private void generateAccountSAS() throws AzureBlobFileSystemException { + final String accountKey = getConfiguration().getStorageAccountKey(); + AccountSASGenerator configAccountSASGenerator = new AccountSASGenerator(Base64.decode(accountKey)); + // Setting only read permissions. + configAccountSASGenerator.setPermissions("r"); + accountSAS = configAccountSASGenerator.getAccountSAS(getAccountName()); + } + + /** + * Tests the scenario where both the custom SASTokenProvider and a fixed SAS token are configured. + * Custom implementation of SASTokenProvider class should be chosen and User Delegation SAS should be used. + * @throws Exception + */ + @Test + public void testBothProviderFixedTokenConfigured() throws Exception { + AbfsConfiguration testAbfsConfig = new AbfsConfiguration( + getRawConfiguration(), this.getAccountName()); + removeAnyPresetConfiguration(testAbfsConfig); + + // Configuring a SASTokenProvider class which provides a user delegation SAS. + testAbfsConfig.set(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, + MockDelegationSASTokenProvider.class.getName()); + + // configuring the Fixed SAS token which is an Account SAS. + testAbfsConfig.set(FS_AZURE_SAS_FIXED_TOKEN, accountSAS); + + // Creating a new file system with updated configs. + try (AzureBlobFileSystem newTestFs = (AzureBlobFileSystem) + FileSystem.newInstance(testAbfsConfig.getRawConfiguration())) { + + // Asserting that MockDelegationSASTokenProvider is used. + Assertions.assertThat(testAbfsConfig.getSASTokenProvider()) + .describedAs("Custom SASTokenProvider Class must be used") + .isInstanceOf(MockDelegationSASTokenProvider.class); + + // Assert that User Delegation SAS is used and both read and write operations are permitted. + Path testPath = path(getMethodName()); + newTestFs.create(testPath).close(); + newTestFs.open(testPath).close(); + } + } + + /** + * Tests the scenario where only the fixed token is configured, and no token provider class is set. + * Account SAS Token configured as fixed SAS should be used. + * Also verifies that Account Specific as well as Account Agnostic Fixed SAS Token Works. + * @throws IOException + */ + @Test + public void testOnlyFixedTokenConfigured() throws Exception { + AbfsConfiguration testAbfsConfig = new AbfsConfiguration( + getRawConfiguration(), this.getAccountName()); + + // setting an Account Specific Fixed SAS token. + removeAnyPresetConfiguration(testAbfsConfig); + testAbfsConfig.set(accountProperty(FS_AZURE_SAS_FIXED_TOKEN, this.getAccountName()), accountSAS); + testOnlyFixedTokenConfiguredInternal(testAbfsConfig); + + // setting an Account Agnostic Fixed SAS token. + removeAnyPresetConfiguration(testAbfsConfig); + testAbfsConfig.set(FS_AZURE_SAS_FIXED_TOKEN, accountSAS); + testOnlyFixedTokenConfiguredInternal(testAbfsConfig); + } + + private void testOnlyFixedTokenConfiguredInternal(AbfsConfiguration testAbfsConfig) throws Exception { + // Creating a new filesystem with updated configs. + try (AzureBlobFileSystem newTestFs = (AzureBlobFileSystem) + FileSystem.newInstance(testAbfsConfig.getRawConfiguration())) { + + // Asserting that FixedSASTokenProvider is used. + Assertions.assertThat(testAbfsConfig.getSASTokenProvider()) + .describedAs("FixedSASTokenProvider Class must be used") + .isInstanceOf(FixedSASTokenProvider.class); + + // Assert that Account SAS is used and only read operations are permitted. + Path testPath = path(getMethodName()); + intercept(AccessDeniedException.class, () -> { + newTestFs.create(testPath); + }); + // Read Operation is permitted + newTestFs.getFileStatus(new Path(TEST_PATH)); + } + } + + /** + * Tests the scenario where both the token provider class and the fixed token are not configured. + * The code errors out at the initialization stage itself. + * @throws IOException + */ + @Test + public void testBothProviderFixedTokenUnset() throws Exception { + AbfsConfiguration testAbfsConfig = new AbfsConfiguration( + getRawConfiguration(), this.getAccountName()); + removeAnyPresetConfiguration(testAbfsConfig); + + intercept(SASTokenProviderException.class, () -> { + FileSystem.newInstance(testAbfsConfig.getRawConfiguration()); + }); + } + + private void removeAnyPresetConfiguration(AbfsConfiguration testAbfsConfig) { + testAbfsConfig.unset(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE); + testAbfsConfig.unset(FS_AZURE_SAS_FIXED_TOKEN); + testAbfsConfig.unset(accountProperty(FS_AZURE_SAS_TOKEN_PROVIDER_TYPE, this.getAccountName())); + testAbfsConfig.unset(accountProperty(FS_AZURE_SAS_FIXED_TOKEN, this.getAccountName())); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java index 00c681fdadde8..53185606b6c80 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java @@ -43,7 +43,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_HTTP_READ_TIMEOUT; /** - * A mock SAS token provider implementation + * A mock SAS token provider implementation. */ public class MockDelegationSASTokenProvider implements SASTokenProvider { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java index 50ac20970f45f..3fda128a9c01d 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockSASTokenProvider.java @@ -20,7 +20,11 @@ import java.io.IOException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; @@ -28,17 +32,25 @@ import org.apache.hadoop.fs.azurebfs.utils.ServiceSASGenerator; /** - * A mock SAS token provider implementation + * A mock SAS token provider implementation. */ public class MockSASTokenProvider implements SASTokenProvider { private byte[] accountKey; private ServiceSASGenerator generator; private boolean skipAuthorizationForTestSetup = false; + private static final Logger LOG = LoggerFactory.getLogger(MockSASTokenProvider.class); // For testing we use a container SAS for all operations. private String generateSAS(byte[] accountKey, String accountName, String fileSystemName) { - return generator.getContainerSASWithFullControl(accountName, fileSystemName); + String containerSAS = ""; + try { + containerSAS = generator.getContainerSASWithFullControl(accountName, fileSystemName); + } catch (InvalidConfigurationValueException e) { + LOG.debug(e.getMessage()); + containerSAS = ""; + } + return containerSAS; } @Override diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java new file mode 100644 index 0000000000000..2af741b7a4c12 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AccountSASGenerator.java @@ -0,0 +1,103 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.utils; + +import java.time.Instant; + +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.services.AbfsUriQueryBuilder; + +/** + * Test Account SAS Generator. + * SAS generated by this will have only read access to storage account blob and file services. + */ +public class AccountSASGenerator extends SASGenerator { + /** + * Creates Account SAS from Storage Account Key. + * https://learn.microsoft.com/en-us/rest/api/storageservices/create-account-sas. + * @param accountKey: the storage account key. + */ + public AccountSASGenerator(byte[] accountKey) { + super(accountKey); + } + + private String permissions = "racwdl"; + + public String getAccountSAS(String accountName) throws + AzureBlobFileSystemException { + // retaining only the account name + accountName = getCanonicalAccountName(accountName); + String sp = permissions; + String sv = "2021-06-08"; + String srt = "sco"; + + String st = ISO_8601_FORMATTER.format(Instant.now().minus(FIVE_MINUTES)); + String se = ISO_8601_FORMATTER.format(Instant.now().plus(ONE_DAY)); + + String ss = "bf"; + String spr = "https"; + String signature = computeSignatureForSAS(sp, ss, srt, st, se, sv, accountName); + + AbfsUriQueryBuilder qb = new AbfsUriQueryBuilder(); + qb.addQuery("sp", sp); + qb.addQuery("ss", ss); + qb.addQuery("srt", srt); + qb.addQuery("st", st); + qb.addQuery("se", se); + qb.addQuery("sv", sv); + qb.addQuery("sig", signature); + return qb.toString().substring(1); + } + + private String computeSignatureForSAS(String signedPerm, String signedService, String signedResType, + String signedStart, String signedExp, String signedVersion, String accountName) { + + StringBuilder sb = new StringBuilder(); + sb.append(accountName); + sb.append("\n"); + sb.append(signedPerm); + sb.append("\n"); + sb.append(signedService); + sb.append("\n"); + sb.append(signedResType); + sb.append("\n"); + sb.append(signedStart); + sb.append("\n"); + sb.append(signedExp); + sb.append("\n"); + sb.append("\n"); // signedIP + sb.append("\n"); // signedProtocol + sb.append(signedVersion); + sb.append("\n"); + sb.append("\n"); //signed encryption scope + + String stringToSign = sb.toString(); + LOG.debug("Account SAS stringToSign: " + stringToSign.replace("\n", ".")); + return computeHmac256(stringToSign); + } + + /** + * By default Account SAS has all the available permissions. Use this to + * override the default permissions and set as per the requirements. + * @param permissions + */ + public void setPermissions(final String permissions) { + this.permissions = permissions; + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java index 2e9289d8d44c7..a80ddac5ed36f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/SASGenerator.java @@ -29,6 +29,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; + /** * Test SAS generator. */ @@ -54,10 +58,8 @@ public String toString() { protected static final Logger LOG = LoggerFactory.getLogger(SASGenerator.class); public static final Duration FIVE_MINUTES = Duration.ofMinutes(5); public static final Duration ONE_DAY = Duration.ofDays(1); - public static final DateTimeFormatter ISO_8601_FORMATTER = - DateTimeFormatter - .ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT) - .withZone(ZoneId.of("UTC")); + public static final DateTimeFormatter ISO_8601_FORMATTER = DateTimeFormatter + .ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.ROOT).withZone(ZoneId.of("UTC")); private Mac hmacSha256; private byte[] key; @@ -68,7 +70,7 @@ private SASGenerator() { /** * Called by subclasses to initialize the cryptographic SHA-256 HMAC provider. - * @param key - a 256-bit secret key + * @param key - a 256-bit secret key. */ protected SASGenerator(byte[] key) { this.key = key; @@ -85,6 +87,26 @@ private void initializeMac() { } } + protected String getCanonicalAccountName(String accountName) throws + InvalidConfigurationValueException { + // returns the account name without the endpoint + // given account names with endpoint have the format accountname.endpoint + // For example, input of xyz.dfs.core.windows.net should return "xyz" only + int dotIndex = accountName.indexOf(AbfsHttpConstants.DOT); + if (dotIndex == 0) { + // case when accountname starts with a ".": endpoint is present, accountName is null + // for example .dfs.azure.com, which is invalid + throw new InvalidConfigurationValueException("Account Name is not fully qualified"); + } + if (dotIndex > 0) { + // case when endpoint is present with accountName + return accountName.substring(0, dotIndex); + } else { + // case when accountName is already canonicalized + return accountName; + } + } + protected String computeHmac256(final String stringToSign) { byte[] utf8Bytes; try { @@ -98,4 +120,4 @@ protected String computeHmac256(final String stringToSign) { } return Base64.encode(hmac); } -} \ No newline at end of file +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java index 24a1cea255b4a..0ae5239e8f2a5 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/ServiceSASGenerator.java @@ -20,23 +20,26 @@ import java.time.Instant; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; import org.apache.hadoop.fs.azurebfs.services.AbfsUriQueryBuilder; /** - * Test Service SAS generator. + * Test Service SAS Generator. */ public class ServiceSASGenerator extends SASGenerator { /** - * Creates a SAS Generator for Service SAS + * Creates a SAS Generator for Service SAS. * (https://docs.microsoft.com/en-us/rest/api/storageservices/create-service-sas). - * @param accountKey - the storage account key + * @param accountKey - the storage account key. */ public ServiceSASGenerator(byte[] accountKey) { super(accountKey); } - public String getContainerSASWithFullControl(String accountName, String containerName) { + public String getContainerSASWithFullControl(String accountName, String containerName) throws + InvalidConfigurationValueException { + accountName = getCanonicalAccountName(accountName); String sp = "rcwdl"; String sv = AuthenticationVersion.Feb20.toString(); String sr = "c"; @@ -66,7 +69,7 @@ private String computeSignatureForSAS(String sp, String st, String se, String sv sb.append("\n"); sb.append(se); sb.append("\n"); - // canonicalized resource + // canonicalize resource sb.append("/blob/"); sb.append(accountName); sb.append("/"); @@ -93,4 +96,4 @@ private String computeSignatureForSAS(String sp, String st, String se, String sv LOG.debug("Service SAS stringToSign: " + stringToSign.replace("\n", ".")); return computeHmac256(stringToSign); } -} \ No newline at end of file +} From 9f6c997662c5212bd7b542c1e5188ad4ede3f840 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Sat, 1 Jun 2024 06:15:20 +0800 Subject: [PATCH 016/113] YARN-11471. [Federation] FederationStateStoreFacade Cache Support Caffeine. (#6795) Contributed by Shilun Fan. Reviewed-by: Inigo Goiri Signed-off-by: Shilun Fan --- LICENSE-binary | 1 + hadoop-project/pom.xml | 6 + .../hadoop-yarn-server-common/pom.xml | 14 ++ .../cache/FederationCaffeineCache.java | 131 ++++++++++++++++++ .../federation/cache/FederationJCache.java | 2 +- .../federation/cache/TestFederationCache.java | 3 +- .../pom.xml | 4 + .../src/site/markdown/Federation.md | 8 +- 8 files changed, 166 insertions(+), 3 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationCaffeineCache.java diff --git a/LICENSE-binary b/LICENSE-binary index c0258e9311b1b..32f9f06ae15da 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -226,6 +226,7 @@ com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.12.7 com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.12.7 com.fasterxml.uuid:java-uuid-generator:3.1.4 com.fasterxml.woodstox:woodstox-core:5.4.0 +com.github.ben-manes.caffeine:caffeine:2.9.3 com.github.davidmoten:rxjava-extras:0.8.0.17 com.github.stephenc.jcip:jcip-annotations:1.0-1 com.google:guice:4.0 diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index ba7631189a1a4..0345925e9994e 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -134,6 +134,7 @@ 2.0.3 3.8.2 1.1.1 + 2.9.3 4.0.3 10.14.2.0 6.2.1.jre7 @@ -1975,6 +1976,11 @@ + + com.github.ben-manes.caffeine + caffeine + ${caffeine.version} + com.zaxxer HikariCP diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml index 1f762d31800d3..e768ad5e48451 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml @@ -131,6 +131,20 @@ org.ehcache ehcache + + com.github.ben-manes.caffeine + caffeine + + + org.checkerframework + checker-qual + + + com.google.errorprone + error_prone_annotations + + + com.zaxxer HikariCP diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationCaffeineCache.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationCaffeineCache.java new file mode 100644 index 0000000000000..cbf3e9db3db3d --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationCaffeineCache.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.yarn.server.federation.cache; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.federation.store.FederationStateStore; +import org.apache.hadoop.yarn.server.federation.store.records.SubClusterId; +import org.apache.hadoop.yarn.server.federation.store.records.SubClusterInfo; +import org.apache.hadoop.yarn.server.federation.store.records.SubClusterPolicyConfiguration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; +import java.util.concurrent.TimeUnit; + +/** + * CaffeineCache is a high-performance caching library for Java, offering better performance compared to Ehcache and Guava Cache. + * We are integrating this cache to store information about application and homesubclusters etc. + */ +public class FederationCaffeineCache extends FederationCache { + + private static final Logger LOG = LoggerFactory.getLogger(FederationCaffeineCache.class); + + private Cache cache; + + private int cacheTimeToLive; + private long cacheEntityNums; + + private String className = this.getClass().getSimpleName(); + + private boolean isCachingEnabled = false; + + @Override + public boolean isCachingEnabled() { + return isCachingEnabled; + } + + @Override + public void initCache(Configuration pConf, FederationStateStore pStateStore) { + cacheTimeToLive = pConf.getInt(YarnConfiguration.FEDERATION_CACHE_TIME_TO_LIVE_SECS, + YarnConfiguration.DEFAULT_FEDERATION_CACHE_TIME_TO_LIVE_SECS); + cacheEntityNums = pConf.getLong(YarnConfiguration.FEDERATION_CACHE_ENTITY_NUMS, + YarnConfiguration.DEFAULT_FEDERATION_CACHE_ENTITY_NUMS); + if (cacheTimeToLive <= 0) { + isCachingEnabled = false; + LOG.warn("Federation cache is not enabled. If we want to enable federation cache, " + + "we need to set yarn.federation.cache-ttl.secs greater than 0."); + return; + } + this.setStateStore(pStateStore); + + // Initialize Cache. + LOG.info("Creating a JCache Manager with name {}. " + + "Cache TTL Time = {} secs. Cache Entity Nums = {}.", className, cacheTimeToLive, + cacheEntityNums); + + this.cache = Caffeine.newBuilder().maximumSize(cacheEntityNums) + .expireAfterWrite(cacheTimeToLive, TimeUnit.SECONDS).build(); + } + + @Override + public void clearCache() { + this.cache.cleanUp(); + this.cache = null; + } + + @Override + public Map getSubClusters( + boolean filterInactiveSubClusters) throws YarnException { + final String cacheKey = buildCacheKey(className, GET_SUBCLUSTERS_CACHEID, + Boolean.toString(filterInactiveSubClusters)); + CacheRequest cacheRequest = cache.getIfPresent(cacheKey); + if (cacheRequest == null) { + cacheRequest = buildGetSubClustersCacheRequest(className, filterInactiveSubClusters); + cache.put(cacheKey, cacheRequest); + } + return buildSubClusterInfoMap(cacheRequest); + } + + @Override + public Map getPoliciesConfigurations() + throws Exception { + final String cacheKey = buildCacheKey(className, GET_POLICIES_CONFIGURATIONS_CACHEID); + CacheRequest cacheRequest = cache.getIfPresent(cacheKey); + if(cacheRequest == null){ + cacheRequest = buildGetPoliciesConfigurationsCacheRequest(className); + cache.put(cacheKey, cacheRequest); + } + return buildPolicyConfigMap(cacheRequest); + } + + @Override + public SubClusterId getApplicationHomeSubCluster(ApplicationId appId) throws Exception { + final String cacheKey = buildCacheKey(className, GET_APPLICATION_HOME_SUBCLUSTER_CACHEID, + appId.toString()); + CacheRequest cacheRequest = cache.getIfPresent(cacheKey); + if (cacheRequest == null) { + cacheRequest = buildGetApplicationHomeSubClusterRequest(className, appId); + cache.put(cacheKey, cacheRequest); + } + CacheResponse response = + ApplicationHomeSubClusterCacheResponse.class.cast(cacheRequest.getValue()); + return response.getItem(); + } + + @Override + public void removeSubCluster(boolean flushCache) { + final String cacheKey = buildCacheKey(className, GET_SUBCLUSTERS_CACHEID, + Boolean.toString(flushCache)); + cache.invalidate(cacheKey); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationJCache.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationJCache.java index b4dbefe1278a8..07f300e65f6be 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationJCache.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationJCache.java @@ -91,7 +91,7 @@ public void initCache(Configuration pConf, FederationStateStore pStateStore) { @Override public void clearCache() { - + this.cache.clear(); this.cache = null; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/cache/TestFederationCache.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/cache/TestFederationCache.java index 8e0f15802bcc6..8873e60939043 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/cache/TestFederationCache.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/test/java/org/apache/hadoop/yarn/server/federation/cache/TestFederationCache.java @@ -49,7 +49,8 @@ public class TestFederationCache { @Parameterized.Parameters public static Collection getParameters() { - return Arrays.asList(new Class[][] {{FederationGuavaCache.class}, {FederationJCache.class}}); + return Arrays.asList(new Class[][]{{FederationGuavaCache.class}, {FederationJCache.class}, + {FederationCaffeineCache.class}}); } private final long clusterTs = System.currentTimeMillis(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-common/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-common/pom.xml index 7f3c711fe7935..976c21e2dd2e4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-common/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-common/pom.xml @@ -66,6 +66,10 @@ com.google.inject guice + + error_prone_annotations + com.google.errorprone + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/Federation.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/Federation.md index 3886f54041e6b..fcb36c250c2cc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/Federation.md +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/Federation.md @@ -536,7 +536,7 @@ To enable cross-origin support (CORS) for the Yarn Router, please set the follow #### How to configure Router Cache Cache is enabled by default. When we set the `yarn.federation.cache-ttl.secs` parameter and its value is greater than 0, Cache will be enabled. -We currently provide two Cache implementations: `JCache` and `GuavaCache`. +We currently provide three Cache implementations: `JCache`, `GuavaCache`, `CaffeineCache` - JCache @@ -550,6 +550,12 @@ If we want to use JCache, we can configure `yarn.federation.cache.class` to `org This is a Cache implemented based on the Guava framework. If we want to use it, we can configure `yarn.federation.cache.class` to `org.apache.hadoop.yarn.server.federation.cache.FederationGuavaCache`. +- CaffeineCache + +[CaffeineCache](https://github.com/ben-manes/caffeine) is a high-performance caching library for Java, offering better performance compared to Ehcache and Guava Cache. +If we want to use it, we can configure `yarn.federation.cache.class` to `org.apache.hadoop.yarn.server.federation.cache.FederationCaffeineCache`. + + #### How to configure Router AuditLog We can enable the AuditLog configuration for the Router and collect the AuditLog in a separate log file. We need to modify the configuration related to RouterAuditLog in the **conf/log4j.properties** file. From 167d4c8447f968c789ba2fb374644cf1d304494c Mon Sep 17 00:00:00 2001 From: Yang Jiandan Date: Sat, 1 Jun 2024 06:18:28 +0800 Subject: [PATCH 017/113] YARN-11699. Diagnostics lacks userlimit info when user capacity has reached its maximum limit (#6849) Contributed by Jiandan Yang. Signed-off-by: Shilun Fan --- .../scheduler/capacity/AbstractLeafQueue.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractLeafQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractLeafQueue.java index 565f89de32080..ba105710467fe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractLeafQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/AbstractLeafQueue.java @@ -1270,8 +1270,14 @@ public CSAssignment assignContainers(Resource clusterResource, } } if (!userAssignable) { + String userName = application.getUser(); + User user = getUser(userName); + Resource usedResourceByUser = + user == null ? null : user.getUsed(candidates.getPartition()); application.updateAMContainerDiagnostics(AMState.ACTIVATED, - "User capacity has reached its maximum limit."); + "User capacity has reached its maximum limit," + + " user limit is " + userLimit + ", resource used by " + + userName + " is " + usedResourceByUser + "."); ActivitiesLogger.APP.recordRejectedAppActivityFromLeafQueue( activitiesManager, node, application, application.getPriority(), ActivityDiagnosticConstant.QUEUE_HIT_USER_MAX_CAPACITY_LIMIT); From f1e2ceb823e92ce864f7f2f327c4c0af722b4d85 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Mon, 3 Jun 2024 09:10:06 -0700 Subject: [PATCH 018/113] HDFS-13603: Do not propagate ExecutionException while initializing EDEK queues for keys. (#6860) --- .../crypto/key/kms/KMSClientProvider.java | 6 +-- .../hadoop/crypto/key/kms/ValueQueue.java | 20 +++++++-- .../hadoop/crypto/key/TestValueQueue.java | 43 +++++++++++++++++++ ...eyGeneratorKeyProviderCryptoExtension.java | 9 +--- 4 files changed, 62 insertions(+), 16 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java index f0c912224f90f..6ee9068ea3458 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java @@ -947,11 +947,7 @@ public void flush() throws IOException { @Override public void warmUpEncryptedKeys(String... keyNames) throws IOException { - try { - encKeyVersionQueue.initializeQueuesForKeys(keyNames); - } catch (ExecutionException e) { - throw new IOException(e); - } + encKeyVersionQueue.initializeQueuesForKeys(keyNames); } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/ValueQueue.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/ValueQueue.java index 58ce443146df3..cbf419356343f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/ValueQueue.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/ValueQueue.java @@ -269,12 +269,24 @@ public ValueQueue(final int numValues, final float lowWaterMark, long expiry, * Initializes the Value Queues for the provided keys by calling the * fill Method with "numInitValues" values * @param keyNames Array of key Names - * @throws ExecutionException executionException. + * @throws IOException if initialization fails for any provided keys */ - public void initializeQueuesForKeys(String... keyNames) - throws ExecutionException { + public void initializeQueuesForKeys(String... keyNames) throws IOException { + int successfulInitializations = 0; + ExecutionException lastException = null; + for (String keyName : keyNames) { - keyQueues.get(keyName); + try { + keyQueues.get(keyName); + successfulInitializations++; + } catch (ExecutionException e) { + lastException = e; + } + } + + if (keyNames.length > 0 && successfulInitializations != keyNames.length) { + throw new IOException(String.format("Failed to initialize %s queues for the provided keys.", + keyNames.length - successfulInitializations), lastException); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestValueQueue.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestValueQueue.java index 4805fca1d49f4..6bf76b6e505f0 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestValueQueue.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestValueQueue.java @@ -21,19 +21,27 @@ import java.util.Arrays; import java.util.HashSet; import java.util.Queue; +import java.util.concurrent.ExecutionException; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeoutException; import java.util.concurrent.TimeUnit; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.commons.lang3.reflect.FieldUtils; import org.apache.hadoop.crypto.key.kms.ValueQueue; import org.apache.hadoop.crypto.key.kms.ValueQueue.QueueRefiller; import org.apache.hadoop.crypto.key.kms.ValueQueue.SyncGenerationPolicy; import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.thirdparty.com.google.common.cache.LoadingCache; import org.junit.Assert; import org.junit.Test; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.spy; + public class TestValueQueue { Logger LOG = LoggerFactory.getLogger(TestValueQueue.class); @@ -111,6 +119,41 @@ public void testWarmUp() throws Exception { vq.shutdown(); } + /** + * Verifies that Queue is initialized (Warmed-up) for partial keys. + */ + @Test(timeout = 30000) + public void testPartialWarmUp() throws Exception { + MockFiller filler = new MockFiller(); + ValueQueue vq = + new ValueQueue<>(10, 0.5f, 30000, 1, + SyncGenerationPolicy.ALL, filler); + + @SuppressWarnings("unchecked") + LoadingCache> kq = + (LoadingCache>) + FieldUtils.getField(ValueQueue.class, "keyQueues", true).get(vq); + + LoadingCache> + kqSpy = spy(kq); + doThrow(new ExecutionException(new Exception())).when(kqSpy).get("k2"); + FieldUtils.writeField(vq, "keyQueues", kqSpy, true); + + Assert.assertThrows(IOException.class, () -> vq.initializeQueuesForKeys("k1", "k2", "k3")); + verify(kqSpy, times(1)).get("k2"); + + FillInfo[] fillInfos = + {filler.getTop(), filler.getTop(), filler.getTop()}; + Assert.assertEquals(5, fillInfos[0].num); + Assert.assertEquals(5, fillInfos[1].num); + Assert.assertNull(fillInfos[2]); + + Assert.assertEquals(new HashSet<>(Arrays.asList("k1", "k3")), + new HashSet<>(Arrays.asList(fillInfos[0].key, + fillInfos[1].key))); + vq.shutdown(); + } + /** * Verifies that the refill task is executed after "checkInterval" if * num values below "lowWatermark" diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/EagerKeyGeneratorKeyProviderCryptoExtension.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/EagerKeyGeneratorKeyProviderCryptoExtension.java index 273c6733610d4..bc9e6d7a90981 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/EagerKeyGeneratorKeyProviderCryptoExtension.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/EagerKeyGeneratorKeyProviderCryptoExtension.java @@ -104,13 +104,8 @@ SyncGenerationPolicy.LOW_WATERMARK, new EncryptedQueueRefiller() } @Override - public void warmUpEncryptedKeys(String... keyNames) throws - IOException { - try { - encKeyVersionQueue.initializeQueuesForKeys(keyNames); - } catch (ExecutionException e) { - throw new IOException(e); - } + public void warmUpEncryptedKeys(String... keyNames) throws IOException { + encKeyVersionQueue.initializeQueuesForKeys(keyNames); } @Override From f92a8ab8ae54f11946412904973eb60404dee7ff Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Mon, 3 Jun 2024 12:00:31 -0500 Subject: [PATCH 019/113] HADOOP-19190. Skip ITestS3AEncryptionWithDefaultS3Settings.testEncryptionFileAttributes when bucket not encrypted with sse-kms (#6859) Follow up of HADOOP-19190 --- .../hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java index 423796bf82b87..4fc63cd4e1b18 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java @@ -102,6 +102,7 @@ protected void assertEncrypted(Path path) throws IOException { @Test public void testEncryptionFileAttributes() throws Exception { describe("Test for correct encryption file attributes for SSE-KMS with user default setting."); + skipIfBucketNotKmsEncrypted(); Path path = path(createFilename(1024)); byte[] data = dataset(1024, 'a', 'z'); S3AFileSystem fs = getFileSystem(); From d8d3d538e463e7cb651dfe013507fa6c4576b8dc Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Wed, 5 Jun 2024 22:25:48 +0800 Subject: [PATCH 020/113] HADOOP-19193. Create orphan commit for website deployment (#6864) This stop gh-pages deployments from increasing the size of the git repository on every run Contributed by Cheng Pan --- .github/workflows/website.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/website.yml b/.github/workflows/website.yml index 6d925f3dcff2c..67b2b908d273d 100644 --- a/.github/workflows/website.yml +++ b/.github/workflows/website.yml @@ -56,4 +56,5 @@ jobs: publish_dir: ./staging/hadoop-project user_name: 'github-actions[bot]' user_email: 'github-actions[bot]@users.noreply.github.com' + force_orphan: true From 2ee0bf953492b66765d3d2c902407fbf9bceddec Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Wed, 5 Jun 2024 15:31:23 +0100 Subject: [PATCH 021/113] HADOOP-19154. Upgrade bouncycastle to 1.78.1 due to CVEs (#6755) Addresses * CVE-2024-29857 - Importing an EC certificate with specially crafted F2m parameters can cause high CPU usage during parameter evaluation. * CVE-2024-30171 - Possible timing based leakage in RSA based handshakes due to exception processing eliminated. * CVE-2024-30172 - Crafted signature and public key can be used to trigger an infinite loop in the Ed25519 verification code. * CVE-2024-301XX - When endpoint identification is enabled and an SSL socket is not created with an explicit hostname (as happens with HttpsURLConnection), hostname verification could be performed against a DNS-resolved IP address. Contributed by PJ Fanning --- LICENSE-binary | 6 +++--- .../hadoop-cos/src/site/markdown/cloud-storage/index.md | 2 +- hadoop-project/pom.xml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 32f9f06ae15da..42e97f4875358 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -482,9 +482,9 @@ com.microsoft.azure:azure-cosmosdb-gateway:2.4.5 com.microsoft.azure:azure-data-lake-store-sdk:2.3.3 com.microsoft.azure:azure-keyvault-core:1.0.0 com.microsoft.sqlserver:mssql-jdbc:6.2.1.jre7 -org.bouncycastle:bcpkix-jdk18on:1.77 -org.bouncycastle:bcprov-jdk18on:1.77 -org.bouncycastle:bcutil-jdk18on:1.77 +org.bouncycastle:bcpkix-jdk18on:1.78.1 +org.bouncycastle:bcprov-jdk18on:1.78.1 +org.bouncycastle:bcutil-jdk18on:1.78.1 org.checkerframework:checker-qual:2.5.2 org.codehaus.mojo:animal-sniffer-annotations:1.21 org.jruby.jcodings:jcodings:1.0.13 diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/site/markdown/cloud-storage/index.md b/hadoop-cloud-storage-project/hadoop-cos/src/site/markdown/cloud-storage/index.md index 64647b03e9baf..60c9c9065946f 100644 --- a/hadoop-cloud-storage-project/hadoop-cos/src/site/markdown/cloud-storage/index.md +++ b/hadoop-cloud-storage-project/hadoop-cos/src/site/markdown/cloud-storage/index.md @@ -86,7 +86,7 @@ Linux kernel 2.6+ - joda-time (version 2.9.9 recommended) - httpClient (version 4.5.1 or later recommended) - Jackson: jackson-core, jackson-databind, jackson-annotations (version 2.9.8 or later) -- bcprov-jdk18on (version 1.77 recommended) +- bcprov-jdk18on (version 1.78.1 recommended) #### Configure Properties diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 0345925e9994e..a8ef068bf8da5 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -111,7 +111,7 @@ 27.0-jre 4.2.3 - 1.78 + 1.78.1 2.0.0.AM26 From bbb17e76a7a8a995a8b202c9b9530f39bb2a2957 Mon Sep 17 00:00:00 2001 From: Anuj Modi <128447756+anujmodi2021@users.noreply.github.com> Date: Fri, 7 Jun 2024 18:58:24 +0530 Subject: [PATCH 022/113] HADOOP-19178: [WASB Deprecation] Updating Documentation on Upcoming Plans for Hadoop-Azure (#6862) Contributed by Anuj Modi --- .../hadoop-azure/src/site/markdown/index.md | 1 + .../hadoop-azure/src/site/markdown/wasb.md | 97 +++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 hadoop-tools/hadoop-azure/src/site/markdown/wasb.md diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/index.md b/hadoop-tools/hadoop-azure/src/site/markdown/index.md index 595353896d123..177ab282c112b 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/index.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/index.md @@ -18,6 +18,7 @@ See also: +* [WASB](./wasb.html) * [ABFS](./abfs.html) * [Testing](./testing_azure.html) diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/wasb.md b/hadoop-tools/hadoop-azure/src/site/markdown/wasb.md new file mode 100644 index 0000000000000..270fd14da4c44 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/site/markdown/wasb.md @@ -0,0 +1,97 @@ + + +# Hadoop Azure Support: WASB Driver + +## Introduction +WASB Driver is a legacy Hadoop File System driver that was developed to support +[FNS(FlatNameSpace) Azure Storage accounts](https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction) +that do not honor File-Folder syntax. +HDFS Folder operations hence are mimicked at client side by WASB driver and +certain folder operations like Rename and Delete can lead to a lot of IOPs with +client-side enumeration and orchestration of rename/delete operation blob by blob. +It was not ideal for other APIs too as initial checks for path is a file or folder +needs to be done over multiple metadata calls. These led to a degraded performance. + +To provide better service to Analytics users, Microsoft released [ADLS Gen2](https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction) +which are HNS (Hierarchical Namespace) enabled, i.e. File-Folder aware storage accounts. +ABFS driver was designed to overcome the inherent deficiencies of WASB and users +were informed to migrate to ABFS driver. + +### Challenges and limitations of WASB Driver +Users of the legacy WASB driver face a number of challenges and limitations: +1. They cannot leverage the optimizations and benefits of the latest ABFS driver. +2. They need to deal with the compatibility issues should the files and folders were +modified with the legacy WASB driver and the ABFS driver concurrently in a phased +transition situation. +3. There are differences for supported features for FNS and HNS over ABFS Driver. +4. In certain cases, they must perform a significant amount of re-work on their +workloads to migrate to the ABFS driver, which is available only on HNS enabled +accounts in a fully tested and supported scenario. + +## Deprecation plans for WASB Driver +We are introducing a new feature that will enable the ABFS driver to support +FNS accounts (over BlobEndpoint that WASB Driver uses) using the ABFS scheme. +This feature will enable us to use the ABFS driver to interact with data stored in GPv2 +(General Purpose v2) storage accounts. + +With this feature, the users who still use the legacy WASB driver will be able +to migrate to the ABFS driver without much re-work on their workloads. They will +however need to change the URIs from the WASB scheme to the ABFS scheme. + +Once ABFS driver has built FNS support capability to migrate WASB users, WASB +driver will be marked for removal in next major release. This will remove any ambiguity +for new users onboards as there will be only one Microsoft driver for Azure Storage +and migrating users will get SLA bound support for driver and service, +which was not guaranteed over WASB. + +We anticipate that this feature will serve as a stepping stone for users to +move to HNS enabled accounts with the ABFS driver, which is our recommended stack +for big data analytics on ADLS Gen2. + +### Impact for existing ABFS users using ADLS Gen2 (HNS enabled account) +This feature does not impact the existing users who are using ADLS Gen2 Accounts +(HNS enabled account) with ABFS driver. + +They do not need to make any changes to their workloads or configurations. They +will still enjoy the benefits of HNS, such as atomic operations, fine-grained +access control, scalability, and performance. + +### Official recommendation +Microsoft continues to recommend all Big Data and Analytics users to use +Azure Data Lake Gen2 (ADLS Gen2) using the ABFS driver and will continue to optimize +this scenario in the future, we believe that this new option will help all those +users to transition to a supported scenario immediately, while they plan to +ultimately move to ADLS Gen2 (HNS enabled account). + +### New Authentication Options for a migrating user +Below auth types that WASB provides will continue to work on the new FNS over +ABFS Driver over configuration that accepts these SAS types (similar to WASB): +1. SharedKey +2. Account SAS +3. Service/Container SAS + +Below authentication types that were not supported by WASB driver but supported by +ABFS driver will continue to be available for new FNS over ABFS Driver +1. OAuth 2.0 Client Credentials +2. OAuth 2.0: Refresh Token +3. Azure Managed Identity +4. Custom OAuth 2.0 Token Provider + +Refer to [ABFS Authentication](abfs.html/authentication) for more details. + +### ABFS Features Not Available for migrating Users +Certain features of ABFS Driver will be available only to users using HNS accounts with ABFS driver. +1. ABFS Driver's SAS Token Provider plugin for UserDelegation SAS and Fixed SAS. +2. Client Provided Encryption Key (CPK) support for Data ingress and egress. From 01d257d5aa94163244cd3f1149d5ba2cb9f1e6ff Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 7 Jun 2024 17:34:01 +0100 Subject: [PATCH 023/113] HADOOP-19189. ITestS3ACommitterFactory failing (#6857) * parameterize the test run rather than do it from within the test suite. * log what the committer factory is up to (and improve its logging) * close all filesystems, then create the test filesystem with cache enabled. The cache is critical, we want the fs from cache to be used when querying filesystem properties, rather than one created from the committer jobconf, which will have the same options as the task committer, so not actually validate the override logic. Contributed by Steve Loughran --- .../commit/AbstractS3ACommitterFactory.java | 5 +- .../fs/s3a/commit/S3ACommitterFactory.java | 7 +- .../s3a/commit/ITestS3ACommitterFactory.java | 234 +++++++++++------- .../src/test/resources/log4j.properties | 2 + 4 files changed, 151 insertions(+), 97 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitterFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitterFactory.java index 6e7a99f50ef93..cbbe5fdc602d6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitterFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/AbstractS3ACommitterFactory.java @@ -51,9 +51,10 @@ public PathOutputCommitter createOutputCommitter(Path outputPath, throw new PathCommitException(outputPath, "Filesystem not supported by this committer"); } - LOG.info("Using Committer {} for {}", + LOG.info("Using Committer {} for {} created by {}", outputCommitter, - outputPath); + outputPath, + this); return outputCommitter; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/S3ACommitterFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/S3ACommitterFactory.java index 36d0af187d3c8..7f5455b6098d0 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/S3ACommitterFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/S3ACommitterFactory.java @@ -113,11 +113,14 @@ private AbstractS3ACommitterFactory chooseCommitterFactory( // job/task configurations. Configuration fsConf = fileSystem.getConf(); - String name = fsConf.getTrimmed(FS_S3A_COMMITTER_NAME, COMMITTER_NAME_FILE); + String name = fsConf.getTrimmed(FS_S3A_COMMITTER_NAME, ""); + LOG.debug("Committer from filesystems \"{}\"", name); + name = taskConf.getTrimmed(FS_S3A_COMMITTER_NAME, name); - LOG.debug("Committer option is {}", name); + LOG.debug("Committer option is \"{}\"", name); switch (name) { case COMMITTER_NAME_FILE: + case "": factory = null; break; case COMMITTER_NAME_DIRECTORY: diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/ITestS3ACommitterFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/ITestS3ACommitterFactory.java index 2ad2568d5cc20..2561a69f60b59 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/ITestS3ACommitterFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/ITestS3ACommitterFactory.java @@ -19,15 +19,24 @@ package org.apache.hadoop.fs.s3a.commit; import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.commit.magic.MagicS3GuardCommitter; import org.apache.hadoop.fs.s3a.commit.staging.DirectoryStagingCommitter; import org.apache.hadoop.fs.s3a.commit.staging.PartitionedStagingCommitter; import org.apache.hadoop.fs.s3a.commit.staging.StagingCommitter; +import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; @@ -35,20 +44,24 @@ import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.PathOutputCommitter; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; -import org.apache.hadoop.test.LambdaTestUtils; +import org.apache.hadoop.security.UserGroupInformation; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*; +import static org.apache.hadoop.fs.s3a.commit.InternalCommitterConstants.COMMITTER_NAME_STAGING; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** - * Tests for some aspects of the committer factory. - * All tests are grouped into one single test so that only one - * S3A FS client is set up and used for the entire run. - * Saves time and money. + * Tests for the committer factory creation/override process. */ -public class ITestS3ACommitterFactory extends AbstractCommitITest { - - - protected static final String INVALID_NAME = "invalid-name"; +@RunWith(Parameterized.class) +public final class ITestS3ACommitterFactory extends AbstractCommitITest { + private static final Logger LOG = LoggerFactory.getLogger( + ITestS3ACommitterFactory.class); + /** + * Name for invalid committer: {@value}. + */ + private static final String INVALID_NAME = "invalid-name"; /** * Counter to guarantee that even in parallel test runs, no job has the same @@ -72,121 +85,156 @@ public class ITestS3ACommitterFactory extends AbstractCommitITest { * Parameterized list of bindings of committer name in config file to * expected class instantiated. */ - private static final Object[][] bindings = { - {COMMITTER_NAME_FILE, FileOutputCommitter.class}, - {COMMITTER_NAME_DIRECTORY, DirectoryStagingCommitter.class}, - {COMMITTER_NAME_PARTITIONED, PartitionedStagingCommitter.class}, - {InternalCommitterConstants.COMMITTER_NAME_STAGING, - StagingCommitter.class}, - {COMMITTER_NAME_MAGIC, MagicS3GuardCommitter.class} + private static final Object[][] BINDINGS = { + {"", "", FileOutputCommitter.class, "Default Binding"}, + {COMMITTER_NAME_FILE, "", FileOutputCommitter.class, "File committer in FS"}, + {COMMITTER_NAME_PARTITIONED, "", PartitionedStagingCommitter.class, + "partitoned committer in FS"}, + {COMMITTER_NAME_STAGING, "", StagingCommitter.class, "staging committer in FS"}, + {COMMITTER_NAME_MAGIC, "", MagicS3GuardCommitter.class, "magic committer in FS"}, + {COMMITTER_NAME_DIRECTORY, "", DirectoryStagingCommitter.class, "Dir committer in FS"}, + {INVALID_NAME, "", null, "invalid committer in FS"}, + + {"", COMMITTER_NAME_FILE, FileOutputCommitter.class, "File committer in task"}, + {"", COMMITTER_NAME_PARTITIONED, PartitionedStagingCommitter.class, + "partioned committer in task"}, + {"", COMMITTER_NAME_STAGING, StagingCommitter.class, "staging committer in task"}, + {"", COMMITTER_NAME_MAGIC, MagicS3GuardCommitter.class, "magic committer in task"}, + {"", COMMITTER_NAME_DIRECTORY, DirectoryStagingCommitter.class, "Dir committer in task"}, + {"", INVALID_NAME, null, "invalid committer in task"}, }; /** - * This is a ref to the FS conf, so changes here are visible - * to callers querying the FS config. + * Test array for parameterized test runs. + * + * @return the committer binding for this run. */ - private Configuration filesystemConfRef; - - private Configuration taskConfRef; + @Parameterized.Parameters(name = "{3}-fs=[{0}]-task=[{1}]-[{2}]") + public static Collection params() { + return Arrays.asList(BINDINGS); + } - @Override - public void setup() throws Exception { - super.setup(); - jobId = randomJobId(); - attempt0 = "attempt_" + jobId + "_m_000000_0"; - taskAttempt0 = TaskAttemptID.forName(attempt0); + /** + * Name of committer to set in filesystem config. If "" do not set one. + */ + private final String fsCommitterName; - outDir = path(getMethodName()); - factory = new S3ACommitterFactory(); - Configuration conf = new Configuration(); - conf.set(FileOutputFormat.OUTDIR, outDir.toUri().toString()); - conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt0); - conf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1); - filesystemConfRef = getFileSystem().getConf(); - tContext = new TaskAttemptContextImpl(conf, taskAttempt0); - taskConfRef = tContext.getConfiguration(); - } + /** + * Name of committer to set in job config. + */ + private final String jobCommitterName; - @Test - public void testEverything() throws Throwable { - testImplicitFileBinding(); - testBindingsInTask(); - testBindingsInFSConfig(); - testInvalidFileBinding(); - testInvalidTaskBinding(); - } + /** + * Expected committer class. + * If null: an exception is expected + */ + private final Class committerClass; /** - * Verify that if all config options are unset, the FileOutputCommitter - * - * is returned. + * Description from parameters, simply for thread names to be more informative. */ - public void testImplicitFileBinding() throws Throwable { - taskConfRef.unset(FS_S3A_COMMITTER_NAME); - filesystemConfRef.unset(FS_S3A_COMMITTER_NAME); - assertFactoryCreatesExpectedCommitter(FileOutputCommitter.class); - } + private final String description; /** - * Verify that task bindings are picked up. + * Create a parameterized instance. + * @param fsCommitterName committer to set in filesystem config + * @param jobCommitterName committer to set in job config + * @param committerClass expected committer class + * @param description debug text for thread names. */ - public void testBindingsInTask() throws Throwable { - // set this to an invalid value to be confident it is not - // being checked. - filesystemConfRef.set(FS_S3A_COMMITTER_NAME, "INVALID"); - taskConfRef.set(FS_S3A_COMMITTER_NAME, COMMITTER_NAME_FILE); - assertFactoryCreatesExpectedCommitter(FileOutputCommitter.class); - for (Object[] binding : bindings) { - taskConfRef.set(FS_S3A_COMMITTER_NAME, - (String) binding[0]); - assertFactoryCreatesExpectedCommitter((Class) binding[1]); - } + public ITestS3ACommitterFactory( + final String fsCommitterName, + final String jobCommitterName, + final Class committerClass, + final String description) { + this.fsCommitterName = fsCommitterName; + this.jobCommitterName = jobCommitterName; + this.committerClass = committerClass; + this.description = description; + } + + @Override + protected Configuration createConfiguration() { + final Configuration conf = super.createConfiguration(); + // do not cache, because we want the committer one to pick up + // the fs with fs-specific configuration + conf.setBoolean(FS_S3A_IMPL_DISABLE_CACHE, false); + removeBaseAndBucketOverrides(conf, FS_S3A_COMMITTER_NAME); + maybeSetCommitterName(conf, fsCommitterName); + return conf; } /** - * Verify that FS bindings are picked up. + * Set a committer name in a configuration. + * @param conf configuration to patch. + * @param name name. If "" the option is unset. */ - public void testBindingsInFSConfig() throws Throwable { - taskConfRef.unset(FS_S3A_COMMITTER_NAME); - filesystemConfRef.set(FS_S3A_COMMITTER_NAME, COMMITTER_NAME_FILE); - assertFactoryCreatesExpectedCommitter(FileOutputCommitter.class); - for (Object[] binding : bindings) { - taskConfRef.set(FS_S3A_COMMITTER_NAME, (String) binding[0]); - assertFactoryCreatesExpectedCommitter((Class) binding[1]); + private static void maybeSetCommitterName(final Configuration conf, final String name) { + if (!name.isEmpty()) { + conf.set(FS_S3A_COMMITTER_NAME, name); + } else { + conf.unset(FS_S3A_COMMITTER_NAME); } } - /** - * Create an invalid committer via the FS binding. - */ - public void testInvalidFileBinding() throws Throwable { - taskConfRef.unset(FS_S3A_COMMITTER_NAME); - filesystemConfRef.set(FS_S3A_COMMITTER_NAME, INVALID_NAME); - LambdaTestUtils.intercept(PathCommitException.class, INVALID_NAME, - () -> createCommitter()); + @Override + public void setup() throws Exception { + // destroy all filesystems from previous runs. + FileSystem.closeAllForUGI(UserGroupInformation.getCurrentUser()); + super.setup(); + jobId = randomJobId(); + attempt0 = "attempt_" + jobId + "_m_000000_0"; + taskAttempt0 = TaskAttemptID.forName(attempt0); + + outDir = methodPath(); + factory = new S3ACommitterFactory(); + final Configuration fsConf = getConfiguration(); + JobConf jobConf = new JobConf(fsConf); + jobConf.set(FileOutputFormat.OUTDIR, outDir.toUri().toString()); + jobConf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt0); + jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 1); + maybeSetCommitterName(jobConf, jobCommitterName); + tContext = new TaskAttemptContextImpl(jobConf, taskAttempt0); + + LOG.info("{}: Filesystem Committer='{}'; task='{}'", + description, + fsConf.get(FS_S3A_COMMITTER_NAME), + jobConf.get(FS_S3A_COMMITTER_NAME)); + } + + + @Override + protected void deleteTestDirInTeardown() { + // no-op } /** - * Create an invalid committer via the task attempt. + * Verify that if all config options are unset, the FileOutputCommitter + * is returned. */ - public void testInvalidTaskBinding() throws Throwable { - filesystemConfRef.unset(FS_S3A_COMMITTER_NAME); - taskConfRef.set(FS_S3A_COMMITTER_NAME, INVALID_NAME); - LambdaTestUtils.intercept(PathCommitException.class, INVALID_NAME, - () -> createCommitter()); + @Test + public void testBinding() throws Throwable { + assertFactoryCreatesExpectedCommitter(committerClass); } /** * Assert that the factory creates the expected committer. + * If a null committer is passed in, a {@link PathIOException} + * is expected. * @param expected expected committer class. - * @throws IOException IO failure. + * @throws Exception IO failure. */ - protected void assertFactoryCreatesExpectedCommitter( + private void assertFactoryCreatesExpectedCommitter( final Class expected) - throws IOException { - assertEquals("Wrong Committer from factory", - expected, - createCommitter().getClass()); + throws Exception { + describe("Creating committer: expected class \"%s\"", expected); + if (expected != null) { + assertEquals("Wrong Committer from factory", + expected, + createCommitter().getClass()); + } else { + intercept(PathCommitException.class, this::createCommitter); + } } /** diff --git a/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties b/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties index 25247aaaabd32..7442a357f9777 100644 --- a/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties +++ b/hadoop-tools/hadoop-aws/src/test/resources/log4j.properties @@ -49,6 +49,8 @@ log4j.logger.org.apache.hadoop.mapred.ShuffleHandler=WARN log4j.logger.org.apache.hadoop.ipc.Server=WARN #log4j.logger.=WARN +# information about origin of committers +log4j.logger.org.apache.hadoop.mapreduce.lib.output.PathOutputCommitterFactory=DEBUG # for debugging low level S3a operations, uncomment these lines # Log all S3A classes From 10df59e4210206508da648d5676f1c7d423b0353 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Sat, 8 Jun 2024 14:51:28 +0800 Subject: [PATCH 024/113] Revert "HADOOP-19071. Update maven-surefire-plugin from 3.0.0 to 3.2.5. (#6664)" (#6875) This reverts commit 88ad7db80de6f5e2d6185d5c397b32218d6c20ea. Signed-off-by: Shilun Fan --- hadoop-project/pom.xml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index a8ef068bf8da5..5b63dd129d6e9 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -170,7 +170,7 @@ -Xmx2048m -XX:+HeapDumpOnOutOfMemoryError - 3.2.5 + 3.0.0-M1 ${maven-surefire-plugin.version} ${maven-surefire-plugin.version} @@ -2450,7 +2450,6 @@ ${env.DYLD_LIBRARY_PATH}:${project.build.directory}/native/target/usr/local/lib:${hadoop.common.build.dir}/native/target/usr/local/lib 4 - false false From 2e1deee87a5ecedbab09db1e546da733771aa11a Mon Sep 17 00:00:00 2001 From: Pranav Saxena <108325433+saxenapranav@users.noreply.github.com> Date: Tue, 11 Jun 2024 01:33:41 +0530 Subject: [PATCH 025/113] HADOOP-19137. [ABFS] Prevent ABFS initialization for non-hierarchal-namespace account if Customer-provided-key configs given. (#6752) Customer-provided-keys (CPK) configs are not allowed with non-hierarchal-namespace (non-HNS) accounts for ABFS. This patch aims to prevent ABFS initialization for non-HNS accounts if CPK configs are provided. Contributed by: Pranav Saxena --- .../fs/azurebfs/AzureBlobFileSystem.java | 31 +++++++ .../fs/azurebfs/AzureBlobFileSystemStore.java | 50 ++++++++++- .../azurebfs/constants/AbfsHttpConstants.java | 34 +++++++ .../fs/azurebfs/services/AbfsClient.java | 19 ---- .../fs/azurebfs/utils/NamespaceUtil.java | 88 ------------------- .../azurebfs/AbstractAbfsIntegrationTest.java | 3 - .../azurebfs/ITestAbfsCustomEncryption.java | 86 +++++++++++++----- .../ITestAzureBlobFileSystemCreate.java | 2 - ...ITestAzureBlobFileSystemInitAndCreate.java | 47 ++++++++++ .../fs/azurebfs/services/AbfsClientUtils.java | 3 - 10 files changed, 221 insertions(+), 142 deletions(-) delete mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/NamespaceUtil.java diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java index 5475ff30651bd..9d7d3cd50782c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java @@ -43,6 +43,7 @@ import java.util.concurrent.Future; import javax.annotation.Nullable; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.impl.BackReference; import org.apache.hadoop.security.ProviderUtils; @@ -113,6 +114,7 @@ import static org.apache.hadoop.fs.CommonConfigurationKeys.IOSTATISTICS_LOGGING_LEVEL_DEFAULT; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.*; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CPK_IN_NON_HNS_ACCOUNT_ERROR_MESSAGE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.DATA_BLOCKS_BUFFER; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_BLOCK_UPLOAD_ACTIVE_BLOCKS; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_BLOCK_UPLOAD_BUFFER_DIR; @@ -221,6 +223,26 @@ public void initialize(URI uri, Configuration configuration) } } + /* + * Non-hierarchical-namespace account can not have a customer-provided-key(CPK). + * Fail initialization of filesystem if the configs are provided. CPK is of + * two types: GLOBAL_KEY, and ENCRYPTION_CONTEXT. + */ + if ((isEncryptionContextCPK(abfsConfiguration) || isGlobalKeyCPK( + abfsConfiguration)) + && !getIsNamespaceEnabled( + new TracingContext(clientCorrelationId, fileSystemId, + FSOperationType.CREATE_FILESYSTEM, tracingHeaderFormat, + listener))) { + /* + * Close the filesystem gracefully before throwing exception. Graceful close + * will ensure that all resources are released properly. + */ + close(); + throw new PathIOException(uri.getPath(), + CPK_IN_NON_HNS_ACCOUNT_ERROR_MESSAGE); + } + LOG.trace("Initiate check for delegation token manager"); if (UserGroupInformation.isSecurityEnabled()) { this.delegationTokenEnabled = abfsConfiguration.isDelegationTokenManagerEnabled(); @@ -237,6 +259,15 @@ public void initialize(URI uri, Configuration configuration) LOG.debug("Initializing AzureBlobFileSystem for {} complete", uri); } + private boolean isGlobalKeyCPK(final AbfsConfiguration abfsConfiguration) { + return StringUtils.isNotEmpty( + abfsConfiguration.getEncodedClientProvidedEncryptionKey()); + } + + private boolean isEncryptionContextCPK(final AbfsConfiguration abfsConfiguration) { + return abfsConfiguration.createEncryptionContextProvider() != null; + } + @Override public String toString() { final StringBuilder sb = new StringBuilder( diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 85d9d96ac2ddb..ac564f082c9e4 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -60,7 +60,6 @@ import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.security.NoContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.utils.EncryptionType; -import org.apache.hadoop.fs.azurebfs.utils.NamespaceUtil; import org.apache.hadoop.fs.impl.BackReference; import org.apache.hadoop.fs.PathIOException; @@ -182,7 +181,7 @@ public class AzureBlobFileSystemStore implements Closeable, ListingSupport { private final AbfsConfiguration abfsConfiguration; private final Set azureAtomicRenameDirSet; private Set azureInfiniteLeaseDirSet; - private Trilean isNamespaceEnabled; + private volatile Trilean isNamespaceEnabled; private final AuthType authType; private final UserGroupInformation userGroupInformation; private final IdentityTransformerInterface identityTransformer; @@ -364,19 +363,62 @@ private String[] authorityParts(URI uri) throws InvalidUriAuthorityException, In return authorityParts; } + /** + * Resolves namespace information of the filesystem from the state of {@link #isNamespaceEnabled}. + * if the state is UNKNOWN, it will be determined by making a GET_ACL request + * to the root of the filesystem. GET_ACL call is synchronized to ensure a single + * call is made to determine the namespace information in case multiple threads are + * calling this method at the same time. The resolution of namespace information + * would be stored back as state of {@link #isNamespaceEnabled}. + * + * @param tracingContext tracing context + * @return true if namespace is enabled, false otherwise. + * @throws AzureBlobFileSystemException server errors. + */ public boolean getIsNamespaceEnabled(TracingContext tracingContext) throws AzureBlobFileSystemException { try { - return this.isNamespaceEnabled.toBoolean(); + return isNamespaceEnabled(); } catch (TrileanConversionException e) { LOG.debug("isNamespaceEnabled is UNKNOWN; fall back and determine through" + " getAcl server call", e); } - isNamespaceEnabled = Trilean.getTrilean(NamespaceUtil.isNamespaceEnabled(client, tracingContext)); + return getNamespaceEnabledInformationFromServer(tracingContext); + } + + private synchronized boolean getNamespaceEnabledInformationFromServer( + final TracingContext tracingContext) throws AzureBlobFileSystemException { + if (isNamespaceEnabled != Trilean.UNKNOWN) { + return isNamespaceEnabled.toBoolean(); + } + try { + LOG.debug("Get root ACL status"); + getClient().getAclStatus(AbfsHttpConstants.ROOT_PATH, tracingContext); + isNamespaceEnabled = Trilean.getTrilean(true); + } catch (AbfsRestOperationException ex) { + // Get ACL status is a HEAD request, its response doesn't contain + // errorCode + // So can only rely on its status code to determine its account type. + if (HttpURLConnection.HTTP_BAD_REQUEST != ex.getStatusCode()) { + throw ex; + } + isNamespaceEnabled = Trilean.getTrilean(false); + } catch (AzureBlobFileSystemException ex) { + throw ex; + } return isNamespaceEnabled.toBoolean(); } + /** + * @return true if namespace is enabled, false otherwise. + * @throws TrileanConversionException if namespaceEnabled information is UNKNOWN + */ + @VisibleForTesting + boolean isNamespaceEnabled() throws TrileanConversionException { + return this.isNamespaceEnabled.toBoolean(); + } + @VisibleForTesting URIBuilder getURIBuilder(final String hostName, boolean isSecure) { String scheme = isSecure ? FileSystemUriSchemes.HTTPS_SCHEME : FileSystemUriSchemes.HTTP_SCHEME; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java index 4f5ee5f9683fc..f16d315e4d62d 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java @@ -22,6 +22,10 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.util.VersionInfo; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENCRYPTION_CONTEXT_PROVIDER_TYPE; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENCRYPTION_ENCODED_CLIENT_PROVIDED_KEY; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENCRYPTION_ENCODED_CLIENT_PROVIDED_KEY_SHA; + /** * Responsible to keep all constant keys used in abfs rest client here. */ @@ -165,5 +169,35 @@ public static ApiVersion getCurrentVersion() { */ public static final Integer HTTP_STATUS_CATEGORY_QUOTIENT = 100; + /** + * List of configurations that are related to Customer-Provided-Keys. + *

    + *
  1. + * {@value ConfigurationKeys#FS_AZURE_ENCRYPTION_CONTEXT_PROVIDER_TYPE} + * for ENCRYPTION_CONTEXT cpk-type. + *
  2. + *
  3. + * {@value ConfigurationKeys#FS_AZURE_ENCRYPTION_ENCODED_CLIENT_PROVIDED_KEY} and + * {@value ConfigurationKeys#FS_AZURE_ENCRYPTION_ENCODED_CLIENT_PROVIDED_KEY_SHA} + * for GLOBAL_KEY cpk-type. + *
  4. + *
+ * List: {@value} + */ + private static final String CPK_CONFIG_LIST = + FS_AZURE_ENCRYPTION_CONTEXT_PROVIDER_TYPE + ", " + + FS_AZURE_ENCRYPTION_ENCODED_CLIENT_PROVIDED_KEY + ", " + + FS_AZURE_ENCRYPTION_ENCODED_CLIENT_PROVIDED_KEY_SHA; + + /** + * Exception message on filesystem init if customer-provided-keys configs are provided + * for a non-hierarchical-namespace account: {@value} + */ + public static final String CPK_IN_NON_HNS_ACCOUNT_ERROR_MESSAGE = + "Non hierarchical-namespace account can not have configs enabled for " + + "Customer Provided Keys. Following configs can not be given with " + + "non-hierarchical-namespace account:" + + CPK_CONFIG_LIST; + private AbfsHttpConstants() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index f76f0ca6e87f7..8ba550e06deb9 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -47,7 +47,6 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; -import org.apache.hadoop.fs.azurebfs.utils.NamespaceUtil; import org.apache.hadoop.fs.store.LogExactlyOnce; import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.Permissions; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; @@ -134,7 +133,6 @@ public class AbfsClient implements Closeable { private final AbfsThrottlingIntercept intercept; private final ListeningScheduledExecutorService executorService; - private Boolean isNamespaceEnabled; private boolean renameResilience; private TimerTask runningTimerTask; @@ -359,9 +357,6 @@ private void addEncryptionKeyRequestHeaders(String path, List requestHeaders, boolean isCreateFileRequest, ContextEncryptionAdapter contextEncryptionAdapter, TracingContext tracingContext) throws AzureBlobFileSystemException { - if (!getIsNamespaceEnabled(tracingContext)) { - return; - } String encodedKey, encodedKeySHA256; switch (encryptionType) { case GLOBAL_KEY: @@ -1550,15 +1545,6 @@ public synchronized String getAccessToken() throws IOException { } } - private synchronized Boolean getIsNamespaceEnabled(TracingContext tracingContext) - throws AzureBlobFileSystemException { - if (isNamespaceEnabled == null) { - setIsNamespaceEnabled(NamespaceUtil.isNamespaceEnabled(this, - tracingContext)); - } - return isNamespaceEnabled; - } - protected Boolean getIsPaginatedDeleteEnabled() { return abfsConfiguration.isPaginatedDeleteEnabled(); } @@ -1748,11 +1734,6 @@ void setEncryptionContextProvider(EncryptionContextProvider provider) { encryptionContextProvider = provider; } - @VisibleForTesting - void setIsNamespaceEnabled(final Boolean isNamespaceEnabled) { - this.isNamespaceEnabled = isNamespaceEnabled; - } - /** * Getter for abfsCounters from AbfsClient. * @return AbfsCounters instance. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/NamespaceUtil.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/NamespaceUtil.java deleted file mode 100644 index 67225efa14323..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/NamespaceUtil.java +++ /dev/null @@ -1,88 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azurebfs.utils; - -import java.net.HttpURLConnection; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; -import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; -import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; -import org.apache.hadoop.fs.azurebfs.services.AbfsClient; - -/** - * Utility class to provide method which can return if the account is namespace - * enabled or not. - */ -public final class NamespaceUtil { - - public static final Logger LOG = LoggerFactory.getLogger(NamespaceUtil.class); - - private NamespaceUtil() { - - } - - /** - * Return if the account used in the provided abfsClient object namespace enabled - * or not. - * It would call {@link org.apache.hadoop.fs.azurebfs.services.AbfsClient#getAclStatus(String, TracingContext)}. - *

    - *
  1. - * If the API call is successful, then the account is namespace enabled. - *
  2. - *
  3. - * If the server returns with {@link java.net.HttpURLConnection#HTTP_BAD_REQUEST}, the account is non-namespace enabled. - *
  4. - *
  5. - * If the server call gets some other exception, then the method would throw the exception. - *
  6. - *
- * @param abfsClient client for which namespace-enabled to be checked. - * @param tracingContext object to correlate Store requests. - * @return if the account corresponding to the given client is namespace-enabled - * or not. - * @throws AzureBlobFileSystemException throws back the exception the method receives - * from the {@link AbfsClient#getAclStatus(String, TracingContext)}. In case it gets - * {@link AbfsRestOperationException}, it checks if the exception statusCode is - * BAD_REQUEST or not. If not, then it will pass the exception to the calling method. - */ - public static Boolean isNamespaceEnabled(final AbfsClient abfsClient, - final TracingContext tracingContext) - throws AzureBlobFileSystemException { - Boolean isNamespaceEnabled; - try { - LOG.debug("Get root ACL status"); - abfsClient.getAclStatus(AbfsHttpConstants.ROOT_PATH, tracingContext); - isNamespaceEnabled = true; - } catch (AbfsRestOperationException ex) { - // Get ACL status is a HEAD request, its response doesn't contain - // errorCode - // So can only rely on its status code to determine its account type. - if (HttpURLConnection.HTTP_BAD_REQUEST != ex.getStatusCode()) { - throw ex; - } - isNamespaceEnabled = false; - } catch (AzureBlobFileSystemException ex) { - throw ex; - } - return isNamespaceEnabled; - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java index 2f0d52f056bd9..05c1f5db3149a 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java @@ -41,7 +41,6 @@ import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; import org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenManager; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; -import org.apache.hadoop.fs.azurebfs.services.AbfsClientUtils; import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; @@ -209,8 +208,6 @@ public void setup() throws Exception { wasb = new NativeAzureFileSystem(azureNativeFileSystemStore); wasb.initialize(wasbUri, rawConfig); } - // Todo: To be fixed in HADOOP-19137 - AbfsClientUtils.setIsNamespaceEnabled(abfs.getAbfsClient(), true); } @After diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java index 33b05be59d5a8..89504ea461b23 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java @@ -35,7 +35,7 @@ import org.apache.hadoop.fs.azurebfs.services.AbfsClientUtils; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.assertj.core.api.Assertions; -import org.junit.Assume; +import org.assertj.core.api.Assumptions; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -57,9 +57,11 @@ import org.apache.hadoop.fs.impl.OpenFileParameters; import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.util.Lists; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CPK_IN_NON_HNS_ACCOUNT_ERROR_MESSAGE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENCRYPTION_CONTEXT_PROVIDER_TYPE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENCRYPTION_ENCODED_CLIENT_PROVIDED_KEY; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENCRYPTION_ENCODED_CLIENT_PROVIDED_KEY_SHA; @@ -171,9 +173,6 @@ public static Iterable params() { } public ITestAbfsCustomEncryption() throws Exception { - Assume.assumeTrue("Account should be HNS enabled for CPK", - getConfiguration().getBoolean(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, - false)); new Random().nextBytes(cpk); cpkSHAEncoded = EncodingHelper.getBase64EncodedString( EncodingHelper.getSHA256Hash(cpk)); @@ -181,7 +180,13 @@ public ITestAbfsCustomEncryption() throws Exception { @Test public void testCustomEncryptionCombinations() throws Exception { - AzureBlobFileSystem fs = getOrCreateFS(); + try (AzureBlobFileSystem fs = getOrCreateFS()) { + validateCpkResponseHeadersForCombination(fs); + } + } + + private void validateCpkResponseHeadersForCombination(final AzureBlobFileSystem fs) + throws Exception { Path testPath = path("/testFile"); String relativePath = fs.getAbfsStore().getRelativePath(testPath); MockEncryptionContextProvider ecp = @@ -375,9 +380,7 @@ private AzureBlobFileSystem getECProviderEnabledFS() throws Exception { + getAccountName()); configuration.unset(FS_AZURE_ENCRYPTION_ENCODED_CLIENT_PROVIDED_KEY_SHA + "." + getAccountName()); - AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(configuration); - fileSystemsOpenedInTest.add(fs); - return fs; + return getAzureBlobFileSystem(configuration); } private AzureBlobFileSystem getCPKEnabledFS() throws IOException { @@ -390,9 +393,34 @@ private AzureBlobFileSystem getCPKEnabledFS() throws IOException { conf.set(FS_AZURE_ENCRYPTION_ENCODED_CLIENT_PROVIDED_KEY_SHA + "." + getAccountName(), cpkEncodedSHA); conf.unset(FS_AZURE_ENCRYPTION_CONTEXT_PROVIDER_TYPE); - AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(conf); - fileSystemsOpenedInTest.add(fs); - return fs; + return getAzureBlobFileSystem(conf); + } + + private AzureBlobFileSystem getAzureBlobFileSystem(final Configuration conf) { + try { + AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance( + conf); + fileSystemsOpenedInTest.add(fs); + Assertions.assertThat( + getConfiguration().getBoolean(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, + false)) + .describedAs("Encryption tests should run only on namespace enabled account") + .isTrue(); + return fs; + } catch (IOException ex) { + GenericTestUtils.assertExceptionContains( + CPK_IN_NON_HNS_ACCOUNT_ERROR_MESSAGE, ex, + "Exception message should contain the expected message"); + Assertions.assertThat( + getConfiguration().getBoolean(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, + false)) + .describedAs("Encryption tests should run only on namespace enabled account") + .isFalse(); + + //Skip the test + Assumptions.assumeThat(true).isFalse(); + return null; + } } private AzureBlobFileSystem getOrCreateFS() throws Exception { @@ -423,18 +451,18 @@ private AzureBlobFileSystem getOrCreateFS() throws Exception { * was used to create the x-ms-encryption-context value used for creating the file. */ private EncryptionContextProvider createEncryptedFile(Path testPath) throws Exception { - AzureBlobFileSystem fs; - if (getFileSystem().getAbfsClient().getEncryptionType() == fileEncryptionType) { - fs = getFileSystem(); - } else { - fs = fileEncryptionType == ENCRYPTION_CONTEXT - ? getECProviderEnabledFS() - : getCPKEnabledFS(); - } - String relativePath = fs.getAbfsStore().getRelativePath(testPath); - try (FSDataOutputStream out = fs.create(new Path(relativePath))) { - out.write(SERVER_FILE_CONTENT.getBytes()); + try (AzureBlobFileSystem fs = getFileSystemForFileEncryption()) { + String relativePath = fs.getAbfsStore().getRelativePath(testPath); + try (FSDataOutputStream out = fs.create(new Path(relativePath))) { + out.write(SERVER_FILE_CONTENT.getBytes()); + } + verifyFileEncryption(fs, relativePath); + return fs.getAbfsClient().getEncryptionContextProvider(); } + } + + private void verifyFileEncryption(final AzureBlobFileSystem fs, + final String relativePath) throws Exception { // verify file is encrypted by calling getPathStatus (with properties) // without encryption headers in request if (fileEncryptionType != EncryptionType.NONE) { @@ -448,7 +476,19 @@ private EncryptionContextProvider createEncryptedFile(Path testPath) throws Exce getTestTracingContext(fs, false), abfsClient))); fs.getAbfsClient().setEncryptionType(fileEncryptionType); } - return fs.getAbfsClient().getEncryptionContextProvider(); + } + + private AzureBlobFileSystem getFileSystemForFileEncryption() throws Exception { + AzureBlobFileSystem fs; + if (getFileSystem().getAbfsClient().getEncryptionType() == fileEncryptionType) { + fs = (AzureBlobFileSystem) FileSystem.newInstance( + getConfiguration().getRawConfiguration()); + } else { + fs = fileEncryptionType == ENCRYPTION_CONTEXT + ? getECProviderEnabledFS() + : getCPKEnabledFS(); + } + return fs; } @Override diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java index f972fb03b88a9..5a6d3785fb660 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java @@ -36,7 +36,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; -import org.apache.hadoop.fs.azurebfs.services.AbfsClientUtils; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.test.GenericTestUtils; @@ -281,7 +280,6 @@ public void testCreateFileOverwrite(boolean enableConditionalCreateOverwrite) final AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance(currentFs.getUri(), config); - AbfsClientUtils.setIsNamespaceEnabled(fs.getAbfsClient(), true); long totalConnectionMadeBeforeTest = fs.getInstrumentationMap() .get(CONNECTIONS_MADE.getStatName()); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemInitAndCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemInitAndCreate.java index 5c4b87b0d2f4a..dcd73cc3e982a 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemInitAndCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemInitAndCreate.java @@ -21,10 +21,16 @@ import java.io.FileNotFoundException; import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.Test; +import org.mockito.Mockito; import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TrileanConversionException; +import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; /** * Test filesystem initialization and creation. @@ -49,4 +55,45 @@ public void ensureFilesystemWillNotBeCreatedIfCreationConfigIsNotSet() throws Ex final AzureBlobFileSystem fs = this.createFileSystem(); FileStatus[] fileStatuses = fs.listStatus(new Path("/")); } + + @Test + public void testGetAclCallOnHnsConfigAbsence() throws Exception { + AzureBlobFileSystem fs = ((AzureBlobFileSystem) FileSystem.newInstance( + getRawConfiguration())); + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + AbfsClient client = Mockito.spy(fs.getAbfsClient()); + Mockito.doReturn(client).when(store).getClient(); + + Mockito.doThrow(TrileanConversionException.class) + .when(store) + .isNamespaceEnabled(); + + TracingContext tracingContext = getSampleTracingContext(fs, true); + Mockito.doReturn(Mockito.mock(AbfsRestOperation.class)) + .when(client) + .getAclStatus(Mockito.anyString(), Mockito.any(TracingContext.class)); + store.getIsNamespaceEnabled(tracingContext); + + Mockito.verify(client, Mockito.times(1)) + .getAclStatus(Mockito.anyString(), Mockito.any(TracingContext.class)); + } + + @Test + public void testNoGetAclCallOnHnsConfigPresence() throws Exception { + AzureBlobFileSystem fs = ((AzureBlobFileSystem) FileSystem.newInstance( + getRawConfiguration())); + AzureBlobFileSystemStore store = Mockito.spy(fs.getAbfsStore()); + AbfsClient client = Mockito.spy(fs.getAbfsClient()); + Mockito.doReturn(client).when(store).getClient(); + + Mockito.doReturn(true) + .when(store) + .isNamespaceEnabled(); + + TracingContext tracingContext = getSampleTracingContext(fs, true); + store.getIsNamespaceEnabled(tracingContext); + + Mockito.verify(client, Mockito.times(0)) + .getAclStatus(Mockito.anyString(), Mockito.any(TracingContext.class)); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientUtils.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientUtils.java index b1ac30d33805c..46ad755a1cf08 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientUtils.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientUtils.java @@ -26,9 +26,6 @@ public final class AbfsClientUtils { private AbfsClientUtils() { } - public static void setIsNamespaceEnabled(final AbfsClient abfsClient, final Boolean isNamespaceEnabled) { - abfsClient.setIsNamespaceEnabled(isNamespaceEnabled); - } public static void setEncryptionContextProvider(final AbfsClient abfsClient, final EncryptionContextProvider provider) { abfsClient.setEncryptionContextProvider(provider); From 776c0a3ab9565efca22c17f41e53f27415f23b30 Mon Sep 17 00:00:00 2001 From: Felix Nguyen <23214709+kokonguyen191@users.noreply.github.com> Date: Tue, 11 Jun 2024 15:26:21 +0800 Subject: [PATCH 026/113] HDFS-17539. Make TestFileChecksum fields static (#6853) --- .../apache/hadoop/hdfs/TestFileChecksum.java | 55 +++++++++---------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java index 29ab1baa705ef..86f71da63d7db 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileChecksum.java @@ -30,9 +30,7 @@ import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.DataNodeFaultInjector; import org.apache.hadoop.test.GenericTestUtils; -import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; @@ -60,34 +58,34 @@ public class TestFileChecksum { private static final Logger LOG = LoggerFactory .getLogger(TestFileChecksum.class); - private final ErasureCodingPolicy ecPolicy = + private static final ErasureCodingPolicy ecPolicy = StripedFileTestUtil.getDefaultECPolicy(); - private int dataBlocks = ecPolicy.getNumDataUnits(); - private int parityBlocks = ecPolicy.getNumParityUnits(); + private static final int dataBlocks = ecPolicy.getNumDataUnits(); + private static final int parityBlocks = ecPolicy.getNumParityUnits(); - private MiniDFSCluster cluster; - private DistributedFileSystem fs; - private Configuration conf; - private DFSClient client; + private static MiniDFSCluster cluster; + private static DistributedFileSystem fs; + private static Configuration conf; + private static DFSClient client; - private int cellSize = ecPolicy.getCellSize(); - private int stripesPerBlock = 6; - private int blockSize = cellSize * stripesPerBlock; - private int numBlockGroups = 10; - private int stripSize = cellSize * dataBlocks; - private int blockGroupSize = stripesPerBlock * stripSize; - private int fileSize = numBlockGroups * blockGroupSize; - private int bytesPerCRC; + private static final int cellSize = ecPolicy.getCellSize(); + private static final int stripesPerBlock = 6; + private static final int blockSize = cellSize * stripesPerBlock; + private static final int numBlockGroups = 10; + private static final int stripSize = cellSize * dataBlocks; + private static final int blockGroupSize = stripesPerBlock * stripSize; + private static final int fileSize = numBlockGroups * blockGroupSize; + private static int bytesPerCRC; - private String ecDir = "/striped"; - private String stripedFile1 = ecDir + "/stripedFileChecksum1"; - private String stripedFile2 = ecDir + "/stripedFileChecksum2"; - private String replicatedFile = "/replicatedFileChecksum"; + private static final String ecDir = "/striped"; + private static final String stripedFile1 = ecDir + "/stripedFileChecksum1"; + private static final String stripedFile2 = ecDir + "/stripedFileChecksum2"; + private static final String replicatedFile = "/replicatedFileChecksum"; - private String checksumCombineMode; + private static String checksumCombineMode; - public TestFileChecksum(String checksumCombineMode) { - this.checksumCombineMode = checksumCombineMode; + public TestFileChecksum(String mode) { + checksumCombineMode = mode; } @Parameterized.Parameters @@ -100,8 +98,9 @@ public static Object[] getParameters() { @Rule public ExpectedException exception = ExpectedException.none(); - @Before - public void setup() throws IOException { + @Parameterized.BeforeParam + public static void setup(String mode) throws IOException { + checksumCombineMode = mode; int numDNs = dataBlocks + parityBlocks + 2; conf = new Configuration(); conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize); @@ -124,8 +123,8 @@ public void setup() throws IOException { GenericTestUtils.setLogLevel(FileChecksumHelper.LOG, Level.DEBUG); } - @After - public void tearDown() { + @Parameterized.AfterParam + public static void tearDown() { if (cluster != null) { cluster.shutdown(); cluster = null; From bb30545583c5c78199143d9cb9dd84cd3dfa8068 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Tue, 11 Jun 2024 17:10:00 +0100 Subject: [PATCH 027/113] HADOOP-19163. Use hadoop-shaded-protobuf_3_25 (#6858) Contributed by PJ Fanning --- hadoop-common-project/hadoop-common/pom.xml | 2 +- hadoop-project/pom.xml | 2 +- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index a7dcbb24a9b40..7521cec6a1db4 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -40,7 +40,7 @@ org.apache.hadoop.thirdparty - hadoop-shaded-protobuf_3_23 + hadoop-shaded-protobuf_3_25 org.apache.hadoop diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 5b63dd129d6e9..be0f58aef63e0 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -250,7 +250,7 @@ org.apache.hadoop.thirdparty - hadoop-shaded-protobuf_3_23 + hadoop-shaded-protobuf_3_25 ${hadoop-thirdparty-protobuf.version} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml index 217335323a79c..7cef2ec4db35c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml @@ -51,7 +51,7 @@ org.apache.hadoop.thirdparty - hadoop-shaded-protobuf_3_23 + hadoop-shaded-protobuf_3_25 @@ -64,7 +64,7 @@ org.apache.hadoop.thirdparty - hadoop-shaded-protobuf_3_23 + hadoop-shaded-protobuf_3_25 @@ -75,7 +75,7 @@ org.apache.hadoop.thirdparty - hadoop-shaded-protobuf_3_23 + hadoop-shaded-protobuf_3_25 From 005030f7a0db90ac936e61f0e8f263283623e30e Mon Sep 17 00:00:00 2001 From: Anuj Modi <128447756+anujmodi2021@users.noreply.github.com> Date: Tue, 11 Jun 2024 23:36:39 +0530 Subject: [PATCH 028/113] HADOOP-18610: [ABFS] OAuth2 Token Provider support for Azure Workload Identity (#6787) Add support for Azure Active Directory (Azure AD) workload identities which integrate with the Kubernetes's native capabilities to federate with any external identity provider. Contributed By: Anuj Modi --- .../hadoop/fs/azurebfs/AbfsConfiguration.java | 15 ++ .../constants/AuthConfigurations.java | 4 + .../azurebfs/constants/ConfigurationKeys.java | 2 + .../azurebfs/oauth2/AzureADAuthenticator.java | 52 ++++++- .../oauth2/WorkloadIdentityTokenProvider.java | 142 +++++++++++++++++ .../hadoop-azure/src/site/markdown/abfs.md | 55 ++++++- .../src/site/markdown/testing_azure.md | 36 +++++ .../azurebfs/AbstractAbfsIntegrationTest.java | 11 +- ...ITestAzureBlobFileSystemAuthorization.java | 3 +- .../fs/azurebfs/TestAccountConfiguration.java | 103 ++++++++++++- .../TestWorkloadIdentityTokenProvider.java | 144 ++++++++++++++++++ .../fs/azurebfs/oauth2/package-info.java | 22 +++ .../test/resources/workload-identity-pod.yaml | 32 ++++ 13 files changed, 594 insertions(+), 27 deletions(-) create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/WorkloadIdentityTokenProvider.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/oauth2/TestWorkloadIdentityTokenProvider.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/oauth2/package-info.java create mode 100644 hadoop-tools/hadoop-azure/src/test/resources/workload-identity-pod.yaml diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index bf9008bfe6dee..5df46eb883da1 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -59,6 +59,7 @@ import org.apache.hadoop.fs.azurebfs.oauth2.MsiTokenProvider; import org.apache.hadoop.fs.azurebfs.oauth2.RefreshTokenBasedTokenProvider; import org.apache.hadoop.fs.azurebfs.oauth2.UserPasswordTokenProvider; +import org.apache.hadoop.fs.azurebfs.oauth2.WorkloadIdentityTokenProvider; import org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenManager; import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.fs.azurebfs.services.ExponentialRetryPolicy; @@ -983,6 +984,20 @@ public AccessTokenProvider getTokenProvider() throws TokenAccessProviderExceptio tokenProvider = new RefreshTokenBasedTokenProvider(authEndpoint, clientId, refreshToken); LOG.trace("RefreshTokenBasedTokenProvider initialized"); + } else if (tokenProviderClass == WorkloadIdentityTokenProvider.class) { + String authority = appendSlashIfNeeded( + getTrimmedPasswordString(FS_AZURE_ACCOUNT_OAUTH_MSI_AUTHORITY, + AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_MSI_AUTHORITY)); + String tenantGuid = + getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_MSI_TENANT); + String clientId = + getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID); + String tokenFile = + getTrimmedPasswordString(FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE, + AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE); + tokenProvider = new WorkloadIdentityTokenProvider( + authority, tenantGuid, clientId, tokenFile); + LOG.trace("WorkloadIdentityTokenProvider initialized"); } else { throw new IllegalArgumentException("Failed to initialize " + tokenProviderClass); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AuthConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AuthConfigurations.java index 4fd8ddf0b4fe3..5daab03d14ed8 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AuthConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AuthConfigurations.java @@ -39,6 +39,10 @@ public final class AuthConfigurations { public static final String DEFAULT_FS_AZURE_ACCOUNT_OAUTH_REFRESH_TOKEN_ENDPOINT = "https://login.microsoftonline.com/Common/oauth2/token"; + /** Default OAuth token file path for the workload identity flow. */ + public static final String + DEFAULT_FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE = + "/var/run/secrets/azure/tokens/azure-identity-token"; private AuthConfigurations() { } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index 2ccc6ade876f1..55d3f6ab4e2bc 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -273,6 +273,8 @@ public final class ConfigurationKeys { public static final String FS_AZURE_ACCOUNT_OAUTH_REFRESH_TOKEN = "fs.azure.account.oauth2.refresh.token"; /** Key for oauth AAD refresh token endpoint: {@value}. */ public static final String FS_AZURE_ACCOUNT_OAUTH_REFRESH_TOKEN_ENDPOINT = "fs.azure.account.oauth2.refresh.token.endpoint"; + /** Key for oauth AAD workload identity token file path: {@value}. */ + public static final String FS_AZURE_ACCOUNT_OAUTH_TOKEN_FILE = "fs.azure.account.oauth2.token.file"; /** Key for enabling the tracking of ABFS API latency and sending the latency numbers to the ABFS API service */ public static final String FS_AZURE_ABFS_LATENCY_TRACK = "fs.azure.abfs.latency.track"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java index 1a1a27c53b641..dab4d79658451 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java @@ -57,6 +57,9 @@ public final class AzureADAuthenticator { private static final Logger LOG = LoggerFactory.getLogger(AzureADAuthenticator.class); private static final String RESOURCE_NAME = "https://storage.azure.com/"; private static final String SCOPE = "https://storage.azure.com/.default"; + private static final String JWT_BEARER_ASSERTION = "urn:ietf:params:oauth:client-assertion-type:jwt-bearer"; + private static final String CLIENT_CREDENTIALS = "client_credentials"; + private static final String OAUTH_VERSION_2_0 = "/oauth2/v2.0/"; private static final int CONNECT_TIMEOUT = 30 * 1000; private static final int READ_TIMEOUT = 30 * 1000; @@ -95,15 +98,14 @@ public static AzureADToken getTokenUsingClientCreds(String authEndpoint, Preconditions.checkNotNull(authEndpoint, "authEndpoint"); Preconditions.checkNotNull(clientId, "clientId"); Preconditions.checkNotNull(clientSecret, "clientSecret"); - boolean isVersion2AuthenticationEndpoint = authEndpoint.contains("/oauth2/v2.0/"); QueryParams qp = new QueryParams(); - if (isVersion2AuthenticationEndpoint) { + if (isVersion2AuthenticationEndpoint(authEndpoint)) { qp.add("scope", SCOPE); } else { qp.add("resource", RESOURCE_NAME); } - qp.add("grant_type", "client_credentials"); + qp.add("grant_type", CLIENT_CREDENTIALS); qp.add("client_id", clientId); qp.add("client_secret", clientSecret); LOG.debug("AADToken: starting to fetch token using client creds for client ID " + clientId); @@ -111,6 +113,46 @@ public static AzureADToken getTokenUsingClientCreds(String authEndpoint, return getTokenCall(authEndpoint, qp.serialize(), null, null); } + /** + * Gets Azure Active Directory token using the user ID and a JWT assertion + * generated by a federated authentication process. + * + * The federation process uses a feature from Azure Active Directory + * called workload identity. A workload identity is an identity used + * by a software workload (such as an application, service, script, + * or container) to authenticate and access other services and resources. + * + * + * @param authEndpoint the OAuth 2.0 token endpoint associated + * with the user's directory (obtain from + * Active Directory configuration) + * @param clientId the client ID (GUID) of the client web app + * obtained from Azure Active Directory configuration + * @param clientAssertion the JWT assertion token + * @return {@link AzureADToken} obtained using the creds + * @throws IOException throws IOException if there is a failure in connecting to Azure AD + */ + public static AzureADToken getTokenUsingJWTAssertion(String authEndpoint, + String clientId, String clientAssertion) throws IOException { + Preconditions.checkNotNull(authEndpoint, "authEndpoint"); + Preconditions.checkNotNull(clientId, "clientId"); + Preconditions.checkNotNull(clientAssertion, "clientAssertion"); + + QueryParams qp = new QueryParams(); + if (isVersion2AuthenticationEndpoint(authEndpoint)) { + qp.add("scope", SCOPE); + } else { + qp.add("resource", RESOURCE_NAME); + } + qp.add("grant_type", CLIENT_CREDENTIALS); + qp.add("client_id", clientId); + qp.add("client_assertion", clientAssertion); + qp.add("client_assertion_type", JWT_BEARER_ASSERTION); + LOG.debug("AADToken: starting to fetch token using client assertion for client ID " + clientId); + + return getTokenCall(authEndpoint, qp.serialize(), null, "POST"); + } + /** * Gets AAD token from the local virtual machine's VM extension. This only works on * an Azure VM with MSI extension @@ -523,4 +565,8 @@ private static String consumeInputStream(InputStream inStream, int length) throw return new String(b, 0, totalBytesRead, StandardCharsets.UTF_8); } + + private static boolean isVersion2AuthenticationEndpoint(String authEndpoint) { + return authEndpoint.contains(OAUTH_VERSION_2_0); + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/WorkloadIdentityTokenProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/WorkloadIdentityTokenProvider.java new file mode 100644 index 0000000000000..21d5f66f694ed --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/WorkloadIdentityTokenProvider.java @@ -0,0 +1,142 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.oauth2; + +import java.io.File; +import java.io.IOException; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.thirdparty.com.google.common.base.Strings; +import org.apache.hadoop.util.Preconditions; + +/** + * Provides tokens based on Azure AD Workload Identity. + */ +public class WorkloadIdentityTokenProvider extends AccessTokenProvider { + + private static final String OAUTH2_TOKEN_PATH = "/oauth2/v2.0/token"; + private static final Logger LOG = LoggerFactory.getLogger(AccessTokenProvider.class); + private static final String EMPTY_TOKEN_FILE_ERROR = "Empty token file found at specified path: "; + private static final String TOKEN_FILE_READ_ERROR = "Error reading token file at specified path: "; + + private final String authEndpoint; + private final String clientId; + private final String tokenFile; + private long tokenFetchTime = -1; + + public WorkloadIdentityTokenProvider(final String authority, final String tenantId, + final String clientId, final String tokenFile) { + Preconditions.checkNotNull(authority, "authority"); + Preconditions.checkNotNull(tenantId, "tenantId"); + Preconditions.checkNotNull(clientId, "clientId"); + Preconditions.checkNotNull(tokenFile, "tokenFile"); + + this.authEndpoint = authority + tenantId + OAUTH2_TOKEN_PATH; + this.clientId = clientId; + this.tokenFile = tokenFile; + } + + @Override + protected AzureADToken refreshToken() throws IOException { + LOG.debug("AADToken: refreshing token from JWT Assertion"); + String clientAssertion = getClientAssertion(); + AzureADToken token = getTokenUsingJWTAssertion(clientAssertion); + tokenFetchTime = System.currentTimeMillis(); + return token; + } + + /** + * Checks if the token is about to expire as per base expiry logic. + * Otherwise, expire if there is a clock skew issue in the system. + * + * @return true if the token is expiring in next 1 hour or if a token has + * never been fetched + */ + @Override + protected boolean isTokenAboutToExpire() { + if (tokenFetchTime == -1 || super.isTokenAboutToExpire()) { + return true; + } + + // In case of, any clock skew issues, refresh token. + long elapsedTimeSinceLastTokenRefreshInMillis = + System.currentTimeMillis() - tokenFetchTime; + boolean expiring = elapsedTimeSinceLastTokenRefreshInMillis < 0; + if (expiring) { + // Clock Skew issue. Refresh token. + LOG.debug("JWTToken: token renewing. Time elapsed since last token fetch:" + + " {} milliseconds", elapsedTimeSinceLastTokenRefreshInMillis); + } + + return expiring; + } + + /** + * Gets the client assertion from the token file. + * The token file should contain the client assertion in JWT format. + * It should be a String containing Base64Url encoded JSON Web Token (JWT). + * See + * Azure Workload Identity FAQ. + * + * @return the client assertion. + * @throws IOException if the token file is empty. + */ + private String getClientAssertion() + throws IOException { + String clientAssertion = ""; + try { + File file = new File(tokenFile); + clientAssertion = FileUtils.readFileToString(file, "UTF-8"); + } catch (Exception e) { + throw new IOException(TOKEN_FILE_READ_ERROR + tokenFile, e); + } + if (Strings.isNullOrEmpty(clientAssertion)) { + throw new IOException(EMPTY_TOKEN_FILE_ERROR + tokenFile); + } + return clientAssertion; + } + + /** + * Gets the Azure AD token from a client assertion in JWT format. + * This method exists to make unit testing possible. + * + * @param clientAssertion the client assertion. + * @return the Azure AD token. + * @throws IOException if there is a failure in connecting to Azure AD. + */ + @VisibleForTesting + AzureADToken getTokenUsingJWTAssertion(String clientAssertion) throws IOException { + return AzureADAuthenticator + .getTokenUsingJWTAssertion(authEndpoint, clientId, clientAssertion); + } + + /** + * Returns the last time the token was fetched from the token file. + * This method exists to make unit testing possible. + * + * @return the time the token was last fetched. + */ + @VisibleForTesting + long getTokenFetchTime() { + return tokenFetchTime; + } +} diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md index 3ab8eee3ac49d..37904808ec659 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md @@ -321,10 +321,9 @@ What can be changed is what secrets/credentials are used to authenticate the cal The authentication mechanism is set in `fs.azure.account.auth.type` (or the account specific variant). The possible values are SharedKey, OAuth, Custom -and SAS. For the various OAuth options use the config `fs.azure.account -.oauth.provider.type`. Following are the implementations supported -ClientCredsTokenProvider, UserPasswordTokenProvider, MsiTokenProvider and -RefreshTokenBasedTokenProvider. An IllegalArgumentException is thrown if +and SAS. For the various OAuth options use the config `fs.azure.account.oauth.provider.type`. Following are the implementations supported +ClientCredsTokenProvider, UserPasswordTokenProvider, MsiTokenProvider, +RefreshTokenBasedTokenProvider and WorkloadIdentityTokenProvider. An IllegalArgumentException is thrown if the specified provider type is not one of the supported. All secrets can be stored in JCEKS files. These are encrypted and password @@ -561,6 +560,54 @@ The Azure Portal/CLI is used to create the service identity. ``` +### Azure Workload Identity + +[Azure Workload Identities](https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/overview), formerly "Azure AD pod identity". + +OAuth 2.0 tokens are written to a file that is only accessible +from the executing pod (`/var/run/secrets/azure/tokens/azure-identity-token`). +The issued credentials can be used to authenticate. + +The Azure Portal/CLI is used to create the service identity. + +```xml + + fs.azure.account.auth.type + OAuth + + Use OAuth authentication + + + + fs.azure.account.oauth.provider.type + org.apache.hadoop.fs.azurebfs.oauth2.WorkloadIdentityTokenProvider + + Use Workload Identity for issuing OAuth tokens + + + + fs.azure.account.oauth2.msi.tenant + ${env.AZURE_TENANT_ID} + + Optional MSI Tenant ID + + + + fs.azure.account.oauth2.client.id + ${env.AZURE_CLIENT_ID} + + Optional Client ID + + + + fs.azure.account.oauth2.token.file + ${env.AZURE_FEDERATED_TOKEN_FILE} + + Token file path + + +``` + ### Custom OAuth 2.0 Token Provider A Custom OAuth 2.0 token provider supplies the ABFS connector with an OAuth 2.0 diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md b/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md index 04bc073461d3b..f8e4dde3e86e8 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md @@ -879,6 +879,42 @@ hierarchical namespace enabled, and set the following configuration settings: --> + + + unset + + + 00 + + unset @@ -115,14 +120,8 @@ ${test.build.data}/${surefire.forkNumber} ${test.build.dir}/${surefire.forkNumber} ${hadoop.tmp.dir}/${surefire.forkNumber} + job-${job.id}-fork-000${surefire.forkNumber} - - - - - - - fork-000${surefire.forkNumber} ${fs.s3a.scale.test.enabled} ${fs.s3a.scale.test.huge.filesize} @@ -163,7 +162,7 @@ - fork-000${surefire.forkNumber} + job-${job.id}-fork-000${surefire.forkNumber} ${fs.s3a.scale.test.enabled} ${fs.s3a.scale.test.huge.filesize} @@ -174,14 +173,14 @@ ${test.integration.timeout} ${fs.s3a.prefetch.enabled} + + ${root.tests.enabled} + - - - - + @@ -228,6 +227,9 @@ ${fs.s3a.directory.marker.audit} ${fs.s3a.prefetch.enabled} + + ${root.tests.enabled} + job-${job.id} @@ -289,6 +291,7 @@ ${fs.s3a.directory.marker.audit} ${fs.s3a.prefetch.enabled} + job-${job.id} ${fs.s3a.scale.test.timeout} diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md index 24c4c322ca143..45d1c8476578c 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md @@ -43,7 +43,7 @@ is a more specific lie and harder to make. And, if you get caught out: you lose all credibility with the project. You don't need to test from a VM within the AWS infrastructure; with the -`-Dparallel=tests` option the non-scale tests complete in under ten minutes. +`-Dparallel-tests` option the non-scale tests complete in under twenty minutes. Because the tests clean up after themselves, they are also designed to be low cost. It's neither hard nor expensive to run the tests; if you can't, there's no guarantee your patch works. The reviewers have enough to do, and @@ -539,12 +539,51 @@ Otherwise, set a large timeout in `fs.s3a.scale.test.timeout` The tests are executed in an order to only clean up created files after the end of all the tests. If the tests are interrupted, the test data will remain. +## Testing through continuous integration + +### Parallel CI builds. +For CI testing of the module, including the integration tests, +it is generally necessary to support testing multiple PRs simultaneously. + +To do this +1. A job ID must be supplied in the `job.id` property, so each job works on an isolated directory + tree. This should be a number or unique string, which will be used within a path element, so + must only contain characters valid in an S3/hadoop path element. +2. Root directory tests need to be disabled by setting `fs.s3a.root.tests.enabled` to + `false`, either in the command line to maven or in the XML configurations. + +``` +mvn verify -T 1C -Dparallel-tests -DtestsThreadCount=14 -Dscale -Dfs.s3a.root.tests.enabled=false -Djob.id=001 +``` + +This parallel execution feature is only for isolated builds sharing a single S3 bucket; it does +not support parallel builds and tests from the same local source tree. + +Without the root tests being executed, set up a scheduled job to purge the test bucket of all +data on a regular basis, to keep costs down. +The easiest way to do this is to have a bucket lifecycle rule for the bucket to delete all files more than a few days old, +alongside one to abort all pending uploads more than 24h old. + + +### Securing CI builds + +It's clearly unsafe to have CI infrastructure testing PRs submitted to apache github account +with AWS credentials -which is why it isn't done by the Yetus-initiated builds. + +Anyone doing this privately should: +* Review incoming patches before triggering the tests. +* Have a dedicated IAM role with restricted access to the test bucket, any KMS keys used, and the + external bucket containing the CSV test file. +* Have a build process which generates short-lived session credentials for this role. +* Run the tests in an EC2 VM/container which collects the restricted IAM credentials + from the IAM instance/container credentials provider. + ## Load tests. -Some are designed to overload AWS services with more +Some tests are designed to overload AWS services with more requests per second than an AWS account is permitted. -The operation of these test maybe observable to other users of the same +The operation of these tests may be observable to other users of the same account -especially if they are working in the AWS region to which the tests are targeted. @@ -556,6 +595,10 @@ They do not run automatically: they must be explicitly run from the command line Look in the source for these and reads the Javadocs before executing. +Note: one fear here was that asking for two many session/role credentials in a short period +of time would actually lock an account out of a region. It doesn't: it simply triggers +throttling of STS requests. + ## Testing against non-AWS S3 Stores. The S3A filesystem is designed to work with S3 stores which implement diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRootDir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRootDir.java index 5335de1b324d0..cd5c078a9ed2b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRootDir.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractRootDir.java @@ -27,6 +27,8 @@ import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeSkipRootTests; + /** * root dir operations against an S3 bucket. */ @@ -36,6 +38,12 @@ public class ITestS3AContractRootDir extends private static final Logger LOG = LoggerFactory.getLogger(ITestS3AContractRootDir.class); + @Override + public void setup() throws Exception { + super.setup(); + maybeSkipRootTests(getFileSystem().getConf()); + } + @Override protected AbstractFSContract createContract(Configuration conf) { return new S3AContract(conf); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index 73bba9d62cbd8..a3b994054e4d3 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -386,8 +386,10 @@ public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty() s3Configuration.pathStyleAccessEnabled()); byte[] file = ContractTestUtils.toAsciiByteArray("test file"); ContractTestUtils.writeAndRead(fs, - new Path("/path/style/access/testFile"), file, file.length, - (int) conf.getLongBytes(Constants.FS_S3A_BLOCK_SIZE, file.length), false, true); + createTestPath(new Path("/path/style/access/testFile")), + file, file.length, + (int) conf.getLongBytes(Constants.FS_S3A_BLOCK_SIZE, file.length), + false, true); } catch (final AWSRedirectException e) { LOG.error("Caught exception: ", e); // Catch/pass standard path style access behaviour when live bucket diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java index 321f831c0a8dc..d22de3b06d81b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEC.java @@ -20,13 +20,9 @@ import java.io.IOException; import java.nio.file.AccessDeniedException; -import java.util.Arrays; -import java.util.Collection; import org.assertj.core.api.Assertions; import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; @@ -37,19 +33,14 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; -import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY; -import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY_DELETE; -import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY_KEEP; import static org.apache.hadoop.fs.s3a.Constants.ETAG_CHECKSUM_ENABLED; import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_ALGORITHM; import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_KEY; import static org.apache.hadoop.fs.s3a.Constants.SERVER_SIDE_ENCRYPTION_ALGORITHM; import static org.apache.hadoop.fs.s3a.Constants.SERVER_SIDE_ENCRYPTION_KEY; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.createTestPath; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeSkipRootTests; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.skipIfEncryptionTestsDisabled; import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** @@ -60,7 +51,6 @@ * Equally "vexing" has been the optimizations of getFileStatus(), wherein * LIST comes before HEAD path + / */ -@RunWith(Parameterized.class) public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption { private static final String SERVICE_AMAZON_S3_STATUS_CODE_403 @@ -75,31 +65,11 @@ public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption { = "msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8="; private static final int TEST_FILE_LEN = 2048; - /** - * Parameterization. - */ - @Parameterized.Parameters(name = "{0}") - public static Collection params() { - return Arrays.asList(new Object[][]{ - {"keep-markers", true}, - {"delete-markers", false} - }); - } - - /** - * Parameter: should directory markers be retained? - */ - private final boolean keepMarkers; - /** * Filesystem created with a different key. */ private S3AFileSystem fsKeyB; - public ITestS3AEncryptionSSEC(final String name, - final boolean keepMarkers) { - this.keepMarkers = keepMarkers; - } @SuppressWarnings("deprecation") @Override @@ -108,16 +78,11 @@ protected Configuration createConfiguration() { String bucketName = getTestBucketName(conf); // directory marker options removeBaseAndBucketOverrides(bucketName, conf, - DIRECTORY_MARKER_POLICY, ETAG_CHECKSUM_ENABLED, S3_ENCRYPTION_ALGORITHM, S3_ENCRYPTION_KEY, SERVER_SIDE_ENCRYPTION_ALGORITHM, SERVER_SIDE_ENCRYPTION_KEY); - conf.set(DIRECTORY_MARKER_POLICY, - keepMarkers - ? DIRECTORY_MARKER_POLICY_KEEP - : DIRECTORY_MARKER_POLICY_DELETE); conf.set(S3_ENCRYPTION_ALGORITHM, getSSEAlgorithm().getMethod()); conf.set(S3_ENCRYPTION_KEY, KEY_1); @@ -129,6 +94,9 @@ protected Configuration createConfiguration() { public void setup() throws Exception { super.setup(); assumeEnabled(); + // although not a root dir test, this confuses paths enough it shouldn't be run in + // parallel with other jobs + maybeSkipRootTests(getConfiguration()); } @Override @@ -154,7 +122,7 @@ public void testCreateFileAndReadWithDifferentEncryptionKey() throws () -> { int len = TEST_FILE_LEN; describe("Create an encrypted file of size " + len); - Path src = path("testCreateFileAndReadWithDifferentEncryptionKey"); + Path src = methodPath(); writeThenReadFile(src, len); //extract the test FS @@ -174,7 +142,7 @@ public void testCreateFileAndReadWithDifferentEncryptionKey() throws */ @Test public void testCreateSubdirWithDifferentKey() throws Exception { - Path base = path("testCreateSubdirWithDifferentKey"); + Path base = methodPath(); Path nestedDirectory = new Path(base, "nestedDir"); fsKeyB = createNewFileSystemWithSSECKey( KEY_2); @@ -213,9 +181,10 @@ public void testCreateFileThenMoveWithDifferentSSECKey() throws Exception { */ @Test public void testRenameFile() throws Exception { - Path src = path("original-path.txt"); + final Path base = methodPath(); + Path src = new Path(base, "original-path.txt"); writeThenReadFile(src, TEST_FILE_LEN); - Path newPath = path("different-path.txt"); + Path newPath = new Path(base, "different-path.txt"); getFileSystem().rename(src, newPath); byte[] data = dataset(TEST_FILE_LEN, 'a', 'z'); ContractTestUtils.verifyFileContents(getFileSystem(), newPath, data); @@ -228,11 +197,11 @@ public void testRenameFile() throws Exception { @Test public void testListEncryptedDir() throws Exception { - Path pathABC = path("testListEncryptedDir/a/b/c/"); + Path pathABC = new Path(methodPath(), "a/b/c/"); Path pathAB = pathABC.getParent(); Path pathA = pathAB.getParent(); - Path nestedDirectory = createTestPath(pathABC); + Path nestedDirectory = pathABC; assertTrue(getFileSystem().mkdirs(nestedDirectory)); fsKeyB = createNewFileSystemWithSSECKey(KEY_4); @@ -261,7 +230,7 @@ public void testListEncryptedDir() throws Exception { @Test public void testListStatusEncryptedDir() throws Exception { - Path pathABC = path("testListStatusEncryptedDir/a/b/c/"); + Path pathABC = new Path(methodPath(), "a/b/c/"); Path pathAB = pathABC.getParent(); Path pathA = pathAB.getParent(); assertTrue(getFileSystem().mkdirs(pathABC)); @@ -296,7 +265,7 @@ public void testListStatusEncryptedDir() throws Exception { */ @Test public void testListStatusEncryptedFile() throws Exception { - Path pathABC = path("testListStatusEncryptedFile/a/b/c/"); + Path pathABC = new Path(methodPath(), "a/b/c/"); assertTrue("mkdirs failed", getFileSystem().mkdirs(pathABC)); Path fileToStat = new Path(pathABC, "fileToStat.txt"); @@ -305,23 +274,9 @@ public void testListStatusEncryptedFile() throws Exception { fsKeyB = createNewFileSystemWithSSECKey(KEY_4); //Until this point, no exception is thrown about access - if (statusProbesCheckS3(fsKeyB, fileToStat)) { - intercept(AccessDeniedException.class, - SERVICE_AMAZON_S3_STATUS_CODE_403, - () -> fsKeyB.listStatus(fileToStat)); - } else { - fsKeyB.listStatus(fileToStat); - } - } - - /** - * Do file status probes check S3? - * @param fs filesystem - * @param path file path - * @return true if check for a path being a file will issue a HEAD request. - */ - private boolean statusProbesCheckS3(S3AFileSystem fs, Path path) { - return true; + intercept(AccessDeniedException.class, + SERVICE_AMAZON_S3_STATUS_CODE_403, + () -> fsKeyB.listStatus(fileToStat)); } /** @@ -332,22 +287,17 @@ private boolean statusProbesCheckS3(S3AFileSystem fs, Path path) { */ @Test public void testDeleteEncryptedObjectWithDifferentKey() throws Exception { - //requireUnguardedFilesystem(); - Path pathABC = path("testDeleteEncryptedObjectWithDifferentKey/a/b/c/"); + Path pathABC = new Path(methodPath(), "a/b/c/"); Path pathAB = pathABC.getParent(); Path pathA = pathAB.getParent(); assertTrue(getFileSystem().mkdirs(pathABC)); Path fileToDelete = new Path(pathABC, "filetobedeleted.txt"); writeThenReadFile(fileToDelete, TEST_FILE_LEN); fsKeyB = createNewFileSystemWithSSECKey(KEY_4); - if (statusProbesCheckS3(fsKeyB, fileToDelete)) { - intercept(AccessDeniedException.class, - SERVICE_AMAZON_S3_STATUS_CODE_403, - () -> fsKeyB.delete(fileToDelete, false)); - } else { - fsKeyB.delete(fileToDelete, false); - } + intercept(AccessDeniedException.class, + SERVICE_AMAZON_S3_STATUS_CODE_403, + () -> fsKeyB.delete(fileToDelete, false)); //This is possible fsKeyB.delete(pathABC, true); fsKeyB.delete(pathAB, true); @@ -360,7 +310,7 @@ public void testDeleteEncryptedObjectWithDifferentKey() throws Exception { */ @Test public void testChecksumRequiresReadAccess() throws Throwable { - Path path = path("tagged-file"); + Path path = methodPath(); S3AFileSystem fs = getFileSystem(); touch(fs, path); Assertions.assertThat(fs.getFileChecksum(path)) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java index af04c2fa634e9..9ab1768b2aba1 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java @@ -193,6 +193,8 @@ public interface S3ATestConstants { /** * Fork ID passed down from maven if the test is running in parallel. + * If a build was also executed with job.id set, this is included in + * the fork ID. */ String TEST_UNIQUE_FORK_ID = "test.unique.fork.id"; String TEST_STS_ENABLED = "test.fs.s3a.sts.enabled"; @@ -273,4 +275,14 @@ public interface S3ATestConstants { * AWS ireland region. */ String EU_WEST_1 = "eu-west-1"; + + /** + * System property for root tests being enabled: {@value}. + */ + String ROOT_TESTS_ENABLED = "fs.s3a.root.tests.enabled"; + + /** + * Default policy on root tests: {@value}. + */ + boolean DEFAULT_ROOT_TESTS_ENABLED = true; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index d91bbe4656ec2..caff545eadfcb 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -601,12 +601,13 @@ public static void assumePathCapabilityFalse(FileSystem fs, String capability) { /** * Create a test path, using the value of * {@link S3ATestConstants#TEST_UNIQUE_FORK_ID} if it is set. + * This path is *not* qualified. * @param defVal default value * @return a path */ public static Path createTestPath(Path defVal) { String testUniqueForkId = - System.getProperty(S3ATestConstants.TEST_UNIQUE_FORK_ID); + System.getProperty(TEST_UNIQUE_FORK_ID); return testUniqueForkId == null ? defVal : new Path("/" + testUniqueForkId, "test"); } @@ -1738,6 +1739,15 @@ public static void disablePrefetching(Configuration conf) { removeBaseAndBucketOverrides(conf, PREFETCH_ENABLED_KEY); } + /** + * Skip root tests if the system properties/config says so. + * @param conf configuration to check + */ + public static void maybeSkipRootTests(Configuration conf) { + assume("Root tests disabled", + getTestPropertyBool(conf, ROOT_TESTS_ENABLED, DEFAULT_ROOT_TESTS_ENABLED)); + } + /** * Does this FS support multi object delete? * @param fs filesystem diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/terasort/ITestTerasortOnS3A.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/terasort/ITestTerasortOnS3A.java index be52220833784..da1580076dbb8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/terasort/ITestTerasortOnS3A.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/terasort/ITestTerasortOnS3A.java @@ -20,6 +20,7 @@ import java.io.File; import java.io.FileNotFoundException; +import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collection; @@ -43,6 +44,7 @@ import org.apache.hadoop.examples.terasort.TeraSortConfigKeys; import org.apache.hadoop.examples.terasort.TeraValidate; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.fs.s3a.commit.AbstractYarnClusterITest; import org.apache.hadoop.fs.s3a.commit.CommitConstants; import org.apache.hadoop.fs.s3a.commit.magic.MagicS3GuardCommitter; @@ -118,7 +120,7 @@ public class ITestTerasortOnS3A extends AbstractYarnClusterITest { * * @return the committer binding for this run. */ - @Parameterized.Parameters(name = "{0}") + @Parameterized.Parameters(name = "{0}-memory={1}") public static Collection params() { return Arrays.asList(new Object[][]{ {DirectoryStagingCommitter.NAME, false}, @@ -143,6 +145,11 @@ public void setup() throws Exception { prepareToTerasort(); } + @Override + protected void deleteTestDirInTeardown() throws IOException { + /* no-op */ + } + /** * Set up the job conf with the options for terasort chosen by the scale * options. @@ -180,14 +187,14 @@ protected int getRowCount() { * The paths used must be unique across parameterized runs but * common across all test cases in a single parameterized run. */ - private void prepareToTerasort() { + private void prepareToTerasort() throws IOException { // small sample size for faster runs - terasortPath = new Path("/terasort-" + committerName + "-" + trackCommitsInMemory) - .makeQualified(getFileSystem()); + terasortPath = getFileSystem().qualify( + new Path(S3ATestUtils.createTestPath(new Path("terasort-test")), + "terasort-" + committerName + "-" + trackCommitsInMemory)); sortInput = new Path(terasortPath, "sortin"); sortOutput = new Path(terasortPath, "sortout"); sortValidate = new Path(terasortPath, "validate"); - } /** @@ -254,7 +261,7 @@ private void executeStage( */ @Test public void test_100_terasort_setup() throws Throwable { - describe("Setting up for a terasort"); + describe("Setting up for a terasort with path of %s", terasortPath); getFileSystem().delete(terasortPath, true); completedStages = new HashMap<>(); @@ -339,7 +346,8 @@ public void test_140_teracomplete() throws Throwable { stage.accept("teravalidate"); stage.accept("overall"); String text = results.toString(); - File resultsFile = new File(getReportDir(), committerName + ".csv"); + File resultsFile = new File(getReportDir(), + String.format("%s-%s.csv", committerName, trackCommitsInMemory)); FileUtils.write(resultsFile, text, StandardCharsets.UTF_8); LOG.info("Results are in {}\n{}", resultsFile, text); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextMainOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextMainOperations.java index 3b4eaf4a80667..b28f88e43b42e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextMainOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextMainOperations.java @@ -3,7 +3,7 @@ * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -11,21 +11,28 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.apache.hadoop.fs.s3a.fileContext; import java.io.IOException; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileContextMainOperationsBaseTest; -import org.apache.hadoop.fs.s3a.S3ATestUtils; +import java.util.UUID; + import org.junit.Before; import org.junit.Ignore; import org.junit.Test; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContextMainOperationsBaseTest; +import org.apache.hadoop.fs.FileContextTestHelper; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3ATestUtils; + /** * S3A implementation of FileContextMainOperationsBaseTest. */ public class ITestS3AFileContextMainOperations - extends FileContextMainOperationsBaseTest { + extends FileContextMainOperationsBaseTest { + @Before public void setUp() throws IOException, Exception { @@ -34,6 +41,19 @@ public void setUp() throws IOException, Exception { super.setUp(); } + + /** + * Called before even our own constructor and fields are + * inited. + * @return a test helper using the s3a test path. + */ + @Override + protected FileContextTestHelper createFileContextHelper() { + Path testPath = + S3ATestUtils.createTestPath(new Path("/" + UUID.randomUUID())); + return new FileContextTestHelper(testPath.toUri().toString()); + } + @Override protected boolean listCorruptedBlocksSupported() { return false; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java index dc81077257bcc..c2c941798e77a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/AbstractSTestS3AHugeFiles.java @@ -113,6 +113,16 @@ public void setup() throws Exception { DEFAULT_HUGE_FILESIZE); } + /** + * Test dir deletion is removed from test case teardown so the + * subsequent tests see the output. + * @throws IOException failure + */ + @Override + protected void deleteTestDirInTeardown() throws IOException { + /* no-op */ + } + /** * Get the name of this test suite, which is used in path generation. * Base implementation uses {@link #getBlockOutputBufferName()} for this. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AConcurrentOps.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AConcurrentOps.java index df5cd46fffae1..c7e65d70fd340 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AConcurrentOps.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AConcurrentOps.java @@ -37,7 +37,6 @@ import org.apache.hadoop.fs.contract.ContractTestUtils.NanoTimer; import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.S3AFileSystem; -import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.junit.After; import org.junit.Test; @@ -67,8 +66,8 @@ public void setup() throws Exception { super.setup(); auxFs = getNormalFileSystem(); - testRoot = path("/ITestS3AConcurrentOps"); - testRoot = S3ATestUtils.createTestPath(testRoot); + // this is set to the method path, even in test setup. + testRoot = methodPath(); } private S3AFileSystem getNormalFileSystem() throws Exception { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java index 514c6cf886918..38839ba0ddce7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/S3AScaleTestBase.java @@ -83,7 +83,7 @@ public Configuration getConf() { @Override public void setup() throws Exception { super.setup(); - testPath = path("/tests3ascale"); + testPath = path("tests3ascale"); LOG.debug("Scale test operation count = {}", getOperationCount()); enabled = getTestPropertyBool( getConf(), diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerToolRootOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerToolRootOperations.java index 02fec81513fca..6d50fa7230335 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerToolRootOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/ITestMarkerToolRootOperations.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeSkipRootTests; import static org.apache.hadoop.fs.s3a.tools.MarkerTool.AUDIT; import static org.apache.hadoop.fs.s3a.tools.MarkerTool.CLEAN; import static org.apache.hadoop.fs.s3a.tools.MarkerTool.MARKERS; @@ -42,6 +43,7 @@ public class ITestMarkerToolRootOperations extends AbstractMarkerToolTest { @Override public void setup() throws Exception { super.setup(); + maybeSkipRootTests(getConfiguration()); rootPath = getFileSystem().makeQualified(new Path("/")); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/yarn/ITestS3A.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/yarn/ITestS3A.java index 7d2c1dc302377..037eda974276d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/yarn/ITestS3A.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/yarn/ITestS3A.java @@ -17,49 +17,34 @@ */ package org.apache.hadoop.fs.s3a.yarn; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CreateFlag; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileContext; -import org.apache.hadoop.fs.FsStatus; -import org.apache.hadoop.fs.Path; +import java.util.EnumSet; -import org.junit.After; -import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; -import java.util.EnumSet; +import org.apache.hadoop.fs.CreateFlag; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.FsStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.AbstractS3ATestBase; import org.apache.hadoop.fs.s3a.S3ATestUtils; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; - /** * S3A tests through the {@link FileContext} API. */ -public class ITestS3A { +public class ITestS3A extends AbstractS3ATestBase { private FileContext fc; @Rule public final Timeout testTimeout = new Timeout(90000); - @Before - public void setUp() throws Exception { - Configuration conf = new Configuration(); - fc = S3ATestUtils.createTestFileContext(conf); - } - - @After - public void tearDown() throws Exception { - if (fc != null) { - fc.delete(getTestPath(), true); - } - } - protected Path getTestPath() { - return S3ATestUtils.createTestPath(new Path("/tests3afc")); + @Override + public void setup() throws Exception { + super.setup(); + fc = S3ATestUtils.createTestFileContext(getConfiguration()); } @Test @@ -77,7 +62,7 @@ public void testS3AStatus() throws Exception { @Test public void testS3ACreateFileInSubDir() throws Exception { - Path dirPath = getTestPath(); + Path dirPath = methodPath(); fc.mkdir(dirPath, FileContext.DIR_DEFAULT_PERM, true); Path filePath = new Path(dirPath, "file"); try (FSDataOutputStream file = fc.create(filePath, EnumSet.of(CreateFlag From 2fbbfe3cc93574818690373a26e1c3e0f88c560a Mon Sep 17 00:00:00 2001 From: Heagan A Date: Fri, 14 Jun 2024 20:47:21 -0700 Subject: [PATCH 035/113] HDFS-17546. Implementing HostsFileReader timeout (#6873) --- .../hdfs/util/CombinedHostsFileReader.java | 38 +++++++++++++ .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 2 + .../CombinedHostFileManager.java | 11 ++-- .../src/main/resources/hdfs-default.xml | 9 ++- .../util/TestCombinedHostsFileReader.java | 57 +++++++++++++++++++ 5 files changed, 112 insertions(+), 5 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileReader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileReader.java index c7724ce6db486..33f4934e5489d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileReader.java @@ -33,6 +33,11 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.FutureTask; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.Callable; +import java.util.concurrent.TimeoutException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -120,4 +125,37 @@ private CombinedHostsFileReader() { } return allDNs; } + + /** + * Wrapper to call readFile with timeout via Future Tasks. + * If timeout is reached, it will throw IOException + * @param hostsFile the input json file to read from + * @param readTimeout timeout for FutureTask execution in milliseconds + * @return the set of DatanodeAdminProperties + * @throws IOException + */ + public static DatanodeAdminProperties[] + readFileWithTimeout(final String hostsFile, final int readTimeout) throws IOException { + FutureTask futureTask = new FutureTask<>( + new Callable() { + @Override + public DatanodeAdminProperties[] call() throws Exception { + return readFile(hostsFile); + } + }); + + Thread thread = new Thread(futureTask); + thread.start(); + + try { + return futureTask.get(readTimeout, TimeUnit.MILLISECONDS); + } catch (TimeoutException e) { + futureTask.cancel(true); + LOG.error("refresh File read operation timed out"); + throw new IOException("host file read operation timed out"); + } catch (InterruptedException | ExecutionException e) { + LOG.error("File read operation interrupted : " + e.getMessage()); + throw new IOException("host file read operation timed out"); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index f92a2ad56581b..7d136c5c83751 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -757,6 +757,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { "dfs.namenode.hosts.provider.classname"; public static final String DFS_HOSTS = "dfs.hosts"; public static final String DFS_HOSTS_EXCLUDE = "dfs.hosts.exclude"; + public static final String DFS_HOSTS_TIMEOUT = "dfs.hosts.timeout"; + public static final int DFS_HOSTS_TIMEOUT_DEFAULT = 0; public static final String DFS_NAMENODE_AUDIT_LOGGERS_KEY = "dfs.namenode.audit.loggers"; public static final String DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME = "default"; public static final String DFS_NAMENODE_AUDIT_LOG_TOKEN_TRACKING_ID_KEY = "dfs.namenode.audit.log.token.tracking.id"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CombinedHostFileManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CombinedHostFileManager.java index d92f14c8c66ac..5b1a2dee99078 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CombinedHostFileManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CombinedHostFileManager.java @@ -179,12 +179,15 @@ public Configuration getConf() { @Override public void refresh() throws IOException { - refresh(conf.get(DFSConfigKeys.DFS_HOSTS, "")); + refresh(conf.get(DFSConfigKeys.DFS_HOSTS, ""), + conf.getInt(DFSConfigKeys.DFS_HOSTS_TIMEOUT, DFSConfigKeys.DFS_HOSTS_TIMEOUT_DEFAULT) + ); } - private void refresh(final String hostsFile) throws IOException { + private void refresh(final String hostsFile, final int readTimeout) throws IOException { HostProperties hostProps = new HostProperties(); - DatanodeAdminProperties[] all = - CombinedHostsFileReader.readFile(hostsFile); + DatanodeAdminProperties[] all = readTimeout != DFSConfigKeys.DFS_HOSTS_TIMEOUT_DEFAULT + ? CombinedHostsFileReader.readFileWithTimeout(hostsFile, readTimeout) + : CombinedHostsFileReader.readFile(hostsFile); for(DatanodeAdminProperties properties : all) { InetSocketAddress addr = parseEntry(hostsFile, properties.getHostName(), properties.getPort()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index e6dc8c5ba1ac4..3e362de198176 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -1131,7 +1131,14 @@ not permitted to connect to the namenode. The full pathname of the file must be specified. If the value is empty, no hosts are excluded. - + + + + dfs.hosts.timeout + 0 + Specifies a timeout (in milliseconds) for reading the dfs.hosts file. + A value of zero indicates no timeout to be set. + dfs.namenode.max.objects diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestCombinedHostsFileReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestCombinedHostsFileReader.java index cf021805518df..9c536dbf26a08 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestCombinedHostsFileReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestCombinedHostsFileReader.java @@ -19,14 +19,21 @@ import java.io.File; import java.io.FileWriter; +import java.io.IOException; +import java.util.concurrent.Callable; import org.apache.hadoop.hdfs.protocol.DatanodeAdminProperties; import org.apache.hadoop.test.GenericTestUtils; import org.junit.Before; import org.junit.After; import org.junit.Test; +import org.mockito.invocation.InvocationOnMock; +import org.mockito.stubbing.Answer; +import org.mockito.Mock; +import org.mockito.Mockito; import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.when; /** * Test for JSON based HostsFileReader. @@ -44,8 +51,12 @@ public class TestCombinedHostsFileReader { private final File legacyFile = new File(TESTCACHEDATADIR, "legacy.dfs.hosts.json"); + @Mock + private Callable callable; + @Before public void setUp() throws Exception { + callable = Mockito.mock(Callable.class); } @After @@ -87,4 +98,50 @@ public void testEmptyCombinedHostsFileReader() throws Exception { CombinedHostsFileReader.readFile(newFile.getAbsolutePath()); assertEquals(0, all.length); } + + /* + * When timeout is enabled, test for success when reading file within timeout + * limits + */ + @Test + public void testReadFileWithTimeoutSuccess() throws Exception { + + DatanodeAdminProperties[] all = CombinedHostsFileReader.readFileWithTimeout( + jsonFile.getAbsolutePath(), 1000); + assertEquals(7, all.length); + } + + /* + * When timeout is enabled, test for IOException when reading file exceeds + * timeout limits + */ + @Test(expected = IOException.class) + public void testReadFileWithTimeoutTimeoutException() throws Exception { + when(callable.call()).thenAnswer(new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + Thread.sleep(2000); + return null; + } + }); + + CombinedHostsFileReader.readFileWithTimeout( + jsonFile.getAbsolutePath(), 1); + } + + /* + * When timeout is enabled, test for IOException when execution is interrupted + */ + @Test(expected = IOException.class) + public void testReadFileWithTimeoutInterruptedException() throws Exception { + when(callable.call()).thenAnswer(new Answer() { + @Override + public Void answer(InvocationOnMock invocation) throws Throwable { + throw new InterruptedException(); + } + }); + + CombinedHostsFileReader.readFileWithTimeout( + jsonFile.getAbsolutePath(), 1); + } } From 90024d8cb137e59be648da937fd0dbcff1704df9 Mon Sep 17 00:00:00 2001 From: Fateh Singh Date: Tue, 18 Jun 2024 05:52:24 -0700 Subject: [PATCH 036/113] HDFS-17439. Support -nonSuperUser for NNThroughputBenchmark: useful for testing auth frameworks such as Ranger (#6677) --- .../src/site/markdown/Benchmarking.md | 1 + .../namenode/NNThroughputBenchmark.java | 160 ++++++++++++------ .../namenode/TestNNThroughputBenchmark.java | 25 ++- 3 files changed, 134 insertions(+), 52 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Benchmarking.md b/hadoop-common-project/hadoop-common/src/site/markdown/Benchmarking.md index 2449ab5cdeda5..bac80dc856906 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Benchmarking.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Benchmarking.md @@ -46,6 +46,7 @@ The following are all supported command options: |`-logLevel` | Specify the logging level when the benchmark runs. The default logging level is ERROR. | |`-UGCacheRefreshCount` | After every specified number of operations, the benchmark purges the name-node's user group cache. By default the refresh is never called. | |`-keepResults` | If specified, do not clean up the name-space after execution. By default the name-space will be removed after test. | +|`-nonSuperUser` | If specified, non super user can use the tool and can be helpful for bringing authorization time into benchmarking calculations. | ##### Operations Supported diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java index a4e88d759fb4e..031a744f29fd9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java @@ -89,10 +89,10 @@ /** * Main class for a series of name-node benchmarks. - * + * * Each benchmark measures throughput and average execution time * of a specific name-node operation, e.g. file creation or block reports. - * + * * The benchmark does not involve any other hadoop components * except for the name-node. Each operation is executed * by calling directly the respective name-node method. @@ -107,7 +107,7 @@ public class NNThroughputBenchmark implements Tool { LoggerFactory.getLogger(NNThroughputBenchmark.class); private static final int BLOCK_SIZE = 16; private static final String GENERAL_OPTIONS_USAGE = - "[-keepResults] | [-logLevel L] | [-UGCacheRefreshCount G]"; + "[-keepResults] | [-logLevel L] | [-UGCacheRefreshCount G] [-nonSuperUser]"; static Configuration config; static NameNode nameNode; @@ -157,7 +157,7 @@ static void setNameNodeLoggingLevel(Level logLevel) { /** * Base class for collecting operation statistics. - * + * * Overload this class in order to run statistics for a * specific name-node operation. */ @@ -175,6 +175,7 @@ abstract class OperationStatsBase { protected long cumulativeTime = 0; // sum of times for each op protected long elapsedTime = 0; // time from start to finish protected boolean keepResults = false;// don't clean base directory on exit + protected boolean nonSuperUser = false; // enter/exit safe mode protected Level logLevel; // logging level, ERROR by default protected int ugcRefreshCount = 0; // user group cache refresh count @@ -187,7 +188,7 @@ abstract class OperationStatsBase { /** * Parse command line arguments. - * + * * @param args arguments * @throws IOException */ @@ -195,7 +196,7 @@ abstract class OperationStatsBase { /** * Generate inputs for each daemon thread. - * + * * @param opsPerThread number of inputs for each thread. * @throws IOException */ @@ -205,7 +206,7 @@ abstract class OperationStatsBase { * This corresponds to the arg1 argument of * {@link #executeOp(int, int, String)}, which can have different meanings * depending on the operation performed. - * + * * @param daemonId id of the daemon calling this method * @return the argument */ @@ -213,7 +214,7 @@ abstract class OperationStatsBase { /** * Execute name-node operation. - * + * * @param daemonId id of the daemon calling this method. * @param inputIdx serial index of the operation called by the deamon. * @param arg1 operation specific argument. @@ -247,7 +248,7 @@ void benchmark() throws IOException { return; int tIdx = 0; // thread index < nrThreads int opsPerThread[] = new int[numThreads]; - for(int opsScheduled = 0; opsScheduled < numOpsRequired; + for(int opsScheduled = 0; opsScheduled < numOpsRequired; opsScheduled += opsPerThread[tIdx++]) { // execute in a separate thread opsPerThread[tIdx] = (numOpsRequired-opsScheduled)/(numThreads-tIdx); @@ -285,14 +286,30 @@ private boolean isInProgress() { } void cleanUp() throws IOException { - clientProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, - false); + if (!nonSuperUser) { + try { + clientProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, + false); + } catch (Exception e){ + LOG.error("Potentially insufficient permission: try running the tool" + + "with -nonSuperUser argument or login as super user"); + throw e; + } + } if(!keepResults) clientProto.delete(getBaseDir(), true); else { - clientProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_ENTER, - true); - clientProto.saveNamespace(0, 0); + if (!nonSuperUser) { + try { + clientProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_ENTER, + true); + clientProto.saveNamespace(0, 0); + } catch (Exception e){ + LOG.error("Potentially insufficient permission: try running the tool" + + " with -nonSuperUser argument or login as super user"); + throw e; + } + } } } public String getBaseDirName() { @@ -341,7 +358,7 @@ void incrementStats(int ops, long time) { /** * Parse first 2 arguments, corresponding to the "-op" option. - * + * * @param args argument list * @return true if operation is all, which means that options not related * to this operation should be ignored, or false otherwise, meaning @@ -358,6 +375,12 @@ protected boolean verifyOpArgument(List args) { args.remove(krIndex); } + int nonSuperUserIndex = args.indexOf("-nonSuperUser"); + nonSuperUser = (nonSuperUserIndex >= 0); + if(nonSuperUser) { + args.remove(nonSuperUserIndex); + } + int llIndex = args.indexOf("-logLevel"); if(llIndex >= 0) { if(args.size() <= llIndex + 1) @@ -422,7 +445,7 @@ public void run() { try { benchmarkOne(); } catch(IOException ex) { - LOG.error("StatsDaemon " + daemonId + " failed: \n" + LOG.error("StatsDaemon " + daemonId + " failed: \n" + StringUtils.stringifyException(ex)); } } @@ -499,10 +522,18 @@ String getExecutionArgument(int daemonId) { * Remove entire benchmark directory. */ @Override - long executeOp(int daemonId, int inputIdx, String ignore) + long executeOp(int daemonId, int inputIdx, String ignore) throws IOException { - clientProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, - false); + if (!nonSuperUser) { + try{ + clientProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, + false); + } catch (Exception e){ + LOG.error("Potentially insufficient permission: try running the tool" + + " with -nonSuperUser argument or login as super user"); + throw e; + } + } long start = Time.now(); clientProto.delete(getBaseDirName(), true); long end = Time.now(); @@ -519,7 +550,7 @@ void printResults() { /** * File creation statistics. - * + * * Each thread creates the same (+ or -1) number of files. * File names are pre-generated during initialization. * The created files do not have blocks. @@ -578,9 +609,17 @@ void parseArguments(List args) { @Override void generateInputs(int[] opsPerThread) throws IOException { - assert opsPerThread.length == numThreads : "Error opsPerThread.length"; - clientProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, - false); + assert opsPerThread.length == numThreads : "Error opsPerThread.length"; + if (!nonSuperUser) { + try{ + clientProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, + false); + } catch (Exception e){ + LOG.error("Potentially insufficient permission: try running the tool" + + "with -nonSuperUser argument or login as super user"); + throw e; + } + } // int generatedFileIdx = 0; LOG.info("Generate " + numOpsRequired + " intputs for " + getOpName()); LOG.info("basedir: " + getBaseDir()); @@ -614,7 +653,7 @@ String getExecutionArgument(int daemonId) { * Do file create. */ @Override - long executeOp(int daemonId, int inputIdx, String clientName) + long executeOp(int daemonId, int inputIdx, String clientName) throws IOException { long start = Time.now(); clientProto.create(fileNames[daemonId][inputIdx], @@ -695,8 +734,16 @@ void parseArguments(List args) { @Override void generateInputs(int[] opsPerThread) throws IOException { assert opsPerThread.length == numThreads : "Error opsPerThread.length"; - clientProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, - false); + if (!nonSuperUser) { + try { + clientProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, + false); + } catch (Exception e){ + LOG.error("Potentially insufficient permission: try running the tool" + + " with -nonSuperUser argument or login as super user"); + throw e; + } + } LOG.info("Generate " + numOpsRequired + " inputs for " + getOpName()); dirPaths = new String[numThreads][]; try { @@ -750,14 +797,14 @@ void printResults() { /** * Open file statistics. - * - * Measure how many open calls (getBlockLocations()) + * + * Measure how many open calls (getBlockLocations()) * the name-node can handle per second. */ class OpenFileStats extends CreateFileStats { // Operation types static final String OP_OPEN_NAME = "open"; - static final String OP_USAGE_ARGS = + static final String OP_USAGE_ARGS = " [-threads T] [-files N] [-blockSize S] [-filesPerDir P]" + " [-useExisting] [-baseDirName D]"; static final String OP_OPEN_USAGE = @@ -796,13 +843,16 @@ void generateInputs(int[] opsPerThread) throws IOException { String.valueOf(nameGenerator.getFilesPerDirectory()), "-baseDirName", getBaseDirName(), "-close"}; - CreateFileStats opCreate = new CreateFileStats(Arrays.asList(createArgs)); - + List createArgsList = new ArrayList(Arrays.asList(createArgs)); + if (this.nonSuperUser){ + createArgsList.add("-nonSuperUser"); + } + CreateFileStats opCreate = new CreateFileStats(createArgsList); if(!useExisting) { // create files if they were not created before opCreate.benchmark(); LOG.info("Created " + numOpsRequired + " files."); } else { - LOG.info("useExisting = true. Assuming " + LOG.info("useExisting = true. Assuming " + numOpsRequired + " files have been created before."); } // use the same files for open @@ -820,7 +870,7 @@ void generateInputs(int[] opsPerThread) throws IOException { * Do file open. */ @Override - long executeOp(int daemonId, int inputIdx, String ignore) + long executeOp(int daemonId, int inputIdx, String ignore) throws IOException { long start = Time.now(); clientProto.getBlockLocations(fileNames[daemonId][inputIdx], 0L, @@ -832,13 +882,13 @@ long executeOp(int daemonId, int inputIdx, String ignore) /** * Delete file statistics. - * + * * Measure how many delete calls the name-node can handle per second. */ class DeleteFileStats extends OpenFileStats { // Operation types static final String OP_DELETE_NAME = "delete"; - static final String OP_DELETE_USAGE = + static final String OP_DELETE_USAGE = "-op " + OP_DELETE_NAME + OP_USAGE_ARGS; DeleteFileStats(List args) { @@ -851,7 +901,7 @@ String getOpName() { } @Override - long executeOp(int daemonId, int inputIdx, String ignore) + long executeOp(int daemonId, int inputIdx, String ignore) throws IOException { long start = Time.now(); clientProto.delete(fileNames[daemonId][inputIdx], false); @@ -909,13 +959,13 @@ long executeOp(int daemonId, int inputIdx, String ignore) /** * List file status statistics. - * + * * Measure how many get-file-status calls the name-node can handle per second. */ class FileStatusStats extends OpenFileStats { // Operation types static final String OP_FILE_STATUS_NAME = "fileStatus"; - static final String OP_FILE_STATUS_USAGE = + static final String OP_FILE_STATUS_USAGE = "-op " + OP_FILE_STATUS_NAME + OP_USAGE_ARGS; FileStatusStats(List args) { @@ -928,7 +978,7 @@ String getOpName() { } @Override - long executeOp(int daemonId, int inputIdx, String ignore) + long executeOp(int daemonId, int inputIdx, String ignore) throws IOException { long start = Time.now(); clientProto.getFileInfo(fileNames[daemonId][inputIdx]); @@ -939,13 +989,13 @@ long executeOp(int daemonId, int inputIdx, String ignore) /** * Rename file statistics. - * + * * Measure how many rename calls the name-node can handle per second. */ class RenameFileStats extends OpenFileStats { // Operation types static final String OP_RENAME_NAME = "rename"; - static final String OP_RENAME_USAGE = + static final String OP_RENAME_USAGE = "-op " + OP_RENAME_NAME + OP_USAGE_ARGS; protected String[][] destNames; @@ -972,7 +1022,7 @@ void generateInputs(int[] opsPerThread) throws IOException { } @Override - long executeOp(int daemonId, int inputIdx, String ignore) + long executeOp(int daemonId, int inputIdx, String ignore) throws IOException { long start = Time.now(); clientProto.rename(fileNames[daemonId][inputIdx], @@ -988,10 +1038,10 @@ long executeOp(int daemonId, int inputIdx, String ignore) private static class TinyDatanode implements Comparable { private static final long DF_CAPACITY = 100*1024*1024; private static final long DF_USED = 0; - + NamespaceInfo nsInfo; DatanodeRegistration dnRegistration; - DatanodeStorage storage; //only one storage + DatanodeStorage storage; //only one storage final List blocks; int nrBlocks; // actual number of blocks BlockListAsLongs blockReportList; @@ -1124,7 +1174,7 @@ int replicateBlocks() throws IOException { * Just report on behalf of the other data-node * that the blocks have been received. */ - private int transferBlocks( Block blocks[], + private int transferBlocks( Block blocks[], DatanodeInfo xferTargets[][], String targetStorageIDs[][] ) throws IOException { @@ -1152,7 +1202,7 @@ private int transferBlocks( Block blocks[], /** * Block report statistics. - * + * * Each thread here represents its own data-node. * Data-nodes send the same block report each time. * The block report may contain missing or non-existing blocks. @@ -1224,7 +1274,7 @@ void parseArguments(List args) { @Override void generateInputs(int[] ignore) throws IOException { int nrDatanodes = getNumDatanodes(); - int nrBlocks = (int)Math.ceil((double)blocksPerReport * nrDatanodes + int nrBlocks = (int)Math.ceil((double)blocksPerReport * nrDatanodes / replication); int nrFiles = (int)Math.ceil((double)nrBlocks / blocksPerFile); datanodes = new TinyDatanode[nrDatanodes]; @@ -1235,13 +1285,21 @@ void generateInputs(int[] ignore) throws IOException { datanodes[idx].sendHeartbeat(); } - // create files + // create files LOG.info("Creating " + nrFiles + " files with " + blocksPerFile + " blocks each."); FileNameGenerator nameGenerator; nameGenerator = new FileNameGenerator(getBaseDir(), 100); String clientName = getClientName(007); - clientProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, - false); + if (!nonSuperUser) { + try { + clientProto.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, + false); + } catch (Exception e){ + LOG.error("Potentially insufficient permission: try running the tool" + + " with -nonSuperUser argument or login as super user"); + throw e; + } + } for(int idx=0; idx < nrFiles; idx++) { String fileName = nameGenerator.getNextFileName("ThroughputBench"); clientProto.create(fileName, FsPermission.getDefault(), clientName, @@ -1375,7 +1433,7 @@ void printResults() { */ class ReplicationStats extends OperationStatsBase { static final String OP_REPLICATION_NAME = "replication"; - static final String OP_REPLICATION_USAGE = + static final String OP_REPLICATION_USAGE = "-op replication [-datanodes T] [-nodesToDecommission D] " + "[-nodeReplicationLimit C] [-totalBlocks B] [-blockSize S] " + "[-replication R] [-baseDirName D]"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNThroughputBenchmark.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNThroughputBenchmark.java index a9836e0003595..bd19dc5cf563a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNThroughputBenchmark.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNThroughputBenchmark.java @@ -100,7 +100,30 @@ public void testNNThroughputAgainstRemoteNN() throws Exception { } } } - + /** + * This test runs {@link NNThroughputBenchmark} against a mini DFS cluster with + * nonSuperUser option (useful when testing any authorization framework e.g. + * Ranger since only super user e.g. hdfs can enter/exit safemode + * but any request from super user is not sent for authorization). + */ + @Test(timeout = 120000) + public void testNNThroughputAgainstRemoteNNNonSuperUser() throws Exception { + final Configuration conf = new HdfsConfiguration(); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 16); + MiniDFSCluster cluster = null; + try { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); + cluster.waitActive(); + final Configuration benchConf = new HdfsConfiguration(); + benchConf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 16); + FileSystem.setDefaultUri(benchConf, cluster.getURI()); + NNThroughputBenchmark.runBenchmark(benchConf, new String[]{"-op", "all", "-nonSuperUser"}); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } /** * This test runs {@link NNThroughputBenchmark} against a mini DFS cluster * with explicit -fs option. From 9710a8d52f9449ac8f74ef82f512f12018503c19 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Wed, 19 Jun 2024 08:34:19 +0800 Subject: [PATCH 037/113] YARN-11701. [Federation] Enhance Federation Cache Clean Conditions. (#6889) Contributed by Shilun Fan. Reviewed-by: Ayush Saxena --- .../yarn/server/federation/cache/FederationCaffeineCache.java | 4 +++- .../yarn/server/federation/cache/FederationGuavaCache.java | 4 +++- .../hadoop/yarn/server/federation/cache/FederationJCache.java | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationCaffeineCache.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationCaffeineCache.java index cbf3e9db3db3d..96a630bae3875 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationCaffeineCache.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationCaffeineCache.java @@ -79,7 +79,9 @@ public void initCache(Configuration pConf, FederationStateStore pStateStore) { @Override public void clearCache() { - this.cache.cleanUp(); + if (this.cache != null) { + this.cache.cleanUp(); + } this.cache = null; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationGuavaCache.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationGuavaCache.java index 2ba9e2869fe8d..01f38343fba40 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationGuavaCache.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationGuavaCache.java @@ -76,7 +76,9 @@ public void initCache(Configuration pConf, FederationStateStore pStateStore) { @Override public void clearCache() { - cache.invalidateAll(); + if (this.cache != null) { + cache.invalidateAll(); + } cache = null; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationJCache.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationJCache.java index 07f300e65f6be..b31a24d2dda98 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationJCache.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/cache/FederationJCache.java @@ -91,7 +91,9 @@ public void initCache(Configuration pConf, FederationStateStore pStateStore) { @Override public void clearCache() { - this.cache.clear(); + if (this.cache != null) { + this.cache.clear(); + } this.cache = null; } From 1e6411c9ec610f406f71a8d9f54d32a400c26815 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Wed, 19 Jun 2024 11:38:17 +0800 Subject: [PATCH 038/113] HDFS-17528. FsImageValidation: set txid when saving a new image (#6828) --- .../apache/hadoop/hdfs/server/namenode/FSImage.java | 13 +++++++++---- .../hdfs/server/namenode/FsImageValidation.java | 7 +++++-- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java index f9b796551d682..fa321fe85e57b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImage.java @@ -977,12 +977,16 @@ private void loadFSImage(File curFile, MD5Hash expectedMd5, " but expecting " + expectedMd5); } - long txId = loader.getLoadedImageTxId(); + final long txId = setLastAppliedTxId(loader); LOG.info("Loaded image for txid " + txId + " from " + curFile); - lastAppliedTxId = txId; storage.setMostRecentCheckpointInfo(txId, curFile.lastModified()); } + synchronized long setLastAppliedTxId(FSImageFormat.LoaderDelegator loader) { + lastAppliedTxId = loader.getLoadedImageTxId(); + return lastAppliedTxId; + } + /** * Save the contents of the FS image to the file. */ @@ -1215,8 +1219,9 @@ public void removeFromCheckpointing(long txid) { } void save(FSNamesystem src, File dst) throws IOException { - final SaveNamespaceContext context = new SaveNamespaceContext(src, - getCorrectLastAppliedOrWrittenTxId(), new Canceler()); + final long txid = getCorrectLastAppliedOrWrittenTxId(); + LOG.info("save fsimage with txid={} to {}", txid, dst.getAbsolutePath()); + final SaveNamespaceContext context = new SaveNamespaceContext(src, txid, new Canceler()); final Storage.StorageDirectory storageDirectory = new Storage.StorageDirectory(dst); Files.createDirectories(storageDirectory.getCurrentDir().toPath()); new FSImageSaver(context, storageDirectory, NameNodeFile.IMAGE).run(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsImageValidation.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsImageValidation.java index 275be9050511a..4dac221e4d294 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsImageValidation.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FsImageValidation.java @@ -214,6 +214,7 @@ int run(Configuration conf, AtomicInteger errorCount) throws Exception { initConf(conf); // check INodeReference + NameNode.initMetrics(conf, HdfsServerConstants.NamenodeRole.NAMENODE); // to avoid NPE final FSNamesystem namesystem = checkINodeReference(conf, errorCount); // check INodeMap @@ -276,14 +277,16 @@ public void run() { namesystem.getFSDirectory().writeLock(); try { loader.load(fsImageFile, false); + fsImage.setLastAppliedTxId(loader); } finally { namesystem.getFSDirectory().writeUnlock(); namesystem.writeUnlock("loadImage"); } } t.cancel(); - Cli.println("Loaded %s %s successfully in %s", - FS_IMAGE, fsImageFile, StringUtils.formatTime(now() - loadStart)); + Cli.println("Loaded %s %s with txid %d successfully in %s", + FS_IMAGE, fsImageFile, namesystem.getFSImage().getLastAppliedTxId(), + StringUtils.formatTime(now() - loadStart)); return namesystem; } From 56c8aa5f1c4a0336f69083c742e2504ccc828d7d Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 19 Jun 2024 12:05:24 +0100 Subject: [PATCH 039/113] HADOOP-19204. VectorIO regression: empty ranges are now rejected (#6887) - restore old outcome: no-op - test this - update spec This is a critical fix for vector IO and MUST be cherrypicked to all branches with that feature Contributed by Steve Loughran --- .../java/org/apache/hadoop/fs/VectoredReadUtils.java | 9 ++++++++- .../src/site/markdown/filesystem/fsdatainputstream.md | 1 - .../fs/contract/AbstractContractVectoredReadTest.java | 11 +++++++++++ .../apache/hadoop/fs/impl/TestVectoredReadUtils.java | 7 +++---- 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java index 493b8c3a33d65..fa0440620a409 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java @@ -294,7 +294,14 @@ public static List validateAndSortRanges( final Optional fileLength) throws EOFException { requireNonNull(input, "Null input list"); - checkArgument(!input.isEmpty(), "Empty input list"); + + if (input.isEmpty()) { + // this may seem a pathological case, but it was valid + // before and somehow Spark can call it through parquet. + LOG.debug("Empty input list"); + return input; + } + final List sortedRanges; if (input.size() == 1) { diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md index 6cbb54ea70108..db844a94e39e8 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstream.md @@ -474,7 +474,6 @@ No empty lists. ```python if ranges = null raise NullPointerException -if ranges.len() = 0 raise IllegalArgumentException if allocate = null raise NullPointerException ``` diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java index d6a1fb1f0b7c4..aa478f3af63f7 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractVectoredReadTest.java @@ -340,6 +340,17 @@ public void testConsecutiveRanges() throws Exception { } } + @Test + public void testEmptyRanges() throws Exception { + List fileRanges = new ArrayList<>(); + try (FSDataInputStream in = openVectorFile()) { + in.readVectored(fileRanges, allocate); + Assertions.assertThat(fileRanges) + .describedAs("Empty ranges must stay empty") + .isEmpty(); + } + } + /** * Test to validate EOF ranges. *

diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java index 2a290058cae2b..3fd3fe4d1f451 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java @@ -702,12 +702,11 @@ private static Stream mockStreamWithReadFully() throws IOException { } /** - * Empty ranges cannot be sorted. + * Empty ranges are allowed. */ @Test - public void testEmptyRangesRaisesIllegalArgument() throws Throwable { - intercept(IllegalArgumentException.class, - () -> validateAndSortRanges(Collections.emptyList(), Optional.empty())); + public void testEmptyRangesAllowed() throws Throwable { + validateAndSortRanges(Collections.emptyList(), Optional.empty()); } /** From 6545b7eeef1ccbba526c771a6ab7da2f374a1e0b Mon Sep 17 00:00:00 2001 From: Hexiaoqiao Date: Wed, 19 Jun 2024 20:58:57 +0800 Subject: [PATCH 040/113] HDFS-17098. DatanodeManager does not handle null storage type properly. (#6840). Contributed by ConfX. Signed-off-by: Shilun Fan --- .../hdfs/server/blockmanagement/DatanodeManager.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index b3eb2fd7f97ea..a46a2ce15c660 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -678,7 +678,15 @@ private Consumer> createSecondaryNodeSorter() { Consumer> secondarySort = null; if (readConsiderStorageType) { Comparator comp = - Comparator.comparing(DatanodeInfoWithStorage::getStorageType); + Comparator.comparing(DatanodeInfoWithStorage::getStorageType, (s1, s2) -> { + if (s1 == null) { + return (s2 == null) ? 0 : -1; + } else if (s2 == null) { + return 1; + } else { + return s2.compareTo(s1); + } + }); secondarySort = list -> Collections.sort(list, comp); } if (readConsiderLoad) { From 8ac9c1839acba61d73fb2c9109e3c5b8cefb33c0 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 19 Jun 2024 18:47:29 +0100 Subject: [PATCH 041/113] HADOOP-19203. WrappedIO BulkDelete API to raise IOEs as UncheckedIOExceptions (#6885) * WrappedIO methods raise UncheckedIOExceptions *New class org.apache.hadoop.util.functional.FunctionalIO with wrap/unwrap and the ability to generate a java.util.function.Supplier around a CallableRaisingIOE. Contributed by Steve Loughran --- .../apache/hadoop/io/wrappedio/WrappedIO.java | 37 ++++--- .../functional/CommonCallableSupplier.java | 5 +- .../hadoop/util/functional/FunctionalIO.java | 99 +++++++++++++++++++ .../util/functional/TestFunctionalIO.java | 97 ++++++++++++++++++ 4 files changed, 221 insertions(+), 17 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionalIO.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestFunctionalIO.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java index 286557c2c378c..d6fe311fba866 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java @@ -18,7 +18,7 @@ package org.apache.hadoop.io.wrappedio; -import java.io.IOException; +import java.io.UncheckedIOException; import java.util.Collection; import java.util.List; import java.util.Map; @@ -29,17 +29,19 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import static org.apache.hadoop.util.functional.FunctionalIO.uncheckIOExceptions; + /** * Reflection-friendly access to APIs which are not available in * some of the older Hadoop versions which libraries still * compile against. *

* The intent is to avoid the need for complex reflection operations - * including wrapping of parameter classes, direct instatiation of + * including wrapping of parameter classes, direct instantiation of * new classes etc. */ @InterfaceAudience.Public -@InterfaceStability.Evolving +@InterfaceStability.Unstable public final class WrappedIO { private WrappedIO() { @@ -52,12 +54,15 @@ private WrappedIO() { * @return a number greater than or equal to zero. * @throws UnsupportedOperationException bulk delete under that path is not supported. * @throws IllegalArgumentException path not valid. - * @throws IOException problems resolving paths + * @throws UncheckedIOException if an IOE was raised. */ - public static int bulkDelete_pageSize(FileSystem fs, Path path) throws IOException { - try (BulkDelete bulk = fs.createBulkDelete(path)) { - return bulk.pageSize(); - } + public static int bulkDelete_pageSize(FileSystem fs, Path path) { + + return uncheckIOExceptions(() -> { + try (BulkDelete bulk = fs.createBulkDelete(path)) { + return bulk.pageSize(); + } + }); } /** @@ -79,15 +84,17 @@ public static int bulkDelete_pageSize(FileSystem fs, Path path) throws IOExcepti * @param paths list of paths which must be absolute and under the base path. * @return a list of all the paths which couldn't be deleted for a reason other than "not found" and any associated error message. * @throws UnsupportedOperationException bulk delete under that path is not supported. - * @throws IOException IO problems including networking, authentication and more. + * @throws UncheckedIOException if an IOE was raised. * @throws IllegalArgumentException if a path argument is invalid. */ public static List> bulkDelete_delete(FileSystem fs, - Path base, - Collection paths) - throws IOException { - try (BulkDelete bulk = fs.createBulkDelete(base)) { - return bulk.bulkDelete(paths); - } + Path base, + Collection paths) { + + return uncheckIOExceptions(() -> { + try (BulkDelete bulk = fs.createBulkDelete(base)) { + return bulk.bulkDelete(paths); + } + }); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CommonCallableSupplier.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CommonCallableSupplier.java index 67299ef96aec6..7a3193efbf0d7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CommonCallableSupplier.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CommonCallableSupplier.java @@ -41,7 +41,7 @@ * raised by the callable and wrapping them as appropriate. * @param return type. */ -public final class CommonCallableSupplier implements Supplier { +public final class CommonCallableSupplier implements Supplier { private static final Logger LOG = LoggerFactory.getLogger(CommonCallableSupplier.class); @@ -57,7 +57,7 @@ public CommonCallableSupplier(final Callable call) { } @Override - public Object get() { + public T get() { try { return call.call(); } catch (RuntimeException e) { @@ -155,4 +155,5 @@ public static void maybeAwaitCompletion( waitForCompletion(future); } } + } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionalIO.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionalIO.java new file mode 100644 index 0000000000000..6bc4a7103022d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionalIO.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.function.Supplier; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Functional utilities for IO operations. + */ +@InterfaceAudience.Private +public final class FunctionalIO { + + private FunctionalIO() { + } + + /** + * Invoke any operation, wrapping IOExceptions with + * {@code UncheckedIOException}. + * @param call callable + * @param type of result + * @return result + * @throws UncheckedIOException if an IOE was raised. + */ + public static T uncheckIOExceptions(CallableRaisingIOE call) { + try { + return call.apply(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + /** + * Wrap a {@link CallableRaisingIOE} as a {@link Supplier}. + * This is similar to {@link CommonCallableSupplier}, except that + * only IOExceptions are caught and wrapped; all other exceptions are + * propagated unchanged. + * @param type of result + */ + private static final class UncheckedIOExceptionSupplier implements Supplier { + + private final CallableRaisingIOE call; + + private UncheckedIOExceptionSupplier(CallableRaisingIOE call) { + this.call = call; + } + + @Override + public T get() { + return uncheckIOExceptions(call); + } + } + + /** + * Wrap a {@link CallableRaisingIOE} as a {@link Supplier}. + * @param call call to wrap + * @param type of result + * @return a supplier which invokes the call. + */ + public static Supplier toUncheckedIOExceptionSupplier(CallableRaisingIOE call) { + return new UncheckedIOExceptionSupplier<>(call); + } + + /** + * Invoke the supplier, catching any {@code UncheckedIOException} raised, + * extracting the inner IOException and rethrowing it. + * @param call call to invoke + * @param type of result + * @return result + * @throws IOException if the call raised an IOException wrapped by an UncheckedIOException. + */ + public static T extractIOExceptions(Supplier call) throws IOException { + try { + return call.get(); + } catch (UncheckedIOException e) { + throw e.getCause(); + } + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestFunctionalIO.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestFunctionalIO.java new file mode 100644 index 0000000000000..25bdab8ea3203 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestFunctionalIO.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.IOException; +import java.io.UncheckedIOException; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.util.functional.FunctionalIO.extractIOExceptions; +import static org.apache.hadoop.util.functional.FunctionalIO.toUncheckedIOExceptionSupplier; +import static org.apache.hadoop.util.functional.FunctionalIO.uncheckIOExceptions; + +/** + * Test the functional IO class. + */ +public class TestFunctionalIO extends AbstractHadoopTestBase { + + /** + * Verify that IOEs are caught and wrapped. + */ + @Test + public void testUncheckIOExceptions() throws Throwable { + final IOException raised = new IOException("text"); + final UncheckedIOException ex = intercept(UncheckedIOException.class, "text", () -> + uncheckIOExceptions(() -> { + throw raised; + })); + Assertions.assertThat(ex.getCause()) + .describedAs("Cause of %s", ex) + .isSameAs(raised); + } + + /** + * Verify that UncheckedIOEs are not double wrapped. + */ + @Test + public void testUncheckIOExceptionsUnchecked() throws Throwable { + final UncheckedIOException raised = new UncheckedIOException( + new IOException("text")); + final UncheckedIOException ex = intercept(UncheckedIOException.class, "text", () -> + uncheckIOExceptions(() -> { + throw raised; + })); + Assertions.assertThat(ex) + .describedAs("Propagated Exception %s", ex) + .isSameAs(raised); + } + + /** + * Supplier will also wrap IOEs. + */ + @Test + public void testUncheckedSupplier() throws Throwable { + intercept(UncheckedIOException.class, "text", () -> + toUncheckedIOExceptionSupplier(() -> { + throw new IOException("text"); + }).get()); + } + + /** + * The wrap/unwrap code which will be used to invoke operations + * through reflection. + */ + @Test + public void testUncheckAndExtract() throws Throwable { + final IOException raised = new IOException("text"); + final IOException ex = intercept(IOException.class, "text", () -> + extractIOExceptions(toUncheckedIOExceptionSupplier(() -> { + throw raised; + }))); + Assertions.assertThat(ex) + .describedAs("Propagated Exception %s", ex) + .isSameAs(raised); + } + +} From d3b98cb1b23841a57b966c5cedab312687f098cb Mon Sep 17 00:00:00 2001 From: HarshitGupta11 <50410275+HarshitGupta11@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:11:11 +0530 Subject: [PATCH 042/113] HADOOP-19194:Add test to find unshaded dependencies in the aws sdk (#6865) The new test TestAWSV2SDK scans the aws sdk bundle.jar and prints out all classes which are unshaded, so at risk of creating classpath problems It does not fail the test if this holds, because the current SDKs do ship with unshaded classes; the test would always fail. The SDK upgrade process should include inspecting the output of this test to see if it has got worse (do a before/after check). Once the AWS SDK does shade everything, we can have this test fail on any regression Contributed by Harshit Gupta --- .../site/markdown/tools/hadoop-aws/testing.md | 1 + .../apache/hadoop/fs/sdk/TestAWSV2SDK.java | 94 +++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/sdk/TestAWSV2SDK.java diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md index 45d1c8476578c..7222eee98baeb 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md @@ -1184,6 +1184,7 @@ your IDE or via maven. 1. Run a full AWS-test suite with S3 client-side encryption enabled by setting `fs.s3a.encryption.algorithm` to 'CSE-KMS' and setting up AWS-KMS Key ID in `fs.s3a.encryption.key`. +2. Verify that the output of test `TestAWSV2SDK` doesn't contain any unshaded classes. The dependency chain of the `hadoop-aws` module should be similar to this, albeit with different version numbers: diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/sdk/TestAWSV2SDK.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/sdk/TestAWSV2SDK.java new file mode 100644 index 0000000000000..fca9fcc300cbd --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/sdk/TestAWSV2SDK.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.sdk; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.List; +import java.util.jar.JarEntry; +import java.util.jar.JarFile; + +import org.junit.Test; +import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Tests to verify AWS SDK based issues like duplicated shaded classes and others. + */ +public class TestAWSV2SDK extends AbstractHadoopTestBase { + + private static final Logger LOG = LoggerFactory.getLogger(TestAWSV2SDK.class.getName()); + + @Test + public void testShadedClasses() throws IOException { + String allClassPath = System.getProperty("java.class.path"); + LOG.debug("Current classpath:{}", allClassPath); + String[] classPaths = allClassPath.split(File.pathSeparator); + String v2ClassPath = null; + for (String classPath : classPaths) { + //Checking for only version 2.x sdk here + if (classPath.contains("awssdk/bundle/2")) { + v2ClassPath = classPath; + break; + } + } + LOG.debug("AWS SDK V2 Classpath:{}", v2ClassPath); + assertThat(v2ClassPath) + .as("AWS V2 SDK should be present on the classpath").isNotNull(); + List listOfV2SdkClasses = getClassNamesFromJarFile(v2ClassPath); + String awsSdkPrefix = "software/amazon/awssdk"; + List unshadedClasses = new ArrayList<>(); + for (String awsSdkClass : listOfV2SdkClasses) { + if (!awsSdkClass.startsWith(awsSdkPrefix)) { + unshadedClasses.add(awsSdkClass); + } + } + if (!unshadedClasses.isEmpty()) { + LOG.warn("Unshaded Classes Found :{}", unshadedClasses.size()); + LOG.warn("List of unshaded classes:{}", unshadedClasses); + } else { + LOG.info("No Unshaded classes found in the sdk."); + } + } + + /** + * Returns the list of classes in a jar file. + * @param jarFilePath: the location of the jar file from absolute path + * @return a list of classes contained by the jar file + * @throws IOException if the file is not present or the path is not readable + */ + private List getClassNamesFromJarFile(String jarFilePath) throws IOException { + List classNames = new ArrayList<>(); + try (JarFile jarFile = new JarFile(new File(jarFilePath))) { + Enumeration jarEntryEnumeration = jarFile.entries(); + while (jarEntryEnumeration.hasMoreElements()) { + JarEntry jarEntry = jarEntryEnumeration.nextElement(); + if (jarEntry.getName().endsWith(".class")) { + classNames.add(jarEntry.getName()); + } + } + } + return classNames; + } +} From b4ddb2d3bba9d139960a0053acec9cbe41ae9737 Mon Sep 17 00:00:00 2001 From: Yu Zhang Date: Mon, 24 Jun 2024 09:34:52 -0700 Subject: [PATCH 043/113] HDFS-13603: do not propagate ExecutionException and add maxRetries limit to NameNode edek cache warmup (#6774) --- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 3 + .../namenode/FSDirEncryptionZoneOp.java | 50 ++++++++-------- .../hdfs/server/namenode/FSNamesystem.java | 9 ++- .../src/main/resources/hdfs-default.xml | 8 +++ .../namenode/TestFSDirEncryptionZoneOp.java | 59 +++++++++++++++++++ 5 files changed, 102 insertions(+), 27 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirEncryptionZoneOp.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 7d136c5c83751..b9f8e07f67a5f 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -1422,6 +1422,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final int DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_DEFAULT = 1000; public static final String DFS_NAMENODE_EDEKCACHELOADER_INITIAL_DELAY_MS_KEY = "dfs.namenode.edekcacheloader.initial.delay.ms"; public static final int DFS_NAMENODE_EDEKCACHELOADER_INITIAL_DELAY_MS_DEFAULT = 3000; + public static final String DFS_NAMENODE_EDEKCACHELOADER_MAX_RETRIES_KEY = + "dfs.namenode.edekcacheloader.max-retries"; + public static final int DFS_NAMENODE_EDEKCACHELOADER_MAX_RETRIES_DEFAULT = 10; public static final String DFS_NAMENODE_REENCRYPT_SLEEP_INTERVAL_KEY = "dfs.namenode.reencrypt.sleep.interval"; public static final String DFS_NAMENODE_REENCRYPT_SLEEP_INTERVAL_DEFAULT = "1m"; public static final String DFS_NAMENODE_REENCRYPT_BATCH_SIZE_KEY = "dfs.namenode.reencrypt.batch.size"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java index 2110a408b0877..34c4216bdce40 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirEncryptionZoneOp.java @@ -533,16 +533,16 @@ static boolean isInAnEZ(final FSDirectory fsd, final INodesInPath iip) } /** - * Proactively warm up the edek cache. We'll get all the edek key names, - * then launch up a separate thread to warm them up. + * Best-effort attempt to proactively warm up the edek cache. We'll get all the edek key names, + * then launch up a separate thread to warm them up. Retries happen if any of keys fail to warm up. */ static void warmUpEdekCache(final ExecutorService executor, - final FSDirectory fsd, final int delay, final int interval) { + final FSDirectory fsd, final int delay, final int interval, final int maxRetries) { fsd.readLock(); try { String[] edeks = fsd.ezManager.getKeyNames(); executor.execute( - new EDEKCacheLoader(edeks, fsd.getProvider(), delay, interval)); + new EDEKCacheLoader(edeks, fsd.getProvider(), delay, interval, maxRetries)); } finally { fsd.readUnlock(); } @@ -557,19 +557,22 @@ static class EDEKCacheLoader implements Runnable { private final KeyProviderCryptoExtension kp; private int initialDelay; private int retryInterval; + private int maxRetries; EDEKCacheLoader(final String[] names, final KeyProviderCryptoExtension kp, - final int delay, final int interval) { + final int delay, final int interval, final int maxRetries) { this.keyNames = names; this.kp = kp; this.initialDelay = delay; this.retryInterval = interval; + this.maxRetries = maxRetries; } @Override public void run() { NameNode.LOG.info("Warming up {} EDEKs... (initialDelay={}, " - + "retryInterval={})", keyNames.length, initialDelay, retryInterval); + + "retryInterval={}, maxRetries={})", keyNames.length, initialDelay, retryInterval, + maxRetries); try { Thread.sleep(initialDelay); } catch (InterruptedException ie) { @@ -577,42 +580,39 @@ public void run() { return; } - final int logCoolDown = 10000; // periodically print error log (if any) - int sinceLastLog = logCoolDown; // always print the first failure boolean success = false; + int retryCount = 0; IOException lastSeenIOE = null; long warmUpEDEKStartTime = monotonicNow(); - while (true) { + + while (!success && retryCount < maxRetries) { try { kp.warmUpEncryptedKeys(keyNames); - NameNode.LOG - .info("Successfully warmed up {} EDEKs.", keyNames.length); + NameNode.LOG.info("Successfully warmed up {} EDEKs.", keyNames.length); success = true; - break; } catch (IOException ioe) { lastSeenIOE = ioe; - if (sinceLastLog >= logCoolDown) { - NameNode.LOG.info("Failed to warm up EDEKs.", ioe); - sinceLastLog = 0; - } else { - NameNode.LOG.debug("Failed to warm up EDEKs.", ioe); - } + NameNode.LOG.info("Failed to warm up EDEKs.", ioe); } catch (Exception e) { NameNode.LOG.error("Cannot warm up EDEKs.", e); throw e; } - try { - Thread.sleep(retryInterval); - } catch (InterruptedException ie) { - NameNode.LOG.info("EDEKCacheLoader interrupted during retry."); - break; + + if (!success) { + try { + Thread.sleep(retryInterval); + } catch (InterruptedException ie) { + NameNode.LOG.info("EDEKCacheLoader interrupted during retry."); + break; + } + retryCount++; } - sinceLastLog += retryInterval; } + long warmUpEDEKTime = monotonicNow() - warmUpEDEKStartTime; NameNode.getNameNodeMetrics().addWarmUpEDEKTime(warmUpEDEKTime); if (!success) { - NameNode.LOG.warn("Unable to warm up EDEKs."); + NameNode.LOG.warn("Max retry {} reached, unable to warm up EDEKs.", maxRetries); if (lastSeenIOE != null) { NameNode.LOG.warn("Last seen exception:", lastSeenIOE); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 6e5117f4db5da..edc790dbc309a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -579,6 +579,7 @@ private boolean isFromProxyUser(CallerContext ctx) { private ExecutorService edekCacheLoader = null; private final int edekCacheLoaderDelay; private final int edekCacheLoaderInterval; + private final int edekCacheLoaderMaxRetries; /** * When an active namenode will roll its own edit log, in # edits @@ -1012,6 +1013,9 @@ static FSNamesystem loadFromDisk(Configuration conf) throws IOException { this.edekCacheLoaderInterval = conf.getInt( DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_KEY, DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_INTERVAL_MS_DEFAULT); + this.edekCacheLoaderMaxRetries = conf.getInt( + DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_MAX_RETRIES_KEY, + DFSConfigKeys.DFS_NAMENODE_EDEKCACHELOADER_MAX_RETRIES_DEFAULT); this.leaseRecheckIntervalMs = conf.getLong( DFS_NAMENODE_LEASE_RECHECK_INTERVAL_MS_KEY, @@ -1470,8 +1474,9 @@ void startActiveServices() throws IOException { new ThreadFactoryBuilder().setDaemon(true) .setNameFormat("Warm Up EDEK Cache Thread #%d") .build()); - FSDirEncryptionZoneOp.warmUpEdekCache(edekCacheLoader, dir, - edekCacheLoaderDelay, edekCacheLoaderInterval); + FSDirEncryptionZoneOp + .warmUpEdekCache(edekCacheLoader, dir, edekCacheLoaderDelay, edekCacheLoaderInterval, + edekCacheLoaderMaxRetries); } if (blockManager.getSPSManager() != null) { blockManager.getSPSManager().start(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 3e362de198176..94c3ea0cc9b0c 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -3614,6 +3614,14 @@ + + dfs.namenode.edekcacheloader.max-retries + 10 + When KeyProvider is configured, the max retries allowed to attempt + warm up edek cache if none of key successful on NN start up / become active. + + + dfs.namenode.reencrypt.sleep.interval 1m diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirEncryptionZoneOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirEncryptionZoneOp.java new file mode 100644 index 0000000000000..2fa6a33f0b232 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSDirEncryptionZoneOp.java @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; + +import org.junit.Test; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; + +public class TestFSDirEncryptionZoneOp { + + @Test + public void testWarmUpEdekCacheRetries() throws IOException { + NameNode.initMetrics(new Configuration(), NamenodeRole.NAMENODE); + + final int initialDelay = 100; + final int retryInterval = 100; + final int maxRetries = 2; + + KeyProviderCryptoExtension kpMock = mock(KeyProviderCryptoExtension.class); + + doThrow(new IOException()) + .doThrow(new IOException()) + .doAnswer(invocation -> null) + .when(kpMock).warmUpEncryptedKeys(any()); + + FSDirEncryptionZoneOp.EDEKCacheLoader loader = + new FSDirEncryptionZoneOp.EDEKCacheLoader(new String[] {"edek1", "edek2"}, kpMock, + initialDelay, retryInterval, maxRetries); + + loader.run(); + + verify(kpMock, times(maxRetries)).warmUpEncryptedKeys(any()); + } +} \ No newline at end of file From 134dcf166f51a3bd47923f3a0fbad7954135cb6d Mon Sep 17 00:00:00 2001 From: K0K0V0K <109747532+K0K0V0K@users.noreply.github.com> Date: Thu, 27 Jun 2024 16:21:28 +0200 Subject: [PATCH 044/113] YARN-11703. Validate accessibility of Node Manager working directories (#6903) --- .../hadoop/yarn/conf/YarnConfiguration.java | 19 ++- .../src/main/resources/yarn-default.xml | 6 + .../nodemanager/DirectoryCollection.java | 153 +++++++++++------- .../nodemanager/TestDirectoryCollection.java | 35 +++- 4 files changed, 140 insertions(+), 73 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 7747d4cb73410..9503d47537706 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -2157,16 +2157,19 @@ public static boolean isAclEnabled(Configuration conf) { public static final String NM_MIN_PER_DISK_FREE_SPACE_MB = NM_DISK_HEALTH_CHECK_PREFIX + "min-free-space-per-disk-mb"; + /** + * By default, all the disk can be used before it is marked as offline. + */ + public static final long DEFAULT_NM_MIN_PER_DISK_FREE_SPACE_MB = 0; + /** * Enable/Disable the minimum disk free * space threshold for disk health checker. */ public static final String NM_DISK_FREE_SPACE_THRESHOLD_ENABLED = - NM_DISK_HEALTH_CHECK_PREFIX + - "disk-free-space-threshold.enabled"; + NM_DISK_HEALTH_CHECK_PREFIX + "disk-free-space-threshold.enabled"; - public static final boolean - DEFAULT_NM_DISK_FREE_SPACE_THRESHOLD_ENABLED = true; + public static final boolean DEFAULT_NM_DISK_FREE_SPACE_THRESHOLD_ENABLED = true; /** * The minimum space that must be available on an offline @@ -2180,9 +2183,13 @@ public static boolean isAclEnabled(Configuration conf) { NM_DISK_HEALTH_CHECK_PREFIX + "min-free-space-per-disk-watermark-high-mb"; /** - * By default, all of the disk can be used before it is marked as offline. + * Validate content of the node manager directories can be accessed. */ - public static final long DEFAULT_NM_MIN_PER_DISK_FREE_SPACE_MB = 0; + public static final String NM_WORKING_DIR_CONTENT_ACCESSIBILITY_VALIDATION_ENABLED = + NM_DISK_HEALTH_CHECK_PREFIX + "working-dir-content-accessibility-validation.enabled"; + + public static final boolean DEFAULT_NM_WORKING_DIR_CONTENT_ACCESSIBILITY_VALIDATION_ENABLED = + true; /** The health checker scripts. */ public static final String NM_HEALTH_CHECK_SCRIPTS = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 927d0c1aa41e0..ac976b7472d3e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1995,6 +1995,12 @@ true + + Validate content of the node manager directories can be accessed + yarn.nodemanager.disk-health-checker.working-dir-content-accessibility-validation.enabled + true + + The maximum percentage of disk space utilization allowed after which a disk is marked as bad. Values can range from 0.0 to 100.0. diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java index 8ecaa6d959049..a5657ab48b440 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java @@ -21,6 +21,8 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -28,22 +30,27 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock; import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock; +import java.util.stream.Collectors; +import java.util.stream.Stream; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileContext; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.DiskChecker; import org.apache.hadoop.util.DiskValidator; import org.apache.hadoop.util.DiskValidatorFactory; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; @@ -62,6 +69,7 @@ public class DirectoryCollection { private boolean diskUtilizationThresholdEnabled; private boolean diskFreeSpaceThresholdEnabled; + private boolean subAccessibilityValidationEnabled; /** * The enum defines disk failure type. */ @@ -242,16 +250,15 @@ public DirectoryCollection(String[] dirs, throw new YarnRuntimeException(e); } - diskUtilizationThresholdEnabled = conf. - getBoolean(YarnConfiguration. - NM_DISK_UTILIZATION_THRESHOLD_ENABLED, - YarnConfiguration. - DEFAULT_NM_DISK_UTILIZATION_THRESHOLD_ENABLED); - diskFreeSpaceThresholdEnabled = conf. - getBoolean(YarnConfiguration. - NM_DISK_FREE_SPACE_THRESHOLD_ENABLED, - YarnConfiguration. - DEFAULT_NM_DISK_FREE_SPACE_THRESHOLD_ENABLED); + diskUtilizationThresholdEnabled = conf.getBoolean( + YarnConfiguration.NM_DISK_UTILIZATION_THRESHOLD_ENABLED, + YarnConfiguration.DEFAULT_NM_DISK_UTILIZATION_THRESHOLD_ENABLED); + diskFreeSpaceThresholdEnabled = conf.getBoolean( + YarnConfiguration.NM_DISK_FREE_SPACE_THRESHOLD_ENABLED, + YarnConfiguration.DEFAULT_NM_DISK_FREE_SPACE_THRESHOLD_ENABLED); + subAccessibilityValidationEnabled = conf.getBoolean( + YarnConfiguration.NM_WORKING_DIR_CONTENT_ACCESSIBILITY_VALIDATION_ENABLED, + YarnConfiguration.DEFAULT_NM_WORKING_DIR_CONTENT_ACCESSIBILITY_VALIDATION_ENABLED); localDirs = new ArrayList<>(Arrays.asList(dirs)); errorDirs = new ArrayList<>(); @@ -448,8 +455,7 @@ boolean checkDirs() { // move testDirs out of any lock as it could wait for very long time in // case of busy IO - Map dirsFailedCheck = testDirs(allLocalDirs, - preCheckGoodDirs); + Map dirsFailedCheck = testDirs(allLocalDirs, preCheckGoodDirs); this.writeLock.lock(); try { @@ -521,60 +527,89 @@ boolean checkDirs() { } } - Map testDirs(List dirs, - Set goodDirs) { - HashMap ret = - new HashMap(); - for (final String dir : dirs) { - String msg; - try { - File testDir = new File(dir); - diskValidator.checkStatus(testDir); - float diskUtilizationPercentageCutoff = goodDirs.contains(dir) ? - diskUtilizationPercentageCutoffHigh : diskUtilizationPercentageCutoffLow; - long diskFreeSpaceCutoff = goodDirs.contains(dir) ? - diskFreeSpaceCutoffLow : diskFreeSpaceCutoffHigh; - - if (diskUtilizationThresholdEnabled - && isDiskUsageOverPercentageLimit(testDir, - diskUtilizationPercentageCutoff)) { - msg = - "used space above threshold of " - + diskUtilizationPercentageCutoff - + "%"; - ret.put(dir, - new DiskErrorInformation(DiskErrorCause.DISK_FULL, msg)); - continue; - } else if (diskFreeSpaceThresholdEnabled - && isDiskFreeSpaceUnderLimit(testDir, diskFreeSpaceCutoff)) { - msg = - "free space below limit of " + diskFreeSpaceCutoff - + "MB"; - ret.put(dir, - new DiskErrorInformation(DiskErrorCause.DISK_FULL, msg)); - continue; - } - } catch (IOException ie) { - ret.put(dir, - new DiskErrorInformation(DiskErrorCause.OTHER, ie.getMessage())); - } + Map testDirs(List dirs, Set goodDirs) { + final Map ret = new HashMap<>(0); + for (String dir : dirs) { + LOG.debug("Start testing dir accessibility: {}", dir); + File testDir = new File(dir); + boolean goodDir = goodDirs.contains(dir); + Stream.of( + validateDisk(testDir), + validateUsageOverPercentageLimit(testDir, goodDir), + validateDiskFreeSpaceUnderLimit(testDir, goodDir), + validateSubsAccessibility(testDir) + ) + .filter(Objects::nonNull) + .findFirst() + .ifPresent(diskErrorInformation -> ret.put(dir, diskErrorInformation)); } return ret; } - private boolean isDiskUsageOverPercentageLimit(File dir, - float diskUtilizationPercentageCutoff) { - float freePercentage = - 100 * (dir.getUsableSpace() / (float) dir.getTotalSpace()); + private DiskErrorInformation validateDisk(File dir) { + try { + diskValidator.checkStatus(dir); + LOG.debug("Dir {} pass throw the disk validation", dir); + return null; + } catch (IOException | UncheckedIOException | SecurityException e) { + return new DiskErrorInformation(DiskErrorCause.OTHER, e.getMessage()); + } + } + + private DiskErrorInformation validateUsageOverPercentageLimit(File dir, boolean isGoodDir) { + if (!diskUtilizationThresholdEnabled) { + return null; + } + float diskUtilizationPercentageCutoff = isGoodDir + ? diskUtilizationPercentageCutoffHigh + : diskUtilizationPercentageCutoffLow; + float freePercentage = 100 * (dir.getUsableSpace() / (float) dir.getTotalSpace()); float usedPercentage = 100.0F - freePercentage; - return (usedPercentage > diskUtilizationPercentageCutoff - || usedPercentage >= 100.0F); + if (usedPercentage > diskUtilizationPercentageCutoff || usedPercentage >= 100.0F) { + return new DiskErrorInformation(DiskErrorCause.DISK_FULL, + "used space above threshold of " + diskUtilizationPercentageCutoff + "%"); + } else { + LOG.debug("Dir {} pass throw the usage over percentage validation", dir); + return null; + } } - private boolean isDiskFreeSpaceUnderLimit(File dir, - long freeSpaceCutoff) { + private DiskErrorInformation validateDiskFreeSpaceUnderLimit(File dir, boolean isGoodDir) { + if (!diskFreeSpaceThresholdEnabled) { + return null; + } + long freeSpaceCutoff = isGoodDir ? diskFreeSpaceCutoffLow : diskFreeSpaceCutoffHigh; long freeSpace = dir.getUsableSpace() / (1024 * 1024); - return freeSpace < freeSpaceCutoff; + if (freeSpace < freeSpaceCutoff) { + return new DiskErrorInformation(DiskErrorCause.DISK_FULL, + "free space below limit of " + freeSpaceCutoff + "MB"); + } else { + LOG.debug("Dir {} pass throw the free space validation", dir); + return null; + } + } + + private DiskErrorInformation validateSubsAccessibility(File dir) { + if (!subAccessibilityValidationEnabled) { + return null; + } + try (Stream walk = Files.walk(dir.toPath())) { + List subs = walk + .map(java.nio.file.Path::toFile) + .collect(Collectors.toList()); + for (File sub : subs) { + if (sub.isDirectory()) { + DiskChecker.checkDir(sub); + } else if (!Files.isReadable(sub.toPath())) { + return new DiskErrorInformation(DiskErrorCause.OTHER, "Can not read " + sub); + } else { + LOG.debug("{} under {} is accessible", sub, dir); + } + } + } catch (IOException | UncheckedIOException | SecurityException e) { + return new DiskErrorInformation(DiskErrorCause.OTHER, e.getMessage()); + } + return null; } private void createDir(FileContext localFs, Path dir, FsPermission perm) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java index 33bd4d92347ca..0193f844ac824 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java @@ -20,8 +20,17 @@ import java.io.File; import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.attribute.PosixFilePermissions; +import java.util.Collections; import java.util.List; import java.util.ListIterator; +import java.util.Map; + +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; @@ -32,16 +41,11 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.nodemanager.DirectoryCollection.DirsChangeListener; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; public class TestDirectoryCollection { - private static final File testDir = new File("target", - TestDirectoryCollection.class.getName()).getAbsoluteFile(); - private static final File testFile = new File(testDir, "testfile"); + private File testDir; + private File testFile; private Configuration conf; private FileContext localFs; @@ -50,7 +54,8 @@ public class TestDirectoryCollection { public void setupForTests() throws IOException { conf = new Configuration(); localFs = FileContext.getLocalFSFileContext(conf); - testDir.mkdirs(); + testDir = Files.createTempDirectory(TestDirectoryCollection.class.getName()).toFile(); + testFile = new File(testDir, "testfile"); testFile.createNewFile(); } @@ -516,6 +521,20 @@ public void testDirsChangeListener() { Assert.assertEquals(listener3.num, 1); } + @Test + public void testNonAccessibleSub() throws IOException { + Files.setPosixFilePermissions(testDir.toPath(), + PosixFilePermissions.fromString("rwx------")); + Files.setPosixFilePermissions(testFile.toPath(), + PosixFilePermissions.fromString("-w--w--w-")); + DirectoryCollection dc = new DirectoryCollection(new String[]{testDir.toString()}); + Map diskErrorInformationMap = + dc.testDirs(Collections.singletonList(testDir.toString()), Collections.emptySet()); + Assert.assertEquals(1, diskErrorInformationMap.size()); + Assert.assertTrue(diskErrorInformationMap.values().iterator().next() + .message.contains(testFile.getName())); + } + static class DirsChangeListenerTest implements DirsChangeListener { public int num = 0; public DirsChangeListenerTest() { From c33d86860606f972f8b743b02f629b14f83d14f2 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 2 Jul 2024 11:34:45 +0100 Subject: [PATCH 045/113] HADOOP-19210. S3A: Speed up some slow unit tests (#6907) Speed up slow tests * TestS3AAWSCredentialsProvider: decrease thread pool shutdown time * TestS3AInputStreamRetry: reduce retry limit and intervals Contributed by Steve Loughran --- .../org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java | 9 +++++++++ .../hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java | 8 +++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java index f43710cf25eb0..e76b304604836 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java @@ -80,6 +80,15 @@ public Configuration createConfiguration() { conf.setInt(ASYNC_DRAIN_THRESHOLD, Integer.MAX_VALUE); // set the region to avoid the getBucketLocation on FS init. conf.set(AWS_REGION, "eu-west-1"); + + // tight retry logic as all failures are simulated + final String interval = "1ms"; + final int limit = 3; + conf.set(RETRY_THROTTLE_INTERVAL, interval); + conf.setInt(RETRY_THROTTLE_LIMIT, limit); + conf.set(RETRY_INTERVAL, interval); + conf.setInt(RETRY_LIMIT, limit); + return conf; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java index 0ffd7e75b1843..d51bc954a6329 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java @@ -86,6 +86,8 @@ public class TestS3AAWSCredentialsProvider extends AbstractS3ATestBase { private static final Logger LOG = LoggerFactory.getLogger(TestS3AAWSCredentialsProvider.class); + public static final int TERMINATION_TIMEOUT = 3; + @Test public void testProviderWrongClass() throws Exception { expectProviderInstantiationFailure(this.getClass(), @@ -579,7 +581,7 @@ protected AwsCredentials createCredentials(Configuration config) throws IOExcept } } - private static final int CONCURRENT_THREADS = 10; + private static final int CONCURRENT_THREADS = 4; @Test public void testConcurrentAuthentication() throws Throwable { @@ -619,7 +621,7 @@ public void testConcurrentAuthentication() throws Throwable { "expectedSecret", credentials.secretAccessKey()); } } finally { - pool.awaitTermination(10, TimeUnit.SECONDS); + pool.awaitTermination(TERMINATION_TIMEOUT, TimeUnit.SECONDS); pool.shutdown(); } @@ -685,7 +687,7 @@ public void testConcurrentAuthenticationError() throws Throwable { ); } } finally { - pool.awaitTermination(10, TimeUnit.SECONDS); + pool.awaitTermination(TERMINATION_TIMEOUT, TimeUnit.SECONDS); pool.shutdown(); } From a57105462be57840ee0a5646be2c443228776931 Mon Sep 17 00:00:00 2001 From: hfutatzhanghb Date: Fri, 5 Jul 2024 14:41:39 +0800 Subject: [PATCH 046/113] HADOOP-19215. Fix unit tests testSlowConnection and testBadSetup failed in TestRPC. (#6912). Contributed by farmmamba. Reviewed-by: huhaiyang Signed-off-by: Ayush Saxena --- .../src/test/java/org/apache/hadoop/ipc/TestRPC.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java index f9b03721b50db..17f1f65261468 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java @@ -1038,7 +1038,7 @@ public int read() throws IOException { // disable ping & timeout to minimize traffic clientConf.setBoolean(CommonConfigurationKeys.IPC_CLIENT_PING_KEY, false); clientConf.setInt(CommonConfigurationKeys.IPC_CLIENT_RPC_TIMEOUT_KEY, 0); - RPC.setProtocolEngine(clientConf, TestRpcService.class, ProtobufRpcEngine.class); + RPC.setProtocolEngine(clientConf, TestRpcService.class, ProtobufRpcEngine2.class); // set async mode so that we don't need to implement the input stream final boolean wasAsync = Client.isAsynchronousMode(); TestRpcService client = null; @@ -1165,7 +1165,7 @@ public void testBadSetup() throws Exception { clientConf.set(CommonConfigurationKeys.IPC_MAXIMUM_RESPONSE_LENGTH, "xxx"); RPC.setProtocolEngine(clientConf, TestRpcService.class, - ProtobufRpcEngine.class); + ProtobufRpcEngine2.class); TestRpcService client = null; int threadCount = Thread.getAllStackTraces().size(); try { From ae76e9475cdafbe4c00f37a0d94f13b772b4d10d Mon Sep 17 00:00:00 2001 From: huhaiyang Date: Fri, 5 Jul 2024 20:45:01 +0800 Subject: [PATCH 047/113] HDFS-17564. EC: Fix the issue of inaccurate metrics when decommission mark busy DN. (#6911). Contributed by Haiyang Hu. Signed-off-by: He Xiaoqiao --- .../server/blockmanagement/BlockManager.java | 4 +- .../BlockReconstructionWork.java | 2 +- .../blockmanagement/ErasureCodingWork.java | 8 ++- .../blockmanagement/ReplicationWork.java | 3 +- .../hdfs/TestDecommissionWithStriped.java | 54 +++++++++++++++++++ 5 files changed, 66 insertions(+), 5 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 8f65673806bd0..41845152514fe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -2396,7 +2396,9 @@ boolean validateReconstructionWork(BlockReconstructionWork rw) { } // Add block to the datanode's task list - rw.addTaskToDatanode(numReplicas); + if (!rw.addTaskToDatanode(numReplicas)) { + return false; + } DatanodeStorageInfo.incrementBlocksScheduled(targets); // Move the block-replication into a "pending" state. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockReconstructionWork.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockReconstructionWork.java index df76a15c733f0..6ea046204fbda 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockReconstructionWork.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockReconstructionWork.java @@ -145,5 +145,5 @@ abstract void chooseTargets(BlockPlacementPolicy blockplacement, * * @param numberReplicas replica details */ - abstract void addTaskToDatanode(NumberReplicas numberReplicas); + abstract boolean addTaskToDatanode(NumberReplicas numberReplicas); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ErasureCodingWork.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ErasureCodingWork.java index b8c396696ab11..5726fac0b3d79 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ErasureCodingWork.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ErasureCodingWork.java @@ -136,11 +136,11 @@ private int chooseSource4SimpleReplication() { } @Override - void addTaskToDatanode(NumberReplicas numberReplicas) { + boolean addTaskToDatanode(NumberReplicas numberReplicas) { final DatanodeStorageInfo[] targets = getTargets(); assert targets.length > 0; BlockInfoStriped stripedBlk = (BlockInfoStriped) getBlock(); - + boolean flag = true; if (hasNotEnoughRack()) { // if we already have all the internal blocks, but not enough racks, // we only need to replicate one internal block to a new rack @@ -152,6 +152,9 @@ void addTaskToDatanode(NumberReplicas numberReplicas) { List leavingServiceSources = findLeavingServiceSources(); // decommissioningSources.size() should be >= targets.length final int num = Math.min(leavingServiceSources.size(), targets.length); + if (num == 0) { + flag = false; + } for (int i = 0; i < num; i++) { createReplicationWork(leavingServiceSources.get(i), targets[i]); } @@ -160,6 +163,7 @@ void addTaskToDatanode(NumberReplicas numberReplicas) { new ExtendedBlock(blockPoolId, stripedBlk), getSrcNodes(), targets, liveBlockIndices, excludeReconstructedIndices, stripedBlk.getErasureCodingPolicy()); } + return flag; } private void createReplicationWork(int sourceIndex, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ReplicationWork.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ReplicationWork.java index 15e5d5cdc2729..19b56f171523a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ReplicationWork.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/ReplicationWork.java @@ -61,7 +61,8 @@ assert getSrcNodes().length > 0 } @Override - void addTaskToDatanode(NumberReplicas numberReplicas) { + boolean addTaskToDatanode(NumberReplicas numberReplicas) { getSrcNodes()[0].addBlockToBeReplicated(getBlock(), getTargets()); + return true; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java index 1de8fc17ee802..83332cc3134b4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommissionWithStriped.java @@ -462,6 +462,60 @@ public void testFileChecksumAfterDecommission() throws Exception { fileChecksum1.equals(fileChecksum2)); } + /** + * Test decommission when DN marked as busy. + * @throwsException + */ + @Test(timeout = 120000) + public void testBusyAfterDecommissionNode() throws Exception { + int busyDNIndex = 0; + //1. create EC file. + final Path ecFile = new Path(ecDir, "testBusyAfterDecommissionNode"); + int writeBytes = cellSize * dataBlocks; + writeStripedFile(dfs, ecFile, writeBytes); + Assert.assertEquals(0, bm.numOfUnderReplicatedBlocks()); + FileChecksum fileChecksum1 = dfs.getFileChecksum(ecFile, writeBytes); + + //2. make once DN busy. + final INodeFile fileNode = cluster.getNamesystem().getFSDirectory() + .getINode4Write(ecFile.toString()).asFile(); + BlockInfo firstBlock = fileNode.getBlocks()[0]; + DatanodeStorageInfo[] dnStorageInfos = bm.getStorages(firstBlock); + DatanodeDescriptor busyNode = + dnStorageInfos[busyDNIndex].getDatanodeDescriptor(); + for (int j = 0; j < replicationStreamsHardLimit; j++) { + busyNode.incrementPendingReplicationWithoutTargets(); + } + + //3. decomission one node. + List decommisionNodes = new ArrayList<>(); + decommisionNodes.add(busyNode); + decommissionNode(0, decommisionNodes, AdminStates.DECOMMISSION_INPROGRESS); + + final List live = new ArrayList(); + bm.getDatanodeManager().fetchDatanodes(live, null, false); + int liveDecommissioning = 0; + for (DatanodeDescriptor node : live) { + liveDecommissioning += node.isDecommissionInProgress() ? 1 : 0; + } + assertEquals(decommisionNodes.size(), liveDecommissioning); + + //4. wait for decommission block to replicate. + GenericTestUtils.waitFor(() -> bm.getLowRedundancyBlocksCount() == 1, + 100, 3000); + + int blocksScheduled = 0; + final List dnList = new ArrayList<>(); + fsn.getBlockManager().getDatanodeManager().fetchDatanodes(dnList, null, + false); + for (DatanodeDescriptor dn : dnList) { + blocksScheduled += dn.getBlocksScheduled(); + } + assertEquals(0, blocksScheduled); + assertEquals(0, bm.getPendingReconstructionBlocksCount()); + assertEquals(1, bm.getLowRedundancyBlocksCount()); + } + private void testDecommission(int writeBytes, int storageCount, int decomNodeCount, String filename) throws IOException, Exception { Path ecFile = new Path(ecDir, filename); From 4c55adbb6bc25fe76943535fd97cbd2b6d350e33 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 5 Jul 2024 16:38:37 +0100 Subject: [PATCH 048/113] HADOOP-19205. S3A: initialization/close slower than with v1 SDK (#6892) Adds new ClientManager interface/implementation which provides on-demand creation of synchronous and asynchronous s3 clients, s3 transfer manager, and in close() terminates these. S3A FS is modified to * Create a ClientManagerImpl instance and pass down to its S3Store. * Use the same ClientManager interface against S3Store to demand-create the services. * Only create the async client as part of the transfer manager creation, which will take place during the first rename() operation. * Statistics on client creation count and duration are recorded. + Statistics on the time to initialize and shutdown the S3A FS are collected in IOStatistics for reporting. Adds to hadoop common class LazyAtomicReference implements CallableRaisingIOE, Supplier and subclass LazyAutoCloseableReference extends LazyAtomicReference implements AutoCloseable These evaluate the Supplier/CallableRaisingIOE they were constructed with on the first (successful) read of the the value. Any exception raised during this operation will be rethrown, and on future evaluations the same operation retried. These classes implement the Supplier and CallableRaisingIOE interfaces so can actually be used for to implement lazy function evaluation as Haskell and some other functional languages do. LazyAutoCloseableReference is AutoCloseable; its close() method will close the inner reference if it is set This class is used in ClientManagerImpl for the lazy S3 Cliehnt creation and closure. Contributed by Steve Loughran. --- .../statistics/FileSystemStatisticNames.java | 45 +++ .../fs/statistics/StoreStatisticNames.java | 6 + .../hadoop/util/functional/FunctionalIO.java | 23 +- .../hadoop/util/functional/FutureIO.java | 50 +-- .../util/functional/LazyAtomicReference.java | 152 +++++++ .../LazyAutoCloseableReference.java | 102 +++++ .../util/functional/TestLazyReferences.java | 263 ++++++++++++ .../dev-support/findbugs-exclude.xml | 5 - .../apache/hadoop/fs/s3a/S3AFileSystem.java | 259 +++++++----- .../org/apache/hadoop/fs/s3a/S3AStore.java | 9 +- .../apache/hadoop/fs/s3a/S3ClientFactory.java | 2 - .../org/apache/hadoop/fs/s3a/Statistic.java | 16 + .../hadoop/fs/s3a/impl/ClientManager.java | 50 +++ .../hadoop/fs/s3a/impl/ClientManagerImpl.java | 238 +++++++++++ .../hadoop/fs/s3a/impl/S3AStoreBuilder.java | 21 +- .../hadoop/fs/s3a/impl/S3AStoreImpl.java | 121 ++++-- .../hadoop/fs/s3a/MockS3AFileSystem.java | 7 + .../s3a/commit/staging/StagingTestBase.java | 11 +- .../hadoop/fs/s3a/impl/TestClientManager.java | 379 ++++++++++++++++++ .../fs/s3a/test/StubS3ClientFactory.java | 122 ++++++ 20 files changed, 1664 insertions(+), 217 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/FileSystemStatisticNames.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/LazyAtomicReference.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/LazyAutoCloseableReference.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestLazyReferences.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ClientManager.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ClientManagerImpl.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestClientManager.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/StubS3ClientFactory.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/FileSystemStatisticNames.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/FileSystemStatisticNames.java new file mode 100644 index 0000000000000..cd8df2f853612 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/FileSystemStatisticNames.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.statistics; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Common statistic names for Filesystem-level statistics, + * including internals. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class FileSystemStatisticNames { + + private FileSystemStatisticNames() { + } + + /** + * How long did filesystem initialization take? + */ + public static final String FILESYSTEM_INITIALIZATION = "filesystem_initialization"; + + /** + * How long did filesystem close take? + */ + public static final String FILESYSTEM_CLOSE = "filesystem_close"; + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java index a513cffd849b6..44f794aa77478 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/statistics/StoreStatisticNames.java @@ -176,6 +176,11 @@ public final class StoreStatisticNames { public static final String DELEGATION_TOKENS_ISSUED = "delegation_tokens_issued"; + /** + * How long did any store client creation take? + */ + public static final String STORE_CLIENT_CREATION = "store_client_creation"; + /** Probe for store existing: {@value}. */ public static final String STORE_EXISTS_PROBE = "store_exists_probe"; @@ -200,6 +205,7 @@ public final class StoreStatisticNames { public static final String STORE_IO_RATE_LIMITED_DURATION = "store_io_rate_limited_duration"; + /** * A store's equivalent of a paged LIST request was initiated: {@value}. */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionalIO.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionalIO.java index 6bc4a7103022d..bc9e2ea729b97 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionalIO.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionalIO.java @@ -49,27 +49,6 @@ public static T uncheckIOExceptions(CallableRaisingIOE call) { } } - /** - * Wrap a {@link CallableRaisingIOE} as a {@link Supplier}. - * This is similar to {@link CommonCallableSupplier}, except that - * only IOExceptions are caught and wrapped; all other exceptions are - * propagated unchanged. - * @param type of result - */ - private static final class UncheckedIOExceptionSupplier implements Supplier { - - private final CallableRaisingIOE call; - - private UncheckedIOExceptionSupplier(CallableRaisingIOE call) { - this.call = call; - } - - @Override - public T get() { - return uncheckIOExceptions(call); - } - } - /** * Wrap a {@link CallableRaisingIOE} as a {@link Supplier}. * @param call call to wrap @@ -77,7 +56,7 @@ public T get() { * @return a supplier which invokes the call. */ public static Supplier toUncheckedIOExceptionSupplier(CallableRaisingIOE call) { - return new UncheckedIOExceptionSupplier<>(call); + return () -> uncheckIOExceptions(call); } /** diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FutureIO.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FutureIO.java index 2f043b6499795..0a0d023d931d0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FutureIO.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FutureIO.java @@ -38,9 +38,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSBuilder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - /** * Future IO Helper methods. *

@@ -62,7 +59,6 @@ @InterfaceStability.Unstable public final class FutureIO { - private static final Logger LOG = LoggerFactory.getLogger(FutureIO.class.getName()); private FutureIO() { } @@ -129,7 +125,6 @@ public static T awaitFuture(final Future future, * If any future throws an exception during its execution, this method * extracts and rethrows that exception. *

- * * @param collection collection of futures to be evaluated * @param type of the result. * @return the list of future's result, if all went well. @@ -140,19 +135,10 @@ public static T awaitFuture(final Future future, public static List awaitAllFutures(final Collection> collection) throws InterruptedIOException, IOException, RuntimeException { List results = new ArrayList<>(); - try { - for (Future future : collection) { - results.add(future.get()); - } - return results; - } catch (InterruptedException e) { - LOG.debug("Execution of future interrupted ", e); - throw (InterruptedIOException) new InterruptedIOException(e.toString()) - .initCause(e); - } catch (ExecutionException e) { - LOG.debug("Execution of future failed with exception", e.getCause()); - return raiseInnerCause(e); + for (Future future : collection) { + results.add(awaitFuture(future)); } + return results; } /** @@ -163,7 +149,6 @@ public static List awaitAllFutures(final Collection> collection * the timeout expires, whichever happens first. If any future throws an * exception during its execution, this method extracts and rethrows that exception. *

- * * @param collection collection of futures to be evaluated * @param duration timeout duration * @param type of the result. @@ -176,21 +161,12 @@ public static List awaitAllFutures(final Collection> collection public static List awaitAllFutures(final Collection> collection, final Duration duration) throws InterruptedIOException, IOException, RuntimeException, - TimeoutException { + TimeoutException { List results = new ArrayList<>(); - try { - for (Future future : collection) { - results.add(future.get(duration.toMillis(), TimeUnit.MILLISECONDS)); - } - return results; - } catch (InterruptedException e) { - LOG.debug("Execution of future interrupted ", e); - throw (InterruptedIOException) new InterruptedIOException(e.toString()) - .initCause(e); - } catch (ExecutionException e) { - LOG.debug("Execution of future failed with exception", e.getCause()); - return raiseInnerCause(e); + for (Future future : collection) { + results.add(awaitFuture(future, duration.toMillis(), TimeUnit.MILLISECONDS)); } + return results; } /** @@ -199,7 +175,6 @@ public static List awaitAllFutures(final Collection> collection * This will always raise an exception, either the inner IOException, * an inner RuntimeException, or a new IOException wrapping the raised * exception. - * * @param e exception. * @param type of return value. * @return nothing, ever. @@ -283,12 +258,11 @@ public static IOException unwrapInnerException(final Throwable e) { * @param type of builder * @return the builder passed in. */ - public static > - FSBuilder propagateOptions( - final FSBuilder builder, - final Configuration conf, - final String optionalPrefix, - final String mandatoryPrefix) { + public static > FSBuilder propagateOptions( + final FSBuilder builder, + final Configuration conf, + final String optionalPrefix, + final String mandatoryPrefix) { propagateOptions(builder, conf, optionalPrefix, false); propagateOptions(builder, conf, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/LazyAtomicReference.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/LazyAtomicReference.java new file mode 100644 index 0000000000000..5f2d674bba5ca --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/LazyAtomicReference.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.concurrent.atomic.AtomicReference; +import java.util.function.Supplier; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.util.functional.FunctionalIO.uncheckIOExceptions; + +/** + * A lazily constructed reference, whose reference + * constructor is a {@link CallableRaisingIOE} so + * may raise IOExceptions. + *

+ * This {@code constructor} is only invoked on demand + * when the reference is first needed, + * after which the same value is returned. + * This value MUST NOT be null. + *

+ * Implements {@link CallableRaisingIOE} and {@code java.util.function.Supplier}. + * An instance of this can therefore be used in a functional IO chain. + * As such, it can act as a delayed and caching invocator of a function: + * the supplier passed in is only ever invoked once, and only when requested. + * @param type of reference + */ +public class LazyAtomicReference + implements CallableRaisingIOE, Supplier { + + /** + * Underlying reference. + */ + private final AtomicReference reference = new AtomicReference<>(); + + /** + * Constructor for lazy creation. + */ + private final CallableRaisingIOE constructor; + + /** + * Constructor for this instance. + * @param constructor method to invoke to actually construct the inner object. + */ + public LazyAtomicReference(final CallableRaisingIOE constructor) { + this.constructor = requireNonNull(constructor); + } + + /** + * Getter for the constructor. + * @return the constructor class + */ + protected CallableRaisingIOE getConstructor() { + return constructor; + } + + /** + * Get the reference. + * Subclasses working with this need to be careful working with this. + * @return the reference. + */ + protected AtomicReference getReference() { + return reference; + } + + /** + * Get the value, constructing it if needed. + * @return the value + * @throws IOException on any evaluation failure + * @throws NullPointerException if the evaluated function returned null. + */ + public synchronized T eval() throws IOException { + final T v = reference.get(); + if (v != null) { + return v; + } + reference.set(requireNonNull(constructor.apply())); + return reference.get(); + } + + /** + * Implementation of {@code CallableRaisingIOE.apply()}. + * Invoke {@link #eval()}. + * @return the value + * @throws IOException on any evaluation failure + */ + @Override + public final T apply() throws IOException { + return eval(); + } + + /** + * Implementation of {@code Supplier.get()}. + *

+ * Invoke {@link #eval()} and convert IOEs to + * UncheckedIOException. + *

+ * This is the {@code Supplier.get()} implementation, which allows + * this class to passed into anything taking a supplier. + * @return the value + * @throws UncheckedIOException if the constructor raised an IOException. + */ + @Override + public final T get() throws UncheckedIOException { + return uncheckIOExceptions(this::eval); + } + + /** + * Is the reference set? + * @return true if the reference has been set. + */ + public final boolean isSet() { + return reference.get() != null; + } + + @Override + public String toString() { + return "LazyAtomicReference{" + + "reference=" + reference + '}'; + } + + + /** + * Create from a supplier. + * This is not a constructor to avoid ambiguity when a lambda-expression is + * passed in. + * @param supplier supplier implementation. + * @return a lazy reference. + * @param type of reference + */ + public static LazyAtomicReference lazyAtomicReferenceFromSupplier( + Supplier supplier) { + return new LazyAtomicReference<>(supplier::get); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/LazyAutoCloseableReference.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/LazyAutoCloseableReference.java new file mode 100644 index 0000000000000..d6d625c125589 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/LazyAutoCloseableReference.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Supplier; + +import static org.apache.hadoop.util.Preconditions.checkState; + +/** + * A subclass of {@link LazyAtomicReference} which + * holds an {@code AutoCloseable} reference and calls {@code close()} + * when it itself is closed. + * @param type of reference. + */ +public class LazyAutoCloseableReference + extends LazyAtomicReference implements AutoCloseable { + + /** Closed flag. */ + private final AtomicBoolean closed = new AtomicBoolean(false); + + /** + * Constructor for this instance. + * @param constructor method to invoke to actually construct the inner object. + */ + public LazyAutoCloseableReference(final CallableRaisingIOE constructor) { + super(constructor); + } + + /** + * {@inheritDoc} + * @throws IllegalStateException if the reference is closed. + */ + @Override + public synchronized T eval() throws IOException { + checkState(!closed.get(), "Reference is closed"); + return super.eval(); + } + + /** + * Is the reference closed? + * @return true if the reference is closed. + */ + public boolean isClosed() { + return closed.get(); + } + + /** + * Close the reference value if it is non-null. + * Sets the reference to null afterwards, even on + * a failure. + * @throws Exception failure to close. + */ + @Override + public synchronized void close() throws Exception { + if (closed.getAndSet(true)) { + // already closed + return; + } + final T v = getReference().get(); + // check the state. + // A null reference means it has not yet been evaluated, + if (v != null) { + try { + v.close(); + } finally { + // set the reference to null, even on a failure. + getReference().set(null); + } + } + } + + + /** + * Create from a supplier. + * This is not a constructor to avoid ambiguity when a lambda-expression is + * passed in. + * @param supplier supplier implementation. + * @return a lazy reference. + * @param type of reference + */ + public static LazyAutoCloseableReference lazyAutoCloseablefromSupplier(Supplier supplier) { + return new LazyAutoCloseableReference<>(supplier::get); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestLazyReferences.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestLazyReferences.java new file mode 100644 index 0000000000000..4d1dae184b7d1 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestLazyReferences.java @@ -0,0 +1,263 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.functional; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.net.UnknownHostException; +import java.util.concurrent.atomic.AtomicInteger; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.test.LambdaTestUtils.verifyCause; +import static org.apache.hadoop.util.Preconditions.checkState; + +/** + * Test {@link LazyAtomicReference} and {@link LazyAutoCloseableReference}. + */ +public class TestLazyReferences extends AbstractHadoopTestBase { + + /** + * Format of exceptions to raise. + */ + private static final String GENERATED = "generated[%d]"; + + /** + * Invocation counter, can be asserted on in {@link #assertCounterValue(int)}. + */ + private final AtomicInteger counter = new AtomicInteger(); + + /** + * Assert that {@link #counter} has a specific value. + * @param val expected value + */ + private void assertCounterValue(final int val) { + assertAtomicIntValue(counter, val); + } + + /** + * Assert an atomic integer has a specific value. + * @param ai atomic integer + * @param val expected value + */ + private static void assertAtomicIntValue( + final AtomicInteger ai, final int val) { + Assertions.assertThat(ai.get()) + .describedAs("value of atomic integer %s", ai) + .isEqualTo(val); + } + + + /** + * Test the underlying {@link LazyAtomicReference} integration with java + * Supplier API. + */ + @Test + public void testLazyAtomicReference() throws Throwable { + + LazyAtomicReference ref = new LazyAtomicReference<>(counter::incrementAndGet); + + // constructor does not invoke the supplier + assertCounterValue(0); + + assertSetState(ref, false); + + // second invocation does not + Assertions.assertThat(ref.eval()) + .describedAs("first eval()") + .isEqualTo(1); + assertCounterValue(1); + assertSetState(ref, true); + + + // Callable.apply() returns the same value + Assertions.assertThat(ref.apply()) + .describedAs("second get of %s", ref) + .isEqualTo(1); + // no new counter increment + assertCounterValue(1); + } + + /** + * Assert that {@link LazyAtomicReference#isSet()} is in the expected state. + * @param ref reference + * @param expected expected value + */ + private static void assertSetState(final LazyAtomicReference ref, + final boolean expected) { + Assertions.assertThat(ref.isSet()) + .describedAs("isSet() of %s", ref) + .isEqualTo(expected); + } + + /** + * Test the underlying {@link LazyAtomicReference} integration with java + * Supplier API. + */ + @Test + public void testSupplierIntegration() throws Throwable { + + LazyAtomicReference ref = LazyAtomicReference.lazyAtomicReferenceFromSupplier(counter::incrementAndGet); + + // constructor does not invoke the supplier + assertCounterValue(0); + assertSetState(ref, false); + + // second invocation does not + Assertions.assertThat(ref.get()) + .describedAs("first get()") + .isEqualTo(1); + assertCounterValue(1); + + // Callable.apply() returns the same value + Assertions.assertThat(ref.apply()) + .describedAs("second get of %s", ref) + .isEqualTo(1); + // no new counter increment + assertCounterValue(1); + } + + /** + * Test failure handling. through the supplier API. + */ + @Test + public void testSupplierIntegrationFailureHandling() throws Throwable { + + LazyAtomicReference ref = new LazyAtomicReference<>(() -> { + throw new UnknownHostException(String.format(GENERATED, counter.incrementAndGet())); + }); + + // the get() call will wrap the raised exception, which can be extracted + // and type checked. + verifyCause(UnknownHostException.class, + intercept(UncheckedIOException.class, "[1]", ref::get)); + + assertSetState(ref, false); + + // counter goes up + intercept(UncheckedIOException.class, "[2]", ref::get); + } + + @Test + public void testAutoCloseable() throws Throwable { + final LazyAutoCloseableReference ref = + LazyAutoCloseableReference.lazyAutoCloseablefromSupplier(CloseableClass::new); + + assertSetState(ref, false); + ref.eval(); + final CloseableClass closeable = ref.get(); + Assertions.assertThat(closeable.isClosed()) + .describedAs("closed flag of %s", closeable) + .isFalse(); + + // first close will close the class. + ref.close(); + Assertions.assertThat(ref.isClosed()) + .describedAs("closed flag of %s", ref) + .isTrue(); + Assertions.assertThat(closeable.isClosed()) + .describedAs("closed flag of %s", closeable) + .isTrue(); + + // second close will not raise an exception + ref.close(); + + // you cannot eval() a closed reference + intercept(IllegalStateException.class, "Reference is closed", ref::eval); + intercept(IllegalStateException.class, "Reference is closed", ref::get); + intercept(IllegalStateException.class, "Reference is closed", ref::apply); + + Assertions.assertThat(ref.getReference().get()) + .describedAs("inner referece of %s", ref) + .isNull(); + } + + /** + * Not an error to close a reference which was never evaluated. + */ + @Test + public void testCloseableUnevaluated() throws Throwable { + final LazyAutoCloseableReference ref = + new LazyAutoCloseableReference<>(CloseableRaisingException::new); + ref.close(); + ref.close(); + } + + /** + * If the close() call fails, that only raises an exception on the first attempt, + * and the reference is set to null. + */ + @Test + public void testAutoCloseableFailureHandling() throws Throwable { + final LazyAutoCloseableReference ref = + new LazyAutoCloseableReference<>(CloseableRaisingException::new); + ref.eval(); + + // close reports the failure. + intercept(IOException.class, "raised", ref::close); + + // but the reference is set to null + assertSetState(ref, false); + // second attept does nothing, so will not raise an exception.p + ref.close(); + } + + /** + * Closeable which sets the closed flag on close(). + */ + private static final class CloseableClass implements AutoCloseable { + + /** closed flag. */ + private boolean closed; + + /** + * Close the resource. + * @throws IllegalArgumentException if already closed. + */ + @Override + public void close() { + checkState(!closed, "Already closed"); + closed = true; + } + + /** + * Get the closed flag. + * @return the state. + */ + private boolean isClosed() { + return closed; + } + + } + /** + * Closeable which raises an IOE in close(). + */ + private static final class CloseableRaisingException implements AutoCloseable { + + @Override + public void close() throws Exception { + throw new IOException("raised"); + } + } + +} diff --git a/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml b/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml index 359ac0e80dd1b..39a9e51ac8125 100644 --- a/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml +++ b/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml @@ -64,11 +64,6 @@ - - - - - diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index d04ca70a68ded..f5937ae0a4abd 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -54,7 +54,6 @@ import software.amazon.awssdk.core.ResponseInputStream; import software.amazon.awssdk.core.exception.SdkException; -import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadResponse; @@ -88,7 +87,6 @@ import software.amazon.awssdk.transfer.s3.model.CompletedCopy; import software.amazon.awssdk.transfer.s3.model.CompletedFileUpload; import software.amazon.awssdk.transfer.s3.model.Copy; -import software.amazon.awssdk.transfer.s3.S3TransferManager; import software.amazon.awssdk.transfer.s3.model.CopyRequest; import software.amazon.awssdk.transfer.s3.model.FileUpload; import software.amazon.awssdk.transfer.s3.model.UploadFileRequest; @@ -123,6 +121,8 @@ import org.apache.hadoop.fs.s3a.impl.BulkDeleteOperation; import org.apache.hadoop.fs.s3a.impl.BulkDeleteOperationCallbacksImpl; import org.apache.hadoop.fs.s3a.impl.ChangeDetectionPolicy; +import org.apache.hadoop.fs.s3a.impl.ClientManager; +import org.apache.hadoop.fs.s3a.impl.ClientManagerImpl; import org.apache.hadoop.fs.s3a.impl.ConfigurationHelper; import org.apache.hadoop.fs.s3a.impl.ContextAccessors; import org.apache.hadoop.fs.s3a.impl.CopyFromLocalOperation; @@ -152,6 +152,7 @@ import org.apache.hadoop.fs.s3a.tools.MarkerToolOperationsImpl; import org.apache.hadoop.fs.statistics.DurationTracker; import org.apache.hadoop.fs.statistics.DurationTrackerFactory; +import org.apache.hadoop.fs.statistics.FileSystemStatisticNames; import org.apache.hadoop.fs.statistics.IOStatistics; import org.apache.hadoop.fs.statistics.IOStatisticsSource; import org.apache.hadoop.fs.statistics.IOStatisticsContext; @@ -305,11 +306,13 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, */ private S3AStore store; + /** + * The core S3 client is created and managed by the ClientManager. + * It is copied here within {@link #initialize(URI, Configuration)}. + * Some mocking tests modify this so take care with changes. + */ private S3Client s3Client; - /** Async client is used for transfer manager. */ - private S3AsyncClient s3AsyncClient; - // initial callback policy is fail-once; it's there just to assist // some mock tests and other codepaths trying to call the low level // APIs on an uninitialized filesystem. @@ -328,7 +331,6 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private Listing listing; private long partSize; private boolean enableMultiObjectsDelete; - private S3TransferManager transferManager; private ExecutorService boundedThreadPool; private ThreadPoolExecutor unboundedThreadPool; @@ -548,6 +550,9 @@ public void initialize(URI name, Configuration originalConf) // get the host; this is guaranteed to be non-null, non-empty bucket = name.getHost(); AuditSpan span = null; + // track initialization duration; will only be set after + // statistics are set up. + Optional trackInitialization = Optional.empty(); try { LOG.debug("Initializing S3AFileSystem for {}", bucket); if (LOG.isTraceEnabled()) { @@ -592,6 +597,18 @@ public void initialize(URI name, Configuration originalConf) super.initialize(uri, conf); setConf(conf); + // initialize statistics, after which statistics + // can be collected. + instrumentation = new S3AInstrumentation(uri); + initializeStatisticsBinding(); + + // track initialization duration. + // this should really be done in a onceTrackingDuration() call, + // but then all methods below would need to be in the lambda and + // it would create a merge/backport headache for all. + trackInitialization = Optional.of( + instrumentation.trackDuration(FileSystemStatisticNames.FILESYSTEM_INITIALIZATION)); + s3aInternals = createS3AInternals(); // look for encryption data @@ -600,8 +617,7 @@ public void initialize(URI name, Configuration originalConf) buildEncryptionSecrets(bucket, conf)); invoker = new Invoker(new S3ARetryPolicy(getConf()), onRetry); - instrumentation = new S3AInstrumentation(uri); - initializeStatisticsBinding(); + // If CSE-KMS method is set then CSE is enabled. isCSEEnabled = S3AEncryptionMethods.CSE_KMS.getMethod() .equals(getS3EncryptionAlgorithm().getMethod()); @@ -687,7 +703,7 @@ public void initialize(URI name, Configuration originalConf) // the FS came with a DT // this may do some patching of the configuration (e.g. setting // the encryption algorithms) - bindAWSClient(name, delegationTokensEnabled); + ClientManager clientManager = createClientManager(name, delegationTokensEnabled); inputPolicy = S3AInputPolicy.getPolicy( conf.getTrimmed(INPUT_FADVISE, @@ -762,36 +778,55 @@ public void initialize(URI name, Configuration originalConf) int rateLimitCapacity = intOption(conf, S3A_IO_RATE_LIMIT, DEFAULT_S3A_IO_RATE_LIMIT, 0); // now create the store - store = new S3AStoreBuilder() - .withS3Client(s3Client) - .withDurationTrackerFactory(getDurationTrackerFactory()) - .withStoreContextFactory(this) - .withAuditSpanSource(getAuditManager()) - .withInstrumentation(getInstrumentation()) - .withStatisticsContext(statisticsContext) - .withStorageStatistics(getStorageStatistics()) - .withReadRateLimiter(unlimitedRate()) - .withWriteRateLimiter(RateLimitingFactory.create(rateLimitCapacity)) - .build(); - + store = createS3AStore(clientManager, rateLimitCapacity); + // the s3 client is created through the store, rather than + // directly through the client manager. + // this is to aid mocking. + s3Client = store.getOrCreateS3Client(); // The filesystem is now ready to perform operations against // S3 // This initiates a probe against S3 for the bucket existing. doBucketProbing(); initMultipartUploads(conf); + trackInitialization.ifPresent(DurationTracker::close); } catch (SdkException e) { // amazon client exception: stop all services then throw the translation cleanupWithLogger(LOG, span); stopAllServices(); + trackInitialization.ifPresent(DurationTracker::failed); throw translateException("initializing ", new Path(name), e); } catch (IOException | RuntimeException e) { // other exceptions: stop the services. cleanupWithLogger(LOG, span); stopAllServices(); + trackInitialization.ifPresent(DurationTracker::failed); throw e; } } + /** + * Create the S3AStore instance. + * This is protected so that tests can override it. + * @param clientManager client manager + * @param rateLimitCapacity rate limit + * @return a new store instance + */ + @VisibleForTesting + protected S3AStore createS3AStore(final ClientManager clientManager, + final int rateLimitCapacity) { + return new S3AStoreBuilder() + .withClientManager(clientManager) + .withDurationTrackerFactory(getDurationTrackerFactory()) + .withStoreContextFactory(this) + .withAuditSpanSource(getAuditManager()) + .withInstrumentation(getInstrumentation()) + .withStatisticsContext(statisticsContext) + .withStorageStatistics(getStorageStatistics()) + .withReadRateLimiter(unlimitedRate()) + .withWriteRateLimiter(RateLimitingFactory.create(rateLimitCapacity)) + .build(); + } + /** * Populates the configurations related to vectored IO operation * in the context which has to passed down to input streams. @@ -965,7 +1000,7 @@ protected void verifyBucketExists() throws UnknownStoreException, IOException { STORE_EXISTS_PROBE, bucket, null, () -> invoker.retry("doesBucketExist", bucket, true, () -> { try { - s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()); + getS3Client().headBucket(HeadBucketRequest.builder().bucket(bucket).build()); return true; } catch (AwsServiceException ex) { int statusCode = ex.statusCode(); @@ -1014,14 +1049,22 @@ public Listing getListing() { /** * Set up the client bindings. * If delegation tokens are enabled, the FS first looks for a DT - * ahead of any other bindings;. + * ahead of any other bindings. * If there is a DT it uses that to do the auth - * and switches to the DT authenticator automatically (and exclusively) - * @param name URI of the FS + * and switches to the DT authenticator automatically (and exclusively). + *

+ * Delegation tokens are configured and started, but the actual + * S3 clients are not: instead a {@link ClientManager} is created + * and returned, from which they can be created on demand. + * This is to reduce delays in FS initialization, especially + * for features (transfer manager, async client) which are not + * always used. + * @param fsURI URI of the FS * @param dtEnabled are delegation tokens enabled? + * @return the client manager which can generate the clients. * @throws IOException failure. */ - private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { + private ClientManager createClientManager(URI fsURI, boolean dtEnabled) throws IOException { Configuration conf = getConf(); credentials = null; String uaSuffix = ""; @@ -1059,7 +1102,7 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { uaSuffix = tokens.getUserAgentField(); } else { // DT support is disabled, so create the normal credential chain - credentials = createAWSCredentialProviderList(name, conf); + credentials = createAWSCredentialProviderList(fsURI, conf); } LOG.debug("Using credential provider {}", credentials); Class s3ClientFactoryClass = conf.getClass( @@ -1069,7 +1112,7 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { S3ClientFactory.S3ClientCreationParameters parameters = new S3ClientFactory.S3ClientCreationParameters() .withCredentialSet(credentials) - .withPathUri(name) + .withPathUri(fsURI) .withEndpoint(endpoint) .withMetrics(statisticsContext.newStatisticsFromAwsSdk()) .withPathStyleAccess(conf.getBoolean(PATH_STYLE_ACCESS, false)) @@ -1088,22 +1131,27 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { conf.getBoolean(CHECKSUM_VALIDATION, CHECKSUM_VALIDATION_DEFAULT)); S3ClientFactory clientFactory = ReflectionUtils.newInstance(s3ClientFactoryClass, conf); - s3Client = clientFactory.createS3Client(getUri(), parameters); - createS3AsyncClient(clientFactory, parameters); - transferManager = clientFactory.createS3TransferManager(getS3AsyncClient()); + // this is where clients and the transfer manager are created on demand. + return createClientManager(clientFactory, parameters, getDurationTrackerFactory()); } /** - * Creates and configures the S3AsyncClient. - * Uses synchronized method to suppress spotbugs error. - * - * @param clientFactory factory used to create S3AsyncClient - * @param parameters parameter object - * @throws IOException on any IO problem + * Create the Client Manager; protected to allow for mocking. + * Requires {@link #unboundedThreadPool} to be initialized. + * @param clientFactory (reflection-bonded) client factory. + * @param clientCreationParameters parameters for client creation. + * @param durationTrackerFactory factory for duration tracking. + * @return a client manager instance. */ - private void createS3AsyncClient(S3ClientFactory clientFactory, - S3ClientFactory.S3ClientCreationParameters parameters) throws IOException { - s3AsyncClient = clientFactory.createS3AsyncClient(getUri(), parameters); + @VisibleForTesting + protected ClientManager createClientManager( + final S3ClientFactory clientFactory, + final S3ClientFactory.S3ClientCreationParameters clientCreationParameters, + final DurationTrackerFactory durationTrackerFactory) { + return new ClientManagerImpl(clientFactory, + clientCreationParameters, + durationTrackerFactory + ); } /** @@ -1241,14 +1289,6 @@ public RequestFactory getRequestFactory() { return requestFactory; } - /** - * Get the S3 Async client. - * @return the async s3 client. - */ - private S3AsyncClient getS3AsyncClient() { - return s3AsyncClient; - } - /** * Implementation of all operations used by delegation tokens. */ @@ -1335,7 +1375,8 @@ public void abortOutstandingMultipartUploads(long seconds) invoker.retry("Purging multipart uploads", bucket, true, () -> { RemoteIterator uploadIterator = - MultipartUtils.listMultipartUploads(createStoreContext(), s3Client, null, maxKeys); + MultipartUtils.listMultipartUploads(createStoreContext(), + getS3Client(), null, maxKeys); while (uploadIterator.hasNext()) { MultipartUpload upload = uploadIterator.next(); @@ -1395,12 +1436,23 @@ public int getDefaultPort() { * Set the client -used in mocking tests to force in a different client. * @param client client. */ + @VisibleForTesting protected void setAmazonS3Client(S3Client client) { Preconditions.checkNotNull(client, "clientV2"); LOG.debug("Setting S3V2 client to {}", client); s3Client = client; } + /** + * Get the S3 client created in {@link #initialize(URI, Configuration)}. + * @return the s3Client + * @throws UncheckedIOException if the client could not be created. + */ + @VisibleForTesting + protected S3Client getS3Client() { + return s3Client; + } + /** * S3AInternals method. * {@inheritDoc}. @@ -1437,7 +1489,7 @@ private final class S3AInternalsImpl implements S3AInternals { @Override public S3Client getAmazonS3Client(String reason) { LOG.debug("Access to S3 client requested, reason {}", reason); - return s3Client; + return getS3Client(); } @Override @@ -1470,7 +1522,7 @@ public String getBucketLocation(String bucketName) throws IOException { // If accessPoint then region is known from Arn accessPoint != null ? accessPoint.getRegion() - : s3Client.getBucketLocation(GetBucketLocationRequest.builder() + : getS3Client().getBucketLocation(GetBucketLocationRequest.builder() .bucket(bucketName) .build()) .locationConstraintAsString())); @@ -1859,7 +1911,7 @@ public GetObjectRequest.Builder newGetRequestBuilder(final String key) { public ResponseInputStream getObject(GetObjectRequest request) { // active the audit span used for the operation try (AuditSpan span = auditSpan.activate()) { - return s3Client.getObject(request); + return getS3Client().getObject(request); } } @@ -1888,7 +1940,7 @@ private final class WriteOperationHelperCallbacksImpl @Override public CompleteMultipartUploadResponse completeMultipartUpload( CompleteMultipartUploadRequest request) { - return s3Client.completeMultipartUpload(request); + return getS3Client().completeMultipartUpload(request); } } @@ -2926,7 +2978,8 @@ protected HeadObjectResponse getObjectMetadata(String key, if (changeTracker != null) { changeTracker.maybeApplyConstraint(requestBuilder); } - HeadObjectResponse headObjectResponse = s3Client.headObject(requestBuilder.build()); + HeadObjectResponse headObjectResponse = getS3Client() + .headObject(requestBuilder.build()); if (changeTracker != null) { changeTracker.processMetadata(headObjectResponse, operation); } @@ -2960,7 +3013,7 @@ protected HeadBucketResponse getBucketMetadata() throws IOException { final HeadBucketResponse response = trackDurationAndSpan(STORE_EXISTS_PROBE, bucket, null, () -> invoker.retry("getBucketMetadata()", bucket, true, () -> { try { - return s3Client.headBucket( + return getS3Client().headBucket( getRequestFactory().newHeadBucketRequestBuilder(bucket).build()); } catch (NoSuchBucketException e) { throw new UnknownStoreException("s3a://" + bucket + "/", " Bucket does " + "not exist"); @@ -2995,9 +3048,9 @@ protected S3ListResult listObjects(S3ListRequest request, OBJECT_LIST_REQUEST, () -> { if (useListV1) { - return S3ListResult.v1(s3Client.listObjects(request.getV1())); + return S3ListResult.v1(getS3Client().listObjects(request.getV1())); } else { - return S3ListResult.v2(s3Client.listObjectsV2(request.getV2())); + return S3ListResult.v2(getS3Client().listObjectsV2(request.getV2())); } })); } @@ -3050,10 +3103,10 @@ protected S3ListResult continueListObjects(S3ListRequest request, nextMarker = prevListResult.get(prevListResult.size() - 1).key(); } - return S3ListResult.v1(s3Client.listObjects( + return S3ListResult.v1(getS3Client().listObjects( request.getV1().toBuilder().marker(nextMarker).build())); } else { - return S3ListResult.v2(s3Client.listObjectsV2(request.getV2().toBuilder() + return S3ListResult.v2(getS3Client().listObjectsV2(request.getV2().toBuilder() .continuationToken(prevResult.getV2().nextContinuationToken()).build())); } })); @@ -3185,15 +3238,16 @@ public PutObjectRequest.Builder newPutObjectRequestBuilder(String key, * @param file the file to be uploaded * @param listener the progress listener for the request * @return the upload initiated + * @throws IOException if transfer manager creation failed. */ @Retries.OnceRaw public UploadInfo putObject(PutObjectRequest putObjectRequest, File file, - ProgressableProgressListener listener) { + ProgressableProgressListener listener) throws IOException { long len = getPutRequestLength(putObjectRequest); LOG.debug("PUT {} bytes to {} via transfer manager ", len, putObjectRequest.key()); incrementPutStartStatistics(len); - FileUpload upload = transferManager.uploadFile( + FileUpload upload = store.getOrCreateTransferManager().uploadFile( UploadFileRequest.builder() .putObjectRequest(putObjectRequest) .source(file) @@ -3233,9 +3287,10 @@ PutObjectResponse putObjectDirect(PutObjectRequest putObjectRequest, PutObjectResponse response = trackDurationOfSupplier(nonNullDurationTrackerFactory(durationTrackerFactory), OBJECT_PUT_REQUESTS.getSymbol(), - () -> isFile ? - s3Client.putObject(putObjectRequest, RequestBody.fromFile(uploadData.getFile())) : - s3Client.putObject(putObjectRequest, + () -> isFile + ? getS3Client().putObject(putObjectRequest, + RequestBody.fromFile(uploadData.getFile())) + : getS3Client().putObject(putObjectRequest, RequestBody.fromInputStream(uploadData.getUploadStream(), putObjectRequest.contentLength()))); incrementPutCompletedStatistics(true, len); @@ -3285,7 +3340,7 @@ UploadPartResponse uploadPart(UploadPartRequest request, RequestBody body, UploadPartResponse uploadPartResponse = trackDurationOfSupplier( nonNullDurationTrackerFactory(durationTrackerFactory), MULTIPART_UPLOAD_PART_PUT.getSymbol(), () -> - s3Client.uploadPart(request, body)); + getS3Client().uploadPart(request, body)); incrementPutCompletedStatistics(true, len); return uploadPartResponse; } catch (AwsServiceException e) { @@ -4344,35 +4399,43 @@ public void close() throws IOException { * both the expected state of this FS and of failures while being stopped. */ protected synchronized void stopAllServices() { - closeAutocloseables(LOG, transferManager, - s3Client, - getS3AsyncClient()); - transferManager = null; - s3Client = null; - s3AsyncClient = null; - - // At this point the S3A client is shut down, - // now the executor pools are closed - HadoopExecutors.shutdown(boundedThreadPool, LOG, - THREAD_POOL_SHUTDOWN_DELAY_SECONDS, TimeUnit.SECONDS); - boundedThreadPool = null; - HadoopExecutors.shutdown(unboundedThreadPool, LOG, - THREAD_POOL_SHUTDOWN_DELAY_SECONDS, TimeUnit.SECONDS); - unboundedThreadPool = null; - if (futurePool != null) { - futurePool.shutdown(LOG, THREAD_POOL_SHUTDOWN_DELAY_SECONDS, TimeUnit.SECONDS); - futurePool = null; + try { + trackDuration(getDurationTrackerFactory(), FILESYSTEM_CLOSE.getSymbol(), () -> { + closeAutocloseables(LOG, store); + store = null; + s3Client = null; + + // At this point the S3A client is shut down, + // now the executor pools are closed + HadoopExecutors.shutdown(boundedThreadPool, LOG, + THREAD_POOL_SHUTDOWN_DELAY_SECONDS, TimeUnit.SECONDS); + boundedThreadPool = null; + HadoopExecutors.shutdown(unboundedThreadPool, LOG, + THREAD_POOL_SHUTDOWN_DELAY_SECONDS, TimeUnit.SECONDS); + unboundedThreadPool = null; + if (futurePool != null) { + futurePool.shutdown(LOG, THREAD_POOL_SHUTDOWN_DELAY_SECONDS, TimeUnit.SECONDS); + futurePool = null; + } + // other services are shutdown. + cleanupWithLogger(LOG, + delegationTokens.orElse(null), + signerManager, + auditManager); + closeAutocloseables(LOG, credentials); + delegationTokens = Optional.empty(); + signerManager = null; + credentials = null; + return null; + }); + } catch (IOException e) { + // failure during shutdown. + // this should only be from the signature of trackDurationAndSpan(). + LOG.warn("Failure during service shutdown", e); } + // and once this duration has been tracked, close the statistics // other services are shutdown. - cleanupWithLogger(LOG, - instrumentation, - delegationTokens.orElse(null), - signerManager, - auditManager); - closeAutocloseables(LOG, credentials); - delegationTokens = Optional.empty(); - signerManager = null; - credentials = null; + cleanupWithLogger(LOG, instrumentation); } /** @@ -4559,7 +4622,7 @@ private CopyObjectResponse copyFile(String srcKey, String dstKey, long size, () -> { incrementStatistic(OBJECT_COPY_REQUESTS); - Copy copy = transferManager.copy( + Copy copy = store.getOrCreateTransferManager().copy( CopyRequest.builder() .copyObjectRequest(copyRequest) .build()); @@ -4589,7 +4652,7 @@ private CopyObjectResponse copyFile(String srcKey, String dstKey, long size, LOG.debug("copyFile: single part copy {} -> {} of size {}", srcKey, dstKey, size); incrementStatistic(OBJECT_COPY_REQUESTS); try { - return s3Client.copyObject(copyRequest); + return getS3Client().copyObject(copyRequest); } catch (SdkException awsException) { // if this is a 412 precondition failure, it may // be converted to a RemoteFileChangedException @@ -4620,7 +4683,7 @@ CreateMultipartUploadResponse initiateMultipartUpload( LOG.debug("Initiate multipart upload to {}", request.key()); return trackDurationOfSupplier(getDurationTrackerFactory(), OBJECT_MULTIPART_UPLOAD_INITIATED.getSymbol(), - () -> s3Client.createMultipartUpload(request)); + () -> getS3Client().createMultipartUpload(request)); } /** @@ -5343,7 +5406,7 @@ public RemoteIterator listUploadsUnderPrefix( p = prefix + "/"; } // duration tracking is done in iterator. - return MultipartUtils.listMultipartUploads(storeContext, s3Client, p, maxKeys); + return MultipartUtils.listMultipartUploads(storeContext, getS3Client(), p, maxKeys); } /** @@ -5368,7 +5431,7 @@ public List listMultipartUploads(String prefix) final ListMultipartUploadsRequest request = getRequestFactory() .newListMultipartUploadsRequestBuilder(p).build(); return trackDuration(getInstrumentation(), MULTIPART_UPLOAD_LIST.getSymbol(), () -> - s3Client.listMultipartUploads(request).uploads()); + getS3Client().listMultipartUploads(request).uploads()); }); } @@ -5383,7 +5446,7 @@ public List listMultipartUploads(String prefix) public void abortMultipartUpload(String destKey, String uploadId) throws IOException { LOG.debug("Aborting multipart upload {} to {}", uploadId, destKey); trackDuration(getInstrumentation(), OBJECT_MULTIPART_UPLOAD_ABORTED.getSymbol(), () -> - s3Client.abortMultipartUpload( + getS3Client().abortMultipartUpload( getRequestFactory().newAbortMultipartUploadRequestBuilder( destKey, uploadId).build())); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AStore.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AStore.java index 68eacc35b1887..a11ed19670587 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AStore.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AStore.java @@ -32,6 +32,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.s3a.api.RequestFactory; +import org.apache.hadoop.fs.s3a.impl.ClientManager; import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; import org.apache.hadoop.fs.s3a.impl.StoreContext; import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext; @@ -42,10 +43,14 @@ * Interface for the S3A Store; * S3 client interactions should be via this; mocking * is possible for unit tests. + *

+ * The {@link ClientManager} interface is used to create the AWS clients; + * the base implementation forwards to the implementation of this interface + * passed in at construction time. */ @InterfaceAudience.LimitedPrivate("Extensions") @InterfaceStability.Unstable -public interface S3AStore extends IOStatisticsSource { +public interface S3AStore extends IOStatisticsSource, ClientManager { /** * Acquire write capacity for operations. @@ -71,6 +76,8 @@ public interface S3AStore extends IOStatisticsSource { RequestFactory getRequestFactory(); + ClientManager clientManager(); + /** * Perform a bulk object delete operation against S3. * Increments the {@code OBJECT_DELETE_REQUESTS} and write diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index 0b01876ae504f..e82eb4c9182e1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -47,8 +47,6 @@ * implementing only the deprecated method will work. * See https://github.com/apache/hbase-filesystem * - * @deprecated This interface will be replaced by one which uses the AWS SDK V2 S3 client as part of - * upgrading S3A to SDK V2. See HADOOP-18073. */ @InterfaceAudience.LimitedPrivate("HBoss") @InterfaceStability.Evolving diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java index 7c4883c3d9967..3bee1008ce42b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java @@ -24,6 +24,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.audit.AuditStatisticNames; import org.apache.hadoop.fs.s3a.statistics.StatisticTypeEnum; +import org.apache.hadoop.fs.statistics.FileSystemStatisticNames; import org.apache.hadoop.fs.statistics.StoreStatisticNames; import org.apache.hadoop.fs.statistics.StreamStatisticNames; @@ -65,6 +66,16 @@ public enum Statistic { TYPE_DURATION), /* FileSystem Level statistics */ + + FILESYSTEM_INITIALIZATION( + FileSystemStatisticNames.FILESYSTEM_INITIALIZATION, + "Filesystem initialization", + TYPE_DURATION), + FILESYSTEM_CLOSE( + FileSystemStatisticNames.FILESYSTEM_CLOSE, + "Filesystem close", + TYPE_DURATION), + DIRECTORIES_CREATED("directories_created", "Total number of directories created through the object store.", TYPE_COUNTER), @@ -532,6 +543,11 @@ public enum Statistic { TYPE_DURATION), /* General Store operations */ + STORE_CLIENT_CREATION( + StoreStatisticNames.STORE_CLIENT_CREATION, + "Store Client Creation", + TYPE_DURATION), + STORE_EXISTS_PROBE(StoreStatisticNames.STORE_EXISTS_PROBE, "Store Existence Probe", TYPE_DURATION), diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ClientManager.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ClientManager.java new file mode 100644 index 0000000000000..84770861cc489 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ClientManager.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.Closeable; +import java.io.IOException; + +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.transfer.s3.S3TransferManager; + +/** + * Interface for on-demand/async creation of AWS clients + * and extension services. + */ +public interface ClientManager extends Closeable { + + /** + * Get the transfer manager, creating it and any dependencies if needed. + * @return a transfer manager + * @throws IOException on any failure to create the manager + */ + S3TransferManager getOrCreateTransferManager() + throws IOException; + + S3Client getOrCreateS3Client() throws IOException; + + S3AsyncClient getOrCreateAsyncClient() throws IOException; + + /** + * Close operation is required to not raise exceptions. + */ + void close(); +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ClientManagerImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ClientManagerImpl.java new file mode 100644 index 0000000000000..ff6748e66d1d2 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ClientManagerImpl.java @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.transfer.s3.S3TransferManager; + +import org.apache.hadoop.fs.s3a.S3ClientFactory; +import org.apache.hadoop.fs.statistics.DurationTrackerFactory; +import org.apache.hadoop.util.functional.CallableRaisingIOE; +import org.apache.hadoop.util.functional.LazyAutoCloseableReference; + +import static java.util.Objects.requireNonNull; +import static java.util.concurrent.CompletableFuture.completedFuture; +import static java.util.concurrent.CompletableFuture.supplyAsync; +import static org.apache.hadoop.fs.s3a.Statistic.STORE_CLIENT_CREATION; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfOperation; +import static org.apache.hadoop.util.Preconditions.checkState; +import static org.apache.hadoop.util.functional.FutureIO.awaitAllFutures; + +/** + * Client manager for on-demand creation of S3 clients, + * with parallelized close of them in {@link #close()}. + * Updates {@link org.apache.hadoop.fs.s3a.Statistic#STORE_CLIENT_CREATION} + * to track count and duration of client creation. + */ +public class ClientManagerImpl implements ClientManager { + + public static final Logger LOG = LoggerFactory.getLogger(ClientManagerImpl.class); + + /** + * Client factory to invoke. + */ + private final S3ClientFactory clientFactory; + + /** + * Closed flag. + */ + private final AtomicBoolean closed = new AtomicBoolean(false); + + /** + * Parameters to create sync/async clients. + */ + private final S3ClientFactory.S3ClientCreationParameters clientCreationParameters; + + /** + * Duration tracker factory for creation. + */ + private final DurationTrackerFactory durationTrackerFactory; + + /** + * Core S3 client. + */ + private final LazyAutoCloseableReference s3Client; + + /** Async client is used for transfer manager. */ + private final LazyAutoCloseableReference s3AsyncClient; + + /** Transfer manager. */ + private final LazyAutoCloseableReference transferManager; + + /** + * Constructor. + * This does not create any clients. + * @param clientFactory client factory to invoke + * @param clientCreationParameters creation parameters. + * @param durationTrackerFactory duration tracker. + */ + public ClientManagerImpl( + final S3ClientFactory clientFactory, + final S3ClientFactory.S3ClientCreationParameters clientCreationParameters, + final DurationTrackerFactory durationTrackerFactory) { + this.clientFactory = requireNonNull(clientFactory); + this.clientCreationParameters = requireNonNull(clientCreationParameters); + this.durationTrackerFactory = requireNonNull(durationTrackerFactory); + this.s3Client = new LazyAutoCloseableReference<>(createS3Client()); + this.s3AsyncClient = new LazyAutoCloseableReference<>(createAyncClient()); + this.transferManager = new LazyAutoCloseableReference<>(createTransferManager()); + } + + /** + * Create the function to create the S3 client. + * @return a callable which will create the client. + */ + private CallableRaisingIOE createS3Client() { + return trackDurationOfOperation( + durationTrackerFactory, + STORE_CLIENT_CREATION.getSymbol(), + () -> clientFactory.createS3Client(getUri(), clientCreationParameters)); + } + + /** + * Create the function to create the S3 Async client. + * @return a callable which will create the client. + */ + private CallableRaisingIOE createAyncClient() { + return trackDurationOfOperation( + durationTrackerFactory, + STORE_CLIENT_CREATION.getSymbol(), + () -> clientFactory.createS3AsyncClient(getUri(), clientCreationParameters)); + } + + /** + * Create the function to create the Transfer Manager. + * @return a callable which will create the component. + */ + private CallableRaisingIOE createTransferManager() { + return () -> { + final S3AsyncClient asyncClient = s3AsyncClient.eval(); + return trackDuration(durationTrackerFactory, + STORE_CLIENT_CREATION.getSymbol(), () -> + clientFactory.createS3TransferManager(asyncClient)); + }; + } + + @Override + public synchronized S3Client getOrCreateS3Client() throws IOException { + checkNotClosed(); + return s3Client.eval(); + } + + @Override + public synchronized S3AsyncClient getOrCreateAsyncClient() throws IOException { + checkNotClosed(); + return s3AsyncClient.eval(); + } + + @Override + public synchronized S3TransferManager getOrCreateTransferManager() throws IOException { + checkNotClosed(); + return transferManager.eval(); + } + + /** + * Check that the client manager is not closed. + * @throws IllegalStateException if it is closed. + */ + private void checkNotClosed() { + checkState(!closed.get(), "Client manager is closed"); + } + + /** + * Close() is synchronized to avoid race conditions between + * slow client creation and this close operation. + *

+ * The objects are all deleted in parallel + */ + @Override + public synchronized void close() { + if (closed.getAndSet(true)) { + // re-entrant close. + return; + } + // queue the closures. + List> l = new ArrayList<>(); + l.add(closeAsync(transferManager)); + l.add(closeAsync(s3AsyncClient)); + l.add(closeAsync(s3Client)); + + // once all are queued, await their completion + // and swallow any exception. + try { + awaitAllFutures(l); + } catch (Exception e) { + // should never happen. + LOG.warn("Exception in close", e); + } + } + + /** + * Get the URI of the filesystem. + * @return URI to use when creating clients. + */ + public URI getUri() { + return clientCreationParameters.getPathUri(); + } + + /** + * Queue closing a closeable, logging any exception, and returning null + * to use in when awaiting a result. + * @param reference closeable. + * @param type of closeable + * @return null + */ + private CompletableFuture closeAsync( + LazyAutoCloseableReference reference) { + if (!reference.isSet()) { + // no-op + return completedFuture(null); + } + return supplyAsync(() -> { + try { + reference.close(); + } catch (Exception e) { + LOG.warn("Failed to close {}", reference, e); + } + return null; + }); + } + + @Override + public String toString() { + return "ClientManagerImpl{" + + "closed=" + closed.get() + + ", s3Client=" + s3Client + + ", s3AsyncClient=" + s3AsyncClient + + ", transferManager=" + transferManager + + '}'; + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreBuilder.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreBuilder.java index c1a6fcffab487..ca629f16be992 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreBuilder.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreBuilder.java @@ -18,8 +18,6 @@ package org.apache.hadoop.fs.s3a.impl; -import software.amazon.awssdk.services.s3.S3Client; - import org.apache.hadoop.fs.s3a.S3AInstrumentation; import org.apache.hadoop.fs.s3a.S3AStorageStatistics; import org.apache.hadoop.fs.s3a.S3AStore; @@ -36,7 +34,7 @@ public class S3AStoreBuilder { private StoreContextFactory storeContextFactory; - private S3Client s3Client; + private ClientManager clientManager; private DurationTrackerFactory durationTrackerFactory; @@ -58,9 +56,9 @@ public S3AStoreBuilder withStoreContextFactory( return this; } - public S3AStoreBuilder withS3Client( - final S3Client s3ClientValue) { - this.s3Client = s3ClientValue; + public S3AStoreBuilder withClientManager( + final ClientManager manager) { + this.clientManager = manager; return this; } @@ -107,7 +105,14 @@ public S3AStoreBuilder withAuditSpanSource( } public S3AStore build() { - return new S3AStoreImpl(storeContextFactory, s3Client, durationTrackerFactory, instrumentation, - statisticsContext, storageStatistics, readRateLimiter, writeRateLimiter, auditSpanSource); + return new S3AStoreImpl(storeContextFactory, + clientManager, + durationTrackerFactory, + instrumentation, + statisticsContext, + storageStatistics, + readRateLimiter, + writeRateLimiter, + auditSpanSource); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreImpl.java index 6bfe42767d8b1..d0871e7af2388 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AStoreImpl.java @@ -38,6 +38,7 @@ import software.amazon.awssdk.services.s3.model.DeleteObjectsResponse; import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import software.amazon.awssdk.services.s3.model.S3Error; +import software.amazon.awssdk.transfer.s3.S3TransferManager; import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.Retries; @@ -57,10 +58,16 @@ import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.s3a.S3AUtils.isThrottleException; -import static org.apache.hadoop.fs.s3a.Statistic.*; +import static org.apache.hadoop.fs.s3a.Statistic.IGNORED_ERRORS; +import static org.apache.hadoop.fs.s3a.Statistic.OBJECT_BULK_DELETE_REQUEST; +import static org.apache.hadoop.fs.s3a.Statistic.OBJECT_DELETE_OBJECTS; +import static org.apache.hadoop.fs.s3a.Statistic.OBJECT_DELETE_REQUEST; +import static org.apache.hadoop.fs.s3a.Statistic.STORE_IO_RATE_LIMITED; +import static org.apache.hadoop.fs.s3a.Statistic.STORE_IO_RETRY; +import static org.apache.hadoop.fs.s3a.Statistic.STORE_IO_THROTTLED; +import static org.apache.hadoop.fs.s3a.Statistic.STORE_IO_THROTTLE_RATE; import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isObjectNotFound; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DELETE_CONSIDERED_IDEMPOTENT; -import static org.apache.hadoop.fs.statistics.StoreStatisticNames.STORE_IO_RATE_LIMITED_DURATION; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfOperation; import static org.apache.hadoop.util.Preconditions.checkArgument; @@ -76,8 +83,8 @@ public class S3AStoreImpl implements S3AStore { /** Factory to create store contexts. */ private final StoreContextFactory storeContextFactory; - /** The S3 client used to communicate with S3 bucket. */ - private final S3Client s3Client; + /** Source of the S3 clients. */ + private final ClientManager clientManager; /** The S3 bucket to communicate with. */ private final String bucket; @@ -85,9 +92,6 @@ public class S3AStoreImpl implements S3AStore { /** Request factory for creating requests. */ private final RequestFactory requestFactory; - /** Async client is used for transfer manager. */ - private S3AsyncClient s3AsyncClient; - /** Duration tracker factory. */ private final DurationTrackerFactory durationTrackerFactory; @@ -117,7 +121,7 @@ public class S3AStoreImpl implements S3AStore { /** Constructor to create S3A store. */ S3AStoreImpl(StoreContextFactory storeContextFactory, - S3Client s3Client, + ClientManager clientManager, DurationTrackerFactory durationTrackerFactory, S3AInstrumentation instrumentation, S3AStatisticsContext statisticsContext, @@ -126,7 +130,7 @@ public class S3AStoreImpl implements S3AStore { RateLimiting writeRateLimiter, AuditSpanSource auditSpanSource) { this.storeContextFactory = requireNonNull(storeContextFactory); - this.s3Client = requireNonNull(s3Client); + this.clientManager = requireNonNull(clientManager); this.durationTrackerFactory = requireNonNull(durationTrackerFactory); this.instrumentation = requireNonNull(instrumentation); this.statisticsContext = requireNonNull(statisticsContext); @@ -140,6 +144,11 @@ public class S3AStoreImpl implements S3AStore { this.requestFactory = storeContext.getRequestFactory(); } + @Override + public void close() { + clientManager.close(); + } + /** Acquire write capacity for rate limiting {@inheritDoc}. */ @Override public Duration acquireWriteCapacity(final int capacity) { @@ -166,8 +175,28 @@ public StoreContext getStoreContext() { return storeContext; } - private S3Client getS3Client() { - return s3Client; + /** + * Get the S3 client. + * @return the S3 client. + * @throws IOException on any failure to create the client. + */ + private S3Client getS3Client() throws IOException { + return clientManager.getOrCreateS3Client(); + } + + @Override + public S3TransferManager getOrCreateTransferManager() throws IOException { + return clientManager.getOrCreateTransferManager(); + } + + @Override + public S3Client getOrCreateS3Client() throws IOException { + return clientManager.getOrCreateS3Client(); + } + + @Override + public S3AsyncClient getOrCreateAsyncClient() throws IOException { + return clientManager.getOrCreateAsyncClient(); } @Override @@ -193,6 +222,15 @@ public RequestFactory getRequestFactory() { return requestFactory; } + /** + * Get the client manager. + * @return the client manager. + */ + @Override + public ClientManager clientManager() { + return clientManager; + } + /** * Increment a statistic by 1. * This increments both the instrumentation and storage statistics. @@ -298,7 +336,7 @@ private void blockRootDelete(String key) throws IllegalArgumentException { @Override @Retries.RetryRaw public Map.Entry deleteObjects( - final DeleteObjectsRequest deleteRequest) + final DeleteObjectsRequest deleteRequest) throws SdkException { DeleteObjectsResponse response; @@ -318,22 +356,23 @@ public Map.Entry deleteObjects( try (DurationInfo d = new DurationInfo(LOG, false, "DELETE %d keys", keyCount)) { response = - invoker.retryUntranslated("delete", - DELETE_CONSIDERED_IDEMPOTENT, (text, e, r, i) -> { - // handle the failure - retryHandler.bulkDeleteRetried(deleteRequest, e); - }, - // duration is tracked in the bulk delete counters - trackDurationOfOperation(getDurationTrackerFactory(), - OBJECT_BULK_DELETE_REQUEST.getSymbol(), () -> { - // acquire the write capacity for the number of keys to delete and record the duration. - Duration durationToAcquireWriteCapacity = acquireWriteCapacity(keyCount); - instrumentation.recordDuration(STORE_IO_RATE_LIMITED, - true, - durationToAcquireWriteCapacity); - incrementStatistic(OBJECT_DELETE_OBJECTS, keyCount); - return s3Client.deleteObjects(deleteRequest); - })); + invoker.retryUntranslated("delete", + DELETE_CONSIDERED_IDEMPOTENT, (text, e, r, i) -> { + // handle the failure + retryHandler.bulkDeleteRetried(deleteRequest, e); + }, + // duration is tracked in the bulk delete counters + trackDurationOfOperation(getDurationTrackerFactory(), + OBJECT_BULK_DELETE_REQUEST.getSymbol(), () -> { + // acquire the write capacity for the number of keys to delete + // and record the duration. + Duration durationToAcquireWriteCapacity = acquireWriteCapacity(keyCount); + instrumentation.recordDuration(STORE_IO_RATE_LIMITED, + true, + durationToAcquireWriteCapacity); + incrementStatistic(OBJECT_DELETE_OBJECTS, keyCount); + return getS3Client().deleteObjects(deleteRequest); + })); if (!response.errors().isEmpty()) { // one or more of the keys could not be deleted. // log and then throw @@ -361,25 +400,25 @@ public Map.Entry deleteObjects( @Override @Retries.RetryRaw public Map.Entry> deleteObject( - final DeleteObjectRequest request) - throws SdkException { + final DeleteObjectRequest request) + throws SdkException { String key = request.key(); blockRootDelete(key); DurationInfo d = new DurationInfo(LOG, false, "deleting %s", key); try { DeleteObjectResponse response = - invoker.retryUntranslated(String.format("Delete %s:/%s", bucket, key), - DELETE_CONSIDERED_IDEMPOTENT, - trackDurationOfOperation(getDurationTrackerFactory(), - OBJECT_DELETE_REQUEST.getSymbol(), () -> { - incrementStatistic(OBJECT_DELETE_OBJECTS); - // We try to acquire write capacity just before delete call. - Duration durationToAcquireWriteCapacity = acquireWriteCapacity(1); - instrumentation.recordDuration(STORE_IO_RATE_LIMITED, - true, durationToAcquireWriteCapacity); - return s3Client.deleteObject(request); - })); + invoker.retryUntranslated(String.format("Delete %s:/%s", bucket, key), + DELETE_CONSIDERED_IDEMPOTENT, + trackDurationOfOperation(getDurationTrackerFactory(), + OBJECT_DELETE_REQUEST.getSymbol(), () -> { + incrementStatistic(OBJECT_DELETE_OBJECTS); + // We try to acquire write capacity just before delete call. + Duration durationToAcquireWriteCapacity = acquireWriteCapacity(1); + instrumentation.recordDuration(STORE_IO_RATE_LIMITED, + true, durationToAcquireWriteCapacity); + return getS3Client().deleteObject(request); + })); d.close(); return Tuples.pair(d.asDuration(), Optional.of(response)); } catch (AwsServiceException ase) { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java index bf3fd27701ab8..fb797038601ab 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java @@ -40,6 +40,7 @@ import org.apache.hadoop.fs.s3a.audit.AuditTestSupport; import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; import org.apache.hadoop.fs.s3a.commit.staging.StagingTestBase; +import org.apache.hadoop.fs.s3a.impl.ClientManager; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; import org.apache.hadoop.fs.s3a.impl.RequestFactoryImpl; import org.apache.hadoop.fs.s3a.impl.StoreContext; @@ -119,6 +120,12 @@ public MockS3AFileSystem(S3AFileSystem mock, private static void prepareRequest(SdkRequest.Builder t) {} + @Override + protected S3AStore createS3AStore(final ClientManager clientManager, + final int rateLimitCapacity) { + return super.createS3AStore(clientManager, rateLimitCapacity); + } + @Override public RequestFactory getRequestFactory() { return REQUEST_FACTORY; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java index 9fba584fbdccb..3e13f0988e0cc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java @@ -146,11 +146,18 @@ protected static S3AFileSystem createAndBindMockFSInstance(Configuration conf, return mockFs; } - private static S3AFileSystem mockS3AFileSystemRobustly(S3Client mockS3Client) { + private static S3AFileSystem mockS3AFileSystemRobustly(S3Client mockS3Client) throws IOException { + S3AFileSystem mockFS = mock(S3AFileSystem.class); + S3AStore store = mock(S3AStore.class); + when(store.getOrCreateS3Client()) + .thenReturn(mockS3Client); + S3AInternals s3AInternals = mock(S3AInternals.class); + when(mockFS.getS3AInternals()).thenReturn(s3AInternals); - when(s3AInternals.getStore()).thenReturn(mock(S3AStore.class)); + + when(s3AInternals.getStore()).thenReturn(store); when(s3AInternals.getAmazonS3Client(anyString())) .thenReturn(mockS3Client); doNothing().when(mockFS).incrementReadOperations(); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestClientManager.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestClientManager.java new file mode 100644 index 0000000000000..857df58f42bb1 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestClientManager.java @@ -0,0 +1,379 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.UncheckedIOException; +import java.net.URI; +import java.net.UnknownHostException; +import java.time.Duration; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.Semaphore; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +import org.assertj.core.api.Assertions; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.transfer.s3.S3TransferManager; + +import org.apache.hadoop.fs.s3a.S3ClientFactory; +import org.apache.hadoop.fs.s3a.test.StubS3ClientFactory; +import org.apache.hadoop.fs.statistics.impl.StubDurationTrackerFactory; +import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.apache.hadoop.util.functional.InvocationRaisingIOE; +import org.apache.hadoop.util.functional.LazyAtomicReference; + +import static java.util.concurrent.CompletableFuture.supplyAsync; +import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.util.functional.FunctionalIO.toUncheckedIOExceptionSupplier; +import static org.mockito.Mockito.mock; + +/** + * Test the client manager. + *

+ * The tests with "Parallel" in the title generate delays in the second thread + * so stress the concurrency logic. + */ +public class TestClientManager extends AbstractHadoopTestBase { + + private static final Logger LOG = LoggerFactory.getLogger(TestClientManager.class); + + /** + * Factory delay for the multithreaded operations. + */ + private static final Duration FACTORY_DELAY = Duration.ofSeconds(5); + + /** + * How long for the second thread to sleep before it tries to get() + * the client. + */ + private static final Duration SECOND_THREAD_DELAY = Duration.ofSeconds(2); + + /** + * Format of exceptions raise. + */ + private static final String GENERATED = "generated[%d]"; + + private S3Client s3Client; + + private S3AsyncClient asyncClient; + + private S3TransferManager transferManager; + + private URI uri; + + @Before + public void setUp() throws Exception { + asyncClient = mock(S3AsyncClient.class); + transferManager = mock(S3TransferManager.class); + s3Client = mock(S3Client.class); + uri = new URI("https://bucket/"); + } + + /** + * Create a stub client factory where there is a specific delay. + * @param delay delay when creating a client. + * @return the factory + */ + private StubS3ClientFactory factory(final Duration delay) { + return factory(() -> sleep(delay)); + } + + /** + * Create a stub client factory. where the the invocation is called before + * the operation is executed. + * @param invocationRaisingIOE invocation to call before returning a client. + * @return the factory + */ + private StubS3ClientFactory factory(final InvocationRaisingIOE invocationRaisingIOE) { + return new StubS3ClientFactory(s3Client, asyncClient, transferManager, + invocationRaisingIOE); + } + + /** + * Create a manager instance using the given factory. + * @param factory factory for clients. + * @return a client manager + */ + private ClientManager manager(final StubS3ClientFactory factory) { + return new ClientManagerImpl( + factory, + new S3ClientFactory.S3ClientCreationParameters() + .withPathUri(uri), + StubDurationTrackerFactory.STUB_DURATION_TRACKER_FACTORY); + } + + /** + * Create a single s3 client. + */ + @Test + public void testCreateS3Client() throws Throwable { + + final StubS3ClientFactory factory = factory(Duration.ZERO); + final ClientManager manager = manager(factory); + + Assertions.assertThat(manager.getOrCreateS3Client()) + .describedAs("manager %s", manager) + .isSameAs(s3Client); + Assertions.assertThat(factory.clientCreationCount()) + .describedAs("client creation count") + .isEqualTo(1); + + // second attempt returns same instance + Assertions.assertThat(manager.getOrCreateS3Client()) + .describedAs("manager %s", manager) + .isSameAs(s3Client); + + // and the factory counter is not incremented. + Assertions.assertThat(factory.clientCreationCount()) + .describedAs("client creation count") + .isEqualTo(1); + + // now close + manager.close(); + + // and expect a failure + intercept(IllegalStateException.class, manager::getOrCreateS3Client); + } + + /** + * Sleep for a given period; interrupts are swallowed. + * @param delay delay + */ + private static void sleep(final Duration delay) { + try { + Thread.sleep(delay.toMillis()); + } catch (InterruptedException e) { + + } + } + + /** + * Get an async s3 client twice and verify it is only created once. + */ + @Test + public void testCreateAsyncS3Client() throws Throwable { + + final StubS3ClientFactory factory = factory(Duration.ofMillis(100)); + final ClientManager manager = manager(factory); + + Assertions.assertThat(manager.getOrCreateAsyncClient()) + .describedAs("manager %s", manager) + .isSameAs(asyncClient); + + manager.getOrCreateAsyncClient(); + // and the factory counter is not incremented. + Assertions.assertThat(factory.asyncClientCreationCount()) + .describedAs("client creation count") + .isEqualTo(1); + + // now close + manager.close(); + + // and expect a failure + intercept(IllegalStateException.class, () -> + manager.getOrCreateAsyncClient()); + } + + /** + * Create a transfer manager; this will demand create an async s3 client + * if needed. + */ + @Test + public void testCreateTransferManagerAndAsyncClient() throws Throwable { + + final StubS3ClientFactory factory = factory(Duration.ZERO); + final ClientManager manager = manager(factory); + + Assertions.assertThat(manager.getOrCreateTransferManager()) + .describedAs("manager %s", manager) + .isSameAs(transferManager); + + // and we created an async client + Assertions.assertThat(factory.asyncClientCreationCount()) + .describedAs("client creation count") + .isEqualTo(1); + Assertions.assertThat(factory.transferManagerCreationCount()) + .describedAs("client creation count") + .isEqualTo(1); + + // now close + manager.close(); + + // and expect a failure + intercept(IllegalStateException.class, manager::getOrCreateTransferManager); + } + + /** + * Create a transfer manager with the async client already created. + */ + @Test + public void testCreateTransferManagerWithAsyncClientAlreadyCreated() throws Throwable { + final StubS3ClientFactory factory = factory(Duration.ZERO); + final ClientManager manager = manager(factory); + + manager.getOrCreateAsyncClient(); + Assertions.assertThat(manager.getOrCreateTransferManager()) + .describedAs("manager %s", manager) + .isSameAs(transferManager); + + // no new async client was created. + Assertions.assertThat(factory.asyncClientCreationCount()) + .describedAs("client creation count") + .isEqualTo(1); + } + + /** + * Create clients in parallel and verify that the first one blocks + * the others. + * There's a bit of ordering complexity which uses a semaphore and a sleep + * to block one of the acquisitions until the initial operation has started. + * There's then an assertion that the time the first client created + */ + @Test + public void testParallelClientCreation() throws Throwable { + + // semaphore + Semaphore sem = new Semaphore(1); + // reference of thread where the construction took place + AtomicReference threadRef = new AtomicReference(); + // this factory releases the semaphore on its invocation then + // sleeps + final ClientManager manager = manager(factory(() -> { + threadRef.set(Thread.currentThread()); + sem.release(); + sleep(FACTORY_DELAY); + })); + + // acquire that semaphore. + sem.acquire(1); + + // execute the first creation in a separate thread. + final CompletableFuture futureClient = + supplyAsync(toUncheckedIOExceptionSupplier(() -> { + LOG.info("creating #1 s3 client"); + final S3Client client = manager.getOrCreateS3Client(); + LOG.info("#1 s3 client created"); + return client; + })); + + // wait until the async creation has started + sem.acquire(); + sleep(SECOND_THREAD_DELAY); + // expect to block. + LOG.info("creating #2 s3 client"); + final S3Client client2 = manager.getOrCreateS3Client(); + LOG.info("created #2 s3 client"); + + // now assert that the #1 client has succeeded, without + // even calling futureClient.get() to evaluate the result. + Assertions.assertThat(threadRef.get()) + .describedAs("Thread in which client #1 was created") + .isNotSameAs(Thread.currentThread()); + + final S3Client orig = futureClient.get(); + Assertions.assertThat(orig) + .describedAs("second getOrCreate() call to %s", manager) + .isSameAs(client2); + } + + /** + * Parallel transfer manager creation. + * This will force creation of the async client + */ + @Test + public void testParallelTransferManagerCreation() throws Throwable { + // semaphore + Semaphore sem = new Semaphore(1); + // reference of thread where the construction took place + AtomicReference threadRef = new AtomicReference(); + // this factory releases the semaphore on its invocation, then + // sleeps + final ClientManager manager = manager(factory(() -> { + threadRef.set(Thread.currentThread()); + sem.release(); + sleep(FACTORY_DELAY); + })); + + // acquire that semaphore. + sem.acquire(1); + sleep(SECOND_THREAD_DELAY); + + // execute the first creation in a separate thread. + final CompletableFuture futureClient = + supplyAsync(toUncheckedIOExceptionSupplier(() -> { + LOG.info("creating #1 instance"); + sem.release(); + final S3TransferManager r = manager.getOrCreateTransferManager(); + LOG.info("#1 instance created"); + return r; + })); + + // wait until the async creation has started + sem.acquire(); + // expect to block. + LOG.info("creating #2 s3 client"); + final S3TransferManager client2 = manager.getOrCreateTransferManager(); + LOG.info("created #2 s3 client"); + + // now assert that the #1 client has succeeded, without + // even calling futureClient.get() to evaluate the result. + Assertions.assertThat(threadRef.get()) + .describedAs("Thread in which client #1 was created") + .isNotSameAs(Thread.currentThread()); + + futureClient.get(); + } + + /** + * Verify that if an exception is thrown during creation, the + * operation will be repeated -there's no attempt to record + * that an exception was raised on the first attempt. + */ + @Test + public void testClientCreationFailure() throws Throwable { + + // counter is incremented on every eval(), so can be used to assert + // the number of invocations. + final AtomicInteger counter = new AtomicInteger(0); + + final ClientManager manager = manager(factory(() -> { + throw new UnknownHostException(String.format(GENERATED, counter.incrementAndGet())); + })); + + // first attempt fails + intercept(UnknownHostException.class, + String.format(GENERATED, 1), + manager::getOrCreateS3Client); + + // subsequent tests will also retry; the exception message changes each time, + // showing that it is regenerated rather than cached + intercept(UnknownHostException.class, "[2]", manager::getOrCreateS3Client); + intercept(UnknownHostException.class, "[3]", manager::getOrCreateAsyncClient); + intercept(UnknownHostException.class, "[4]", manager::getOrCreateTransferManager); + + manager.close(); + } + +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/StubS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/StubS3ClientFactory.java new file mode 100644 index 0000000000000..edef65b7d3051 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/test/StubS3ClientFactory.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.test; + +import java.io.IOException; +import java.net.URI; +import java.util.concurrent.atomic.AtomicInteger; + +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.transfer.s3.S3TransferManager; + +import org.apache.hadoop.fs.s3a.S3ClientFactory; +import org.apache.hadoop.util.functional.InvocationRaisingIOE; + +/** + * Stub implementation of {@link S3ClientFactory}. + * returns the preconfigured clients. + * No checks for null values are made. + *

+ * The {@link #launcher} operation is invoked before creating + * the sync and async client libraries, which is where failures, + * delays etc can be added. + * It is not used in {@link #createS3TransferManager(S3AsyncClient)} + * because that is normally a fast phase. + */ +public final class StubS3ClientFactory implements S3ClientFactory { + + /** + * The class name of this factory. + */ + public static final String STUB_FACTORY = StubS3ClientFactory.class.getName(); + + private final S3Client client; + + private final S3AsyncClient asyncClient; + + private final S3TransferManager transferManager; + + private final InvocationRaisingIOE launcher; + + private AtomicInteger clientCreationCount = new AtomicInteger(0); + + private AtomicInteger asyncClientCreationCount = new AtomicInteger(0); + + private AtomicInteger transferManagerCreationCount = new AtomicInteger(0); + + public StubS3ClientFactory( + final S3Client client, + final S3AsyncClient asyncClient, + final S3TransferManager transferManager, + final InvocationRaisingIOE launcher) { + + this.client = client; + this.asyncClient = asyncClient; + this.transferManager = transferManager; + this.launcher = launcher; + } + + @Override + public S3Client createS3Client(final URI uri, final S3ClientCreationParameters parameters) + throws IOException { + clientCreationCount.incrementAndGet(); + launcher.apply(); + return client; + } + + @Override + public S3AsyncClient createS3AsyncClient(final URI uri, + final S3ClientCreationParameters parameters) + throws IOException { + asyncClientCreationCount.incrementAndGet(); + launcher.apply(); + return asyncClient; + } + + @Override + public S3TransferManager createS3TransferManager(final S3AsyncClient s3AsyncClient) { + transferManagerCreationCount.incrementAndGet(); + return transferManager; + } + + public int clientCreationCount() { + return clientCreationCount.get(); + } + + public int asyncClientCreationCount() { + return asyncClientCreationCount.get(); + } + + public int transferManagerCreationCount() { + return transferManagerCreationCount.get(); + } + + @Override + public String toString() { + return "StubS3ClientFactory{" + + "client=" + client + + ", asyncClient=" + asyncClient + + ", transferManager=" + transferManager + + ", clientCreationCount=" + clientCreationCount.get() + + ", asyncClientCreationCount=" + asyncClientCreationCount.get() + + ", transferManagerCreationCount=" + transferManagerCreationCount.get() + + '}'; + } +} From 25e28b41cc3d4cf15dc5b793fe2b6ed7820e6c86 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Sat, 6 Jul 2024 15:43:49 +0800 Subject: [PATCH 049/113] HADOOP-19216. Upgrade Guice from 4.0 to 5.1.0 to support Java 17 (#6913). Contributed by Cheng Pan. Signed-off-by: Ayush Saxena --- LICENSE-binary | 4 ++-- hadoop-project/pom.xml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 42e97f4875358..0b1a79672ee72 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -229,8 +229,8 @@ com.fasterxml.woodstox:woodstox-core:5.4.0 com.github.ben-manes.caffeine:caffeine:2.9.3 com.github.davidmoten:rxjava-extras:0.8.0.17 com.github.stephenc.jcip:jcip-annotations:1.0-1 -com.google:guice:4.0 -com.google:guice-servlet:4.0 +com.google:guice:5.1.0 +com.google:guice-servlet:5.1.0 com.google.api.grpc:proto-google-common-protos:1.0.0 com.google.code.gson:2.9.0 com.google.errorprone:error_prone_annotations:2.2.0 diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index be0f58aef63e0..47636f658511b 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -109,7 +109,7 @@ 3.4.0 27.0-jre - 4.2.3 + 5.1.0 1.78.1 @@ -2512,7 +2512,7 @@ cglib:cglib:3.2.0 - com.google.inject:guice:4.0 + com.google.inject:guice:5.1.0 com.sun.jersey:jersey-core:1.19.4 com.sun.jersey:jersey-servlet:1.19.4 com.github.pjfanning:jersey-json:1.22.0 From 5a8f70a72eb012e4345d8693ee939eb2237f1dd7 Mon Sep 17 00:00:00 2001 From: huhaiyang Date: Sat, 6 Jul 2024 15:46:25 +0800 Subject: [PATCH 050/113] HDFS-17559. Fix the uuid as null in NameNodeMXBean (#6906). Contributed by Haiyang Hu. Signed-off-by: Ayush Saxena --- .../hdfs/server/namenode/FSNamesystem.java | 9 +-- .../server/namenode/TestNameNodeMXBean.java | 64 +++++++++++++++++++ 2 files changed, 69 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index edc790dbc309a..2cb29dfef8e65 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -97,6 +97,7 @@ import static org.apache.hadoop.hdfs.DFSUtil.isParentEntry; import java.nio.charset.StandardCharsets; +import java.util.Optional; import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.text.CaseUtils; @@ -6674,7 +6675,7 @@ public String getLiveNodes() { .put("infoSecureAddr", node.getInfoSecureAddr()) .put("xferaddr", node.getXferAddr()) .put("location", node.getNetworkLocation()) - .put("uuid", node.getDatanodeUuid()) + .put("uuid", Optional.ofNullable(node.getDatanodeUuid()).orElse("")) .put("lastContact", getLastContact(node)) .put("usedSpace", getDfsUsed(node)) .put("adminState", node.getAdminState().toString()) @@ -6728,7 +6729,7 @@ public String getDeadNodes() { .put("adminState", node.getAdminState().toString()) .put("xferaddr", node.getXferAddr()) .put("location", node.getNetworkLocation()) - .put("uuid", node.getDatanodeUuid()) + .put("uuid", Optional.ofNullable(node.getDatanodeUuid()).orElse("")) .build(); info.put(node.getXferAddrWithHostname(), innerinfo); } @@ -6751,7 +6752,7 @@ public String getDecomNodes() { . builder() .put("xferaddr", node.getXferAddr()) .put("location", node.getNetworkLocation()) - .put("uuid", node.getDatanodeUuid()) + .put("uuid", Optional.ofNullable(node.getDatanodeUuid()).orElse("")) .put("underReplicatedBlocks", node.getLeavingServiceStatus().getUnderReplicatedBlocks()) .put("decommissionOnlyReplicas", @@ -6782,7 +6783,7 @@ public String getEnteringMaintenanceNodes() { . builder() .put("xferaddr", node.getXferAddr()) .put("location", node.getNetworkLocation()) - .put("uuid", node.getDatanodeUuid()) + .put("uuid", Optional.ofNullable(node.getDatanodeUuid()).orElse("")) .put("underReplicatedBlocks", node.getLeavingServiceStatus().getUnderReplicatedBlocks()) .put("maintenanceOnlyReplicas", diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java index 89ae01ddd25d3..2f8258baa5bcd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeMXBean.java @@ -18,7 +18,11 @@ package org.apache.hadoop.hdfs.server.namenode; import com.fasterxml.jackson.databind.ObjectMapper; + +import java.util.Optional; import java.util.function.Supplier; + +import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Uninterruptibles; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; @@ -1132,6 +1136,66 @@ public void testTotalBlocksMetrics() throws Exception { } } + @SuppressWarnings({ "unchecked" }) + @Test + public void testDeadNodesInNameNodeMXBean() throws Exception { + Configuration conf = new Configuration(); + conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_HEARTBEAT_RECHECK_INTERVAL_KEY, 1); + MiniDFSCluster cluster = null; + HostsFileWriter hostsFileWriter = new HostsFileWriter(); + hostsFileWriter.initialize(conf, "temp/TestNameNodeMXBean"); + + try { + cluster = new MiniDFSCluster.Builder(conf, baseDir.getRoot()).numDataNodes(3).build(); + cluster.waitActive(); + + FSNamesystem fsn = cluster.getNameNode().namesystem; + + MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); + ObjectName mxbeanName = new ObjectName( + "Hadoop:service=NameNode,name=NameNodeInfo"); + + List hosts = new ArrayList<>(); + for(DataNode dn : cluster.getDataNodes()) { + hosts.add(dn.getDisplayName()); + } + + DatanodeDescriptor mockNode = new DatanodeDescriptor( + new DatanodeID("127.0.0.2", "127.0.0.2", "", + 5000, 5001, 5002, 5003)); + + assertEquals("", Optional.ofNullable(mockNode.getDatanodeUuid()).orElse("")); + hosts.add(mockNode.getXferAddrWithHostname()); + hostsFileWriter.initIncludeHosts(hosts.toArray( + new String[hosts.size()])); + fsn.getBlockManager().getDatanodeManager().refreshNodes(conf); + DatanodeManager dm = cluster.getNameNode().getNamesystem(). + getBlockManager().getDatanodeManager(); + LOG.info("Get all include nodes: {}", dm.getHostConfigManager().getIncludes()); + + // get attribute DeadNodes + String deadNodeInfo = (String) (mbs.getAttribute(mxbeanName, + "DeadNodes")); + assertEquals(fsn.getDeadNodes(), deadNodeInfo); + LOG.info("Get deadNode info: {}", deadNodeInfo); + Map> deadNodes = + (Map>) JSON.parse(deadNodeInfo); + assertEquals(1, deadNodes.size()); + for (Map deadNode : deadNodes.values()) { + assertTrue(deadNode.containsKey("lastContact")); + assertTrue(deadNode.containsKey("adminState")); + assertTrue(deadNode.containsKey("xferaddr")); + assertEquals("", deadNode.get("uuid")); + } + } finally { + if (cluster != null) { + cluster.shutdown(); + } + hostsFileWriter.cleanup(); + } + } + void verifyTotalBlocksMetrics(long expectedTotalReplicatedBlocks, long expectedTotalECBlockGroups, long actualTotalBlocks) throws Exception { From 8ca4627a0da91f61a8015589a886ecbe9c949de5 Mon Sep 17 00:00:00 2001 From: huhaiyang Date: Sat, 6 Jul 2024 15:48:12 +0800 Subject: [PATCH 051/113] HDFS-17557. Fix bug for TestRedundancyMonitor#testChooseTargetWhenAllDataNodesStop (#6897). Contributed by Haiyang Hu. Signed-off-by: Ayush Saxena --- .../hdfs/server/blockmanagement/TestRedundancyMonitor.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRedundancyMonitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRedundancyMonitor.java index 0667e2611b4f2..cfe7f41b6af82 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRedundancyMonitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestRedundancyMonitor.java @@ -72,7 +72,7 @@ public void testChooseTargetWhenAllDataNodesStop() throws Throwable { NetworkTopology clusterMap = replicator.clusterMap; NetworkTopology spyClusterMap = spy(clusterMap); replicator.clusterMap = spyClusterMap; - doAnswer(delayer).when(spyClusterMap).getNumOfRacks(); + doAnswer(delayer).when(spyClusterMap).getNumOfNonEmptyRacks(); ExecutorService pool = Executors.newFixedThreadPool(2); From b1d96f6101c624a9eeea34337f69fb80ea45c801 Mon Sep 17 00:00:00 2001 From: HarshitGupta11 <50410275+HarshitGupta11@users.noreply.github.com> Date: Mon, 8 Jul 2024 14:48:53 +0530 Subject: [PATCH 052/113] HADOOP-19195. S3A: Upgrade aws sdk v2 to 2.25.53 (#6900) Contributed by Harshit Gupta --- LICENSE-binary | 2 +- hadoop-project/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 0b1a79672ee72..ff8012096a437 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -362,7 +362,7 @@ org.objenesis:objenesis:2.6 org.xerial.snappy:snappy-java:1.1.10.4 org.yaml:snakeyaml:2.0 org.wildfly.openssl:wildfly-openssl:1.1.3.Final -software.amazon.awssdk:bundle:jar:2.24.6 +software.amazon.awssdk:bundle:jar:2.25.53 -------------------------------------------------------------------------------- diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 47636f658511b..5129f5d99ca0a 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -188,7 +188,7 @@ 1.0-beta-1 900 1.12.720 - 2.24.6 + 2.25.53 1.0.1 2.7.1 1.11.2 From 783a852029264d0556acfaa8e606dd06ca6e3c57 Mon Sep 17 00:00:00 2001 From: "gavin.wang" Date: Tue, 9 Jul 2024 16:22:15 +0800 Subject: [PATCH 053/113] HDFS-17555. Fix NumberFormatException of NNThroughputBenchmark when configured dfs.blocksize. (#6894). Contributed by wangzhongwei Reviewed-by: He Xiaoqiao Signed-off-by: Ayush Saxena --- .../src/site/markdown/Benchmarking.md | 1 + .../namenode/NNThroughputBenchmark.java | 10 ++--- .../namenode/TestNNThroughputBenchmark.java | 39 +++++++++++++++++++ 3 files changed, 45 insertions(+), 5 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Benchmarking.md b/hadoop-common-project/hadoop-common/src/site/markdown/Benchmarking.md index bac80dc856906..f675919331bf9 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Benchmarking.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Benchmarking.md @@ -88,6 +88,7 @@ When running benchmarks with the above operation(s), please provide operation-sp |`-totalBlocks` | Number of total blocks to operate. | |`-replication` | Replication factor. Will be adjusted to number of data-nodes if it is larger than that. | |`-baseDirName` | The base dir name for benchmarks, to support multiple clients submitting benchmark tests at the same time. | +|`-blockSize` | The block size for new files. | ### Reports diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java index 031a744f29fd9..f96b33ba2fb1c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java @@ -168,7 +168,7 @@ abstract class OperationStatsBase { private String baseDir; protected short replication; - protected int blockSize; + protected long blockSize; protected int numThreads = 0; // number of threads protected int numOpsRequired = 0; // number of operations requested protected int numOpsExecuted = 0; // number of operations executed @@ -231,7 +231,7 @@ abstract class OperationStatsBase { OperationStatsBase() { baseDir = baseDirName + "/" + getOpName(); replication = (short) config.getInt(DFSConfigKeys.DFS_REPLICATION_KEY, 3); - blockSize = config.getInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); + blockSize = config.getLongBytes(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); numOpsRequired = 10; numThreads = 3; logLevel = Level.ERROR; @@ -587,7 +587,7 @@ void parseArguments(List args) { numOpsRequired = Integer.parseInt(args.get(++i)); } else if (args.get(i).equals("-blockSize")) { if(i+1 == args.size()) printUsage(); - blockSize = Integer.parseInt(args.get(++i)); + blockSize = Long.parseLong(args.get(++i)); } else if(args.get(i).equals("-threads")) { if(i+1 == args.size()) printUsage(); numThreads = Integer.parseInt(args.get(++i)); @@ -1260,7 +1260,7 @@ void parseArguments(List args) { blocksPerFile = Integer.parseInt(args.get(++i)); } else if (args.get(i).equals("-blockSize")) { if(i+1 == args.size()) printUsage(); - blockSize = Integer.parseInt(args.get(++i)); + blockSize = Long.parseLong(args.get(++i)); } else if(args.get(i).equals("-baseDirName")) { if (i + 1 == args.size()) { printUsage(); @@ -1498,7 +1498,7 @@ void parseArguments(List args) { replication = Short.parseShort(args.get(++i)); } else if (args.get(i).equals("-blockSize")) { if(i+1 == args.size()) printUsage(); - blockSize = Integer.parseInt(args.get(++i)); + blockSize = Long.parseLong(args.get(++i)); } else if(args.get(i).equals("-baseDirName")) { if (i + 1 == args.size()) { printUsage(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNThroughputBenchmark.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNThroughputBenchmark.java index bd19dc5cf563a..651db0c910a1a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNThroughputBenchmark.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNThroughputBenchmark.java @@ -246,4 +246,43 @@ public void testNNThroughputWithBaseDir() throws Exception { } } } + + /** + * This test runs {@link NNThroughputBenchmark} against a mini DFS cluster + * for blockSize with letter suffix. + */ + @Test(timeout = 120000) + public void testNNThroughputForBlockSizeWithLetterSuffix() throws Exception { + final Configuration conf = new HdfsConfiguration(); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 16); + conf.set(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, "1m"); + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build()) { + cluster.waitActive(); + final Configuration benchConf = new HdfsConfiguration(); + benchConf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 16); + benchConf.set(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, "1m"); + FileSystem.setDefaultUri(benchConf, cluster.getURI()); + NNThroughputBenchmark.runBenchmark(benchConf, + new String[]{"-op", "create", "-keepResults", "-files", "3", "-close"}); + } + } + + /** + * This test runs {@link NNThroughputBenchmark} against a mini DFS cluster + * with explicit -blockSize option. + */ + @Test(timeout = 120000) + public void testNNThroughputWithBlockSize() throws Exception { + final Configuration conf = new HdfsConfiguration(); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 16); + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build()) { + cluster.waitActive(); + final Configuration benchConf = new HdfsConfiguration(); + benchConf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 16); + FileSystem.setDefaultUri(benchConf, cluster.getURI()); + NNThroughputBenchmark.runBenchmark(benchConf, + new String[]{"-op", "create", "-keepResults", "-files", "3", + "-blockSize", "32", "-close"}); + } + } } From f211af30be95e195ba45b06aa9f4c68e6ff8e2d0 Mon Sep 17 00:00:00 2001 From: Yuanbo Liu Date: Tue, 9 Jul 2024 16:31:58 +0800 Subject: [PATCH 054/113] HDFS-17534. RBF: Support leader follower mode for multiple subclusters (#6861). Contributed by Yuanbo Liu. Reviewed-by: Inigo Goiri Signed-off-by: Ayush Saxena --- ...MultipleDestinationMountTableResolver.java | 5 +- .../resolver/order/DestinationOrder.java | 7 +- .../order/LeaderFollowerResolver.java | 50 ++++++++++++++ .../records/impl/pb/MountTablePBImpl.java | 4 ++ .../tools/federation/AddMountAttributes.java | 1 + .../hdfs/tools/federation/RouterAdmin.java | 10 +-- .../src/main/proto/FederationProtocol.proto | 1 + .../src/site/markdown/HDFSRouterFederation.md | 4 ++ .../TestMultipleDestinationResolver.java | 17 +++++ .../order/TestLeaderFollowerResolver.java | 66 +++++++++++++++++++ .../federation/router/TestRouterAdminCLI.java | 37 +++++++++-- 11 files changed, 188 insertions(+), 14 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/LeaderFollowerResolver.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/order/TestLeaderFollowerResolver.java diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MultipleDestinationMountTableResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MultipleDestinationMountTableResolver.java index 2cfc7cf39f8a5..6d1acf2879417 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MultipleDestinationMountTableResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/MultipleDestinationMountTableResolver.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hdfs.server.federation.resolver.order.DestinationOrder; import org.apache.hadoop.hdfs.server.federation.resolver.order.HashFirstResolver; import org.apache.hadoop.hdfs.server.federation.resolver.order.HashResolver; +import org.apache.hadoop.hdfs.server.federation.resolver.order.LeaderFollowerResolver; import org.apache.hadoop.hdfs.server.federation.resolver.order.LocalResolver; import org.apache.hadoop.hdfs.server.federation.resolver.order.OrderedResolver; import org.apache.hadoop.hdfs.server.federation.resolver.order.RandomResolver; @@ -78,8 +79,8 @@ public MultipleDestinationMountTableResolver( addResolver(DestinationOrder.LOCAL, new LocalResolver(conf, router)); addResolver(DestinationOrder.RANDOM, new RandomResolver()); addResolver(DestinationOrder.HASH_ALL, new HashResolver()); - addResolver(DestinationOrder.SPACE, - new AvailableSpaceResolver(conf, router)); + addResolver(DestinationOrder.SPACE, new AvailableSpaceResolver(conf, router)); + addResolver(DestinationOrder.LEADER_FOLLOWER, new LeaderFollowerResolver()); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/DestinationOrder.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/DestinationOrder.java index 6a637d5e46fc1..0156d3d4381e9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/DestinationOrder.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/DestinationOrder.java @@ -29,11 +29,14 @@ public enum DestinationOrder { LOCAL, // Local first RANDOM, // Random order HASH_ALL, // Follow consistent hashing - SPACE; // Available space based order + SPACE, // Available space based order + LEADER_FOLLOWER; // Try leader sub-cluster first, if failed, try followers /** Approaches that write folders in all subclusters. */ public static final EnumSet FOLDER_ALL = EnumSet.of( HASH_ALL, RANDOM, - SPACE); + SPACE, + // leader-follower mode should make sure all directory exists in case of switching + LEADER_FOLLOWER); } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/LeaderFollowerResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/LeaderFollowerResolver.java new file mode 100644 index 0000000000000..3b1ee1cc03018 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/resolver/order/LeaderFollowerResolver.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.resolver.order; + +import org.apache.hadoop.hdfs.server.federation.resolver.PathLocation; +import org.apache.hadoop.hdfs.server.federation.resolver.RemoteLocation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * LEADER_FOLLOWER can be used in cross-cluster disaster tolerance, + * and the order of namespaces is always "leader,follower,follower...". + * Write data in leader sub-cluster as many as possible. If leader + * sub-cluster failed, try followers then, the same goes for reading data. + */ +public class LeaderFollowerResolver implements OrderedResolver { + protected static final Logger LOG = + LoggerFactory.getLogger(LeaderFollowerResolver.class); + + @Override + public String getFirstNamespace(String path, PathLocation loc) { + try { + // Always return first destination. + // In leader/follower mode, admin add sub-clusters + // by the order of leader,follower,follower... + // The first element is always the leader sub-cluster, + // so invoking getDefaultLocation is suitable here. + RemoteLocation remoteLocation = loc.getDefaultLocation(); + return remoteLocation.getNameserviceId(); + } catch (Exception ex) { + LOG.error("Cannot find sub-cluster for {}", loc); + return null; + } + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MountTablePBImpl.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MountTablePBImpl.java index 584c23a817a86..d709b3bfa5393 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MountTablePBImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/store/records/impl/pb/MountTablePBImpl.java @@ -343,6 +343,8 @@ private DestinationOrder convert(DestOrder order) { return DestinationOrder.HASH_ALL; case SPACE: return DestinationOrder.SPACE; + case LEADER_FOLLOWER: + return DestinationOrder.LEADER_FOLLOWER; default: return DestinationOrder.HASH; } @@ -358,6 +360,8 @@ private DestOrder convert(DestinationOrder order) { return DestOrder.HASH_ALL; case SPACE: return DestOrder.SPACE; + case LEADER_FOLLOWER: + return DestOrder.LEADER_FOLLOWER; default: return DestOrder.HASH; } diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/AddMountAttributes.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/AddMountAttributes.java index 4059e10c9bb9b..aa3ef4e19d3b1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/AddMountAttributes.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/AddMountAttributes.java @@ -148,6 +148,7 @@ public MountTable getNewOrUpdatedMountTableEntryWithAttributes(MountTable existi * @throws IOException If mount table instantiation fails. */ private MountTable getMountTableForAddRequest(String mountSrc) throws IOException { + // linked hash map can keep the order of inserting. Map destMap = new LinkedHashMap<>(); for (String ns : this.getNss()) { destMap.put(ns, this.getDest()); diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/RouterAdmin.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/RouterAdmin.java index b627ca35c87bd..c4c43326fa915 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/RouterAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/tools/federation/RouterAdmin.java @@ -164,23 +164,25 @@ private static String getUsage(String cmd) { if (cmd.equals("-add")) { return "\t[-add " + "[-readonly] [-faulttolerant] " - + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE] " + + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE|LEADER_FOLLOWER] " + "-owner -group -mode ]"; } else if (cmd.equals(ADD_ALL_COMMAND)) { return "\t[" + ADD_ALL_COMMAND + " " + " " - + "[-readonly] [-faulttolerant] " + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE] " + + "[-readonly] [-faulttolerant] " + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE" + + "|LEADER_FOLLOWER] " + "-owner -group -mode " + " , " + " " - + "[-readonly] [-faulttolerant] " + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE] " + + "[-readonly] [-faulttolerant] " + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE" + + "|LEADER_FOLLOWER] " + "-owner -group -mode " + " , ...]"; } else if (cmd.equals("-update")) { return "\t[-update " + " [ ] " + "[-readonly true|false] [-faulttolerant true|false] " - + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE] " + + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE|LEADER_FOLLOWER] " + "-owner -group -mode ]"; } else if (cmd.equals("-rm")) { return "\t[-rm ]"; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto index b3be714310a02..e93d1d9fc92d0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/proto/FederationProtocol.proto @@ -142,6 +142,7 @@ message MountTableRecordProto { RANDOM = 2; HASH_ALL = 3; SPACE = 4; + LEADER_FOLLOWER = 5; } optional DestOrder destOrder = 6 [default = HASH]; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md index ed62aec7209c2..2f6fe6a00d254 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/site/markdown/HDFSRouterFederation.md @@ -288,6 +288,7 @@ For deciding where to create a new file/folder it uses the order parameter, it c * RANDOM: Random subcluster. This is usually useful for balancing the load across. Folders are created in all subclusters. * HASH_ALL: Follow consistent hashing at all the levels. This approach tries to balance the reads and writes evenly across subclusters. Folders are created in all subclusters. * SPACE: Try to write data in the subcluster with the most available space. Folders are created in all subclusters. +* LEADER_FOLLOWER: Try to write data in the leader subcluster as much as possible, if failed, try follower subclusters. Folders are created in all subclusters. For the hash-based approaches, the difference is that HASH would make all the files/folders within a folder belong to the same subcluster while HASH_ALL will spread all files under a mount point. For example, assuming we have a HASH mount point for `/data/hash`, files and folders under `/data/hash/folder0` will all be in the same subcluster. @@ -297,6 +298,9 @@ RANDOM can be used for reading and writing data from/into different subclusters. The common use for this approach is to have the same data in multiple subclusters and balance the reads across subclusters. For example, if thousands of containers need to read the same data (e.g., a library), one can use RANDOM to read the data from any of the subclusters. +LEADER_FOLLOWER can be used in cross-cluster disaster tolerance, it's not for sharing overloads among sub-clusters. When using this mode like `-add /data ns2,ns1 /data -order LEADER_FOLLOWER`, +`ns2` is considered an active subcluster and `ns1` is considered a follower subcluster. The order of namespaces is always `leader,follower,follower...`. + To determine which subcluster contains a file: [hdfs]$ $HADOOP_HOME/bin/hdfs dfsrouteradmin -getDestination /user/user1/file.txt diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMultipleDestinationResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMultipleDestinationResolver.java index c4b211c6ee87a..01a07345e7021 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMultipleDestinationResolver.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/TestMultipleDestinationResolver.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.Map; import java.util.Random; import java.util.Set; @@ -104,6 +105,15 @@ public void setup() throws IOException { MountTable readOnlyEntry = MountTable.newInstance("/readonly", mapReadOnly); readOnlyEntry.setReadOnly(true); resolver.addEntry(readOnlyEntry); + + // leader follower mode + Map leaderFollowerMap = new LinkedHashMap<>(); + leaderFollowerMap.put("subcluster1", "/leaderfollower"); + leaderFollowerMap.put("subcluster0", "/leaderfollower"); + leaderFollowerMap.put("subcluster2", "/leaderfollower"); + MountTable leaderFollowerEntry = MountTable.newInstance("/leaderfollower", leaderFollowerMap); + leaderFollowerEntry.setDestOrder(DestinationOrder.LEADER_FOLLOWER); + resolver.addEntry(leaderFollowerEntry); } @Test @@ -340,6 +350,13 @@ public void testReadOnly() throws IOException { assertDest("subcluster1", dest12); } + @Test + public void testLeaderFollower() throws IOException { + PathLocation dest0 = + resolver.getDestinationForPath("/leaderfollower/folder0/file0.txt"); + assertDest("subcluster1", dest0); + } + @Test public void testLocalResolver() throws IOException { PathLocation dest0 = diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/order/TestLeaderFollowerResolver.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/order/TestLeaderFollowerResolver.java new file mode 100644 index 0000000000000..b4ffc9838a018 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/resolver/order/TestLeaderFollowerResolver.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.federation.resolver.order; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.server.federation.resolver.MultipleDestinationMountTableResolver; +import org.apache.hadoop.hdfs.server.federation.resolver.PathLocation; +import org.apache.hadoop.hdfs.server.federation.resolver.RemoteLocation; +import org.apache.hadoop.hdfs.server.federation.router.Router; +import org.apache.hadoop.hdfs.server.federation.store.records.MountTable; +import org.junit.Test; + +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.mockito.Mockito.mock; + +public class TestLeaderFollowerResolver { + @Test + public void testResolve() throws Exception { + // Mock the subcluster mapping + Configuration conf = new Configuration(); + Router router = mock(Router.class); + LeaderFollowerResolver leaderFollowerResolver = new LeaderFollowerResolver(); + + // Add the mocks to the resolver + MultipleDestinationMountTableResolver resolver = + new MultipleDestinationMountTableResolver(conf, router); + resolver.addResolver(DestinationOrder.LEADER_FOLLOWER, leaderFollowerResolver); + + Map mapLocal = new LinkedHashMap<>(); + mapLocal.put("subcluster2", "/local"); + mapLocal.put("subcluster0", "/local"); + mapLocal.put("subcluster1", "/local"); + MountTable localEntry = MountTable.newInstance("/local", mapLocal); + localEntry.setDestOrder(DestinationOrder.LEADER_FOLLOWER); + resolver.addEntry(localEntry); + + PathLocation dest = resolver.getDestinationForPath("/local/file0.txt"); + assertDestination("subcluster2", dest); + + } + + private static void assertDestination(String expectedNsId, PathLocation loc) { + List dests = loc.getDestinations(); + RemoteLocation dest = dests.get(0); + assertEquals(expectedNsId, dest.getNameserviceId()); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java index a0ccf80578ef1..f189d93430440 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterAdminCLI.java @@ -265,6 +265,31 @@ public void testAddOrderMountTable() throws Exception { testAddOrderMountTable(DestinationOrder.RANDOM); testAddOrderMountTable(DestinationOrder.HASH_ALL); testAddOrderMountTable(DestinationOrder.SPACE); + testAddOrderMountTable(DestinationOrder.LEADER_FOLLOWER); + } + + @Test + public void testLeaderFollower() throws Exception { + DestinationOrder order = DestinationOrder.LEADER_FOLLOWER; + final String mnt = "/newAdd1" + order; + final String nsId = "ns1,ns2,ns0"; + final String dest = "/changAdd"; + + String[] argv = new String[] { + "-add", mnt, nsId, dest, "-order", order.toString()}; + assertEquals(0, ToolRunner.run(admin, argv)); + + stateStore.loadCache(MountTableStoreImpl.class, true); + MountTableManager mountTable = client.getMountTableManager(); + GetMountTableEntriesRequest request = + GetMountTableEntriesRequest.newInstance(mnt); + GetMountTableEntriesResponse response = + mountTable.getMountTableEntries(request); + List entries = response.getEntries(); + assertEquals(1, entries.size()); + assertEquals("ns1", entries.get(0).getDestinations().get(0).getNameserviceId()); + assertEquals("ns2", entries.get(0).getDestinations().get(1).getNameserviceId()); + assertEquals("ns0", entries.get(0).getDestinations().get(2).getNameserviceId()); } @Test @@ -795,7 +820,7 @@ public void testInvalidArgumentMessage() throws Exception { assertTrue("Wrong message: " + out, out.toString().contains( "\t[-add " + "[-readonly] [-faulttolerant] " - + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE] " + + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE|LEADER_FOLLOWER] " + "-owner -group -mode ]")); out.reset(); @@ -804,7 +829,7 @@ public void testInvalidArgumentMessage() throws Exception { assertTrue("Wrong message: " + out, out.toString().contains( "\t[-update [ ] " + "[-readonly true|false] [-faulttolerant true|false] " - + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE] " + + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE|LEADER_FOLLOWER] " + "-owner -group -mode ]")); out.reset(); @@ -852,18 +877,18 @@ public void testInvalidArgumentMessage() throws Exception { String expected = "Usage: hdfs dfsrouteradmin :\n" + "\t[-add " + "[-readonly] [-faulttolerant] " - + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE] " + + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE|LEADER_FOLLOWER] " + "-owner -group -mode ]\n" + "\t[-addAll " - + "[-readonly] [-faulttolerant] [-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE] " + + "[-readonly] [-faulttolerant] [-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE|LEADER_FOLLOWER] " + "-owner -group -mode " + " , " - + "[-readonly] [-faulttolerant] [-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE] " + + "[-readonly] [-faulttolerant] [-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE|LEADER_FOLLOWER] " + "-owner -group -mode , ...]\n" + "\t[-update [ " + "] [-readonly true|false]" + " [-faulttolerant true|false] " - + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE] " + + "[-order HASH|LOCAL|RANDOM|HASH_ALL|SPACE|LEADER_FOLLOWER] " + "-owner -group -mode ]\n" + "\t[-rm ]\n" + "\t[-ls [-d] ]\n" + "\t[-getDestination ]\n" From 8913d379fd2d2d1de548637749d5322377395835 Mon Sep 17 00:00:00 2001 From: zhengchenyu Date: Thu, 11 Jul 2024 17:41:24 +0800 Subject: [PATCH 055/113] HDFS-17566. Got wrong sorted block order when StorageType is considered. (#6934). Contributed by Chenyu Zheng. Signed-off-by: He Xiaoqiao --- .../hadoop/hdfs/server/blockmanagement/DatanodeManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java index a46a2ce15c660..081f834825865 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java @@ -684,7 +684,7 @@ private Consumer> createSecondaryNodeSorter() { } else if (s2 == null) { return 1; } else { - return s2.compareTo(s1); + return s1.compareTo(s2); } }); secondarySort = list -> Collections.sort(list, comp); From 4f0ee9d67d9e7ac4195fe2ed8227760a63064091 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Fri, 12 Jul 2024 06:16:44 +0800 Subject: [PATCH 056/113] HADOOP-19222. Switch yum repo baseurl due to CentOS 7 sunset (#6932) Contributed by Cheng Pan. Reviewed-by: Gautham Banasandra Signed-off-by: Shilun Fan --- dev-support/docker/Dockerfile_centos_7 | 16 ++++++++++++++-- .../pkg-resolver/set-vault-as-baseurl-centos.sh | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/dev-support/docker/Dockerfile_centos_7 b/dev-support/docker/Dockerfile_centos_7 index ff947637a2ce2..b97e59969a760 100644 --- a/dev-support/docker/Dockerfile_centos_7 +++ b/dev-support/docker/Dockerfile_centos_7 @@ -30,6 +30,13 @@ COPY pkg-resolver pkg-resolver RUN chmod a+x pkg-resolver/*.sh pkg-resolver/*.py \ && chmod a+r pkg-resolver/*.json +###### +# Centos 7 has reached its EOL and the packages +# are no longer available on mirror.centos.org site. +# Please see https://www.centos.org/centos-linux-eol/ +###### +RUN pkg-resolver/set-vault-as-baseurl-centos.sh centos:7 + ###### # Install packages from yum ###### @@ -38,8 +45,13 @@ RUN yum update -y \ && yum groupinstall -y "Development Tools" \ && yum install -y \ centos-release-scl \ - python3 \ - && yum install -y $(pkg-resolver/resolve.py centos:7) + python3 + +# Apply the script again because centos-release-scl creates new YUM repo files +RUN pkg-resolver/set-vault-as-baseurl-centos.sh centos:7 + +# hadolint ignore=DL3008,SC2046 +RUN yum install -y $(pkg-resolver/resolve.py centos:7) # Set GCC 9 as the default C/C++ compiler RUN echo "source /opt/rh/devtoolset-9/enable" >> /etc/bashrc diff --git a/dev-support/docker/pkg-resolver/set-vault-as-baseurl-centos.sh b/dev-support/docker/pkg-resolver/set-vault-as-baseurl-centos.sh index 4be4cd956b15b..905ac5077deec 100644 --- a/dev-support/docker/pkg-resolver/set-vault-as-baseurl-centos.sh +++ b/dev-support/docker/pkg-resolver/set-vault-as-baseurl-centos.sh @@ -24,7 +24,7 @@ fi if [ "$1" == "centos:7" ] || [ "$1" == "centos:8" ]; then cd /etc/yum.repos.d/ || exit && sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* && - sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* && + sed -i 's|# *baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* && yum update -y && cd /root || exit else From 51cb858cc8c23d873d4adfc21de5f2c1c22d346f Mon Sep 17 00:00:00 2001 From: Anuj Modi <128447756+anujmodi2021@users.noreply.github.com> Date: Mon, 15 Jul 2024 21:51:54 +0530 Subject: [PATCH 057/113] HADOOP-19208: [ABFS] Fixing logic to determine HNS nature of account to avoid extra getAcl() calls (#6893) --- .../fs/azurebfs/AzureBlobFileSystemStore.java | 13 +++-- ...ITestAzureBlobFileSystemInitAndCreate.java | 2 + .../fs/azurebfs/ITestGetNameSpaceEnabled.java | 48 +++++++++++++++++++ 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index ac564f082c9e4..449b123d9212a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -395,14 +395,21 @@ private synchronized boolean getNamespaceEnabledInformationFromServer( try { LOG.debug("Get root ACL status"); getClient().getAclStatus(AbfsHttpConstants.ROOT_PATH, tracingContext); + // If getAcl succeeds, namespace is enabled. isNamespaceEnabled = Trilean.getTrilean(true); } catch (AbfsRestOperationException ex) { - // Get ACL status is a HEAD request, its response doesn't contain - // errorCode - // So can only rely on its status code to determine its account type. + // Get ACL status is a HEAD request, its response doesn't contain errorCode + // So can only rely on its status code to determine account type. if (HttpURLConnection.HTTP_BAD_REQUEST != ex.getStatusCode()) { + // If getAcl fails with anything other than 400, namespace is enabled. + isNamespaceEnabled = Trilean.getTrilean(true); + // Continue to throw exception as earlier. + LOG.debug("Failed to get ACL status with non 400. Inferring namespace enabled", ex); throw ex; } + // If getAcl fails with 400, namespace is disabled. + LOG.debug("Failed to get ACL status with 400. " + + "Inferring namespace disabled and ignoring error", ex); isNamespaceEnabled = Trilean.getTrilean(false); } catch (AzureBlobFileSystemException ex) { throw ex; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemInitAndCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemInitAndCreate.java index dcd73cc3e982a..1ff3458fdbaac 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemInitAndCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemInitAndCreate.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TrileanConversionException; +import org.apache.hadoop.fs.azurebfs.enums.Trilean; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; @@ -67,6 +68,7 @@ public void testGetAclCallOnHnsConfigAbsence() throws Exception { Mockito.doThrow(TrileanConversionException.class) .when(store) .isNamespaceEnabled(); + store.setNamespaceEnabled(Trilean.UNKNOWN); TracingContext tracingContext = getSampleTracingContext(fs, true); Mockito.doReturn(Mockito.mock(AbfsRestOperation.class)) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java index b40e317d2e32d..d168ed38844df 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java @@ -24,9 +24,11 @@ import org.junit.Assume; import org.junit.Test; import org.assertj.core.api.Assertions; +import org.mockito.Mockito; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.conf.Configuration; @@ -34,9 +36,14 @@ import org.apache.hadoop.fs.azurebfs.enums.Trilean; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import static java.net.HttpURLConnection.HTTP_BAD_REQUEST; +import static java.net.HttpURLConnection.HTTP_INTERNAL_ERROR; +import static java.net.HttpURLConnection.HTTP_NOT_FOUND; +import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; @@ -217,4 +224,45 @@ private AbfsClient callAbfsGetIsNamespaceEnabledAndReturnMockAbfsClient() return mockClient; } + @Test + public void ensureGetAclDetermineHnsStatusAccurately() throws Exception { + ensureGetAclDetermineHnsStatusAccuratelyInternal(HTTP_BAD_REQUEST, + false, false); + ensureGetAclDetermineHnsStatusAccuratelyInternal(HTTP_NOT_FOUND, + true, true); + ensureGetAclDetermineHnsStatusAccuratelyInternal(HTTP_INTERNAL_ERROR, + true, true); + ensureGetAclDetermineHnsStatusAccuratelyInternal(HTTP_UNAVAILABLE, + true, true); + } + + private void ensureGetAclDetermineHnsStatusAccuratelyInternal(int statusCode, + boolean expectedValue, boolean isExceptionExpected) throws Exception { + AzureBlobFileSystemStore store = Mockito.spy(getFileSystem().getAbfsStore()); + AbfsClient mockClient = mock(AbfsClient.class); + store.setNamespaceEnabled(Trilean.UNKNOWN); + doReturn(mockClient).when(store).getClient(); + AbfsRestOperationException ex = new AbfsRestOperationException( + statusCode, null, Integer.toString(statusCode), null); + doThrow(ex).when(mockClient).getAclStatus(anyString(), any(TracingContext.class)); + + if (isExceptionExpected) { + try { + store.getIsNamespaceEnabled(getTestTracingContext(getFileSystem(), false)); + Assertions.fail( + "Exception Should have been thrown with status code: " + statusCode); + } catch (AbfsRestOperationException caughtEx) { + Assertions.assertThat(caughtEx.getStatusCode()).isEqualTo(statusCode); + Assertions.assertThat(caughtEx.getErrorMessage()).isEqualTo(ex.getErrorMessage()); + } + } + // This should not trigger extra getAcl() call in case of exceptions. + boolean isHnsEnabled = store.getIsNamespaceEnabled( + getTestTracingContext(getFileSystem(), false)); + Assertions.assertThat(isHnsEnabled).isEqualTo(expectedValue); + + // GetAcl() should be called only once to determine the HNS status. + Mockito.verify(mockClient, times(1)) + .getAclStatus(anyString(), any(TracingContext.class)); + } } From 5730656660184d15ba4cc75ba4596f53feeb3e8b Mon Sep 17 00:00:00 2001 From: "gavin.wang" Date: Tue, 16 Jul 2024 20:57:50 +0800 Subject: [PATCH 058/113] HDFS-17574. Make NNThroughputBenchmark support human-friendly units about blocksize. (#6931). Contributed by wangzhongwei. Signed-off-by: He Xiaoqiao --- .../namenode/NNThroughputBenchmark.java | 6 +++--- .../namenode/TestNNThroughputBenchmark.java | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java index f96b33ba2fb1c..ca29433dbd607 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NNThroughputBenchmark.java @@ -587,7 +587,7 @@ void parseArguments(List args) { numOpsRequired = Integer.parseInt(args.get(++i)); } else if (args.get(i).equals("-blockSize")) { if(i+1 == args.size()) printUsage(); - blockSize = Long.parseLong(args.get(++i)); + blockSize = StringUtils.TraditionalBinaryPrefix.string2long(args.get(++i)); } else if(args.get(i).equals("-threads")) { if(i+1 == args.size()) printUsage(); numThreads = Integer.parseInt(args.get(++i)); @@ -1260,7 +1260,7 @@ void parseArguments(List args) { blocksPerFile = Integer.parseInt(args.get(++i)); } else if (args.get(i).equals("-blockSize")) { if(i+1 == args.size()) printUsage(); - blockSize = Long.parseLong(args.get(++i)); + blockSize = StringUtils.TraditionalBinaryPrefix.string2long(args.get(++i)); } else if(args.get(i).equals("-baseDirName")) { if (i + 1 == args.size()) { printUsage(); @@ -1498,7 +1498,7 @@ void parseArguments(List args) { replication = Short.parseShort(args.get(++i)); } else if (args.get(i).equals("-blockSize")) { if(i+1 == args.size()) printUsage(); - blockSize = Long.parseLong(args.get(++i)); + blockSize = StringUtils.TraditionalBinaryPrefix.string2long(args.get(++i)); } else if(args.get(i).equals("-baseDirName")) { if (i + 1 == args.size()) { printUsage(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNThroughputBenchmark.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNThroughputBenchmark.java index 651db0c910a1a..1071564cf02aa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNThroughputBenchmark.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNNThroughputBenchmark.java @@ -285,4 +285,23 @@ public void testNNThroughputWithBlockSize() throws Exception { "-blockSize", "32", "-close"}); } } + + /** + * This test runs {@link NNThroughputBenchmark} against a mini DFS cluster + * with explicit -blockSize option like 1m. + */ + @Test(timeout = 120000) + public void testNNThroughputBlockSizeArgWithLetterSuffix() throws Exception { + final Configuration conf = new HdfsConfiguration(); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 16); + try (MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build()) { + cluster.waitActive(); + final Configuration benchConf = new HdfsConfiguration(); + benchConf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 16); + FileSystem.setDefaultUri(benchConf, cluster.getURI()); + NNThroughputBenchmark.runBenchmark(benchConf, + new String[]{"-op", "create", "-keepResults", "-files", "3", + "-blockSize", "1m", "-close"}); + } + } } From 1360c7574adc5756ee432c5829d38351523c3f8e Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Tue, 16 Jul 2024 05:08:41 -0800 Subject: [PATCH 059/113] HADOOP-19218 Avoid DNS lookup while creating IPC Connection object (#6916). Contributed by Viraj Jasani. Signed-off-by: Rushabh Shah Signed-off-by: He Xiaoqiao --- .../src/main/java/org/apache/hadoop/ipc/Server.java | 13 ++++--------- .../test/java/org/apache/hadoop/ipc/TestIPC.java | 2 +- .../test/java/org/apache/hadoop/ipc/TestRPC.java | 4 ++-- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index a49b5ec692b4d..0fb902a095d50 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -2034,11 +2034,7 @@ public class Connection { * Address to which the socket is connected to. */ private final InetAddress addr; - /** - * Client Host address from where the socket connection is being established to the Server. - */ - private final String hostName; - + IpcConnectionContextProto connectionContext; String protocolName; SaslServer saslServer; @@ -2081,12 +2077,9 @@ public Connection(SocketChannel channel, long lastContact, this.isOnAuxiliaryPort = isOnAuxiliaryPort; if (addr == null) { this.hostAddress = "*Unknown*"; - this.hostName = this.hostAddress; } else { // host IP address this.hostAddress = addr.getHostAddress(); - // host name for the IP address - this.hostName = addr.getHostName(); } this.remotePort = socket.getPort(); this.responseQueue = new LinkedList(); @@ -2102,7 +2095,7 @@ public Connection(SocketChannel channel, long lastContact, @Override public String toString() { - return hostName + ":" + remotePort + " / " + hostAddress + ":" + remotePort; + return hostAddress + ":" + remotePort; } boolean setShouldClose() { @@ -2516,6 +2509,7 @@ public int readAndProcess() throws IOException, InterruptedException { } if (!RpcConstants.HEADER.equals(dataLengthBuffer)) { + final String hostName = addr == null ? this.hostAddress : addr.getHostName(); LOG.warn("Incorrect RPC Header length from {}:{} / {}:{}. Expected: {}. Actual: {}", hostName, remotePort, hostAddress, remotePort, RpcConstants.HEADER, dataLengthBuffer); @@ -2523,6 +2517,7 @@ public int readAndProcess() throws IOException, InterruptedException { return -1; } if (version != CURRENT_VERSION) { + final String hostName = addr == null ? this.hostAddress : addr.getHostName(); //Warning is ok since this is not supposed to happen. LOG.warn("Version mismatch from {}:{} / {}:{}. " + "Expected version: {}. Actual version: {} ", hostName, diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java index 7cfd65d482129..9165c71eb41bf 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestIPC.java @@ -1177,7 +1177,7 @@ private static void callAndVerify(Server server, InetSocketAddress addr, Connection connection = server.getConnections()[0]; LOG.info("Connection is from: {}", connection); assertEquals( - "Connection string representation should include both IP address and Host name", 2, + "Connection string representation should include only IP address for healthy connection", 1, connection.toString().split(" / ").length); int serviceClass2 = connection.getServiceClass(); assertFalse(noChanged ^ serviceClass == serviceClass2); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java index 17f1f65261468..bc72b6c126275 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java @@ -1941,8 +1941,8 @@ public RpcStatusProto getRpcStatusProto() { String connectionInfo = conns[0].toString(); LOG.info("Connection is from: {}", connectionInfo); assertEquals( - "Connection string representation should include both IP address and Host name", 2, - connectionInfo.split(" / ").length); + "Connection string representation should include only IP address for healthy " + + "connection", 1, connectionInfo.split(" / ").length); // verify whether the connection should have been reused. if (isDisconnected) { assertNotSame(reqName, lastConn, conns[0]); From ebbe9628d34476939343a94484528d3754e92eb9 Mon Sep 17 00:00:00 2001 From: fuchaohong <1783129294@qq.com> Date: Thu, 18 Jul 2024 00:43:00 +0800 Subject: [PATCH 060/113] HDFS-17558. RBF: Make maxSizeOfFederatedStateToPropagate work on setResponseHeaderState. (#6902) Co-authored-by: fuchaohong --- .../router/RouterStateIdContext.java | 8 ++-- .../router/TestObserverWithRouter.java | 40 +++++++++++++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterStateIdContext.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterStateIdContext.java index 5c9ceb865a5ba..e239e5e9059ff 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterStateIdContext.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterStateIdContext.java @@ -104,7 +104,9 @@ public void setResponseHeaderState(RpcResponseHeaderProto.Builder headerBuilder) builder.putNamespaceStateIds(k, v.get()); } }); - headerBuilder.setRouterFederatedState(builder.build().toByteString()); + if (builder.getNamespaceStateIdsCount() <= maxSizeOfFederatedStateToPropagate) { + headerBuilder.setRouterFederatedState(builder.build().toByteString()); + } } public LongAccumulator getNamespaceStateId(String nsId) { @@ -155,9 +157,7 @@ public static long getClientStateIdFromCurrentCall(String nsId) { @Override public void updateResponseState(RpcResponseHeaderProto.Builder header) { - if (namespaceIdMap.size() <= maxSizeOfFederatedStateToPropagate) { - setResponseHeaderState(header); - } + setResponseHeaderState(header); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestObserverWithRouter.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestObserverWithRouter.java index eaee5b8b14613..3f773efd63dc0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestObserverWithRouter.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestObserverWithRouter.java @@ -641,6 +641,46 @@ public void testRouterResponseHeaderState() { Assertions.assertEquals(10L, latestFederateState.get("ns0")); } + @Test + @Tag(SKIP_BEFORE_EACH_CLUSTER_STARTUP) + public void testRouterResponseHeaderStateMaxSizeLimit() { + Configuration conf = new Configuration(); + conf.setBoolean(RBFConfigKeys.DFS_ROUTER_OBSERVER_READ_DEFAULT_KEY, true); + conf.setInt(RBFConfigKeys.DFS_ROUTER_OBSERVER_FEDERATED_STATE_PROPAGATION_MAXSIZE, 1); + + RouterStateIdContext routerStateIdContext = new RouterStateIdContext(conf); + + ConcurrentHashMap namespaceIdMap = + routerStateIdContext.getNamespaceIdMap(); + namespaceIdMap.put("ns0", new LongAccumulator(Math::max, 10)); + namespaceIdMap.put("ns1", new LongAccumulator(Math::max, Long.MIN_VALUE)); + + RpcHeaderProtos.RpcResponseHeaderProto.Builder responseHeaderBuilder = + RpcHeaderProtos.RpcResponseHeaderProto + .newBuilder() + .setCallId(1) + .setStatus(RpcHeaderProtos.RpcResponseHeaderProto.RpcStatusProto.SUCCESS); + routerStateIdContext.updateResponseState(responseHeaderBuilder); + + Map latestFederateState = RouterStateIdContext.getRouterFederatedStateMap( + responseHeaderBuilder.build().getRouterFederatedState()); + // Validate that ns0 is still part of the header + Assertions.assertEquals(1, latestFederateState.size()); + + namespaceIdMap.put("ns2", new LongAccumulator(Math::max, 20)); + // Rebuild header + responseHeaderBuilder = + RpcHeaderProtos.RpcResponseHeaderProto + .newBuilder() + .setCallId(1) + .setStatus(RpcHeaderProtos.RpcResponseHeaderProto.RpcStatusProto.SUCCESS); + routerStateIdContext.updateResponseState(responseHeaderBuilder); + latestFederateState = RouterStateIdContext.getRouterFederatedStateMap( + responseHeaderBuilder.build().getRouterFederatedState()); + // Validate that ns0 is still part of the header + Assertions.assertEquals(0, latestFederateState.size()); + } + @EnumSource(ConfigSetting.class) @ParameterizedTest public void testStateIdProgressionInRouter(ConfigSetting configSetting) throws Exception { From 9a9ad6cc98438908f199c11122d8c90dfd7cc934 Mon Sep 17 00:00:00 2001 From: K0K0V0K <109747532+K0K0V0K@users.noreply.github.com> Date: Thu, 18 Jul 2024 16:55:40 +0200 Subject: [PATCH 061/113] YARN-11705. Turn off Node Manager working directories validation by default (#6948) Change-Id: I011c13c79719be97c7ebc028804f1fdab5eb34c4 --- .../java/org/apache/hadoop/yarn/conf/YarnConfiguration.java | 2 +- .../hadoop-yarn-common/src/main/resources/yarn-default.xml | 2 +- .../hadoop/yarn/server/nodemanager/DirectoryCollection.java | 5 +++++ .../yarn/server/nodemanager/TestDirectoryCollection.java | 1 + 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 9503d47537706..5cab8c0a9ba4c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -2189,7 +2189,7 @@ public static boolean isAclEnabled(Configuration conf) { NM_DISK_HEALTH_CHECK_PREFIX + "working-dir-content-accessibility-validation.enabled"; public static final boolean DEFAULT_NM_WORKING_DIR_CONTENT_ACCESSIBILITY_VALIDATION_ENABLED = - true; + false; /** The health checker scripts. */ public static final String NM_HEALTH_CHECK_SCRIPTS = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index ac976b7472d3e..476789637e0d0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -1998,7 +1998,7 @@ Validate content of the node manager directories can be accessed yarn.nodemanager.disk-health-checker.working-dir-content-accessibility-validation.enabled - true + false diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java index a5657ab48b440..8d8e0325bd0b9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/DirectoryCollection.java @@ -726,4 +726,9 @@ private void setGoodDirsDiskUtilizationPercentage() { public int getGoodDirsDiskUtilizationPercentage() { return goodDirsDiskUtilizationPercentage; } + + @VisibleForTesting + public void setSubAccessibilityValidationEnabled(boolean subAccessibilityValidationEnabled) { + this.subAccessibilityValidationEnabled = subAccessibilityValidationEnabled; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java index 0193f844ac824..4dbab5be8cb4a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestDirectoryCollection.java @@ -528,6 +528,7 @@ public void testNonAccessibleSub() throws IOException { Files.setPosixFilePermissions(testFile.toPath(), PosixFilePermissions.fromString("-w--w--w-")); DirectoryCollection dc = new DirectoryCollection(new String[]{testDir.toString()}); + dc.setSubAccessibilityValidationEnabled(true); Map diskErrorInformationMap = dc.testDirs(Collections.singletonList(testDir.toString()), Collections.emptySet()); Assert.assertEquals(1, diskErrorInformationMap.size()); From 7638b4727e702725bcbfeb4addf3fc80078924b8 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Sat, 20 Jul 2024 15:16:01 +0800 Subject: [PATCH 062/113] HDFS-17575. SaslDataTransferClient should use SaslParticipant to create messages. (#6933) --- .../sasl/SaslDataTransferClient.java | 16 ++++++++++++---- .../datatransfer/sasl/SaslParticipant.java | 10 ++++++++-- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java index 043439130d5dc..960a5221dd1ae 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java @@ -63,6 +63,7 @@ import org.apache.hadoop.security.token.SecretManager; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.Lists; +import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -84,6 +85,8 @@ public class SaslDataTransferClient { private static final Logger LOG = LoggerFactory.getLogger( SaslDataTransferClient.class); + private static final byte[] EMPTY_BYTE_ARRAY = {}; + private final Configuration conf; private final AtomicBoolean fallbackToSimpleAuth; private final SaslPropertiesResolver saslPropsResolver; @@ -519,25 +522,29 @@ private IOStreamPair doSaslHandshake(InetAddress addr, // In which case there will be no encrypted secret sent from NN. BlockTokenIdentifier blockTokenIdentifier = accessToken.decodeIdentifier(); + final byte[] first = sasl.evaluateChallengeOrResponse(EMPTY_BYTE_ARRAY); + if (LOG.isDebugEnabled()) { + LOG.info("first: {}", first == null ? null : first.length == 0 ? "" + : StringUtils.byteToHexString(first)); + } if (blockTokenIdentifier != null) { byte[] handshakeSecret = accessToken.decodeIdentifier().getHandshakeMsg(); if (handshakeSecret == null || handshakeSecret.length == 0) { LOG.debug("Handshake secret is null, " + "sending without handshake secret."); - sendSaslMessage(out, new byte[0]); + sendSaslMessage(out, first); } else { LOG.debug("Sending handshake secret."); BlockTokenIdentifier identifier = new BlockTokenIdentifier(); identifier.readFields(new DataInputStream( new ByteArrayInputStream(accessToken.getIdentifier()))); String bpid = identifier.getBlockPoolId(); - sendSaslMessageHandshakeSecret(out, new byte[0], - handshakeSecret, bpid); + sendSaslMessageHandshakeSecret(out, first, handshakeSecret, bpid); } } else { LOG.debug("Block token id is null, sending without handshake secret."); - sendSaslMessage(out, new byte[0]); + sendSaslMessage(out, first); } // step 1 @@ -565,6 +572,7 @@ private IOStreamPair doSaslHandshake(InetAddress addr, cipherOptions.add(option); } } + LOG.debug("{}: cipherOptions={}", sasl, cipherOptions); sendSaslMessageAndNegotiationCipherOptions(out, localResponse, cipherOptions); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslParticipant.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslParticipant.java index e32f76a8ebd7d..ee8760d688adb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslParticipant.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslParticipant.java @@ -20,6 +20,7 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.util.Map; +import java.util.Objects; import javax.security.auth.callback.CallbackHandler; import javax.security.sasl.Sasl; import javax.security.sasl.SaslClient; @@ -110,7 +111,7 @@ public static SaslParticipant createClientSaslParticipant(String userName, * @param saslServer to wrap */ private SaslParticipant(SaslServer saslServer) { - this.saslServer = saslServer; + this.saslServer = Objects.requireNonNull(saslServer, "saslServer == null"); this.saslClient = null; } @@ -121,7 +122,7 @@ private SaslParticipant(SaslServer saslServer) { */ private SaslParticipant(SaslClient saslClient) { this.saslServer = null; - this.saslClient = saslClient; + this.saslClient = Objects.requireNonNull(saslClient, "saslClient == null"); } /** @@ -228,4 +229,9 @@ public IOStreamPair createStreamPair(DataOutputStream out, new SaslOutputStream(out, saslServer)); } } + + @Override + public String toString() { + return "Sasl" + (saslServer != null? "Server" : "Client"); + } } From 9dad697dbcaf6f4ac92da0d176668bca7265c097 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Sat, 20 Jul 2024 15:18:22 +0800 Subject: [PATCH 063/113] HADOOP-19227. ipc.Server accelerate token negotiation only for the default mechanism. (#6949) --- .../src/main/java/org/apache/hadoop/ipc/Server.java | 4 +++- .../org/apache/hadoop/security/SaslConstants.java | 2 +- .../org/apache/hadoop/security/SaslRpcClient.java | 11 ++++++++++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 0fb902a095d50..0d9e7296d2a4c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -106,6 +106,7 @@ import org.apache.hadoop.ipc.protobuf.RpcHeaderProtos.RPCTraceInfoProto; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.SaslConstants; import org.apache.hadoop.security.SaslPropertiesResolver; import org.apache.hadoop.security.SaslRpcServer; import org.apache.hadoop.security.SaslRpcServer.AuthMethod; @@ -2604,7 +2605,8 @@ private RpcSaslProto buildSaslNegotiateResponse() RpcSaslProto negotiateMessage = negotiateResponse; // accelerate token negotiation by sending initial challenge // in the negotiation response - if (enabledAuthMethods.contains(AuthMethod.TOKEN)) { + if (enabledAuthMethods.contains(AuthMethod.TOKEN) + && SaslConstants.SASL_MECHANISM_DEFAULT.equals(AuthMethod.TOKEN.getMechanismName())) { saslServer = createSaslServer(AuthMethod.TOKEN); byte[] challenge = saslServer.evaluateResponse(new byte[0]); RpcSaslProto.Builder negotiateBuilder = diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslConstants.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslConstants.java index 71e4b44873820..6c6a2383ff103 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslConstants.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslConstants.java @@ -32,7 +32,7 @@ public class SaslConstants { private static final String SASL_MECHANISM_ENV = "HADOOP_SASL_MECHANISM"; public static final String SASL_MECHANISM; - private static final String SASL_MECHANISM_DEFAULT = "DIGEST-MD5"; + public static final String SASL_MECHANISM_DEFAULT = "DIGEST-MD5"; static { final String mechanism = System.getenv(SASL_MECHANISM_ENV); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java index ce7878480e22c..aabe2ed4e01b8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcClient.java @@ -39,6 +39,7 @@ import javax.security.auth.callback.PasswordCallback; import javax.security.auth.callback.UnsupportedCallbackException; import javax.security.auth.kerberos.KerberosPrincipal; +import javax.security.sasl.AuthorizeCallback; import javax.security.sasl.RealmCallback; import javax.security.sasl.RealmChoiceCallback; import javax.security.sasl.Sasl; @@ -681,9 +682,17 @@ public void handle(Callback[] callbacks) pc = (PasswordCallback) callback; } else if (callback instanceof RealmCallback) { rc = (RealmCallback) callback; + } else if (callback instanceof AuthorizeCallback) { + final AuthorizeCallback ac = (AuthorizeCallback) callback; + final String authId = ac.getAuthenticationID(); + final String authzId = ac.getAuthorizationID(); + ac.setAuthorized(authId.equals(authzId)); + if (ac.isAuthorized()) { + ac.setAuthorizedID(authzId); + } } else { throw new UnsupportedCallbackException(callback, - "Unrecognized SASL client callback"); + "Unrecognized SASL client callback " + callback.getClass()); } } if (nc != null) { From a5eb5e961174dffb53f20633280eecddc02a4aca Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Sat, 20 Jul 2024 15:21:06 +0800 Subject: [PATCH 064/113] HDFS-17576. Support user defined auth Callback. (#6945) --- .../hdfs/client/HdfsClientConfigKeys.java | 3 + .../sasl/DataTransferSaslUtil.java | 6 +- .../sasl/CustomizedCallbackHandler.java | 39 ++++++++++++ .../sasl/SaslDataTransferServer.java | 36 ++++++++--- .../src/main/resources/hdfs-default.xml | 9 +++ .../sasl/TestCustomizedCallbackHandler.java | 63 +++++++++++++++++++ 6 files changed, 145 insertions(+), 11 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/CustomizedCallbackHandler.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestCustomizedCallbackHandler.java diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java index 2044530506757..e951b1d7d749e 100755 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/HdfsClientConfigKeys.java @@ -236,6 +236,9 @@ public interface HdfsClientConfigKeys { String DFS_DATA_TRANSFER_SASL_PROPS_RESOLVER_CLASS_KEY = "dfs.data.transfer.saslproperties.resolver.class"; + String DFS_DATA_TRANSFER_SASL_CUSTOMIZEDCALLBACKHANDLER_CLASS_KEY + = "dfs.data.transfer.sasl.CustomizedCallbackHandler.class"; + String DFS_ENCRYPT_DATA_TRANSFER_CIPHER_KEY_BITLENGTH_KEY = "dfs.encrypt.data.transfer.cipher.key.bitlength"; int DFS_ENCRYPT_DATA_TRANSFER_CIPHER_KEY_BITLENGTH_DEFAULT = 128; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/DataTransferSaslUtil.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/DataTransferSaslUtil.java index 4749890ce139a..e4ae936b4feaf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/DataTransferSaslUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/DataTransferSaslUtil.java @@ -102,9 +102,9 @@ public static void checkSaslComplete(SaslParticipant sasl, Set requestedQop = ImmutableSet.copyOf(Arrays.asList( saslProps.get(Sasl.QOP).split(","))); String negotiatedQop = sasl.getNegotiatedQop(); - LOG.debug("Verifying QOP, requested QOP = {}, negotiated QOP = {}", - requestedQop, negotiatedQop); - if (!requestedQop.contains(negotiatedQop)) { + LOG.debug("{}: Verifying QOP: requested = {}, negotiated = {}", + sasl, requestedQop, negotiatedQop); + if (negotiatedQop != null && !requestedQop.contains(negotiatedQop)) { throw new IOException(String.format("SASL handshake completed, but " + "channel does not have acceptable quality of protection, " + "requested = %s, negotiated = %s", requestedQop, negotiatedQop)); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/CustomizedCallbackHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/CustomizedCallbackHandler.java new file mode 100644 index 0000000000000..eff093490bcd1 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/CustomizedCallbackHandler.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.protocol.datatransfer.sasl; + +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.UnsupportedCallbackException; +import java.io.IOException; +import java.util.List; + +/** For handling customized {@link Callback}. */ +public interface CustomizedCallbackHandler { + class DefaultHandler implements CustomizedCallbackHandler{ + @Override + public void handleCallback(List callbacks, String username, char[] password) + throws UnsupportedCallbackException { + if (!callbacks.isEmpty()) { + throw new UnsupportedCallbackException(callbacks.get(0)); + } + } + } + + void handleCallback(List callbacks, String name, char[] password) + throws UnsupportedCallbackException, IOException; +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java index adf3a99634567..ae79800b3ed37 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java @@ -29,6 +29,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -46,6 +47,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.CipherOption; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.net.Peer; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair; @@ -178,7 +180,7 @@ private IOStreamPair getEncryptedStreams(Peer peer, dnConf.getEncryptionAlgorithm()); } - CallbackHandler callbackHandler = new SaslServerCallbackHandler( + final CallbackHandler callbackHandler = new SaslServerCallbackHandler(dnConf.getConf(), new PasswordFunction() { @Override public char[] apply(String userName) throws IOException { @@ -195,7 +197,7 @@ public char[] apply(String userName) throws IOException { * logic. It's similar to a Guava Function, but we need to let it throw * exceptions. */ - private interface PasswordFunction { + interface PasswordFunction { /** * Returns the SASL password for the given user name. @@ -210,18 +212,27 @@ private interface PasswordFunction { /** * Sets user name and password when asked by the server-side SASL object. */ - private static final class SaslServerCallbackHandler + static final class SaslServerCallbackHandler implements CallbackHandler { - private final PasswordFunction passwordFunction; + private final CustomizedCallbackHandler customizedCallbackHandler; /** * Creates a new SaslServerCallbackHandler. * * @param passwordFunction for determing the user's password */ - public SaslServerCallbackHandler(PasswordFunction passwordFunction) { + SaslServerCallbackHandler(Configuration conf, PasswordFunction passwordFunction) { this.passwordFunction = passwordFunction; + + final Class clazz = conf.getClass( + HdfsClientConfigKeys.DFS_DATA_TRANSFER_SASL_CUSTOMIZEDCALLBACKHANDLER_CLASS_KEY, + CustomizedCallbackHandler.DefaultHandler.class, CustomizedCallbackHandler.class); + try { + this.customizedCallbackHandler = clazz.newInstance(); + } catch (Exception e) { + throw new IllegalStateException("Failed to create a new instance of " + clazz, e); + } } @Override @@ -230,6 +241,7 @@ public void handle(Callback[] callbacks) throws IOException, NameCallback nc = null; PasswordCallback pc = null; AuthorizeCallback ac = null; + List unknownCallbacks = null; for (Callback callback : callbacks) { if (callback instanceof AuthorizeCallback) { ac = (AuthorizeCallback) callback; @@ -240,8 +252,10 @@ public void handle(Callback[] callbacks) throws IOException, } else if (callback instanceof RealmCallback) { continue; // realm is ignored } else { - throw new UnsupportedCallbackException(callback, - "Unrecognized SASL Callback: " + callback); + if (unknownCallbacks == null) { + unknownCallbacks = new ArrayList<>(); + } + unknownCallbacks.add(callback); } } @@ -253,6 +267,12 @@ public void handle(Callback[] callbacks) throws IOException, ac.setAuthorized(true); ac.setAuthorizedID(ac.getAuthorizationID()); } + + if (unknownCallbacks != null) { + final String name = nc != null ? nc.getDefaultName() : null; + final char[] password = name != null ? passwordFunction.apply(name) : null; + customizedCallbackHandler.handleCallback(unknownCallbacks, name, password); + } } } @@ -298,7 +318,7 @@ private IOStreamPair getSaslStreams(Peer peer, OutputStream underlyingOut, Map saslProps = saslPropsResolver.getServerProperties( getPeerAddress(peer)); - CallbackHandler callbackHandler = new SaslServerCallbackHandler( + final CallbackHandler callbackHandler = new SaslServerCallbackHandler(dnConf.getConf(), new PasswordFunction() { @Override public char[] apply(String userName) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 94c3ea0cc9b0c..d6fefa4e93989 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -2641,6 +2641,15 @@ + + dfs.data.transfer.sasl.CustomizedCallbackHandler.class + + + Some security provider may define a new javax.security.auth.callback.Callback. + This property allows users to configure a customized callback handler. + + + dfs.journalnode.rpc-address 0.0.0.0:8485 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestCustomizedCallbackHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestCustomizedCallbackHandler.java new file mode 100644 index 0000000000000..88d1d66bc40ff --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestCustomizedCallbackHandler.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.protocol.datatransfer.sasl; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; +import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.SaslDataTransferServer.SaslServerCallbackHandler; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.UnsupportedCallbackException; +import java.util.Arrays; +import java.util.List; + +public class TestCustomizedCallbackHandler { + public static final Logger LOG = LoggerFactory.getLogger(TestCustomizedCallbackHandler.class); + + static class MyCallback implements Callback { } + + static class MyCallbackHandler implements CustomizedCallbackHandler { + @Override + public void handleCallback(List callbacks, String name, char[] password) { + LOG.info("{}: handling {} for {}", getClass().getSimpleName(), callbacks, name); + } + } + + @Test + public void testCustomizedCallbackHandler() throws Exception { + final Configuration conf = new Configuration(); + final Callback[] callbacks = {new MyCallback()}; + + // without setting conf, expect UnsupportedCallbackException + try { + new SaslServerCallbackHandler(conf, String::toCharArray).handle(callbacks); + Assert.fail("Expected UnsupportedCallbackException for " + Arrays.asList(callbacks)); + } catch (UnsupportedCallbackException e) { + LOG.info("The failure is expected", e); + } + + // set conf and expect success + conf.setClass(HdfsClientConfigKeys.DFS_DATA_TRANSFER_SASL_CUSTOMIZEDCALLBACKHANDLER_CLASS_KEY, + MyCallbackHandler.class, CustomizedCallbackHandler.class); + new SaslServerCallbackHandler(conf, String::toCharArray).handle(callbacks); + } +} From 1577f57d4c8bc9a0ef200a9e308e1c0f2c21c18f Mon Sep 17 00:00:00 2001 From: fuchaohong <1783129294@qq.com> Date: Sat, 20 Jul 2024 16:13:33 +0800 Subject: [PATCH 065/113] HADOOP-19228. ShellCommandFencer#setConfAsEnvVars should also replace '-' with '_'. (#6936). Contributed by fuchaohong. Signed-off-by: He Xiaoqiao --- .../java/org/apache/hadoop/ha/HAServiceTarget.java | 2 +- .../java/org/apache/hadoop/ha/ShellCommandFencer.java | 10 +++++----- .../org/apache/hadoop/ha/TestShellCommandFencer.java | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceTarget.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceTarget.java index 288a9dcbe0e53..701862a0f0797 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceTarget.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/HAServiceTarget.java @@ -183,7 +183,7 @@ public final Map getFencingParameters() { * expose to fencing implementations/scripts. Fencing methods are free * to use this map as they see fit -- notably, the shell script * implementation takes each entry, prepends 'target_', substitutes - * '_' for '.', and adds it to the environment of the script. + * '_' for '.' and '-', and adds it to the environment of the script. * * Subclass implementations should be sure to delegate to the superclass * implementation as well as adding their own keys. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java index 1ffcc3009ea8d..059c87c0d314a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ShellCommandFencer.java @@ -39,8 +39,8 @@ * (cmd.exe on Windows) and may not include any closing parentheses.

* * The shell command will be run with an environment set up to contain - * all of the current Hadoop configuration variables, with the '_' character - * replacing any '.' characters in the configuration keys.

+ * all of the current Hadoop configuration variables, with the '_' character + * replacing any '.' or '-' characters in the configuration keys.

* * If the shell command returns an exit code of 0, the fencing is * determined to be successful. If it returns any other exit code, the @@ -202,11 +202,11 @@ private static String tryGetPid(Process p) { /** * Set the environment of the subprocess to be the Configuration, - * with '.'s replaced by '_'s. + * with '.'s and '-'s replaced by '_'s. */ private void setConfAsEnvVars(Map env) { for (Map.Entry pair : getConf()) { - env.put(pair.getKey().replace('.', '_'), pair.getValue()); + env.put(pair.getKey().replaceAll("[.-]", "_"), pair.getValue()); } } @@ -237,7 +237,7 @@ private void addTargetInfoAsEnvVars(HAServiceTarget target, for (Map.Entry e : target.getFencingParameters().entrySet()) { String key = prefix + e.getKey(); - key = key.replace('.', '_'); + key = key.replaceAll("[.-]", "_"); environment.put(key, e.getValue()); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java index 88afb35a8dd9a..3eb6f42e467ed 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestShellCommandFencer.java @@ -63,7 +63,7 @@ public void resetLogSpy() { private static ShellCommandFencer createFencer() { Configuration conf = new Configuration(); - conf.set("in.fencing.tests", "yessir"); + conf.set("in.fencing-tests", "yessir"); ShellCommandFencer fencer = new ShellCommandFencer(); fencer.setConf(conf); return fencer; From e48cd0e987dfb6ef60341f0ff25a501582b7177c Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Sun, 21 Jul 2024 19:07:19 +0800 Subject: [PATCH 066/113] Revert "HDFS-17575. SaslDataTransferClient should use SaslParticipant to create messages. (#6933)" This reverts commit 7638b4727e702725bcbfeb4addf3fc80078924b8. --- .../sasl/SaslDataTransferClient.java | 16 ++++------------ .../datatransfer/sasl/SaslParticipant.java | 10 ++-------- 2 files changed, 6 insertions(+), 20 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java index 960a5221dd1ae..043439130d5dc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java @@ -63,7 +63,6 @@ import org.apache.hadoop.security.token.SecretManager; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.Lists; -import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -85,8 +84,6 @@ public class SaslDataTransferClient { private static final Logger LOG = LoggerFactory.getLogger( SaslDataTransferClient.class); - private static final byte[] EMPTY_BYTE_ARRAY = {}; - private final Configuration conf; private final AtomicBoolean fallbackToSimpleAuth; private final SaslPropertiesResolver saslPropsResolver; @@ -522,29 +519,25 @@ private IOStreamPair doSaslHandshake(InetAddress addr, // In which case there will be no encrypted secret sent from NN. BlockTokenIdentifier blockTokenIdentifier = accessToken.decodeIdentifier(); - final byte[] first = sasl.evaluateChallengeOrResponse(EMPTY_BYTE_ARRAY); - if (LOG.isDebugEnabled()) { - LOG.info("first: {}", first == null ? null : first.length == 0 ? "" - : StringUtils.byteToHexString(first)); - } if (blockTokenIdentifier != null) { byte[] handshakeSecret = accessToken.decodeIdentifier().getHandshakeMsg(); if (handshakeSecret == null || handshakeSecret.length == 0) { LOG.debug("Handshake secret is null, " + "sending without handshake secret."); - sendSaslMessage(out, first); + sendSaslMessage(out, new byte[0]); } else { LOG.debug("Sending handshake secret."); BlockTokenIdentifier identifier = new BlockTokenIdentifier(); identifier.readFields(new DataInputStream( new ByteArrayInputStream(accessToken.getIdentifier()))); String bpid = identifier.getBlockPoolId(); - sendSaslMessageHandshakeSecret(out, first, handshakeSecret, bpid); + sendSaslMessageHandshakeSecret(out, new byte[0], + handshakeSecret, bpid); } } else { LOG.debug("Block token id is null, sending without handshake secret."); - sendSaslMessage(out, first); + sendSaslMessage(out, new byte[0]); } // step 1 @@ -572,7 +565,6 @@ private IOStreamPair doSaslHandshake(InetAddress addr, cipherOptions.add(option); } } - LOG.debug("{}: cipherOptions={}", sasl, cipherOptions); sendSaslMessageAndNegotiationCipherOptions(out, localResponse, cipherOptions); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslParticipant.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslParticipant.java index ee8760d688adb..e32f76a8ebd7d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslParticipant.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslParticipant.java @@ -20,7 +20,6 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.util.Map; -import java.util.Objects; import javax.security.auth.callback.CallbackHandler; import javax.security.sasl.Sasl; import javax.security.sasl.SaslClient; @@ -111,7 +110,7 @@ public static SaslParticipant createClientSaslParticipant(String userName, * @param saslServer to wrap */ private SaslParticipant(SaslServer saslServer) { - this.saslServer = Objects.requireNonNull(saslServer, "saslServer == null"); + this.saslServer = saslServer; this.saslClient = null; } @@ -122,7 +121,7 @@ private SaslParticipant(SaslServer saslServer) { */ private SaslParticipant(SaslClient saslClient) { this.saslServer = null; - this.saslClient = Objects.requireNonNull(saslClient, "saslClient == null"); + this.saslClient = saslClient; } /** @@ -229,9 +228,4 @@ public IOStreamPair createStreamPair(DataOutputStream out, new SaslOutputStream(out, saslServer)); } } - - @Override - public String toString() { - return "Sasl" + (saslServer != null? "Server" : "Client"); - } } From b60497ff41e1dc149d1610f4cc6ea4e0609f9946 Mon Sep 17 00:00:00 2001 From: Pranav Saxena <108325433+saxenapranav@users.noreply.github.com> Date: Mon, 22 Jul 2024 23:33:51 +0530 Subject: [PATCH 067/113] HADOOP-19120. ApacheHttpClient adaptation in ABFS. (#6633) Apache httpclient 4.5.x is the new default implementation of http connections; this supports a large configurable pool of connections along with the ability to limit their lifespan. The networking library can be chosen using the configuration option fs.azure.networking.library The supported values are - APACHE_HTTP_CLIENT : Use Apache HttpClient [Default] - JDK_HTTP_URL_CONNECTION : Use JDK networking library Important: unless the networking library is switched back to the JDK, the apache httpcore and httpclient must be on the classpath Contributed by Pranav Saxena --- .../apache/hadoop/fs/ClosedIOException.java | 39 ++ .../hadoop/fs/azurebfs/AbfsConfiguration.java | 44 ++ .../fs/azurebfs/AzureBlobFileSystem.java | 3 +- .../fs/azurebfs/AzureBlobFileSystemStore.java | 6 +- .../azurebfs/constants/AbfsHttpConstants.java | 11 + .../azurebfs/constants/ConfigurationKeys.java | 12 + .../constants/FileSystemConfigurations.java | 9 + .../azurebfs/constants/HttpOperationType.java | 24 + .../AbfsApacheHttpExpect100Exception.java | 34 ++ .../exceptions/HttpResponseException.java | 40 ++ .../services/AbfsAHCHttpOperation.java | 394 +++++++++++++ .../services/AbfsApacheHttpClient.java | 144 +++++ .../fs/azurebfs/services/AbfsClient.java | 44 +- .../AbfsClientThrottlingIntercept.java | 3 +- .../services/AbfsConnectionManager.java | 207 +++++++ .../AbfsHttpClientConnectionFactory.java | 45 ++ .../azurebfs/services/AbfsHttpOperation.java | 536 +++++++++++------- .../services/AbfsJdkHttpOperation.java | 300 ++++++++++ .../AbfsManagedApacheHttpConnection.java | 240 ++++++++ .../AbfsManagedHttpClientContext.java | 76 +++ .../AbfsManagedHttpRequestExecutor.java | 109 ++++ .../services/AbfsNoOpThrottlingIntercept.java | 8 +- .../azurebfs/services/AbfsRestOperation.java | 142 +++-- .../services/AbfsThrottlingIntercept.java | 5 +- .../fs/azurebfs/services/KeepAliveCache.java | 306 ++++++++++ .../services/SharedKeyCredentials.java | 17 +- .../fs/azurebfs/utils/TracingContext.java | 1 + .../hadoop-azure/src/site/markdown/abfs.md | 39 ++ .../azurebfs/ITestAbfsCustomEncryption.java | 2 +- .../azurebfs/ITestAbfsReadWriteAndSeek.java | 64 ++- .../ITestAzureBlobFileSystemCreate.java | 7 +- ...ITestAzureBlobFileSystemDelegationSAS.java | 2 +- .../ITestAzureBlobFileSystemDelete.java | 4 +- .../azurebfs/ITestAzureBlobFileSystemE2E.java | 17 +- .../ITestAzureBlobFileSystemLease.java | 25 +- .../fs/azurebfs/TestTracingContext.java | 14 +- .../MockDelegationSASTokenProvider.java | 9 +- .../azurebfs/services/AbfsClientTestUtil.java | 8 +- .../fs/azurebfs/services/ITestAbfsClient.java | 131 +++-- .../ITestAbfsHttpClientRequestExecutor.java | 406 +++++++++++++ .../services/ITestAbfsOutputStream.java | 46 +- .../services/ITestAbfsRestOperation.java | 138 +++-- .../ITestApacheClientConnectionPool.java | 63 ++ .../services/TestAbfsPerfTracker.java | 36 +- .../services/TestAbfsRenameRetryRecovery.java | 27 +- .../services/TestAbfsRestOperation.java | 7 +- .../TestAbfsRestOperationMockFailures.java | 15 +- .../TestApacheClientConnectionPool.java | 290 ++++++++++ .../TestApacheHttpClientFallback.java | 226 ++++++++ .../utils/TracingHeaderValidator.java | 2 +- 50 files changed, 3934 insertions(+), 443 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ClosedIOException.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpOperationType.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsApacheHttpExpect100Exception.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/HttpResponseException.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsApacheHttpClient.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsConnectionManager.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpClientConnectionFactory.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsJdkHttpOperation.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsManagedApacheHttpConnection.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsManagedHttpClientContext.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsManagedHttpRequestExecutor.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/KeepAliveCache.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsHttpClientRequestExecutor.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestApacheClientConnectionPool.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestApacheClientConnectionPool.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestApacheHttpClientFallback.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ClosedIOException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ClosedIOException.java new file mode 100644 index 0000000000000..e27346e333198 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ClosedIOException.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Exception to denote if the underlying stream, cache or other closable resource + * is closed. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public class ClosedIOException extends PathIOException { + + /** + * Appends the custom error-message to the default error message. + * @param path path that encountered the closed resource. + * @param message custom error message. + */ + public ClosedIOException(String path, String message) { + super(path, message); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index 5df46eb883da1..996539182225c 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -23,6 +23,7 @@ import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.azurebfs.services.FixedSASTokenProvider; +import org.apache.hadoop.fs.azurebfs.constants.HttpOperationType; import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; import org.apache.hadoop.util.Preconditions; @@ -390,6 +391,20 @@ public class AbfsConfiguration{ FS_AZURE_ENABLE_PAGINATED_DELETE, DefaultValue = DEFAULT_ENABLE_PAGINATED_DELETE) private boolean isPaginatedDeleteEnabled; + @IntegerConfigurationValidatorAnnotation(ConfigurationKey = + FS_AZURE_APACHE_HTTP_CLIENT_MAX_IO_EXCEPTION_RETRIES, DefaultValue = DEFAULT_APACHE_HTTP_CLIENT_MAX_IO_EXCEPTION_RETRIES) + private int maxApacheHttpClientIoExceptionsRetries; + + /** + * Max idle TTL configuration for connection given in + * {@value org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys#FS_AZURE_APACHE_HTTP_CLIENT_IDLE_CONNECTION_TTL} + * with default of + * {@value org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations#DEFAULT_HTTP_CLIENT_CONN_MAX_IDLE_TIME} + */ + @LongConfigurationValidatorAnnotation(ConfigurationKey = FS_AZURE_APACHE_HTTP_CLIENT_IDLE_CONNECTION_TTL, + DefaultValue = DEFAULT_HTTP_CLIENT_CONN_MAX_IDLE_TIME) + private long maxApacheHttpClientConnectionIdleTime; + private String clientProvidedEncryptionKey; private String clientProvidedEncryptionKeySHA; @@ -491,6 +506,17 @@ public long getLong(String key, long defaultValue) { return rawConfig.getLong(accountConf(key), rawConfig.getLong(key, defaultValue)); } + /** + * Returns the account-specific value if it exists, then looks for an + * account-agnostic value, and finally tries the default value. + * @param key Account-agnostic configuration key + * @param defaultValue Value returned if none is configured + * @return value if one exists, else the default value + */ + public int getInt(String key, int defaultValue) { + return rawConfig.getInt(accountConf(key), rawConfig.getInt(key, defaultValue)); + } + /** * Returns the account-specific password in string form if it exists, then * looks for an account-agnostic value. @@ -889,6 +915,24 @@ public DelegatingSSLSocketFactory.SSLChannelMode getPreferredSSLFactoryOption() return getEnum(FS_AZURE_SSL_CHANNEL_MODE_KEY, DEFAULT_FS_AZURE_SSL_CHANNEL_MODE); } + /** + * @return Config to select netlib for server communication. + */ + public HttpOperationType getPreferredHttpOperationType() { + return getEnum(FS_AZURE_NETWORKING_LIBRARY, DEFAULT_NETWORKING_LIBRARY); + } + + public int getMaxApacheHttpClientIoExceptionsRetries() { + return maxApacheHttpClientIoExceptionsRetries; + } + + /** + * @return {@link #maxApacheHttpClientConnectionIdleTime}. + */ + public long getMaxApacheHttpClientConnectionIdleTime() { + return maxApacheHttpClientConnectionIdleTime; + } + /** * Enum config to allow user to pick format of x-ms-client-request-id header * @return tracingContextFormat config if valid, else default ALL_ID_FORMAT diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java index 9d7d3cd50782c..1cff03fb2c72f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java @@ -750,7 +750,8 @@ public synchronized void close() throws IOException { IOSTATISTICS_LOGGING_LEVEL_DEFAULT); logIOStatisticsAtLevel(LOG, iostatisticsLoggingLevel, getIOStatistics()); } - IOUtils.cleanupWithLogger(LOG, abfsStore, delegationTokenManager); + IOUtils.cleanupWithLogger(LOG, abfsStore, delegationTokenManager, + getAbfsClient()); this.isClosed = true; if (LOG.isDebugEnabled()) { LOG.debug("Closing Abfs: {}", toString()); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 449b123d9212a..784e3f25c621f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -55,6 +55,7 @@ import java.util.concurrent.TimeUnit; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.security.ContextProviderEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; @@ -106,7 +107,6 @@ import org.apache.hadoop.fs.azurebfs.services.AbfsClientContextBuilder; import org.apache.hadoop.fs.azurebfs.services.AbfsClientRenameResult; import org.apache.hadoop.fs.azurebfs.services.AbfsCounters; -import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsInputStream; import org.apache.hadoop.fs.azurebfs.services.AbfsInputStreamContext; import org.apache.hadoop.fs.azurebfs.services.AbfsInputStreamStatisticsImpl; @@ -694,7 +694,7 @@ public OutputStream createFile(final Path path, populateAbfsOutputStreamContext( isAppendBlob, lease, - client, + getClient(), statistics, relativePath, 0, @@ -933,7 +933,7 @@ public AbfsInputStream openFileForRead(Path path, perfInfo.registerSuccess(true); // Add statistics for InputStream - return new AbfsInputStream(client, statistics, relativePath, + return new AbfsInputStream(getClient(), statistics, relativePath, contentLength, populateAbfsInputStreamContext( parameters.map(OpenFileParameters::getOptions), contextEncryptionAdapter), diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java index f16d315e4d62d..84127d9d5738f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java @@ -199,5 +199,16 @@ public static ApiVersion getCurrentVersion() { + "non-hierarchical-namespace account:" + CPK_CONFIG_LIST; + /** + * System property that define maximum number of cached-connection per fileSystem for + * ApacheHttpClient. JDK network library uses the same property to define maximum + * number of cached-connections at JVM level. + */ + public static final String HTTP_MAX_CONN_SYS_PROP = "http.maxConnections"; + public static final String JDK_IMPL = "JDK"; + public static final String APACHE_IMPL = "Apache"; + public static final String JDK_FALLBACK = "JDK_fallback"; + public static final String KEEP_ALIVE_CACHE_CLOSED = "KeepAliveCache is closed"; + private AbfsHttpConstants() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index 55d3f6ab4e2bc..ed749c7885b39 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -321,5 +321,17 @@ public static String accountProperty(String property, String account) { * @see FileSystem#openFile(org.apache.hadoop.fs.Path) */ public static final String FS_AZURE_BUFFERED_PREAD_DISABLE = "fs.azure.buffered.pread.disable"; + /**Defines what network library to use for server IO calls: {@value}*/ + public static final String FS_AZURE_NETWORKING_LIBRARY = "fs.azure.networking.library"; + /** + * Maximum number of IOExceptions retries for a single server call on ApacheHttpClient. + * Breach of this count would turn off future uses of the ApacheHttpClient library + * in the JVM lifecycle: {@value} + */ + public static final String FS_AZURE_APACHE_HTTP_CLIENT_MAX_IO_EXCEPTION_RETRIES = "fs.azure.apache.http.client.max.io.exception.retries"; + /**Maximum ApacheHttpClient-connection cache size at filesystem level: {@value}*/ + public static final String FS_AZURE_APACHE_HTTP_CLIENT_MAX_CACHE_CONNECTION_SIZE = "fs.azure.apache.http.client.max.cache.connection.size"; + /**Maximum idle time for a ApacheHttpClient-connection: {@value}*/ + public static final String FS_AZURE_APACHE_HTTP_CLIENT_IDLE_CONNECTION_TTL = "fs.azure.apache.http.client.idle.connection.ttl"; private ConfigurationKeys() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index ade0dc39cfe18..bd2d6e4b57334 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -167,5 +167,14 @@ public final class FileSystemConfigurations { public static final int HUNDRED = 100; public static final long THOUSAND = 1000L; + public static final HttpOperationType DEFAULT_NETWORKING_LIBRARY + = HttpOperationType.APACHE_HTTP_CLIENT; + + public static final int DEFAULT_APACHE_HTTP_CLIENT_MAX_IO_EXCEPTION_RETRIES = 3; + + public static final long DEFAULT_HTTP_CLIENT_CONN_MAX_IDLE_TIME = 5_000L; + + public static final int DEFAULT_HTTP_CLIENT_CONN_MAX_CACHED_CONNECTIONS = 5; + private FileSystemConfigurations() {} } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpOperationType.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpOperationType.java new file mode 100644 index 0000000000000..7b48a9d970cdc --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/HttpOperationType.java @@ -0,0 +1,24 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.constants; + +public enum HttpOperationType { + JDK_HTTP_URL_CONNECTION, + APACHE_HTTP_CLIENT; +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsApacheHttpExpect100Exception.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsApacheHttpExpect100Exception.java new file mode 100644 index 0000000000000..650ef241c6cad --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/AbfsApacheHttpExpect100Exception.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.contracts.exceptions; + +import org.apache.http.HttpResponse; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EXPECT_100_JDK_ERROR; + +/** + * Exception that marks expect100 handshake error. This exception is thrown when + * the expect100 handshake fails with ADLS server sending 4xx or 5xx status code. + */ +public class AbfsApacheHttpExpect100Exception extends HttpResponseException { + + public AbfsApacheHttpExpect100Exception(final HttpResponse httpResponse) { + super(EXPECT_100_JDK_ERROR, httpResponse); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/HttpResponseException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/HttpResponseException.java new file mode 100644 index 0000000000000..c257309c8c9fb --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/HttpResponseException.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.contracts.exceptions; + +import java.io.IOException; +import java.util.Objects; + +import org.apache.http.HttpResponse; + +/** + * Encapsulates an exception thrown from ApacheHttpClient response parsing. + */ +public class HttpResponseException extends IOException { + private final HttpResponse httpResponse; + public HttpResponseException(final String s, final HttpResponse httpResponse) { + super(s); + Objects.requireNonNull(httpResponse, "httpResponse should be non-null"); + this.httpResponse = httpResponse; + } + + public HttpResponse getHttpResponse() { + return httpResponse; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java new file mode 100644 index 0000000000000..3e8c6d22637fb --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsAHCHttpOperation.java @@ -0,0 +1,394 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.time.Duration; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsApacheHttpExpect100Exception; +import org.apache.http.Header; +import org.apache.http.HttpEntity; +import org.apache.http.HttpResponse; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpDelete; +import org.apache.http.client.methods.HttpEntityEnclosingRequestBase; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.client.methods.HttpHead; +import org.apache.http.client.methods.HttpPatch; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.client.methods.HttpRequestBase; +import org.apache.http.entity.ByteArrayEntity; +import org.apache.http.util.EntityUtils; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APACHE_IMPL; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_DELETE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_GET; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_HEAD; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_POST; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; +import static org.apache.http.entity.ContentType.TEXT_PLAIN; + +/** + * Implementation of {@link AbfsHttpOperation} for orchestrating server calls using + * Apache Http Client. + */ +public class AbfsAHCHttpOperation extends AbfsHttpOperation { + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsAHCHttpOperation.class); + + /** + * Request object for network call over ApacheHttpClient. + */ + private final HttpRequestBase httpRequestBase; + + /** + * Response object received from a server call over ApacheHttpClient. + */ + private HttpResponse httpResponse; + + /** + * Flag to indicate if the request is a payload request. HTTP methods PUT, POST, + * PATCH qualify for payload requests. + */ + private final boolean isPayloadRequest; + + /** + * ApacheHttpClient to make network calls. + */ + private final AbfsApacheHttpClient abfsApacheHttpClient; + + public AbfsAHCHttpOperation(final URL url, + final String method, + final List requestHeaders, + final Duration connectionTimeout, + final Duration readTimeout, + final AbfsApacheHttpClient abfsApacheHttpClient) throws IOException { + super(LOG, url, method, requestHeaders, connectionTimeout, readTimeout); + this.isPayloadRequest = HTTP_METHOD_PUT.equals(method) + || HTTP_METHOD_PATCH.equals(method) + || HTTP_METHOD_POST.equals(method); + this.abfsApacheHttpClient = abfsApacheHttpClient; + LOG.debug("Creating AbfsAHCHttpOperation for URL: {}, method: {}", + url, method); + + final URI requestUri; + try { + requestUri = url.toURI(); + } catch (URISyntaxException e) { + throw new IOException(e); + } + switch (getMethod()) { + case HTTP_METHOD_PUT: + httpRequestBase = new HttpPut(requestUri); + break; + case HTTP_METHOD_PATCH: + httpRequestBase = new HttpPatch(requestUri); + break; + case HTTP_METHOD_POST: + httpRequestBase = new HttpPost(requestUri); + break; + case HTTP_METHOD_GET: + httpRequestBase = new HttpGet(requestUri); + break; + case HTTP_METHOD_DELETE: + httpRequestBase = new HttpDelete(requestUri); + break; + case HTTP_METHOD_HEAD: + httpRequestBase = new HttpHead(requestUri); + break; + default: + /* + * This would not happen as the AbfsClient would always be sending valid + * method. + */ + throw new PathIOException(getUrl().toString(), + "Unsupported HTTP method: " + getMethod()); + } + } + + /** + * @return AbfsManagedHttpClientContext instance that captures latencies at + * different phases of network call. + */ + @VisibleForTesting + AbfsManagedHttpClientContext getHttpClientContext() { + return new AbfsManagedHttpClientContext(); + } + + /**{@inheritDoc}*/ + @Override + protected InputStream getErrorStream() throws IOException { + HttpEntity entity = httpResponse.getEntity(); + if (entity == null) { + return null; + } + return entity.getContent(); + } + + /**{@inheritDoc}*/ + @Override + String getConnProperty(final String key) { + for (AbfsHttpHeader header : getRequestHeaders()) { + if (header.getName().equals(key)) { + return header.getValue(); + } + } + return null; + } + + /**{@inheritDoc}*/ + @Override + URL getConnUrl() { + return getUrl(); + } + + /**{@inheritDoc}*/ + @Override + Integer getConnResponseCode() throws IOException { + return getStatusCode(); + } + + /**{@inheritDoc}*/ + @Override + String getConnResponseMessage() throws IOException { + return getStatusDescription(); + } + + /**{@inheritDoc}*/ + @Override + public void processResponse(final byte[] buffer, + final int offset, + final int length) throws IOException { + try { + if (!isPayloadRequest) { + prepareRequest(); + LOG.debug("Sending request: {}", httpRequestBase); + httpResponse = executeRequest(); + LOG.debug("Request sent: {}; response {}", httpRequestBase, + httpResponse); + } + parseResponseHeaderAndBody(buffer, offset, length); + } finally { + if (httpResponse != null) { + try { + EntityUtils.consume(httpResponse.getEntity()); + } finally { + if (httpResponse instanceof CloseableHttpResponse) { + ((CloseableHttpResponse) httpResponse).close(); + } + } + } + } + } + + /** + * Parse response stream for headers and body. + * + * @param buffer byte array to store response body. + * @param offset offset in the buffer to start storing the response body. + * @param length length of the response body. + * + * @throws IOException network error while read response stream + */ + @VisibleForTesting + void parseResponseHeaderAndBody(final byte[] buffer, + final int offset, + final int length) throws IOException { + setStatusCode(parseStatusCode(httpResponse)); + + setStatusDescription(httpResponse.getStatusLine().getReasonPhrase()); + setRequestId(); + + // dump the headers + if (LOG.isDebugEnabled()) { + AbfsIoUtils.dumpHeadersToDebugLog("Request Headers", + getRequestProperties()); + } + parseResponse(buffer, offset, length); + } + + /** + * Parse status code from response + * + * @param httpResponse response object + * @return status code + */ + @VisibleForTesting + int parseStatusCode(HttpResponse httpResponse) { + return httpResponse.getStatusLine().getStatusCode(); + } + + /** + * Execute network call for the request + * + * @return response object + * @throws IOException network error while executing the request + */ + @VisibleForTesting + HttpResponse executeRequest() throws IOException { + AbfsManagedHttpClientContext abfsHttpClientContext + = getHttpClientContext(); + try { + LOG.debug("Executing request: {}", httpRequestBase); + HttpResponse response = abfsApacheHttpClient.execute(httpRequestBase, + abfsHttpClientContext, getConnectionTimeout(), getReadTimeout()); + setConnectionTimeMs(abfsHttpClientContext.getConnectTime()); + setSendRequestTimeMs(abfsHttpClientContext.getSendTime()); + setRecvResponseTimeMs(abfsHttpClientContext.getReadTime()); + return response; + } catch (IOException e) { + LOG.debug("Failed to execute request: {}", httpRequestBase, e); + throw e; + } + } + + /**{@inheritDoc}*/ + @Override + public void setRequestProperty(final String key, final String value) { + List headers = getRequestHeaders(); + if (headers != null) { + headers.add(new AbfsHttpHeader(key, value)); + } + } + + /**{@inheritDoc}*/ + @Override + Map> getRequestProperties() { + Map> map = new HashMap<>(); + for (AbfsHttpHeader header : getRequestHeaders()) { + map.put(header.getName(), + new ArrayList() {{ + add(header.getValue()); + }}); + } + return map; + } + + /**{@inheritDoc}*/ + @Override + public String getResponseHeader(final String headerName) { + if (httpResponse == null) { + return null; + } + Header header = httpResponse.getFirstHeader(headerName); + if (header != null) { + return header.getValue(); + } + return null; + } + + /**{@inheritDoc}*/ + @Override + protected InputStream getContentInputStream() + throws IOException { + if (httpResponse == null || httpResponse.getEntity() == null) { + return null; + } + return httpResponse.getEntity().getContent(); + } + + /**{@inheritDoc}*/ + @Override + public void sendPayload(final byte[] buffer, + final int offset, + final int length) + throws IOException { + if (!isPayloadRequest) { + return; + } + + setExpectedBytesToBeSent(length); + if (buffer != null) { + HttpEntity httpEntity = new ByteArrayEntity(buffer, offset, length, + TEXT_PLAIN); + ((HttpEntityEnclosingRequestBase) httpRequestBase).setEntity( + httpEntity); + } + + prepareRequest(); + try { + LOG.debug("Sending request: {}", httpRequestBase); + httpResponse = executeRequest(); + } catch (AbfsApacheHttpExpect100Exception ex) { + LOG.debug( + "Getting output stream failed with expect header enabled, returning back." + + "Expect 100 assertion failed for uri {} with status code: {}", + getMaskedUrl(), parseStatusCode(ex.getHttpResponse()), + ex); + setConnectionDisconnectedOnError(); + httpResponse = ex.getHttpResponse(); + } catch (IOException ex) { + LOG.debug("Getting output stream failed for uri {}, exception: {}", + getMaskedUrl(), ex); + throw ex; + } finally { + if (httpResponse != null) { + LOG.debug("Request sent: {}; response {}", httpRequestBase, + httpResponse); + } + if (!isConnectionDisconnectedOnError() + && httpRequestBase instanceof HttpEntityEnclosingRequestBase) { + setBytesSent(length); + } + } + } + + /** + * Sets the header on the request. + */ + private void prepareRequest() { + for (AbfsHttpHeader header : getRequestHeaders()) { + httpRequestBase.setHeader(header.getName(), header.getValue()); + } + } + + /**{@inheritDoc}*/ + @Override + public String getRequestProperty(String name) { + for (AbfsHttpHeader header : getRequestHeaders()) { + if (header.getName().equals(name)) { + return header.getValue(); + } + } + return EMPTY_STRING; + } + + /**{@inheritDoc}*/ + @Override + public String getTracingContextSuffix() { + return APACHE_IMPL; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsApacheHttpClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsApacheHttpClient.java new file mode 100644 index 0000000000000..8111a0a4d21c7 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsApacheHttpClient.java @@ -0,0 +1,144 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.Closeable; +import java.io.IOException; + +import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; +import org.apache.http.HttpResponse; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.HttpRequestBase; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.socket.PlainConnectionSocketFactory; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.HTTPS_SCHEME; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.HTTP_SCHEME; +import static org.apache.http.conn.ssl.SSLConnectionSocketFactory.getDefaultHostnameVerifier; + +/** + * Client for AzureBlobFileSystem to execute HTTP requests over ApacheHttpClient. + */ +final class AbfsApacheHttpClient implements Closeable { + + /** + * ApacheHttpClient instance that executes HTTP request. + */ + private final CloseableHttpClient httpClient; + + /** + * Flag to indicate if the client is usable. This is a JVM level flag, state of + * this flag is shared across all instances of fileSystems. Once switched off, + * the ApacheHttpClient would not be used for whole JVM lifecycle. + */ + private static boolean usable = true; + + /** + * Registers the switch off of ApacheHttpClient for all future use in the JVM. + */ + static void registerFallback() { + usable = false; + } + + /** + * @return if ApacheHttpClient is usable. + */ + static boolean usable() { + return usable; + } + + AbfsApacheHttpClient(DelegatingSSLSocketFactory delegatingSSLSocketFactory, + final int readTimeout, final KeepAliveCache keepAliveCache) { + final AbfsConnectionManager connMgr = new AbfsConnectionManager( + createSocketFactoryRegistry( + new SSLConnectionSocketFactory(delegatingSSLSocketFactory, + getDefaultHostnameVerifier())), + new AbfsHttpClientConnectionFactory(), keepAliveCache); + final HttpClientBuilder builder = HttpClients.custom(); + builder.setConnectionManager(connMgr) + .setRequestExecutor(new AbfsManagedHttpRequestExecutor(readTimeout)) + .disableContentCompression() + .disableRedirectHandling() + .disableAutomaticRetries() + /* + * To prevent the read of system property http.agent. The agent is set + * in request headers by AbfsClient. System property read is an + * overhead. + */ + .setUserAgent(EMPTY_STRING); + httpClient = builder.build(); + } + + @Override + public void close() throws IOException { + if (httpClient != null) { + httpClient.close(); + } + } + + /** + * Executes the HTTP request. + * + * @param httpRequest HTTP request to execute. + * @param abfsHttpClientContext HttpClient context. + * @param connectTimeout Connection timeout. + * @param readTimeout Read timeout. + * + * @return HTTP response. + * @throws IOException network error. + */ + public HttpResponse execute(HttpRequestBase httpRequest, + final AbfsManagedHttpClientContext abfsHttpClientContext, + final int connectTimeout, + final int readTimeout) throws IOException { + RequestConfig.Builder requestConfigBuilder = RequestConfig + .custom() + .setConnectTimeout(connectTimeout) + .setSocketTimeout(readTimeout); + httpRequest.setConfig(requestConfigBuilder.build()); + return httpClient.execute(httpRequest, abfsHttpClientContext); + } + + /** + * Creates the socket factory registry for HTTP and HTTPS. + * + * @param sslSocketFactory SSL socket factory. + * @return Socket factory registry. + */ + private Registry createSocketFactoryRegistry( + ConnectionSocketFactory sslSocketFactory) { + if (sslSocketFactory == null) { + return RegistryBuilder.create() + .register(HTTP_SCHEME, + PlainConnectionSocketFactory.getSocketFactory()) + .build(); + } + return RegistryBuilder.create() + .register(HTTP_SCHEME, PlainConnectionSocketFactory.getSocketFactory()) + .register(HTTPS_SCHEME, sslSocketFactory) + .build(); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index 8ba550e06deb9..a2d65c145b625 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -42,6 +42,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.constants.HttpOperationType; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsInvalidChecksumException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; @@ -139,6 +140,10 @@ public class AbfsClient implements Closeable { private boolean isSendMetricCall; private SharedKeyCredentials metricSharedkeyCredentials = null; + private KeepAliveCache keepAliveCache; + + private AbfsApacheHttpClient abfsApacheHttpClient; + /** * logging the rename failure if metadata is in an incomplete state. */ @@ -187,6 +192,15 @@ private AbfsClient(final URL baseUrl, + "{}", e.getMessage()); } } + if (abfsConfiguration.getPreferredHttpOperationType() + == HttpOperationType.APACHE_HTTP_CLIENT) { + keepAliveCache = new KeepAliveCache(abfsConfiguration); + + abfsApacheHttpClient = new AbfsApacheHttpClient( + DelegatingSSLSocketFactory.getDefaultFactory(), + abfsConfiguration.getHttpReadTimeout(), + keepAliveCache); + } this.userAgent = initializeUserAgent(abfsConfiguration, sslProviderName); this.abfsPerfTracker = abfsClientContext.getAbfsPerfTracker(); @@ -255,6 +269,12 @@ public void close() throws IOException { runningTimerTask.cancel(); timer.purge(); } + if (keepAliveCache != null) { + keepAliveCache.close(); + } + if (abfsApacheHttpClient != null) { + abfsApacheHttpClient.close(); + } if (tokenProvider instanceof Closeable) { IOUtils.cleanupWithLogger(LOG, (Closeable) tokenProvider); @@ -1214,7 +1234,8 @@ public AbfsRestOperation deletePath(final String path, final boolean recursive, this, HTTP_METHOD_DELETE, url, - requestHeaders); + requestHeaders, + abfsConfiguration); try { op.execute(tracingContext); } catch (AzureBlobFileSystemException e) { @@ -1600,6 +1621,9 @@ String initializeUserAgent(final AbfsConfiguration abfsConfiguration, sb.append(HUNDRED_CONTINUE); sb.append(SEMICOLON); } + sb.append(SINGLE_WHITE_SPACE) + .append(abfsConfiguration.getPreferredHttpOperationType()) + .append(SEMICOLON); sb.append(SINGLE_WHITE_SPACE); sb.append(abfsConfiguration.getClusterName()); @@ -1928,7 +1952,8 @@ AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType operationType buffer, bufferOffset, bufferLength, - sasTokenForReuse); + sasTokenForReuse, + abfsConfiguration); } /** @@ -1949,7 +1974,8 @@ AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType operationType this, httpMethod, url, - requestHeaders + requestHeaders, + abfsConfiguration ); } @@ -1973,6 +1999,16 @@ AbfsRestOperation getAbfsRestOperation(final AbfsRestOperationType operationType this, httpMethod, url, - requestHeaders, sasTokenForReuse); + requestHeaders, sasTokenForReuse, abfsConfiguration); + } + + @VisibleForTesting + AbfsApacheHttpClient getAbfsApacheHttpClient() { + return abfsApacheHttpClient; + } + + @VisibleForTesting + KeepAliveCache getKeepAliveCache() { + return keepAliveCache; } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java index 3bb225d4be862..05173443cdb23 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientThrottlingIntercept.java @@ -130,6 +130,7 @@ private boolean updateBytesTransferred(boolean isThrottledOperation, /** * Updates the metrics for successful and failed read and write operations. + * * @param operationType Only applicable for read and write operations. * @param abfsHttpOperation Used for status code and data transferred. */ @@ -170,7 +171,7 @@ public void updateMetrics(AbfsRestOperationType operationType, } break; case ReadFile: - String range = abfsHttpOperation.getConnection().getRequestProperty(HttpHeaderConfigurations.RANGE); + String range = abfsHttpOperation.getRequestProperty(HttpHeaderConfigurations.RANGE); contentLength = getContentLengthIfKnown(range); if (contentLength > 0) { readThrottler.addBytesTransferred(contentLength, diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsConnectionManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsConnectionManager.java new file mode 100644 index 0000000000000..9b0e69accbd6f --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsConnectionManager.java @@ -0,0 +1,207 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.UUID; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.http.HttpClientConnection; +import org.apache.http.config.Registry; +import org.apache.http.config.SocketConfig; +import org.apache.http.conn.ConnectionPoolTimeoutException; +import org.apache.http.conn.ConnectionRequest; +import org.apache.http.conn.HttpClientConnectionManager; +import org.apache.http.conn.HttpClientConnectionOperator; +import org.apache.http.conn.ManagedHttpClientConnection; +import org.apache.http.conn.routing.HttpRoute; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.impl.conn.DefaultHttpClientConnectionOperator; +import org.apache.http.impl.conn.ManagedHttpClientConnectionFactory; +import org.apache.http.protocol.HttpContext; + +/** + * AbfsConnectionManager is a custom implementation of {@code HttpClientConnectionManager}. + * This implementation manages connection-pooling heuristics and custom implementation + * of {@link ManagedHttpClientConnectionFactory}. + */ +class AbfsConnectionManager implements HttpClientConnectionManager { + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsConnectionManager.class); + + /** + * Connection pool for the ABFS managed connections. + */ + private final KeepAliveCache kac; + + /** + * Factory to create new connections. + */ + private final AbfsHttpClientConnectionFactory httpConnectionFactory; + + /** + * Operator to manage the network connection state of ABFS managed connections. + */ + private final HttpClientConnectionOperator connectionOperator; + + AbfsConnectionManager(Registry socketFactoryRegistry, + AbfsHttpClientConnectionFactory connectionFactory, KeepAliveCache kac) { + this.httpConnectionFactory = connectionFactory; + this.kac = kac; + this.connectionOperator = new DefaultHttpClientConnectionOperator( + socketFactoryRegistry, null, null); + } + + /** + * Returns a custom implementation of connection request for the given route. + * The implementation would return a connection from the {@link KeepAliveCache} if available, + * else it would create a new non-connected {@link AbfsManagedApacheHttpConnection}. + */ + @Override + public ConnectionRequest requestConnection(final HttpRoute route, + final Object state) { + return new ConnectionRequest() { + + /** + * Synchronously gets a connection from the {@link KeepAliveCache} or + * creates a new un-connected instance of {@link AbfsManagedApacheHttpConnection}. + */ + @Override + public HttpClientConnection get(final long timeout, + final TimeUnit timeUnit) + throws InterruptedException, ExecutionException, + ConnectionPoolTimeoutException { + String requestId = UUID.randomUUID().toString(); + logDebug("Connection requested for request {}", requestId); + try { + HttpClientConnection clientConn = kac.get(); + if (clientConn != null) { + logDebug("Connection retrieved from KAC: {} for requestId: {}", + clientConn, requestId); + return clientConn; + } + logDebug("Creating new connection for requestId: {}", requestId); + ManagedHttpClientConnection conn = httpConnectionFactory.create(route, + null); + logDebug("Connection created: {} for requestId: {}", conn, requestId); + return conn; + } catch (IOException ex) { + throw new ExecutionException(ex); + } + } + + @Override + public boolean cancel() { + return false; + } + }; + } + + /** + * Releases a connection for reuse. It can be reused only if validDuration is greater than 0. + * This method is called by {@link org.apache.http.impl.execchain} internal class `ConnectionHolder`. + * If it wants to reuse the connection, it will send a non-zero validDuration, else it will send 0. + * @param conn the connection to release + * @param newState the new state of the connection + * @param validDuration the duration for which the connection is valid + * @param timeUnit the time unit for the validDuration + */ + @Override + public void releaseConnection(final HttpClientConnection conn, + final Object newState, + final long validDuration, + final TimeUnit timeUnit) { + if (validDuration == 0) { + return; + } + if (conn.isOpen() && conn instanceof AbfsManagedApacheHttpConnection) { + boolean connAddedInKac = kac.put(conn); + if (connAddedInKac) { + logDebug("Connection cached: {}", conn); + } else { + logDebug("Connection not cached, and is released: {}", conn); + } + } + } + + /**{@inheritDoc}*/ + @Override + public void connect(final HttpClientConnection conn, + final HttpRoute route, + final int connectTimeout, + final HttpContext context) throws IOException { + long start = System.currentTimeMillis(); + logDebug("Connecting {} to {}", conn, route.getTargetHost()); + connectionOperator.connect((AbfsManagedApacheHttpConnection) conn, + route.getTargetHost(), route.getLocalSocketAddress(), + connectTimeout, SocketConfig.DEFAULT, context); + logDebug("Connection established: {}", conn); + if (context instanceof AbfsManagedHttpClientContext) { + ((AbfsManagedHttpClientContext) context).setConnectTime( + System.currentTimeMillis() - start); + } + } + + /**{@inheritDoc}*/ + @Override + public void upgrade(final HttpClientConnection conn, + final HttpRoute route, + final HttpContext context) throws IOException { + connectionOperator.upgrade((AbfsManagedApacheHttpConnection) conn, + route.getTargetHost(), context); + } + + /**{@inheritDoc}*/ + @Override + public void routeComplete(final HttpClientConnection conn, + final HttpRoute route, + final HttpContext context) throws IOException { + + } + + /**{@inheritDoc}*/ + @Override + public void closeIdleConnections(final long idletime, + final TimeUnit timeUnit) { + kac.evictIdleConnection(); + } + + /**{@inheritDoc}*/ + @Override + public void closeExpiredConnections() { + kac.evictIdleConnection(); + } + + /**{@inheritDoc}*/ + @Override + public void shutdown() { + kac.close(); + } + + private void logDebug(String message, Object... args) { + if (LOG.isDebugEnabled()) { + LOG.debug(message, args); + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpClientConnectionFactory.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpClientConnectionFactory.java new file mode 100644 index 0000000000000..82a2440bca13d --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpClientConnectionFactory.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.apache.http.config.ConnectionConfig; +import org.apache.http.conn.ManagedHttpClientConnection; +import org.apache.http.conn.routing.HttpRoute; +import org.apache.http.impl.conn.ManagedHttpClientConnectionFactory; + +/** + * Custom implementation of {@link ManagedHttpClientConnectionFactory} and overrides + * {@link ManagedHttpClientConnectionFactory#create(HttpRoute, ConnectionConfig)} to return + * {@link AbfsManagedApacheHttpConnection}. + */ +public class AbfsHttpClientConnectionFactory extends ManagedHttpClientConnectionFactory { + + /** + * Creates a new {@link AbfsManagedApacheHttpConnection} instance which has to + * be connected. + * @param route route for which connection is required. + * @param config connection configuration. + * @return new {@link AbfsManagedApacheHttpConnection} instance. + */ + @Override + public ManagedHttpClientConnection create(final HttpRoute route, + final ConnectionConfig config) { + return new AbfsManagedApacheHttpConnection(super.create(route, config), route); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index a29eed6f42515..e2ce5c628a4b6 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -20,18 +20,12 @@ import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; import java.net.HttpURLConnection; -import java.net.ProtocolException; import java.net.URL; +import java.time.Duration; +import java.util.ArrayList; import java.util.List; - -import javax.net.ssl.HttpsURLConnection; -import javax.net.ssl.SSLSocketFactory; - -import org.apache.hadoop.classification.VisibleForTesting; -import org.apache.hadoop.fs.azurebfs.utils.UriUtils; -import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; +import java.util.Map; import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonParser; @@ -40,37 +34,39 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.contracts.services.AbfsPerfLoggable; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; - -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EXPECT_100_JDK_ERROR; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; -import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; +import org.apache.hadoop.fs.azurebfs.utils.UriUtils; /** - * Represents an HTTP operation. + * Base Http operation class for orchestrating server IO calls. Child classes would + * define the certain orchestration implementation on the basis of network library used. + *

+ * For JDK netlib usage, the child class would be {@link AbfsJdkHttpOperation}.
+ * For ApacheHttpClient netlib usage, the child class would be {@link AbfsAHCHttpOperation}. */ -public class AbfsHttpOperation implements AbfsPerfLoggable { - private static final Logger LOG = LoggerFactory.getLogger(AbfsHttpOperation.class); +public abstract class AbfsHttpOperation implements AbfsPerfLoggable { + + private final Logger log; private static final int CLEAN_UP_BUFFER_SIZE = 64 * 1024; private static final int ONE_THOUSAND = 1000; + private static final int ONE_MILLION = ONE_THOUSAND * ONE_THOUSAND; private final String method; private final URL url; private String maskedUrl; private String maskedEncodedUrl; - - private HttpURLConnection connection; private int statusCode; private String statusDescription; private String storageErrorCode = ""; - private String storageErrorMessage = ""; - private String requestId = ""; + private String storageErrorMessage = ""; + private String requestId = ""; private String expectedAppendPos = ""; private ListResultSchema listResultSchema = null; @@ -85,6 +81,23 @@ public class AbfsHttpOperation implements AbfsPerfLoggable { private boolean shouldMask = false; private boolean connectionDisconnectedOnError = false; + /**Request headers to be sent in the request.*/ + private final List requestHeaders; + + /** + * Timeout that defines maximum allowed connection establishment time for a request. + * Timeout is in milliseconds. Not all requests need to establish a new connection, + * it depends on the connection pooling-heuristic of the networking library. + */ + private final int connectionTimeout; + + /** + * Timeout in milliseconds that defines maximum allowed time to read the response. + * This timeout starts once request is sent. It includes server reponse time, + * network latency, and time to read the response. + */ + private final int readTimeout; + public static AbfsHttpOperation getAbfsHttpOperationWithFixedResult( final URL url, final String method, @@ -94,6 +107,21 @@ public static AbfsHttpOperation getAbfsHttpOperationWithFixedResult( return httpOp; } + public AbfsHttpOperation( + final Logger log, + final URL url, + final String method, + final List requestHeaders, + final Duration connectionTimeout, + final Duration readTimeout) { + this.log = log; + this.url = url; + this.method = method; + this.requestHeaders = requestHeaders; + this.connectionTimeout = (int) connectionTimeout.toMillis(); + this.readTimeout = (int) readTimeout.toMillis(); + } + /** * Constructor for FixedResult instance, avoiding connection init. * @param url request url @@ -103,13 +131,25 @@ public static AbfsHttpOperation getAbfsHttpOperationWithFixedResult( protected AbfsHttpOperation(final URL url, final String method, final int httpStatus) { + this.log = LoggerFactory.getLogger(AbfsHttpOperation.class); this.url = url; this.method = method; this.statusCode = httpStatus; + this.requestHeaders = new ArrayList<>(); + this.connectionTimeout = 0; + this.readTimeout = 0; } - protected HttpURLConnection getConnection() { - return connection; + int getConnectionTimeout() { + return connectionTimeout; + } + + int getReadTimeout() { + return readTimeout; + } + + List getRequestHeaders() { + return requestHeaders; } public String getMethod() { @@ -137,8 +177,7 @@ public String getStorageErrorMessage() { } public String getClientRequestId() { - return this.connection - .getRequestProperty(HttpHeaderConfigurations.X_MS_CLIENT_REQUEST_ID); + return getRequestProperty(HttpHeaderConfigurations.X_MS_CLIENT_REQUEST_ID); } public String getExpectedAppendPos() { @@ -165,13 +204,21 @@ public long getBytesReceived() { return bytesReceived; } + public URL getUrl() { + return url; + } + public ListResultSchema getListResultSchema() { return listResultSchema; } - public String getResponseHeader(String httpHeader) { - return connection.getHeaderField(httpHeader); - } + /** + * Get response header value for the given headerKey. + * + * @param httpHeader header key. + * @return header value. + */ + public abstract String getResponseHeader(String httpHeader); // Returns a trace message for the request @Override @@ -235,6 +282,7 @@ public String getLogString() { return sb.toString(); } + @VisibleForTesting public String getMaskedUrl() { if (!shouldMask) { return url.toString(); @@ -246,7 +294,7 @@ public String getMaskedUrl() { return maskedUrl; } - public String getMaskedEncodedUrl() { + public final String getMaskedEncodedUrl() { if (maskedEncodedUrl != null) { return maskedEncodedUrl; } @@ -255,40 +303,6 @@ public String getMaskedEncodedUrl() { } /** - * Initializes a new HTTP request and opens the connection. - * - * @param url The full URL including query string parameters. - * @param method The HTTP method (PUT, PATCH, POST, GET, HEAD, or DELETE). - * @param requestHeaders The HTTP request headers.READ_TIMEOUT - * @param connectionTimeout The Connection Timeout value to be used while establishing http connection - * @param readTimeout The Read Timeout value to be used with http connection while making a request - * @throws IOException if an error occurs. - */ - public AbfsHttpOperation(final URL url, final String method, final List requestHeaders, - final int connectionTimeout, final int readTimeout) - throws IOException { - this.url = url; - this.method = method; - - this.connection = openConnection(); - if (this.connection instanceof HttpsURLConnection) { - HttpsURLConnection secureConn = (HttpsURLConnection) this.connection; - SSLSocketFactory sslSocketFactory = DelegatingSSLSocketFactory.getDefaultFactory(); - if (sslSocketFactory != null) { - secureConn.setSSLSocketFactory(sslSocketFactory); - } - } - - this.connection.setConnectTimeout(connectionTimeout); - this.connection.setReadTimeout(readTimeout); - this.connection.setRequestMethod(method); - - for (AbfsHttpHeader header : requestHeaders) { - setRequestProperty(header.getName(), header.getValue()); - } - } - - /** * Sends the HTTP request. Note that HttpUrlConnection requires that an * empty buffer be sent in order to set the "Content-Length: 0" header, which * is required by our endpoint. @@ -299,74 +313,9 @@ public AbfsHttpOperation(final URL url, final String method, final List= HttpURLConnection.HTTP_BAD_REQUEST) { processStorageErrorResponse(); this.recvResponseTimeMs += elapsedTimeMs(startTime); - this.bytesReceived = this.connection.getHeaderFieldLong(HttpHeaderConfigurations.CONTENT_LENGTH, 0); + String contentLength = getResponseHeader( + HttpHeaderConfigurations.CONTENT_LENGTH); + if (contentLength != null) { + this.bytesReceived = Long.parseLong(contentLength); + } else { + this.bytesReceived = 0L; + } + } else { // consume the input stream to release resources int totalBytesRead = 0; - try (InputStream stream = this.connection.getInputStream()) { + try (InputStream stream = getContentInputStream()) { if (isNullInputStream(stream)) { return; } @@ -429,12 +381,15 @@ void processConnHeadersAndInputStreams(final byte[] buffer, // this is a list operation and need to retrieve the data // need a better solution - if (AbfsHttpConstants.HTTP_METHOD_GET.equals(this.method) && buffer == null) { + if (AbfsHttpConstants.HTTP_METHOD_GET.equals(this.method) + && buffer == null) { parseListFilesResponse(stream); } else { if (buffer != null) { while (totalBytesRead < length) { - int bytesRead = stream.read(buffer, offset + totalBytesRead, length - totalBytesRead); + int bytesRead = stream.read(buffer, offset + totalBytesRead, + length + - totalBytesRead); if (bytesRead == -1) { endOfStream = true; break; @@ -452,9 +407,9 @@ void processConnHeadersAndInputStreams(final byte[] buffer, } } } catch (IOException ex) { - LOG.warn("IO/Network error: {} {}: {}", + log.warn("IO/Network error: {} {}: {}", method, getMaskedUrl(), ex.getMessage()); - LOG.debug("IO Error: ", ex); + log.debug("IO Error: ", ex); throw ex; } finally { this.recvResponseTimeMs += elapsedTimeMs(startTime); @@ -463,23 +418,12 @@ void processConnHeadersAndInputStreams(final byte[] buffer, } } - public void setRequestProperty(String key, String value) { - this.connection.setRequestProperty(key, value); - } - /** - * Open the HTTP connection. - * - * @throws IOException if an error occurs. + * Get the response stream from the connection. + * @return InputStream: response stream from the connection after network call. + * @throws IOException if the response stream could not be created from the connection. */ - private HttpURLConnection openConnection() throws IOException { - long start = System.nanoTime(); - try { - return (HttpURLConnection) url.openConnection(); - } finally { - connectionTimeMs = elapsedTimeMs(start); - } - } + protected abstract InputStream getContentInputStream() throws IOException; /** * When the request fails, this function is used to parse the responseAbfsHttpClient.LOG.debug("ExpectedError: ", ex); @@ -499,7 +443,7 @@ private HttpURLConnection openConnection() throws IOException { * */ private void processStorageErrorResponse() { - try (InputStream stream = connection.getErrorStream()) { + try (InputStream stream = getErrorStream()) { if (stream == null) { return; } @@ -536,24 +480,25 @@ private void processStorageErrorResponse() { // Ignore errors that occur while attempting to parse the storage // error, since the response may have been handled by the HTTP driver // or for other reasons have an unexpected - LOG.debug("ExpectedError: ", ex); + log.debug("ExpectedError: ", ex); } } /** - * Returns the elapsed time in milliseconds. + * Get the error stream from the connection. + * @return InputStream + * @throws IOException if the error stream could not be created from the response stream. */ - private long elapsedTimeMs(final long startTime) { - return (System.nanoTime() - startTime) / ONE_MILLION; - } + protected abstract InputStream getErrorStream() throws IOException; /** * Parse the list file response * * @param stream InputStream contains the list results. - * @throws IOException + * @throws IOException if the response cannot be deserialized. */ - private void parseListFilesResponse(final InputStream stream) throws IOException { + private void parseListFilesResponse(final InputStream stream) + throws IOException { if (stream == null) { return; } @@ -565,13 +510,21 @@ private void parseListFilesResponse(final InputStream stream) throws IOException try { final ObjectMapper objectMapper = new ObjectMapper(); - this.listResultSchema = objectMapper.readValue(stream, ListResultSchema.class); + this.listResultSchema = objectMapper.readValue(stream, + ListResultSchema.class); } catch (IOException ex) { - LOG.error("Unable to deserialize list results", ex); + log.error("Unable to deserialize list results", ex); throw ex; } } + /** + * Returns the elapsed time in milliseconds. + */ + final long elapsedTimeMs(final long startTime) { + return (System.nanoTime() - startTime) / ONE_MILLION; + } + /** * Check null stream, this is to pass findbugs's redundant check for NULL * @param stream InputStream @@ -585,55 +538,148 @@ private boolean isNullInputStream(InputStream stream) { * @param key The request property key. * @return request peoperty value. */ - String getConnProperty(String key) { - return connection.getRequestProperty(key); - } + abstract String getConnProperty(String key); /** * Gets the connection url. * @return url. */ - URL getConnUrl() { - return connection.getURL(); + abstract URL getConnUrl(); + + /** + * Gets the connection response code. + * @return response code. + * @throws IOException + */ + abstract Integer getConnResponseCode() throws IOException; + + + /** + * Gets the connection response message. + * @return response message. + * @throws IOException + */ + abstract String getConnResponseMessage() throws IOException; + + /** + * Get request headers. + * + * @return request headers. + */ + abstract Map> getRequestProperties(); + + /** + * Get request header value for a header name. + * + * @param headerName header name. + * @return header value. + */ + abstract String getRequestProperty(String headerName); + + boolean getConnectionDisconnectedOnError() { + return connectionDisconnectedOnError; } /** - * Gets the connection request method. - * @return request method. + * Get the suffix to add to the tracing context that defines what http-client is + * used to make the network call + * @return the suffix to distinguish http client */ - String getConnRequestMethod() { - return connection.getRequestMethod(); + public abstract String getTracingContextSuffix(); + + public final long getSendLatency() { + return sendRequestTimeMs; + } + + public final long getRecvLatency() { + return recvResponseTimeMs; } /** - * Gets the connection response code. - * @return response code. - * @throws IOException + * Set response status code for the server call. + * + * @param statusCode status code. */ - Integer getConnResponseCode() throws IOException { - return connection.getResponseCode(); + protected void setStatusCode(final int statusCode) { + this.statusCode = statusCode; } /** - * Gets the connection output stream. - * @return output stream. - * @throws IOException + * Sets response status description for the server call. + * + * @param statusDescription status description. */ - OutputStream getConnOutputStream() throws IOException { - return connection.getOutputStream(); + protected void setStatusDescription(final String statusDescription) { + this.statusDescription = statusDescription; } /** - * Gets the connection response message. - * @return response message. - * @throws IOException + * Set x-ms-request-id value from the server call response header. */ - String getConnResponseMessage() throws IOException { - return connection.getResponseMessage(); + protected void setRequestId() { + requestId = getResponseHeader( + HttpHeaderConfigurations.X_MS_REQUEST_ID); + if (requestId == null) { + requestId = AbfsHttpConstants.EMPTY_STRING; + } } - @VisibleForTesting - Boolean getConnectionDisconnectedOnError() { + /** + * Sets byteSent metric. + * + * @param bytesSent bytes sent. + */ + protected void setBytesSent(final int bytesSent) { + this.bytesSent = bytesSent; + } + + /** + * Sets expected bytes to be sent. + * + * @param expectedBytesToBeSent expected bytes to be sent. + */ + protected void setExpectedBytesToBeSent(final int expectedBytesToBeSent) { + this.expectedBytesToBeSent = expectedBytesToBeSent; + } + + /** + * Sets connection time in milliseconds taken to establish the connection. + * + * @param connectionTimeMs connection time in milliseconds. + */ + protected void setConnectionTimeMs(final long connectionTimeMs) { + this.connectionTimeMs = connectionTimeMs; + } + + /** + * Sets send request time in milliseconds. + * + * @param sendRequestTimeMs send request time in milliseconds. + */ + protected void setSendRequestTimeMs(final long sendRequestTimeMs) { + this.sendRequestTimeMs = sendRequestTimeMs; + } + + /** + * Sets receive response time in milliseconds. + * + * @param recvResponseTimeMs receive response time in milliseconds. + */ + protected void setRecvResponseTimeMs(final long recvResponseTimeMs) { + this.recvResponseTimeMs = recvResponseTimeMs; + } + + /** + * Marks network error and expect100 failures for send-payload phase. + */ + protected void setConnectionDisconnectedOnError() { + this.connectionDisconnectedOnError = true; + } + + /** + * @return value of {@link #connectionDisconnectedOnError} + */ + protected boolean isConnectionDisconnectedOnError() { return connectionDisconnectedOnError; } @@ -652,9 +698,75 @@ public AbfsHttpOperationWithFixedResult(final URL url, super(url, method, httpStatus); } + @Override + public void processResponse(final byte[] buffer, + final int offset, + final int length) + throws IOException { + + } + + @Override + public void setRequestProperty(final String key, final String value) { + + } + + @Override + protected InputStream getContentInputStream() throws IOException { + return null; + } + + @Override + protected InputStream getErrorStream() throws IOException { + return null; + } + + @Override + String getConnProperty(final String key) { + return null; + } + + @Override + URL getConnUrl() { + return null; + } + + @Override + Integer getConnResponseCode() throws IOException { + return null; + } + + @Override + String getConnResponseMessage() throws IOException { + return null; + } + + @Override + Map> getRequestProperties() { + return null; + } + + @Override + String getRequestProperty(final String headerName) { + return null; + } + + @Override + public String getTracingContextSuffix() { + return null; + } + @Override public String getResponseHeader(final String httpHeader) { return ""; } + + @Override + public void sendPayload(final byte[] buffer, + final int offset, + final int length) + throws IOException { + + } } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsJdkHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsJdkHttpOperation.java new file mode 100644 index 0000000000000..9628e8e338028 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsJdkHttpOperation.java @@ -0,0 +1,300 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.HttpURLConnection; +import java.net.ProtocolException; +import java.net.URL; +import java.time.Duration; +import java.util.List; +import java.util.Map; + +import javax.net.ssl.HttpsURLConnection; +import javax.net.ssl.SSLSocketFactory; + +import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EXPECT_100_JDK_ERROR; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JDK_FALLBACK; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JDK_IMPL; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; + +/** + * Implementation of {@link AbfsHttpOperation} for orchestrating calls using JDK's HttpURLConnection. + */ +public class AbfsJdkHttpOperation extends AbfsHttpOperation { + + private static final Logger LOG = LoggerFactory.getLogger( + AbfsJdkHttpOperation.class); + + private final HttpURLConnection connection; + + /** + * Initializes a new HTTP request and opens the connection. + * + * @param url The full URL including query string parameters. + * @param method The HTTP method (PUT, PATCH, POST, GET, HEAD, or DELETE). + * @param requestHeaders The HTTP request headers.READ_TIMEOUT + * @param connectionTimeout The Connection Timeout value to be used while establishing http connection + * @param readTimeout The Read Timeout value to be used with http connection while making a request + * @throws IOException if an error occurs. + */ + public AbfsJdkHttpOperation(final URL url, + final String method, + final List requestHeaders, + final Duration connectionTimeout, + final Duration readTimeout) + throws IOException { + super(LOG, url, method, requestHeaders, connectionTimeout, readTimeout); + + this.connection = openConnection(); + if (this.connection instanceof HttpsURLConnection) { + HttpsURLConnection secureConn = (HttpsURLConnection) this.connection; + SSLSocketFactory sslSocketFactory + = DelegatingSSLSocketFactory.getDefaultFactory(); + if (sslSocketFactory != null) { + secureConn.setSSLSocketFactory(sslSocketFactory); + } + } + + this.connection.setConnectTimeout(getConnectionTimeout()); + this.connection.setReadTimeout(getReadTimeout()); + this.connection.setRequestMethod(method); + + for (AbfsHttpHeader header : requestHeaders) { + setRequestProperty(header.getName(), header.getValue()); + } + } + + /**{@inheritDoc}*/ + public String getResponseHeader(String httpHeader) { + return connection.getHeaderField(httpHeader); + } + + /**{@inheritDoc}*/ + public void sendPayload(byte[] buffer, int offset, int length) + throws IOException { + this.connection.setDoOutput(true); + this.connection.setFixedLengthStreamingMode(length); + if (buffer == null) { + // An empty buffer is sent to set the "Content-Length: 0" header, which + // is required by our endpoint. + buffer = new byte[]{}; + offset = 0; + length = 0; + } + + // send the request body + + long startTime = 0; + startTime = System.nanoTime(); + OutputStream outputStream = null; + // Updates the expected bytes to be sent based on length. + setExpectedBytesToBeSent(length); + try { + try { + /* Without expect header enabled, if getOutputStream() throws + an exception, it gets caught by the restOperation. But with + expect header enabled we return back without throwing an exception + for the correct response code processing. + */ + outputStream = getConnOutputStream(); + } catch (IOException e) { + setConnectionDisconnectedOnError(); + /* If getOutputStream fails with an expect-100 exception , we return back + without throwing an exception to the caller. Else, we throw back the exception. + */ + String expectHeader = getConnProperty(EXPECT); + if (expectHeader != null && expectHeader.equals(HUNDRED_CONTINUE) + && e instanceof ProtocolException + && EXPECT_100_JDK_ERROR.equals(e.getMessage())) { + LOG.debug( + "Getting output stream failed with expect header enabled, returning back ", + e); + /* + * In case expect-100 assertion has failed, headers and inputStream should not + * be parsed. Reason being, conn.getHeaderField(), conn.getHeaderFields(), + * conn.getInputStream() will lead to repeated server call. + * ref: https://bugs.openjdk.org/browse/JDK-8314978. + * Reading conn.responseCode() and conn.getResponseMessage() is safe in + * case of Expect-100 error. Reason being, in JDK, it stores the responseCode + * in the HttpUrlConnection object before throwing exception to the caller. + */ + setStatusCode(getConnResponseCode()); + setStatusDescription(getConnResponseMessage()); + return; + } else { + LOG.debug( + "Getting output stream failed without expect header enabled, throwing exception ", + e); + throw e; + } + } + // update bytes sent for successful as well as failed attempts via the + // accompanying statusCode. + setBytesSent(length); + + // If this fails with or without expect header enabled, + // it throws an IOException. + outputStream.write(buffer, offset, length); + } finally { + // Closing the opened output stream + if (outputStream != null) { + outputStream.close(); + } + setSendRequestTimeMs(elapsedTimeMs(startTime)); + } + } + + /**{@inheritDoc}*/ + @Override + String getRequestProperty(final String headerName) { + return connection.getRequestProperty(headerName); + } + + /**{@inheritDoc}*/ + @Override + Map> getRequestProperties() { + return connection.getRequestProperties(); + } + + /**{@inheritDoc}*/ + @Override + protected InputStream getContentInputStream() throws IOException { + return connection.getInputStream(); + } + + /**{@inheritDoc}*/ + public void processResponse(final byte[] buffer, + final int offset, + final int length) throws IOException { + if (isConnectionDisconnectedOnError()) { + LOG.debug("This connection was not successful or has been disconnected, " + + "hence not parsing headers and inputStream"); + return; + } + processConnHeadersAndInputStreams(buffer, offset, length); + } + + /** + * Parses headers and body of the response. Execute server call if {@link #sendPayload(byte[], int, int)} + * is not called. + * + * @param buffer buffer to store the response body. + * @param offset offset in the buffer. + * @param length length of the response body. + * + * @throws IOException network error or parsing error. + */ + void processConnHeadersAndInputStreams(final byte[] buffer, + final int offset, + final int length) throws IOException { + // get the response + long startTime = 0; + startTime = System.nanoTime(); + + setStatusCode(getConnResponseCode()); + setRecvResponseTimeMs(elapsedTimeMs(startTime)); + + setStatusDescription(getConnResponseMessage()); + setRequestId(); + + // dump the headers + AbfsIoUtils.dumpHeadersToDebugLog("Response Headers", + connection.getHeaderFields()); + + if (AbfsHttpConstants.HTTP_METHOD_HEAD.equals(getMethod())) { + // If it is HEAD, and it is ERROR + return; + } + + parseResponse(buffer, offset, length); + } + + /**{@inheritDoc}*/ + public void setRequestProperty(String key, String value) { + this.connection.setRequestProperty(key, value); + } + + /** + * Creates a new {@link HttpURLConnection} instance. This instance is not connected. + * Any API call on the instance would make it reuse an existing connection or + * establish a new connection. + * + * @throws IOException if an error occurs. + */ + private HttpURLConnection openConnection() throws IOException { + long start = System.nanoTime(); + try { + return (HttpURLConnection) getUrl().openConnection(); + } finally { + setConnectionTimeMs(elapsedTimeMs(start)); + } + } + + /**{@inheritDoc}*/ + @Override + protected InputStream getErrorStream() { + return connection.getErrorStream(); + } + + /**{@inheritDoc}*/ + String getConnProperty(String key) { + return connection.getRequestProperty(key); + } + + /**{@inheritDoc}*/ + URL getConnUrl() { + return connection.getURL(); + } + + /**{@inheritDoc}*/ + Integer getConnResponseCode() throws IOException { + return connection.getResponseCode(); + } + + /** + * Gets the connection output stream. + * @return output stream. + * @throws IOException if creating outputStream on connection failed + */ + OutputStream getConnOutputStream() throws IOException { + return connection.getOutputStream(); + } + + /**{@inheritDoc}*/ + String getConnResponseMessage() throws IOException { + return connection.getResponseMessage(); + } + + /**{@inheritDoc}*/ + @Override + public String getTracingContextSuffix() { + return AbfsApacheHttpClient.usable() ? JDK_IMPL : JDK_FALLBACK; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsManagedApacheHttpConnection.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsManagedApacheHttpConnection.java new file mode 100644 index 0000000000000..04697ab561ed5 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsManagedApacheHttpConnection.java @@ -0,0 +1,240 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import javax.net.ssl.SSLSession; +import java.io.IOException; +import java.net.InetAddress; +import java.net.Socket; +import java.util.UUID; + +import org.apache.http.HttpConnectionMetrics; +import org.apache.http.HttpEntityEnclosingRequest; +import org.apache.http.HttpException; +import org.apache.http.HttpRequest; +import org.apache.http.HttpResponse; +import org.apache.http.conn.ManagedHttpClientConnection; +import org.apache.http.conn.routing.HttpRoute; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COLON; + +/** + * This class wraps the {@link ManagedHttpClientConnection} and provides + * insights onto the connection level activity. + */ +class AbfsManagedApacheHttpConnection + implements ManagedHttpClientConnection { + + /** + * Underlying ApacheHttpClient connection that actually does the work over network. + */ + private final ManagedHttpClientConnection httpClientConnection; + + /** + * Managed HTTP context to track the connection level activity. + */ + private AbfsManagedHttpClientContext managedHttpContext; + + private final int hashCode; + + AbfsManagedApacheHttpConnection(ManagedHttpClientConnection conn, + final HttpRoute route) { + this.httpClientConnection = conn; + this.hashCode = (UUID.randomUUID().toString() + + httpClientConnection.getId()).hashCode(); + } + + /** + * Sets the managed HTTP context to track the connection level activity. + */ + void setManagedHttpContext(AbfsManagedHttpClientContext managedHttpContext) { + this.managedHttpContext = managedHttpContext; + } + + /**{@inheritDoc}*/ + @Override + public void close() throws IOException { + httpClientConnection.close(); + } + + /**{@inheritDoc}*/ + + @Override + public boolean isOpen() { + return httpClientConnection.isOpen(); + } + + /**{@inheritDoc}*/ + @Override + public boolean isStale() { + return httpClientConnection.isStale(); + } + + /**{@inheritDoc}*/ + @Override + public void setSocketTimeout(final int timeout) { + httpClientConnection.setSocketTimeout(timeout); + } + + /**{@inheritDoc}*/ + @Override + public int getSocketTimeout() { + return httpClientConnection.getSocketTimeout(); + } + + /**{@inheritDoc}*/ + @Override + public void shutdown() throws IOException { + httpClientConnection.shutdown(); + } + + /**{@inheritDoc}*/ + @Override + public HttpConnectionMetrics getMetrics() { + return httpClientConnection.getMetrics(); + } + + /**{@inheritDoc}*/ + @Override + public boolean isResponseAvailable(final int timeout) throws IOException { + long start = System.currentTimeMillis(); + boolean val = httpClientConnection.isResponseAvailable(timeout); + managedHttpContext.addReadTime(System.currentTimeMillis() - start); + return val; + } + + /**{@inheritDoc}*/ + @Override + public void sendRequestHeader(final HttpRequest request) + throws HttpException, IOException { + long start = System.currentTimeMillis(); + httpClientConnection.sendRequestHeader(request); + managedHttpContext.addSendTime(System.currentTimeMillis() - start); + } + + /**{@inheritDoc}*/ + @Override + public void sendRequestEntity(final HttpEntityEnclosingRequest request) + throws HttpException, IOException { + long start = System.currentTimeMillis(); + httpClientConnection.sendRequestEntity(request); + managedHttpContext.addSendTime(System.currentTimeMillis() - start); + } + + /**{@inheritDoc}*/ + @Override + public HttpResponse receiveResponseHeader() + throws HttpException, IOException { + long start = System.currentTimeMillis(); + HttpResponse response = httpClientConnection.receiveResponseHeader(); + managedHttpContext.addReadTime(System.currentTimeMillis() - start); + return response; + } + + /**{@inheritDoc}*/ + @Override + public void receiveResponseEntity(final HttpResponse response) + throws HttpException, IOException { + long start = System.currentTimeMillis(); + httpClientConnection.receiveResponseEntity(response); + managedHttpContext.addReadTime(System.currentTimeMillis() - start); + } + + /**{@inheritDoc}*/ + @Override + public void flush() throws IOException { + long start = System.currentTimeMillis(); + httpClientConnection.flush(); + managedHttpContext.addSendTime(System.currentTimeMillis() - start); + } + + /**{@inheritDoc}*/ + @Override + public String getId() { + return httpClientConnection.getId(); + } + + /**{@inheritDoc}*/ + @Override + public void bind(final Socket socket) throws IOException { + httpClientConnection.bind(socket); + } + + /**{@inheritDoc}*/ + @Override + public Socket getSocket() { + return httpClientConnection.getSocket(); + } + + /**{@inheritDoc}*/ + @Override + public SSLSession getSSLSession() { + return httpClientConnection.getSSLSession(); + } + + /**Gets the local address to which the socket is bound.*/ + @Override + public InetAddress getLocalAddress() { + return httpClientConnection.getLocalAddress(); + } + + /**Gets the local port to which the socket is bound.*/ + @Override + public int getLocalPort() { + return httpClientConnection.getLocalPort(); + } + + /**Returns the address to which the socket is connected.*/ + @Override + public InetAddress getRemoteAddress() { + return httpClientConnection.getRemoteAddress(); + } + + /**Returns the remote port number to which this socket is connected.*/ + @Override + public int getRemotePort() { + return httpClientConnection.getRemotePort(); + } + + @Override + public boolean equals(final Object o) { + if (o instanceof AbfsManagedApacheHttpConnection) { + return httpClientConnection.getId().equals( + ((AbfsManagedApacheHttpConnection) o).httpClientConnection.getId()); + } + return false; + } + + @Override + public int hashCode() { + return hashCode; + } + + @Override + public String toString() { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append( + httpClientConnection.getRemoteAddress().getHostName()) + .append(COLON) + .append(httpClientConnection.getRemotePort()) + .append(COLON) + .append(hashCode()); + return stringBuilder.toString(); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsManagedHttpClientContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsManagedHttpClientContext.java new file mode 100644 index 0000000000000..ee3fa92159c66 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsManagedHttpClientContext.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.apache.http.HttpClientConnection; +import org.apache.http.client.protocol.HttpClientContext; + +/** + * Registers the latency of different phases of a network call. + */ +public class AbfsManagedHttpClientContext extends HttpClientContext { + + /**Connection establishment time*/ + private long connectTime = 0L; + + /**Time taken to receive and read response*/ + private long readTime = 0L; + + /***Time taken to send request*/ + private long sendTime = 0L; + + public AbfsManagedHttpClientContext() { + } + + /** + * This to be used only in tests to get connection level activity. + * @param httpClientConnection HttpClientConnection which has to be intercepted + * by test-implementation. + * @return HttpClientConnection which is intercepted by test-implementation. For production + * implementation, it will return the same httpClientConnection. + */ + protected HttpClientConnection interceptConnectionActivity( + HttpClientConnection httpClientConnection) { + return httpClientConnection; + } + + public long getConnectTime() { + return connectTime; + } + + public void setConnectTime(long connectTime) { + this.connectTime = connectTime; + } + + public long getReadTime() { + return readTime; + } + + public long getSendTime() { + return sendTime; + } + + public void addSendTime(long sendTime) { + this.sendTime += sendTime; + } + + public void addReadTime(long readTime) { + this.readTime += readTime; + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsManagedHttpRequestExecutor.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsManagedHttpRequestExecutor.java new file mode 100644 index 0000000000000..05cf180966ac6 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsManagedHttpRequestExecutor.java @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; + +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsApacheHttpExpect100Exception; +import org.apache.http.HttpClientConnection; +import org.apache.http.HttpException; +import org.apache.http.HttpRequest; +import org.apache.http.HttpResponse; +import org.apache.http.protocol.HttpContext; +import org.apache.http.protocol.HttpRequestExecutor; + +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; + +/** + * This class extends {@link HttpRequestExecutor} to intercept the connection + * activity and register the latency of different phases of a network call. It + * also overrides the HttpRequestExecutor's expect100 failure handling as the ADLS + * can send any failure statusCode in expect100 hand-shake failure and non + * necessarily 1XX code. + */ +public class AbfsManagedHttpRequestExecutor extends HttpRequestExecutor { + + public AbfsManagedHttpRequestExecutor(final int expect100WaitTimeout) { + super(expect100WaitTimeout); + } + + /**{@inheritDoc}*/ + @Override + public HttpResponse execute(final HttpRequest request, + final HttpClientConnection conn, + final HttpContext context) throws IOException, HttpException { + if (context instanceof AbfsManagedHttpClientContext + && conn instanceof AbfsManagedApacheHttpConnection) { + ((AbfsManagedApacheHttpConnection) conn).setManagedHttpContext( + (AbfsManagedHttpClientContext) context); + } + return super.execute(request, conn, context); + } + + /**{@inheritDoc}*/ + @Override + protected HttpResponse doSendRequest(final HttpRequest request, + final HttpClientConnection conn, + final HttpContext context) throws IOException, HttpException { + final HttpClientConnection inteceptedConnection; + if (context instanceof AbfsManagedHttpClientContext) { + inteceptedConnection + = ((AbfsManagedHttpClientContext) context).interceptConnectionActivity( + conn); + } else { + inteceptedConnection = conn; + } + final HttpResponse res = super.doSendRequest(request, inteceptedConnection, + context); + + /* + * ApacheHttpClient implementation does not raise an exception if the status + * of expect100 hand-shake is not less than 200. Although it sends payload only + * if the statusCode of the expect100 hand-shake is 100. + * + * ADLS can send any failure statusCode in exect100 handshake. So, an exception + * needs to be explicitly raised if expect100 assertion is failure but the + * ApacheHttpClient has not raised an exception. + * + * Response is only returned by this method if there is no expect100 request header + * or the expect100 assertion is failed. + */ + if (request != null && request.containsHeader(EXPECT) && res != null) { + throw new AbfsApacheHttpExpect100Exception(res); + } + return res; + } + + /**{@inheritDoc}*/ + @Override + protected HttpResponse doReceiveResponse(final HttpRequest request, + final HttpClientConnection conn, + final HttpContext context) throws HttpException, IOException { + final HttpClientConnection interceptedConnection; + if (context instanceof AbfsManagedHttpClientContext) { + interceptedConnection + = ((AbfsManagedHttpClientContext) context).interceptConnectionActivity( + conn); + } else { + interceptedConnection = conn; + } + return super.doReceiveResponse(request, + interceptedConnection, context); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsNoOpThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsNoOpThrottlingIntercept.java index 6b84e583c337a..58e50592997dc 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsNoOpThrottlingIntercept.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsNoOpThrottlingIntercept.java @@ -18,6 +18,10 @@ package org.apache.hadoop.fs.azurebfs.services; +/** + * Implementation of {@link AbfsThrottlingIntercept} that does not throttle + * the ABFS process. + */ final class AbfsNoOpThrottlingIntercept implements AbfsThrottlingIntercept { public static final AbfsNoOpThrottlingIntercept INSTANCE = new AbfsNoOpThrottlingIntercept(); @@ -25,11 +29,13 @@ final class AbfsNoOpThrottlingIntercept implements AbfsThrottlingIntercept { private AbfsNoOpThrottlingIntercept() { } + /**{@inheritDoc}*/ @Override public void updateMetrics(final AbfsRestOperationType operationType, - final AbfsHttpOperation abfsHttpOperation) { + final AbfsHttpOperation httpOperation) { } + /**{@inheritDoc}*/ @Override public void sendingRequest(final AbfsRestOperationType operationType, final AbfsCounters abfsCounters) { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java index c696bd8e68639..1cdc9e20c0f77 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsRestOperation.java @@ -23,14 +23,19 @@ import java.net.HttpURLConnection; import java.net.URL; import java.net.UnknownHostException; +import java.time.Duration; import java.util.List; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.ClosedIOException; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.AbfsStatistic; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.constants.HttpOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidAbfsRestOperationException; @@ -40,6 +45,7 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode; import java.util.Map; import org.apache.hadoop.fs.azurebfs.AbfsBackoffMetrics; +import org.apache.http.impl.execchain.RequestAbortedException; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ZERO; import static org.apache.hadoop.util.Time.now; @@ -95,11 +101,18 @@ public class AbfsRestOperation { private String failureReason; private AbfsRetryPolicy retryPolicy; + private final AbfsConfiguration abfsConfiguration; + /** * This variable stores the tracing context used for last Rest Operation. */ private TracingContext lastUsedTracingContext; + /** + * Number of retries due to IOException. + */ + private int apacheHttpClientIoExceptions = 0; + /** * Checks if there is non-null HTTP response. * @return true if there is a non-null HTTP response from the ABFS call. @@ -150,8 +163,10 @@ String getSasToken() { final AbfsClient client, final String method, final URL url, - final List requestHeaders) { - this(operationType, client, method, url, requestHeaders, null); + final List requestHeaders, + final AbfsConfiguration abfsConfiguration) { + this(operationType, client, method, url, requestHeaders, null, abfsConfiguration + ); } /** @@ -168,7 +183,8 @@ String getSasToken() { final String method, final URL url, final List requestHeaders, - final String sasToken) { + final String sasToken, + final AbfsConfiguration abfsConfiguration) { this.operationType = operationType; this.client = client; this.method = method; @@ -185,8 +201,9 @@ String getSasToken() { if (abfsBackoffMetrics != null) { this.metricsMap = abfsBackoffMetrics.getMetricsMap(); } - this.maxIoRetries = client.getAbfsConfiguration().getMaxIoRetries(); + this.maxIoRetries = abfsConfiguration.getMaxIoRetries(); this.intercept = client.getIntercept(); + this.abfsConfiguration = abfsConfiguration; this.retryPolicy = client.getExponentialRetryPolicy(); } @@ -199,7 +216,7 @@ String getSasToken() { * @param url The full URL including query string parameters. * @param requestHeaders The HTTP request headers. * @param buffer For uploads, this is the request entity body. For downloads, - * this will hold the response entity body. + * this will hold the response entity body. * @param bufferOffset An offset into the buffer where the data beings. * @param bufferLength The length of the data in the buffer. * @param sasToken A sasToken for optional re-use by AbfsInputStream/AbfsOutputStream. @@ -212,8 +229,10 @@ String getSasToken() { byte[] buffer, int bufferOffset, int bufferLength, - String sasToken) { - this(operationType, client, method, url, requestHeaders, sasToken); + String sasToken, + final AbfsConfiguration abfsConfiguration) { + this(operationType, client, method, url, requestHeaders, sasToken, abfsConfiguration + ); this.buffer = buffer; this.bufferOffset = bufferOffset; this.bufferLength = bufferLength; @@ -230,7 +249,9 @@ public void execute(TracingContext tracingContext) // triggered by a single file system call, using a new tracing context. lastUsedTracingContext = createNewTracingContext(tracingContext); try { - abfsCounters.getLastExecutionTime().set(now()); + if (abfsCounters != null) { + abfsCounters.getLastExecutionTime().set(now()); + } client.timerOrchestrator(TimerFunctionality.RESUME, null); IOStatisticsBinding.trackDurationOfInvocation(abfsCounters, AbfsStatistic.getStatNameFromHttpCall(method), @@ -340,7 +361,7 @@ String getClientLatency() { */ private boolean executeHttpOperation(final int retryCount, TracingContext tracingContext) throws AzureBlobFileSystemException { - AbfsHttpOperation httpOperation; + final AbfsHttpOperation httpOperation; // Used to avoid CST Metric Update in Case of UnknownHost/IO Exception. boolean wasKnownExceptionThrown = false; @@ -361,15 +382,13 @@ private boolean executeHttpOperation(final int retryCount, try { // dump the headers AbfsIoUtils.dumpHeadersToDebugLog("Request Headers", - httpOperation.getConnection().getRequestProperties()); + httpOperation.getRequestProperties()); intercept.sendingRequest(operationType, abfsCounters); if (hasRequestBody) { - // HttpUrlConnection requires - httpOperation.sendRequest(buffer, bufferOffset, bufferLength); + httpOperation.sendPayload(buffer, bufferOffset, bufferLength); incrementCounter(AbfsStatistic.SEND_REQUESTS, 1); incrementCounter(AbfsStatistic.BYTES_SENT, bufferLength); } - httpOperation.processResponse(buffer, bufferOffset, bufferLength); if (!isThrottledRequest && httpOperation.getStatusCode() >= HttpURLConnection.HTTP_INTERNAL_ERROR) { @@ -435,6 +454,9 @@ private boolean executeHttpOperation(final int retryCount, retryPolicy = client.getRetryPolicy(failureReason); LOG.warn("Unknown host name: {}. Retrying to resolve the host name...", hostname); + if (httpOperation instanceof AbfsAHCHttpOperation) { + registerApacheHttpClientIoException(); + } if (abfsBackoffMetrics != null) { synchronized (this) { abfsBackoffMetrics.incrementNumberOfNetworkFailedRequests(); @@ -457,6 +479,13 @@ private boolean executeHttpOperation(final int retryCount, } failureReason = RetryReason.getAbbreviation(ex, -1, ""); retryPolicy = client.getRetryPolicy(failureReason); + if (httpOperation instanceof AbfsAHCHttpOperation) { + registerApacheHttpClientIoException(); + if (ex instanceof RequestAbortedException + && ex.getCause() instanceof ClosedIOException) { + throw new AbfsDriverException((IOException) ex.getCause()); + } + } if (!retryPolicy.shouldRetry(retryCount, -1)) { updateBackoffMetrics(retryCount, httpOperation.getStatusCode()); throw new InvalidAbfsRestOperationException(ex, retryCount); @@ -474,6 +503,18 @@ private boolean executeHttpOperation(final int retryCount, return true; } + /** + * Registers switch off of ApacheHttpClient in case of IOException retries increases + * more than the threshold. + */ + private void registerApacheHttpClientIoException() { + apacheHttpClientIoExceptions++; + if (apacheHttpClientIoExceptions + >= abfsConfiguration.getMaxApacheHttpClientIoExceptionsRetries()) { + AbfsApacheHttpClient.registerFallback(); + } + } + /** * Sign an operation. * @param httpOperation operation to sign @@ -483,43 +524,64 @@ private boolean executeHttpOperation(final int retryCount, @VisibleForTesting public void signRequest(final AbfsHttpOperation httpOperation, int bytesToSign) throws IOException { if (client.isSendMetricCall()) { - client.getMetricSharedkeyCredentials().signRequest(httpOperation.getConnection(), bytesToSign); + client.getMetricSharedkeyCredentials().signRequest(httpOperation, bytesToSign); } else { switch (client.getAuthType()) { - case Custom: - case OAuth: - LOG.debug("Authenticating request with OAuth2 access token"); - httpOperation.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, - client.getAccessToken()); - break; - case SAS: - // do nothing; the SAS token should already be appended to the query string - httpOperation.setMaskForSAS(); //mask sig/oid from url for logs - break; - case SharedKey: - default: - // sign the HTTP request - LOG.debug("Signing request with shared key"); - // sign the HTTP request - client.getSharedKeyCredentials().signRequest( - httpOperation.getConnection(), - bytesToSign); - break; + case Custom: + case OAuth: + LOG.debug("Authenticating request with OAuth2 access token"); + httpOperation.setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, + client.getAccessToken()); + break; + case SAS: + // do nothing; the SAS token should already be appended to the query string + httpOperation.setMaskForSAS(); //mask sig/oid from url for logs + break; + case SharedKey: + default: + // sign the HTTP request + LOG.debug("Signing request with shared key"); + // sign the HTTP request + client.getSharedKeyCredentials().signRequest( + httpOperation, + bytesToSign); + break; } } } /** - * Creates new object of {@link AbfsHttpOperation} with the url, method, requestHeader fields and - * timeout values as set in configuration of the AbfsRestOperation object. - * - * @return {@link AbfsHttpOperation} to be used for sending requests + * Creates new object of {@link AbfsHttpOperation} with the url, method, and + * requestHeaders fields of the AbfsRestOperation object. */ @VisibleForTesting AbfsHttpOperation createHttpOperation() throws IOException { - return new AbfsHttpOperation(url, method, requestHeaders, - client.getAbfsConfiguration().getHttpConnectionTimeout(), - client.getAbfsConfiguration().getHttpReadTimeout()); + HttpOperationType httpOperationType + = abfsConfiguration.getPreferredHttpOperationType(); + if (httpOperationType == HttpOperationType.APACHE_HTTP_CLIENT + && isApacheClientUsable()) { + return createAbfsAHCHttpOperation(); + } + return createAbfsHttpOperation(); + } + + private boolean isApacheClientUsable() { + return AbfsApacheHttpClient.usable(); + } + + @VisibleForTesting + AbfsJdkHttpOperation createAbfsHttpOperation() throws IOException { + return new AbfsJdkHttpOperation(url, method, requestHeaders, + Duration.ofMillis(client.getAbfsConfiguration().getHttpConnectionTimeout()), + Duration.ofMillis(client.getAbfsConfiguration().getHttpReadTimeout())); + } + + @VisibleForTesting + AbfsAHCHttpOperation createAbfsAHCHttpOperation() throws IOException { + return new AbfsAHCHttpOperation(url, method, requestHeaders, + Duration.ofMillis(client.getAbfsConfiguration().getHttpConnectionTimeout()), + Duration.ofMillis(client.getAbfsConfiguration().getHttpReadTimeout()), + client.getAbfsApacheHttpClient()); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingIntercept.java index 57b5095bb3219..725377714642b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingIntercept.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsThrottlingIntercept.java @@ -30,11 +30,12 @@ public interface AbfsThrottlingIntercept { /** * Updates the metrics for successful and failed read and write operations. + * * @param operationType Only applicable for read and write operations. - * @param abfsHttpOperation Used for status code and data transferred. + * @param httpOperation Used for status code and data transferred. */ void updateMetrics(AbfsRestOperationType operationType, - AbfsHttpOperation abfsHttpOperation); + AbfsHttpOperation httpOperation); /** * Called before the request is sent. Client-side throttling diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/KeepAliveCache.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/KeepAliveCache.java new file mode 100644 index 0000000000000..47c9ff26ca851 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/KeepAliveCache.java @@ -0,0 +1,306 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Stack; +import java.util.Timer; +import java.util.TimerTask; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.ClosedIOException; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.http.HttpClientConnection; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_MAX_CONN_SYS_PROP; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.KEEP_ALIVE_CACHE_CLOSED; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_APACHE_HTTP_CLIENT_MAX_CACHE_CONNECTION_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_HTTP_CLIENT_CONN_MAX_CACHED_CONNECTIONS; + +/** + * Connection-pooling heuristics used by {@link AbfsConnectionManager}. Each + * instance of FileSystem has its own KeepAliveCache. + *

+ * Why this implementation is required in comparison to {@link org.apache.http.impl.conn.PoolingHttpClientConnectionManager} + * connection-pooling: + *

    + *
  1. PoolingHttpClientConnectionManager heuristic caches all the reusable connections it has created. + * JDK's implementation only caches a limited number of connections. The limit is given by JVM system + * property "http.maxConnections". If there is no system-property, it defaults to 5.
  2. + *
  3. In PoolingHttpClientConnectionManager, it expects the application to provide `setMaxPerRoute` and `setMaxTotal`, + * which the implementation uses as the total number of connections it can create. For application using ABFS, it is not + * feasible to provide a value in the initialisation of the connectionManager. JDK's implementation has no cap on the + * number of connections it can create.
  4. + *
+ */ +class KeepAliveCache extends Stack + implements + Closeable { + private static final long serialVersionUID = 1L; + + private static final Logger LOG = LoggerFactory.getLogger(KeepAliveCache.class); + + /** + * Scheduled timer that evicts idle connections. + */ + private final transient Timer timer; + + /** + * Task provided to the timer that owns eviction logic. + */ + private final transient TimerTask timerTask; + + /** + * Flag to indicate if the cache is closed. + */ + private final AtomicBoolean isClosed = new AtomicBoolean(false); + + /** + * Counter to keep track of the number of KeepAliveCache instances created. + */ + private static final AtomicInteger KAC_COUNTER = new AtomicInteger(0); + + /** + * Maximum number of connections that can be cached. + */ + private final int maxConn; + + /** + * Time-to-live for an idle connection. + */ + private final long connectionIdleTTL; + + /** + * Flag to indicate if the eviction thread is paused. + */ + private final AtomicBoolean isPaused = new AtomicBoolean(false); + + /** + * Account name for which the cache is created. To be used only in exception + * messages. + */ + private final String accountNamePath; + + @VisibleForTesting + synchronized void pauseThread() { + isPaused.set(true); + } + + @VisibleForTesting + synchronized void resumeThread() { + isPaused.set(false); + } + + /** + * @return connectionIdleTTL. + */ + @VisibleForTesting + public long getConnectionIdleTTL() { + return connectionIdleTTL; + } + + /** + * Creates an {@link KeepAliveCache} instance using filesystem's configuration. + *

+ * The size of the cache is determined by the configuration + * {@value org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys#FS_AZURE_APACHE_HTTP_CLIENT_MAX_CACHE_CONNECTION_SIZE}. + * If the configuration is not set, the system-property {@value org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants#HTTP_MAX_CONN_SYS_PROP}. + * If the system-property is not set or set to 0, the default value + * {@value org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations#DEFAULT_HTTP_CLIENT_CONN_MAX_CACHED_CONNECTIONS} is used. + *

+ * This schedules an eviction thread to run every connectionIdleTTL milliseconds + * given by the configuration {@link AbfsConfiguration#getMaxApacheHttpClientConnectionIdleTime()}. + * @param abfsConfiguration Configuration of the filesystem. + */ + KeepAliveCache(AbfsConfiguration abfsConfiguration) { + accountNamePath = abfsConfiguration.getAccountName(); + this.timer = new Timer("abfs-kac-" + KAC_COUNTER.getAndIncrement(), true); + + int sysPropMaxConn = Integer.parseInt(System.getProperty(HTTP_MAX_CONN_SYS_PROP, "0")); + final int defaultMaxConn; + if (sysPropMaxConn > 0) { + defaultMaxConn = sysPropMaxConn; + } else { + defaultMaxConn = DEFAULT_HTTP_CLIENT_CONN_MAX_CACHED_CONNECTIONS; + } + this.maxConn = abfsConfiguration.getInt( + FS_AZURE_APACHE_HTTP_CLIENT_MAX_CACHE_CONNECTION_SIZE, + defaultMaxConn); + + this.connectionIdleTTL + = abfsConfiguration.getMaxApacheHttpClientConnectionIdleTime(); + this.timerTask = new TimerTask() { + @Override + public void run() { + if (isPaused.get() || isClosed.get()) { + return; + } + evictIdleConnection(); + } + }; + timer.schedule(timerTask, 0, connectionIdleTTL); + } + + /** + * Iterate over the cache and evict the idle connections. An idle connection is + * one that has been in the cache for more than connectionIdleTTL milliseconds. + */ + synchronized void evictIdleConnection() { + long currentTime = System.currentTimeMillis(); + int i; + for (i = 0; i < size(); i++) { + KeepAliveEntry e = elementAt(i); + if ((currentTime - e.idleStartTime) > connectionIdleTTL + || e.httpClientConnection.isStale()) { + HttpClientConnection hc = e.httpClientConnection; + closeHttpClientConnection(hc); + } else { + break; + } + } + subList(0, i).clear(); + } + + /** + * Safe close of the HttpClientConnection. + * + * @param hc HttpClientConnection to be closed + */ + private void closeHttpClientConnection(final HttpClientConnection hc) { + try { + hc.close(); + } catch (IOException ex) { + if (LOG.isDebugEnabled()) { + LOG.debug("Close failed for connection: {}", hc, ex); + } + } + } + + /** + * Close all connections in cache and cancel the eviction timer. + */ + @Override + public synchronized void close() { + boolean closed = isClosed.getAndSet(true); + if (closed) { + return; + } + closeInternal(); + } + + @VisibleForTesting + void closeInternal() { + timerTask.cancel(); + timer.purge(); + while (!empty()) { + KeepAliveEntry e = pop(); + closeHttpClientConnection(e.httpClientConnection); + } + } + + /** + *

+ * Gets the latest added HttpClientConnection from the cache. The returned connection + * is non-stale and has been in the cache for less than connectionIdleTTL milliseconds. + *

+ * The cache is checked from the top of the stack. If the connection is stale or has been + * in the cache for more than connectionIdleTTL milliseconds, it is closed and the next + * connection is checked. Once a valid connection is found, it is returned. + * @return HttpClientConnection: if a valid connection is found, else null. + * @throws IOException if the cache is closed. + */ + public synchronized HttpClientConnection get() + throws IOException { + if (isClosed.get()) { + throw new ClosedIOException(accountNamePath, KEEP_ALIVE_CACHE_CLOSED); + } + if (empty()) { + return null; + } + HttpClientConnection hc = null; + long currentTime = System.currentTimeMillis(); + do { + KeepAliveEntry e = pop(); + if ((currentTime - e.idleStartTime) > connectionIdleTTL + || e.httpClientConnection.isStale()) { + closeHttpClientConnection(e.httpClientConnection); + } else { + hc = e.httpClientConnection; + } + } while ((hc == null) && (!empty())); + return hc; + } + + /** + * Puts the HttpClientConnection in the cache. If the size of cache is equal to + * maxConn, the oldest connection is closed and removed from the cache, which + * will make space for the new connection. If the cache is closed or of zero size, + * the connection is closed and not added to the cache. + * + * @param httpClientConnection HttpClientConnection to be cached + * @return true if the HttpClientConnection is added in active cache, false otherwise. + */ + public synchronized boolean put(HttpClientConnection httpClientConnection) { + if (isClosed.get() || maxConn == 0) { + closeHttpClientConnection(httpClientConnection); + return false; + } + if (size() == maxConn) { + closeHttpClientConnection(get(0).httpClientConnection); + subList(0, 1).clear(); + } + KeepAliveEntry entry = new KeepAliveEntry(httpClientConnection, + System.currentTimeMillis()); + push(entry); + return true; + } + + @Override + public synchronized boolean equals(final Object o) { + return super.equals(o); + } + + @Override + public synchronized int hashCode() { + return super.hashCode(); + } + + /** + * Entry data-structure in the cache. + */ + static class KeepAliveEntry { + + /**HttpClientConnection in the cache entry.*/ + private final HttpClientConnection httpClientConnection; + + /**Time at which the HttpClientConnection was added to the cache.*/ + private final long idleStartTime; + + KeepAliveEntry(HttpClientConnection hc, long idleStartTime) { + this.httpClientConnection = hc; + this.idleStartTime = idleStartTime; + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/SharedKeyCredentials.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/SharedKeyCredentials.java index 1aee53def1e27..e9b41440ac97e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/SharedKeyCredentials.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/SharedKeyCredentials.java @@ -21,7 +21,6 @@ import javax.crypto.Mac; import javax.crypto.spec.SecretKeySpec; import java.io.UnsupportedEncodingException; -import java.net.HttpURLConnection; import java.net.URL; import java.net.URLDecoder; import java.nio.charset.StandardCharsets; @@ -81,7 +80,7 @@ public SharedKeyCredentials(final String accountName, initializeMac(); } - public void signRequest(HttpURLConnection connection, final long contentLength) throws UnsupportedEncodingException { + public void signRequest(AbfsHttpOperation connection, final long contentLength) throws UnsupportedEncodingException { String gmtTime = getGMTTime(); connection.setRequestProperty(HttpHeaderConfigurations.X_MS_DATE, gmtTime); @@ -113,7 +112,7 @@ private String computeHmac256(final String stringToSign) { * @param conn the HttpURLConnection for the operation * @param canonicalizedString the canonicalized string to add the canonicalized headerst to. */ - private static void addCanonicalizedHeaders(final HttpURLConnection conn, final StringBuilder canonicalizedString) { + private static void addCanonicalizedHeaders(final AbfsHttpOperation conn, final StringBuilder canonicalizedString) { // Look for header names that start with // HeaderNames.PrefixForStorageHeader // Then sort them in case-insensitive manner. @@ -201,13 +200,13 @@ private static void appendCanonicalizedElement(final StringBuilder builder, fina */ private static String canonicalizeHttpRequest(final URL address, final String accountName, final String method, final String contentType, - final long contentLength, final String date, final HttpURLConnection conn) + final long contentLength, final String date, final AbfsHttpOperation conn) throws UnsupportedEncodingException { // The first element should be the Method of the request. // I.e. GET, POST, PUT, or HEAD. final StringBuilder canonicalizedString = new StringBuilder(EXPECTED_BLOB_QUEUE_CANONICALIZED_STRING_LENGTH); - canonicalizedString.append(conn.getRequestMethod()); + canonicalizedString.append(conn.getMethod()); // The next elements are // If any element is missing it may be empty. @@ -446,7 +445,7 @@ private static String trimStart(final String value) { return value.substring(spaceDex); } - private static String getHeaderValue(final HttpURLConnection conn, final String headerName, final String defaultValue) { + private static String getHeaderValue(final AbfsHttpOperation conn, final String headerName, final String defaultValue) { final String headerValue = conn.getRequestProperty(headerName); return headerValue == null ? defaultValue : headerValue; } @@ -461,7 +460,7 @@ private static String getHeaderValue(final HttpURLConnection conn, final String * -1 if unknown * @return a canonicalized string. */ - private String canonicalize(final HttpURLConnection conn, + private String canonicalize(final AbfsHttpOperation conn, final String accountName, final Long contentLength) throws UnsupportedEncodingException { @@ -472,8 +471,8 @@ private String canonicalize(final HttpURLConnection conn, String contentType = getHeaderValue(conn, HttpHeaderConfigurations.CONTENT_TYPE, ""); - return canonicalizeHttpRequest(conn.getURL(), accountName, - conn.getRequestMethod(), contentType, contentLength, null, conn); + return canonicalizeHttpRequest(conn.getConnUrl(), accountName, + conn.getMethod(), contentType, contentLength, null, conn); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java index b0a9a021c5e47..a349894cd4f30 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/TracingContext.java @@ -192,6 +192,7 @@ public void constructHeader(AbfsHttpOperation httpOperation, String previousFail + getPrimaryRequestIdForHeader(retryCount > 0) + ":" + streamID + ":" + opType + ":" + retryCount; header = addFailureReasons(header, previousFailure, retryPolicyAbbreviation); + header += (":" + httpOperation.getTracingContextSuffix()); metricHeader += !(metricResults.trim().isEmpty()) ? metricResults : ""; break; case TWO_ID_FORMAT: diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md index 37904808ec659..7b4663b5a6140 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/abfs.md @@ -866,6 +866,45 @@ and all associated tests to see how to make use of these extension points. _Warning_ These extension points are unstable. +### Networking Layer: + +ABFS Driver can use the following networking libraries: +- ApacheHttpClient: + - Library Documentation. + - Default networking library. +- JDK networking library: + - Library documentation. + +The networking library can be configured using the configuration `fs.azure.networking.library` +while initializing the filesystem. +Following are the supported values: +- `APACHE_HTTP_CLIENT` : Use Apache HttpClient [Default] +- `JDK_HTTP_URL_CONNECTION` : Use JDK networking library + +#### ApacheHttpClient networking layer configuration Options: + +Following are the configuration options for ApacheHttpClient networking layer that +can be provided at the initialization of the filesystem: +1. `fs.azure.apache.http.client.idle.connection.ttl`: + 1. Maximum idle time in milliseconds for a connection to be kept alive in the connection pool. + If the connection is not reused within the time limit, the connection shall be closed. + 2. Default value: 5000 milliseconds. +2. `fs.azure.apache.http.client.max.cache.connection.size`: + 1. Maximum number of connections that can be cached in the connection pool for + a filesystem instance. Total number of concurrent connections has no limit. + 2. Default value: 5. +3. `fs.azure.apache.http.client.max.io.exception.retries`: + 1. Maximum number of times the client will retry on IOExceptions for a single request + with ApacheHttpClient networking-layer. Breach of this limit would turn off + the future uses of the ApacheHttpClient library in the current JVM instance. + 2. Default value: 3. + +#### ApacheHttpClient classpath requirements: + +ApacheHttpClient is a `compile` maven dependency in hadoop-azure and would be +included in the hadoop-azure jar. For using hadoop-azure with ApacheHttpClient no +additional information is required in the classpath. + ## Other configuration options Consult the javadocs for `org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys`, diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java index 89504ea461b23..0951ed9a0303b 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java @@ -33,6 +33,7 @@ import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; import org.apache.hadoop.fs.azurebfs.security.EncodingHelper; import org.apache.hadoop.fs.azurebfs.services.AbfsClientUtils; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.assertj.core.api.Assertions; import org.assertj.core.api.Assumptions; @@ -51,7 +52,6 @@ import org.apache.hadoop.fs.azurebfs.extensions.MockEncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.security.ContextProviderEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; -import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.utils.EncryptionType; import org.apache.hadoop.fs.impl.OpenFileParameters; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadWriteAndSeek.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadWriteAndSeek.java index bc420c6a1f8cd..c32c0147fe7da 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadWriteAndSeek.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsReadWriteAndSeek.java @@ -31,6 +31,7 @@ import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.services.AbfsInputStream; import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; +import org.apache.hadoop.fs.azurebfs.constants.HttpOperationType; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; import org.apache.hadoop.fs.statistics.IOStatisticsSource; @@ -55,22 +56,71 @@ public class ITestAbfsReadWriteAndSeek extends AbstractAbfsScaleTest { * For test performance, a full x*y test matrix is not used. * @return the test parameters */ - @Parameterized.Parameters(name = "Size={0}-readahead={1}") + @Parameterized.Parameters(name = "Size={0}-readahead={1}-Client={2}") public static Iterable sizes() { - return Arrays.asList(new Object[][]{{MIN_BUFFER_SIZE, true}, - {DEFAULT_READ_BUFFER_SIZE, false}, - {DEFAULT_READ_BUFFER_SIZE, true}, - {APPENDBLOB_MAX_WRITE_BUFFER_SIZE, false}, - {MAX_BUFFER_SIZE, true}}); + return Arrays.asList(new Object[][]{ + { + MIN_BUFFER_SIZE, + true, + HttpOperationType.JDK_HTTP_URL_CONNECTION + }, + { + MIN_BUFFER_SIZE, + true, + HttpOperationType.APACHE_HTTP_CLIENT + }, + { + DEFAULT_READ_BUFFER_SIZE, + false, + HttpOperationType.JDK_HTTP_URL_CONNECTION + }, + { + DEFAULT_READ_BUFFER_SIZE, + false, + HttpOperationType.APACHE_HTTP_CLIENT + }, + { + DEFAULT_READ_BUFFER_SIZE, + true, + HttpOperationType.JDK_HTTP_URL_CONNECTION + }, + { + DEFAULT_READ_BUFFER_SIZE, + true, + HttpOperationType.APACHE_HTTP_CLIENT + }, + { + APPENDBLOB_MAX_WRITE_BUFFER_SIZE, + false, + HttpOperationType.JDK_HTTP_URL_CONNECTION + }, + { + APPENDBLOB_MAX_WRITE_BUFFER_SIZE, + false, + HttpOperationType.APACHE_HTTP_CLIENT + }, + { + MAX_BUFFER_SIZE, + true, + HttpOperationType.JDK_HTTP_URL_CONNECTION + }, + { + MAX_BUFFER_SIZE, + true, + HttpOperationType.APACHE_HTTP_CLIENT + } + }); } private final int size; private final boolean readaheadEnabled; + private final HttpOperationType httpOperationType; public ITestAbfsReadWriteAndSeek(final int size, - final boolean readaheadEnabled) throws Exception { + final boolean readaheadEnabled, final HttpOperationType httpOperationType) throws Exception { this.size = size; this.readaheadEnabled = readaheadEnabled; + this.httpOperationType = httpOperationType; } @Test diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java index 5a6d3785fb660..98e98953c40fa 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCreate.java @@ -35,6 +35,7 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; @@ -44,7 +45,6 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.ConcurrentWriteOperationDetectedException; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; -import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; @@ -56,6 +56,7 @@ import static java.net.HttpURLConnection.HTTP_OK; import static java.net.HttpURLConnection.HTTP_PRECON_FAILED; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; import static org.mockito.ArgumentMatchers.nullable; @@ -238,7 +239,9 @@ public void testFilterFSWriteAfterClose() throws Throwable { intercept(FileNotFoundException.class, () -> { try (FilterOutputStream fos = new FilterOutputStream(out)) { - fos.write('a'); + byte[] bytes = new byte[8*ONE_MB]; + fos.write(bytes); + fos.write(bytes); fos.flush(); out.hsync(); fs.delete(testPath, false); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java index c9f89e6643349..80dda1fa95ed1 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelegationSAS.java @@ -40,9 +40,9 @@ import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys; import org.apache.hadoop.fs.azurebfs.extensions.MockDelegationSASTokenProvider; -import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.services.AuthType; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclEntryScope; import org.apache.hadoop.fs.permission.AclStatus; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java index fd5d312176321..9e42f6ba14953 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemDelete.java @@ -37,8 +37,8 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsClientTestUtil; -import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; import org.apache.hadoop.fs.azurebfs.services.TestAbfsPerfTracker; import org.apache.hadoop.fs.azurebfs.utils.TestMockHelpers; @@ -264,7 +264,7 @@ public void testDeleteIdempotencyTriggerHttp404() throws Exception { AbfsRestOperation idempotencyRetOp = Mockito.spy(ITestAbfsClient.getRestOp( DeletePath, mockClient, HTTP_METHOD_DELETE, ITestAbfsClient.getTestUrl(mockClient, "/NonExistingPath"), - ITestAbfsClient.getTestRequestHeaders(mockClient))); + ITestAbfsClient.getTestRequestHeaders(mockClient), getConfiguration())); idempotencyRetOp.hardSetResult(HTTP_OK); doReturn(idempotencyRetOp).when(mockClient).deleteIdempotencyCheckOp(any()); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemE2E.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemE2E.java index f1673a3b38b45..c7a1fa91a98f9 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemE2E.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemE2E.java @@ -35,6 +35,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_HTTP_CONNECTION_TIMEOUT; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_HTTP_READ_TIMEOUT; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_MAX_IO_RETRIES; @@ -257,22 +258,24 @@ public void testHttpReadTimeout() throws Exception { } public void testHttpTimeouts(int connectionTimeoutMs, int readTimeoutMs) - throws Exception { + throws Exception { Configuration conf = this.getRawConfiguration(); // set to small values that will cause timeouts conf.setInt(AZURE_HTTP_CONNECTION_TIMEOUT, connectionTimeoutMs); conf.setInt(AZURE_HTTP_READ_TIMEOUT, readTimeoutMs); + conf.setBoolean(AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, + false); // Reduce retry count to reduce test run time conf.setInt(AZURE_MAX_IO_RETRIES, 1); final AzureBlobFileSystem fs = getFileSystem(conf); Assertions.assertThat( - fs.getAbfsStore().getAbfsConfiguration().getHttpConnectionTimeout()) - .describedAs("HTTP connection time should be picked from config") - .isEqualTo(connectionTimeoutMs); + fs.getAbfsStore().getAbfsConfiguration().getHttpConnectionTimeout()) + .describedAs("HTTP connection time should be picked from config") + .isEqualTo(connectionTimeoutMs); Assertions.assertThat( - fs.getAbfsStore().getAbfsConfiguration().getHttpReadTimeout()) - .describedAs("HTTP Read time should be picked from config") - .isEqualTo(readTimeoutMs); + fs.getAbfsStore().getAbfsConfiguration().getHttpReadTimeout()) + .describedAs("HTTP Read time should be picked from config") + .isEqualTo(readTimeoutMs); Path testPath = path(methodName.getMethodName()); ContractTestUtils.createFile(fs, testPath, false, new byte[0]); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java index 4b961f56066df..c48b8b0d6267d 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemLease.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs.azurebfs; import java.io.IOException; +import java.util.concurrent.Callable; import java.util.concurrent.RejectedExecutionException; import org.junit.Assert; @@ -28,6 +29,8 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.constants.HttpOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsLease; @@ -302,11 +305,29 @@ public void testFileSystemClose() throws Exception { fs.close(); Assert.assertTrue("Store leases were not freed", fs.getAbfsStore().areLeasesFreed()); - LambdaTestUtils.intercept(RejectedExecutionException.class, () -> { + Callable exceptionRaisingCallable = () -> { try (FSDataOutputStream out2 = fs.append(testFilePath)) { } return "Expected exception on new append after closed FS"; - }); + }; + /* + * For ApacheHttpClient, the failure would happen when trying to get a connection + * from KeepAliveCache, which is not possible after the FS is closed, as that + * also closes the cache. + * + * For JDK_Client, the failure happens when trying to submit a task to the + * executor service, which is not possible after the FS is closed, as that + * also shuts down the executor service. + */ + + if (getConfiguration().getPreferredHttpOperationType() + == HttpOperationType.APACHE_HTTP_CLIENT) { + LambdaTestUtils.intercept(AbfsDriverException.class, + exceptionRaisingCallable); + } else { + LambdaTestUtils.intercept(RejectedExecutionException.class, + exceptionRaisingCallable); + } } @Test(timeout = TEST_EXECUTION_TIMEOUT) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java index 3ffa2bd49e427..506eae7598668 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestTracingContext.java @@ -38,9 +38,9 @@ import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.enums.Trilean; -import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.services.AuthType; +import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderValidator; @@ -51,6 +51,8 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_CLIENT_CORRELATIONID; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.MIN_BUFFER_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpOperationType.APACHE_HTTP_CLIENT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpOperationType.JDK_HTTP_URL_CONNECTION; import static org.apache.hadoop.fs.azurebfs.services.RetryPolicyConstants.EXPONENTIAL_RETRY_POLICY_ABBREVIATION; import static org.apache.hadoop.fs.azurebfs.services.RetryPolicyConstants.STATIC_RETRY_POLICY_ABBREVIATION; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_ABBREVIATION; @@ -135,10 +137,16 @@ public void runCorrelationTestForAllMethods() throws Exception { testClasses.put(new ITestAzureBlobFileSystemListStatus(), //liststatus ITestAzureBlobFileSystemListStatus.class.getMethod("testListPath")); - testClasses.put(new ITestAbfsReadWriteAndSeek(MIN_BUFFER_SIZE, true), //open, + testClasses.put(new ITestAbfsReadWriteAndSeek(MIN_BUFFER_SIZE, true, JDK_HTTP_URL_CONNECTION), //open, + // read, write + ITestAbfsReadWriteAndSeek.class.getMethod("testReadAheadRequestID")); + testClasses.put(new ITestAbfsReadWriteAndSeek(MIN_BUFFER_SIZE, true, APACHE_HTTP_CLIENT), //open, // read, write ITestAbfsReadWriteAndSeek.class.getMethod("testReadAheadRequestID")); - testClasses.put(new ITestAbfsReadWriteAndSeek(MIN_BUFFER_SIZE, false), //read (bypassreadahead) + testClasses.put(new ITestAbfsReadWriteAndSeek(MIN_BUFFER_SIZE, false, JDK_HTTP_URL_CONNECTION), //read (bypassreadahead) + ITestAbfsReadWriteAndSeek.class + .getMethod("testReadAndWriteWithDifferentBufferSizesAndSeek")); + testClasses.put(new ITestAbfsReadWriteAndSeek(MIN_BUFFER_SIZE, false, APACHE_HTTP_CLIENT), //read (bypassreadahead) ITestAbfsReadWriteAndSeek.class .getMethod("testReadAndWriteWithDifferentBufferSizesAndSeek")); testClasses.put(new ITestAzureBlobFileSystemAppend(), //append diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java index 53185606b6c80..1a663ec3c93c5 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/extensions/MockDelegationSASTokenProvider.java @@ -22,6 +22,7 @@ import java.net.MalformedURLException; import java.net.URL; import java.nio.charset.StandardCharsets; +import java.time.Duration; import java.time.Instant; import java.util.ArrayList; import java.util.List; @@ -33,7 +34,7 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidUriException; import org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider; import org.apache.hadoop.fs.azurebfs.services.AbfsHttpHeader; -import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; +import org.apache.hadoop.fs.azurebfs.services.AbfsJdkHttpOperation; import org.apache.hadoop.fs.azurebfs.utils.Base64; import org.apache.hadoop.fs.azurebfs.utils.DelegationSASGenerator; import org.apache.hadoop.fs.azurebfs.utils.SASGenerator; @@ -106,11 +107,11 @@ private byte[] getUserDelegationKey(String accountName, String appID, String app requestBody.append(ske); requestBody.append(""); - AbfsHttpOperation op = new AbfsHttpOperation(url, method, requestHeaders, - DEFAULT_HTTP_CONNECTION_TIMEOUT, DEFAULT_HTTP_READ_TIMEOUT); + AbfsJdkHttpOperation op = new AbfsJdkHttpOperation(url, method, requestHeaders, + Duration.ofMillis(DEFAULT_HTTP_CONNECTION_TIMEOUT), Duration.ofMillis(DEFAULT_HTTP_READ_TIMEOUT)); byte[] requestBuffer = requestBody.toString().getBytes(StandardCharsets.UTF_8.toString()); - op.sendRequest(requestBuffer, 0, requestBuffer.length); + op.sendPayload(requestBuffer, 0, requestBuffer.length); byte[] responseBuffer = new byte[4 * 1024]; op.processResponse(responseBuffer, 0, responseBuffer.length); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java index 2b60cb57fdf39..a153d9f3027ea 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientTestUtil.java @@ -63,18 +63,19 @@ private AbfsClientTestUtil() { public static void setMockAbfsRestOperationForListPathOperation( final AbfsClient spiedClient, - FunctionRaisingIOE functionRaisingIOE) + FunctionRaisingIOE functionRaisingIOE) throws Exception { ExponentialRetryPolicy exponentialRetryPolicy = Mockito.mock(ExponentialRetryPolicy.class); StaticRetryPolicy staticRetryPolicy = Mockito.mock(StaticRetryPolicy.class); AbfsThrottlingIntercept intercept = Mockito.mock(AbfsThrottlingIntercept.class); - AbfsHttpOperation httpOperation = Mockito.mock(AbfsHttpOperation.class); + AbfsJdkHttpOperation httpOperation = Mockito.mock(AbfsJdkHttpOperation.class); AbfsRestOperation abfsRestOperation = Mockito.spy(new AbfsRestOperation( AbfsRestOperationType.ListPaths, spiedClient, HTTP_METHOD_GET, null, - new ArrayList<>() + new ArrayList<>(), + spiedClient.getAbfsConfiguration() )); Mockito.doReturn(abfsRestOperation).when(spiedClient).getAbfsRestOperation( @@ -99,7 +100,6 @@ public static void addGeneralMockBehaviourToRestOpAndHttpOp(final AbfsRestOperat HttpURLConnection httpURLConnection = Mockito.mock(HttpURLConnection.class); Mockito.doNothing().when(httpURLConnection) .setRequestProperty(nullable(String.class), nullable(String.class)); - Mockito.doReturn(httpURLConnection).when(httpOperation).getConnection(); Mockito.doReturn("").when(abfsRestOperation).getClientLatency(); Mockito.doReturn(httpOperation).when(abfsRestOperation).createHttpOperation(); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java index ca2ea92388d97..909e7cf1749a1 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -20,26 +20,32 @@ import java.io.IOException; import java.lang.reflect.Field; -import java.net.HttpURLConnection; import java.net.ProtocolException; import java.net.URI; import java.net.URISyntaxException; import java.net.URL; +import java.util.Arrays; import java.util.List; import java.util.Random; import java.util.regex.Pattern; import org.apache.hadoop.fs.azurebfs.AbfsCountersImpl; import org.assertj.core.api.Assertions; +import org.junit.Assume; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import org.mockito.Mockito; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; import org.apache.hadoop.fs.azurebfs.TestAbfsConfigurationFieldsValidation; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.constants.HttpOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsApacheHttpExpect100Exception; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; @@ -48,6 +54,7 @@ import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; +import org.apache.http.HttpResponse; import static java.net.HttpURLConnection.HTTP_NOT_FOUND; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_ACTION; @@ -61,6 +68,8 @@ import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_ACTION; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_POSITION; import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.HttpOperationType.APACHE_HTTP_CLIENT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpOperationType.JDK_HTTP_URL_CONNECTION; import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.eq; @@ -87,6 +96,7 @@ * Test useragent of abfs client. * */ +@RunWith(Parameterized.class) public final class ITestAbfsClient extends AbstractAbfsIntegrationTest { private static final String ACCOUNT_NAME = "bogusAccountName.dfs.core.windows.net"; @@ -100,6 +110,17 @@ public final class ITestAbfsClient extends AbstractAbfsIntegrationTest { private final Pattern userAgentStringPattern; + @Parameterized.Parameter + public HttpOperationType httpOperationType; + + @Parameterized.Parameters(name = "{0}") + public static Iterable params() { + return Arrays.asList(new Object[][]{ + {HttpOperationType.JDK_HTTP_URL_CONNECTION}, + {APACHE_HTTP_CLIENT} + }); + } + public ITestAbfsClient() throws Exception { StringBuilder regEx = new StringBuilder(); regEx.append("^"); @@ -151,6 +172,7 @@ private String getUserAgentString(AbfsConfiguration config, @Test public void verifyBasicInfo() throws Exception { + Assume.assumeTrue(JDK_HTTP_URL_CONNECTION == httpOperationType); final Configuration configuration = new Configuration(); configuration.addResource(TEST_CONFIGURATION_FILE_NAME); AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, @@ -179,7 +201,8 @@ private void verifyBasicInfo(String userAgentStr) { @Test public void verifyUserAgentPrefix() - throws IOException, IllegalAccessException, URISyntaxException { + throws IOException, IllegalAccessException, URISyntaxException { + Assume.assumeTrue(JDK_HTTP_URL_CONNECTION == httpOperationType); final Configuration configuration = new Configuration(); configuration.addResource(TEST_CONFIGURATION_FILE_NAME); configuration.set(ConfigurationKeys.FS_AZURE_USER_AGENT_PREFIX_KEY, FS_AZURE_USER_AGENT_PREFIX); @@ -214,6 +237,7 @@ public void verifyUserAgentPrefix() @Test public void verifyUserAgentExpectHeader() throws IOException, IllegalAccessException, URISyntaxException { + Assume.assumeTrue(JDK_HTTP_URL_CONNECTION == httpOperationType); final Configuration configuration = new Configuration(); configuration.addResource(TEST_CONFIGURATION_FILE_NAME); configuration.set(ConfigurationKeys.FS_AZURE_USER_AGENT_PREFIX_KEY, FS_AZURE_USER_AGENT_PREFIX); @@ -240,6 +264,7 @@ public void verifyUserAgentExpectHeader() @Test public void verifyUserAgentWithoutSSLProvider() throws Exception { + Assume.assumeTrue(JDK_HTTP_URL_CONNECTION == httpOperationType); final Configuration configuration = new Configuration(); configuration.addResource(TEST_CONFIGURATION_FILE_NAME); configuration.set(ConfigurationKeys.FS_AZURE_SSL_CHANNEL_MODE_KEY, @@ -263,6 +288,7 @@ public void verifyUserAgentWithoutSSLProvider() throws Exception { @Test public void verifyUserAgentClusterName() throws Exception { + Assume.assumeTrue(JDK_HTTP_URL_CONNECTION == httpOperationType); final String clusterName = "testClusterName"; final Configuration configuration = new Configuration(); configuration.addResource(TEST_CONFIGURATION_FILE_NAME); @@ -291,6 +317,7 @@ public void verifyUserAgentClusterName() throws Exception { @Test public void verifyUserAgentClusterType() throws Exception { + Assume.assumeTrue(JDK_HTTP_URL_CONNECTION == httpOperationType); final String clusterType = "testClusterType"; final Configuration configuration = new Configuration(); configuration.addResource(TEST_CONFIGURATION_FILE_NAME); @@ -390,6 +417,7 @@ public static AbfsClient getMockAbfsClient(AbfsClient baseAbfsClientInstance, abfsConfig.getAccountName().substring(0, abfsConfig.getAccountName().indexOf(DOT)), abfsConfig)); when(client.getAbfsCounters()).thenReturn(abfsCounters); + Mockito.doReturn(baseAbfsClientInstance.getAbfsApacheHttpClient()).when(client).getAbfsApacheHttpClient(); // override baseurl client = ITestAbfsClient.setAbfsClientField(client, "abfsConfiguration", @@ -475,13 +503,14 @@ public static AbfsRestOperation getRestOp(AbfsRestOperationType type, AbfsClient client, String method, URL url, - List requestHeaders) { + List requestHeaders, AbfsConfiguration abfsConfiguration) { return new AbfsRestOperation( type, client, method, url, - requestHeaders); + requestHeaders, + abfsConfiguration); } public static AccessTokenProvider getAccessTokenProvider(AbfsClient client) { @@ -499,6 +528,14 @@ private byte[] getRandomBytesArray(int length) { return b; } + @Override + public AzureBlobFileSystem getFileSystem(final Configuration configuration) + throws Exception { + Configuration conf = new Configuration(configuration); + conf.set(ConfigurationKeys.FS_AZURE_NETWORKING_LIBRARY, httpOperationType.toString()); + return (AzureBlobFileSystem) FileSystem.newInstance(conf); + } + /** * Test to verify that client retries append request without * expect header enabled if append with expect header enabled fails @@ -508,9 +545,10 @@ private byte[] getRandomBytesArray(int length) { @Test public void testExpectHundredContinue() throws Exception { // Get the filesystem. - final AzureBlobFileSystem fs = getFileSystem(); + final AzureBlobFileSystem fs = getFileSystem(getRawConfiguration()); - final Configuration configuration = new Configuration(); + final Configuration configuration = fs.getAbfsStore().getAbfsConfiguration() + .getRawConfiguration(); configuration.addResource(TEST_CONFIGURATION_FILE_NAME); AbfsClient abfsClient = fs.getAbfsStore().getClient(); @@ -570,44 +608,49 @@ public void testExpectHundredContinue() throws Exception { url, requestHeaders, buffer, appendRequestParameters.getoffset(), - appendRequestParameters.getLength(), null)); - - AbfsHttpOperation abfsHttpOperation = Mockito.spy(new AbfsHttpOperation(url, - HTTP_METHOD_PUT, requestHeaders, DEFAULT_HTTP_CONNECTION_TIMEOUT, DEFAULT_HTTP_READ_TIMEOUT)); - - // Sets the expect request property if expect header is enabled. - if (appendRequestParameters.isExpectHeaderEnabled()) { - Mockito.doReturn(HUNDRED_CONTINUE).when(abfsHttpOperation) - .getConnProperty(EXPECT); - } - - HttpURLConnection urlConnection = mock(HttpURLConnection.class); - Mockito.doNothing().when(urlConnection).setRequestProperty(Mockito - .any(), Mockito.any()); - Mockito.doReturn(HTTP_METHOD_PUT).when(urlConnection).getRequestMethod(); - Mockito.doReturn(url).when(urlConnection).getURL(); - Mockito.doReturn(urlConnection).when(abfsHttpOperation).getConnection(); - - Mockito.doNothing().when(abfsHttpOperation).setRequestProperty(Mockito - .any(), Mockito.any()); - Mockito.doReturn(url).when(abfsHttpOperation).getConnUrl(); - - // Give user error code 404 when processResponse is called. - Mockito.doReturn(HTTP_METHOD_PUT).when(abfsHttpOperation).getConnRequestMethod(); - Mockito.doReturn(HTTP_NOT_FOUND).when(abfsHttpOperation).getConnResponseCode(); - Mockito.doReturn("Resource Not Found") - .when(abfsHttpOperation) - .getConnResponseMessage(); - - // Make the getOutputStream throw IOException to see it returns from the sendRequest correctly. - Mockito.doThrow(new ProtocolException(EXPECT_100_JDK_ERROR)) - .when(abfsHttpOperation) - .getConnOutputStream(); - - // Sets the httpOperation for the rest operation. - Mockito.doReturn(abfsHttpOperation) - .when(op) - .createHttpOperation(); + appendRequestParameters.getLength(), null, abfsConfig)); + + Mockito.doAnswer(answer -> { + AbfsHttpOperation httpOperation = Mockito.spy((AbfsHttpOperation) answer.callRealMethod()); + // Sets the expect request property if expect header is enabled. + if (appendRequestParameters.isExpectHeaderEnabled()) { + Mockito.doReturn(HUNDRED_CONTINUE).when(httpOperation) + .getConnProperty(EXPECT); + } + Mockito.doNothing().when(httpOperation).setRequestProperty(Mockito + .any(), Mockito.any()); + Mockito.doReturn(url).when(httpOperation).getConnUrl(); + + // Give user error code 404 when processResponse is called. + Mockito.doReturn(HTTP_METHOD_PUT).when(httpOperation).getMethod(); + Mockito.doReturn(HTTP_NOT_FOUND).when(httpOperation).getStatusCode(); + Mockito.doReturn("Resource Not Found") + .when(httpOperation) + .getConnResponseMessage(); + + if (httpOperation instanceof AbfsJdkHttpOperation) { + // Make the getOutputStream throw IOException to see it returns from the sendRequest correctly. + Mockito.doThrow(new ProtocolException(EXPECT_100_JDK_ERROR)) + .when((AbfsJdkHttpOperation) httpOperation) + .getConnOutputStream(); + } + + if (httpOperation instanceof AbfsAHCHttpOperation) { + Mockito.doNothing() + .when((AbfsAHCHttpOperation) httpOperation) + .parseResponseHeaderAndBody(Mockito.any(byte[].class), + Mockito.anyInt(), Mockito.anyInt()); + Mockito.doReturn(HTTP_NOT_FOUND) + .when((AbfsAHCHttpOperation) httpOperation) + .parseStatusCode(Mockito.nullable( + HttpResponse.class)); + Mockito.doThrow( + new AbfsApacheHttpExpect100Exception(Mockito.mock(HttpResponse.class))) + .when((AbfsAHCHttpOperation) httpOperation) + .executeRequest(); + } + return httpOperation; + }).when(op).createHttpOperation(); // Mock the restOperation for the client. Mockito.doReturn(op) diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsHttpClientRequestExecutor.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsHttpClientRequestExecutor.java new file mode 100644 index 0000000000000..f3ba24ff9168b --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsHttpClientRequestExecutor.java @@ -0,0 +1,406 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URL; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.mockito.Mockito; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.http.HttpClientConnection; +import org.apache.http.HttpEntityEnclosingRequest; +import org.apache.http.HttpException; +import org.apache.http.HttpRequest; +import org.apache.http.HttpResponse; +import org.apache.http.client.protocol.HttpClientContext; + +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_NETWORKING_LIBRARY; +import static org.apache.hadoop.fs.azurebfs.constants.HttpOperationType.APACHE_HTTP_CLIENT; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +public class ITestAbfsHttpClientRequestExecutor extends + AbstractAbfsIntegrationTest { + + public ITestAbfsHttpClientRequestExecutor() throws Exception { + super(); + } + + /** + * Verify the correctness of expect 100 continue handling by ApacheHttpClient + * with AbfsManagedHttpRequestExecutor. + */ + @Test + public void testExpect100ContinueHandling() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + Path path = new Path("/testExpect100ContinueHandling"); + + Configuration conf = new Configuration(fs.getConf()); + conf.set(FS_AZURE_NETWORKING_LIBRARY, APACHE_HTTP_CLIENT.toString()); + AzureBlobFileSystem fs2 = Mockito.spy( + (AzureBlobFileSystem) FileSystem.newInstance(conf)); + + AzureBlobFileSystemStore store = Mockito.spy(fs2.getAbfsStore()); + Mockito.doReturn(store).when(fs2).getAbfsStore(); + + AbfsClient client = Mockito.spy(store.getClient()); + Mockito.doReturn(client).when(store).getClient(); + + final int[] invocation = {0}; + Mockito.doAnswer(answer -> { + AbfsRestOperation op = Mockito.spy( + (AbfsRestOperation) answer.callRealMethod()); + final ConnectionInfo connectionInfo = new ConnectionInfo(); + + /* + * Assert that correct actions are taking place over the connection to handle + * expect100 assertions, failure and success. + * + * The test would make two calls to the server. The first two calls would + * be because of attempt to write in a non-existing file. The first call would have + * expect100 header, and the server would respond with 404. The second call would + * be a retry from AbfsOutputStream, and would not have expect100 header. + * + * The third call would be because of attempt to write in an existing file. The call + * would have expect100 assertion pass and would send the data. + * + * Following is the expectation from the first attempt: + * 1. sendHeaders should be called once. This is for expect100 assertion invocation. + * 2. receiveResponse should be called once. This is to receive expect100 assertion. + * 2. sendBody should not be called. + * + * Following is the expectation from the second attempt: + * 1. sendHeaders should be called once. This is not for expect100 assertion invocation. + * 2. sendBody should be called once. It will not have any expect100 assertion. + * Once headers are sent, body is sent. + * 3. receiveResponse should be called once. This is to receive the response from the server. + * + * Following is the expectation from the third attempt: + * 1. sendHeaders should be called once. This is for expect100 assertion invocation. + * 2. receiveResponse should be called. This is to receive the response from the server for expect100 assertion. + * 3. sendBody called as expect100 assertion is pass. + * 4. receiveResponse should be called. This is to receive the response from the server. + */ + mockHttpOperationBehavior(connectionInfo, op); + Mockito.doAnswer(executeAnswer -> { + invocation[0]++; + final Throwable throwable; + if (invocation[0] == 3) { + executeAnswer.callRealMethod(); + throwable = null; + } else { + throwable = intercept(IOException.class, () -> { + try { + executeAnswer.callRealMethod(); + } catch (IOException ex) { + //This exception is expected to be thrown by the op.execute() method. + throw ex; + } catch (Throwable interceptedAssertedThrowable) { + //Any other throwable thrown by Mockito's callRealMethod would be + //considered as an assertion error. + } + }); + } + /* + * The first call would be with expect headers, and expect 100 continue assertion has to happen which would fail. + * For expect100 assertion to happen, header IO happens before body IO. If assertion fails, no body IO happens. + * The second call would not be using expect headers. + * + * The third call would be with expect headers, and expect 100 continue assertion has to happen which would pass. + */ + if (invocation[0] == 1) { + Assertions.assertThat(connectionInfo.getSendHeaderInvocation()) + .isEqualTo(1); + Assertions.assertThat(connectionInfo.getSendBodyInvocation()) + .isEqualTo(0); + Assertions.assertThat(connectionInfo.getReceiveResponseInvocation()) + .isEqualTo(1); + Assertions.assertThat( + connectionInfo.getReceiveResponseBodyInvocation()) + .isEqualTo(1); + } + if (invocation[0] == 2) { + Assertions.assertThat(connectionInfo.getSendHeaderInvocation()) + .isEqualTo(1); + Assertions.assertThat(connectionInfo.getSendBodyInvocation()) + .isEqualTo(1); + Assertions.assertThat(connectionInfo.getReceiveResponseInvocation()) + .isEqualTo(1); + Assertions.assertThat( + connectionInfo.getReceiveResponseBodyInvocation()) + .isEqualTo(1); + } + if (invocation[0] == 3) { + Assertions.assertThat(connectionInfo.getSendHeaderInvocation()) + .isEqualTo(1); + Assertions.assertThat(connectionInfo.getSendBodyInvocation()) + .isEqualTo(1); + Assertions.assertThat(connectionInfo.getReceiveResponseInvocation()) + .isEqualTo(2); + Assertions.assertThat( + connectionInfo.getReceiveResponseBodyInvocation()) + .isEqualTo(1); + } + Assertions.assertThat(invocation[0]).isLessThanOrEqualTo(3); + if (throwable != null) { + throw throwable; + } + return null; + }).when(op).execute(Mockito.any(TracingContext.class)); + return op; + }).when(client).getAbfsRestOperation( + Mockito.any(AbfsRestOperationType.class), + Mockito.anyString(), + Mockito.any(URL.class), + Mockito.anyList(), + Mockito.any(byte[].class), + Mockito.anyInt(), + Mockito.anyInt(), + Mockito.nullable(String.class)); + + final OutputStream os = fs2.create(path); + fs.delete(path, true); + intercept(FileNotFoundException.class, () -> { + /* + * This would lead to two server calls. + * First call would be with expect headers, and expect 100 continue + * assertion has to happen which would fail with 404. + * Second call would be a retry from AbfsOutputStream, and would not be using expect headers. + */ + os.write(1); + os.close(); + }); + + final OutputStream os2 = fs2.create(path); + /* + * This would lead to third server call. This would be with expect headers, + * and the expect 100 continue assertion would pass. + */ + os2.write(1); + os2.close(); + } + + /** + * Creates a mock of HttpOperation that would be returned for AbfsRestOperation + * to use to execute server call. To make call via ApacheHttpClient, an object + * of {@link HttpClientContext} is required. This method would create a mock + * of HttpClientContext that would be able to register the actions taken on + * {@link HttpClientConnection} object. This would help in asserting the + * order of actions taken on the connection object for making an append call with + * expect100 header. + */ + private void mockHttpOperationBehavior(final ConnectionInfo connectionInfo, + final AbfsRestOperation op) throws IOException { + Mockito.doAnswer(httpOpCreationAnswer -> { + AbfsAHCHttpOperation httpOperation = Mockito.spy( + (AbfsAHCHttpOperation) httpOpCreationAnswer.callRealMethod()); + + Mockito.doAnswer(createContextAnswer -> { + AbfsManagedHttpClientContext context = Mockito.spy( + (AbfsManagedHttpClientContext) createContextAnswer.callRealMethod()); + Mockito.doAnswer(connectionSpyIntercept -> { + return interceptedConn(connectionInfo, + (HttpClientConnection) connectionSpyIntercept.getArgument(0)); + }).when(context).interceptConnectionActivity(Mockito.any( + HttpClientConnection.class)); + return context; + }) + .when(httpOperation).getHttpClientContext(); + return httpOperation; + }).when(op).createHttpOperation(); + } + + private HttpClientConnection interceptedConn(final ConnectionInfo connectionInfo, + final HttpClientConnection connection) throws IOException, HttpException { + HttpClientConnection interceptedConn = Mockito.spy(connection); + + Mockito.doAnswer(answer -> { + connectionInfo.incrementSendHeaderInvocation(); + long start = System.currentTimeMillis(); + Object result = answer.callRealMethod(); + connectionInfo.addSendTime(System.currentTimeMillis() - start); + return result; + }).when(interceptedConn).sendRequestHeader(Mockito.any(HttpRequest.class)); + + Mockito.doAnswer(answer -> { + connectionInfo.incrementSendBodyInvocation(); + long start = System.currentTimeMillis(); + Object result = answer.callRealMethod(); + connectionInfo.addSendTime(System.currentTimeMillis() - start); + return result; + }).when(interceptedConn).sendRequestEntity(Mockito.any( + HttpEntityEnclosingRequest.class)); + + Mockito.doAnswer(answer -> { + connectionInfo.incrementReceiveResponseInvocation(); + long start = System.currentTimeMillis(); + Object result = answer.callRealMethod(); + connectionInfo.addReadTime(System.currentTimeMillis() - start); + return result; + }).when(interceptedConn).receiveResponseHeader(); + + Mockito.doAnswer(answer -> { + connectionInfo.incrementReceiveResponseBodyInvocation(); + long start = System.currentTimeMillis(); + Object result = answer.callRealMethod(); + connectionInfo.addReadTime(System.currentTimeMillis() - start); + return result; + }).when(interceptedConn).receiveResponseEntity(Mockito.any( + HttpResponse.class)); + return interceptedConn; + } + + @Test + public void testConnectionReadRecords() throws Exception { + AzureBlobFileSystem fs = getFileSystem(); + Path path = new Path("/testConnectionRecords"); + + Configuration conf = new Configuration(fs.getConf()); + conf.set(FS_AZURE_NETWORKING_LIBRARY, APACHE_HTTP_CLIENT.toString()); + AzureBlobFileSystem fs2 = Mockito.spy( + (AzureBlobFileSystem) FileSystem.newInstance(conf)); + + AzureBlobFileSystemStore store = Mockito.spy(fs2.getAbfsStore()); + Mockito.doReturn(store).when(fs2).getAbfsStore(); + + AbfsClient client = Mockito.spy(store.getClient()); + Mockito.doReturn(client).when(store).getClient(); + + try (OutputStream os = fs.create(path)) { + os.write(1); + } + + InputStream is = fs2.open(path); + + Mockito.doAnswer(answer -> { + AbfsRestOperation op = Mockito.spy( + (AbfsRestOperation) answer.callRealMethod()); + final ConnectionInfo connectionInfo = new ConnectionInfo(); + mockHttpOperationBehavior(connectionInfo, op); + Mockito.doAnswer(executeAnswer -> { + executeAnswer.callRealMethod(); + Assertions.assertThat(connectionInfo.getSendHeaderInvocation()) + .isEqualTo(1); + Assertions.assertThat(connectionInfo.getSendBodyInvocation()) + .isEqualTo(0); + Assertions.assertThat(connectionInfo.getReceiveResponseInvocation()) + .isEqualTo(1); + Assertions.assertThat(connectionInfo.getReceiveResponseBodyInvocation()) + .isEqualTo(1); + return null; + }).when(op).execute(Mockito.any(TracingContext.class)); + return op; + }).when(client).getAbfsRestOperation( + Mockito.any(AbfsRestOperationType.class), + Mockito.anyString(), + Mockito.any(URL.class), + Mockito.anyList(), + Mockito.any(byte[].class), + Mockito.anyInt(), + Mockito.anyInt(), + Mockito.nullable(String.class)); + + is.read(); + is.close(); + } + + private static class ConnectionInfo { + + private long connectTime; + + private long readTime; + + private long sendTime; + + private int sendHeaderInvocation; + + private int sendBodyInvocation; + + private int receiveResponseInvocation; + + private int receiveResponseBodyInvocation; + + private void incrementSendHeaderInvocation() { + sendHeaderInvocation++; + } + + private void incrementSendBodyInvocation() { + sendBodyInvocation++; + } + + private void incrementReceiveResponseInvocation() { + receiveResponseInvocation++; + } + + private void incrementReceiveResponseBodyInvocation() { + receiveResponseBodyInvocation++; + } + + private void addConnectTime(long connectTime) { + this.connectTime += connectTime; + } + + private void addReadTime(long readTime) { + this.readTime += readTime; + } + + private void addSendTime(long sendTime) { + this.sendTime += sendTime; + } + + private long getConnectTime() { + return connectTime; + } + + private long getReadTime() { + return readTime; + } + + private long getSendTime() { + return sendTime; + } + + private int getSendHeaderInvocation() { + return sendHeaderInvocation; + } + + private int getSendBodyInvocation() { + return sendBodyInvocation; + } + + private int getReceiveResponseInvocation() { + return receiveResponseInvocation; + } + + private int getReceiveResponseBodyInvocation() { + return receiveResponseBodyInvocation; + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java index 359846ce14dae..f0b6dc1c5aaea 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java @@ -23,9 +23,12 @@ import java.net.URI; import java.net.URISyntaxException; import java.net.URL; +import java.util.Arrays; import org.assertj.core.api.Assertions; import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; import org.mockito.Mockito; import org.apache.hadoop.conf.Configuration; @@ -36,6 +39,7 @@ import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; +import org.apache.hadoop.fs.azurebfs.constants.HttpOperationType; import org.apache.hadoop.test.LambdaTestUtils; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED; @@ -43,15 +47,36 @@ /** * Test create operation. */ +@RunWith(Parameterized.class) public class ITestAbfsOutputStream extends AbstractAbfsIntegrationTest { private static final int TEST_EXECUTION_TIMEOUT = 2 * 60 * 1000; private static final String TEST_FILE_PATH = "testfile"; + @Parameterized.Parameter + public HttpOperationType httpOperationType; + + @Parameterized.Parameters(name = "{0}") + public static Iterable params() { + return Arrays.asList(new Object[][]{ + {HttpOperationType.JDK_HTTP_URL_CONNECTION}, + {HttpOperationType.APACHE_HTTP_CLIENT} + }); + } + + public ITestAbfsOutputStream() throws Exception { super(); } + @Override + public AzureBlobFileSystem getFileSystem(final Configuration configuration) + throws Exception { + Configuration conf = new Configuration(configuration); + conf.set(ConfigurationKeys.FS_AZURE_NETWORKING_LIBRARY, httpOperationType.toString()); + return (AzureBlobFileSystem) FileSystem.newInstance(conf); + } + @Test public void testMaxRequestsAndQueueCapacityDefaults() throws Exception { Configuration conf = getRawConfiguration(); @@ -158,8 +183,7 @@ public void testAbfsOutputStreamClosingFsBeforeStream() public void testExpect100ContinueFailureInAppend() throws Exception { Configuration configuration = new Configuration(getRawConfiguration()); configuration.set(FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED, "true"); - AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance( - configuration); + AzureBlobFileSystem fs = getFileSystem(configuration); Path path = new Path("/testFile"); AbfsOutputStream os = Mockito.spy( (AbfsOutputStream) fs.create(path).getWrappedStream()); @@ -175,17 +199,23 @@ public void testExpect100ContinueFailureInAppend() throws Exception { Assertions.assertThat(httpOpForAppendTest[0].getConnectionDisconnectedOnError()) .describedAs("First try from AbfsClient will have expect-100 " + "header and should fail with expect-100 error.").isTrue(); - Mockito.verify(httpOpForAppendTest[0], Mockito.times(0)) - .processConnHeadersAndInputStreams(Mockito.any(byte[].class), - Mockito.anyInt(), Mockito.anyInt()); + if (httpOpForAppendTest[0] instanceof AbfsJdkHttpOperation) { + Mockito.verify((AbfsJdkHttpOperation) httpOpForAppendTest[0], + Mockito.times(0)) + .processConnHeadersAndInputStreams(Mockito.any(byte[].class), + Mockito.anyInt(), Mockito.anyInt()); + } Assertions.assertThat(httpOpForAppendTest[1].getConnectionDisconnectedOnError()) .describedAs("The retried operation from AbfsClient should not " + "fail with expect-100 error. The retried operation does not have" + "expect-100 header.").isFalse(); - Mockito.verify(httpOpForAppendTest[1], Mockito.times(1)) - .processConnHeadersAndInputStreams(Mockito.any(byte[].class), - Mockito.anyInt(), Mockito.anyInt()); + if (httpOpForAppendTest[1] instanceof AbfsJdkHttpOperation) { + Mockito.verify((AbfsJdkHttpOperation) httpOpForAppendTest[1], + Mockito.times(1)) + .processConnHeadersAndInputStreams(Mockito.any(byte[].class), + Mockito.anyInt(), Mockito.anyInt()); + } } private void mockSetupForAppend(final AbfsHttpOperation[] httpOpForAppendTest, diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java index 41cbc3be3bc08..ec2c85f61d743 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.io.OutputStream; -import java.net.HttpURLConnection; import java.net.ProtocolException; import java.net.URL; import java.util.Arrays; @@ -34,16 +33,21 @@ import org.mockito.Mockito; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; import org.apache.hadoop.fs.azurebfs.TestAbfsConfigurationFieldsValidation; +import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.constants.HttpOperationType; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsApacheHttpExpect100Exception; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; +import org.apache.http.HttpResponse; import static java.net.HttpURLConnection.HTTP_NOT_FOUND; import static java.net.HttpURLConnection.HTTP_OK; @@ -53,17 +57,16 @@ import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; -import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_HTTP_CONNECTION_TIMEOUT; -import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_HTTP_READ_TIMEOUT; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_HTTP_METHOD_OVERRIDE; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_ACTION; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_POSITION; import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_ABFS_ACCOUNT_NAME; import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_CONFIGURATION_FILE_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.HttpOperationType.APACHE_HTTP_CLIENT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpOperationType.JDK_HTTP_URL_CONNECTION; import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.EGRESS_OVER_ACCOUNT_LIMIT; import static org.apache.hadoop.test.LambdaTestUtils.intercept; -import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; @@ -98,6 +101,9 @@ public enum ErrorType {OUTPUTSTREAM, WRITE}; @Parameterized.Parameter(3) public ErrorType errorType; + @Parameterized.Parameter(4) + public HttpOperationType httpOperationType; + // The intercept. private AbfsThrottlingIntercept intercept; @@ -108,15 +114,26 @@ public enum ErrorType {OUTPUTSTREAM, WRITE}; HTTP_EXPECTATION_FAILED = 417, HTTP_ERROR = 0. */ - @Parameterized.Parameters(name = "expect={0}-code={1}-ErrorType={3}") + @Parameterized.Parameters(name = "expect={0}-code={1}-ErrorType={3}=NetLib={4}") public static Iterable params() { return Arrays.asList(new Object[][]{ - {true, HTTP_OK, "OK", ErrorType.WRITE}, - {false, HTTP_OK, "OK", ErrorType.WRITE}, - {true, HTTP_UNAVAILABLE, "ServerBusy", ErrorType.OUTPUTSTREAM}, - {true, HTTP_NOT_FOUND, "Resource Not Found", ErrorType.OUTPUTSTREAM}, - {true, HTTP_EXPECTATION_FAILED, "Expectation Failed", ErrorType.OUTPUTSTREAM}, - {true, HTTP_ERROR, "Error", ErrorType.OUTPUTSTREAM} + {true, HTTP_OK, "OK", ErrorType.WRITE, JDK_HTTP_URL_CONNECTION}, + {true, HTTP_OK, "OK", ErrorType.WRITE, APACHE_HTTP_CLIENT}, + + {false, HTTP_OK, "OK", ErrorType.WRITE, JDK_HTTP_URL_CONNECTION}, + {false, HTTP_OK, "OK", ErrorType.WRITE, APACHE_HTTP_CLIENT}, + + {true, HTTP_UNAVAILABLE, "ServerBusy", ErrorType.OUTPUTSTREAM, JDK_HTTP_URL_CONNECTION}, + {true, HTTP_UNAVAILABLE, "ServerBusy", ErrorType.OUTPUTSTREAM, APACHE_HTTP_CLIENT}, + + {true, HTTP_NOT_FOUND, "Resource Not Found", ErrorType.OUTPUTSTREAM, JDK_HTTP_URL_CONNECTION}, + {true, HTTP_NOT_FOUND, "Resource Not Found", ErrorType.OUTPUTSTREAM, APACHE_HTTP_CLIENT}, + + {true, HTTP_EXPECTATION_FAILED, "Expectation Failed", ErrorType.OUTPUTSTREAM, JDK_HTTP_URL_CONNECTION}, + {true, HTTP_EXPECTATION_FAILED, "Expectation Failed", ErrorType.OUTPUTSTREAM, APACHE_HTTP_CLIENT}, + + {true, HTTP_ERROR, "Error", ErrorType.OUTPUTSTREAM, JDK_HTTP_URL_CONNECTION}, + {true, HTTP_ERROR, "Error", ErrorType.OUTPUTSTREAM, APACHE_HTTP_CLIENT} }); } @@ -135,15 +152,23 @@ private byte[] getRandomBytesArray(int length) { return b; } + @Override + public AzureBlobFileSystem getFileSystem(final Configuration configuration) + throws Exception { + Configuration conf = new Configuration(configuration); + conf.set(ConfigurationKeys.FS_AZURE_NETWORKING_LIBRARY, httpOperationType.toString()); + return (AzureBlobFileSystem) FileSystem.newInstance(conf); + } + /** * Gives the AbfsRestOperation. * @return abfsRestOperation. */ private AbfsRestOperation getRestOperation() throws Exception { // Get the filesystem. - final AzureBlobFileSystem fs = getFileSystem(); + final AzureBlobFileSystem fs = getFileSystem(getRawConfiguration()); - final Configuration configuration = new Configuration(); + final Configuration configuration = fs.getConf(); configuration.addResource(TEST_CONFIGURATION_FILE_NAME); AbfsClient abfsClient = fs.getAbfsStore().getClient(); @@ -196,36 +221,38 @@ private AbfsRestOperation getRestOperation() throws Exception { URL url = testClient.createRequestUrl(finalTestPath, abfsUriQueryBuilder.toString()); // Create a mock of the AbfsRestOperation to set the urlConnection in the corresponding httpOperation. - AbfsRestOperation op = Mockito.spy(new AbfsRestOperation( + final AbfsRestOperation op = Mockito.spy(new AbfsRestOperation( AbfsRestOperationType.Append, testClient, HTTP_METHOD_PUT, url, requestHeaders, buffer, appendRequestParameters.getoffset(), - appendRequestParameters.getLength(), null)); - - AbfsHttpOperation abfsHttpOperation = Mockito.spy(new AbfsHttpOperation(url, HTTP_METHOD_PUT, requestHeaders, - DEFAULT_HTTP_CONNECTION_TIMEOUT, DEFAULT_HTTP_READ_TIMEOUT)); + appendRequestParameters.getLength(), null, abfsConfig)); + + Mockito.doAnswer(answer -> { + AbfsHttpOperation httpOperation = Mockito.spy( + (AbfsHttpOperation) answer.callRealMethod()); + mockHttpOperation(appendRequestParameters, buffer, url, httpOperation); + Mockito.doReturn(httpOperation).when(op).getResult(); + return httpOperation; + }).when(op).createHttpOperation(); + return op; + } + private void mockHttpOperation(final AppendRequestParameters appendRequestParameters, + final byte[] buffer, + final URL url, + final AbfsHttpOperation httpOperation) throws IOException { // Sets the expect request property if expect header is enabled. if (expectHeaderEnabled) { Mockito.doReturn(HUNDRED_CONTINUE) - .when(abfsHttpOperation) + .when(httpOperation) .getConnProperty(EXPECT); } - HttpURLConnection urlConnection = mock(HttpURLConnection.class); - Mockito.doNothing().when(urlConnection).setRequestProperty(Mockito + Mockito.doNothing().when(httpOperation).setRequestProperty(Mockito .any(), Mockito.any()); - Mockito.doReturn(HTTP_METHOD_PUT).when(urlConnection).getRequestMethod(); - Mockito.doReturn(url).when(urlConnection).getURL(); - Mockito.doReturn(urlConnection).when(abfsHttpOperation).getConnection(); - - Mockito.doNothing().when(abfsHttpOperation).setRequestProperty(Mockito - .any(), Mockito.any()); - Mockito.doReturn(url).when(abfsHttpOperation).getConnUrl(); - Mockito.doReturn(HTTP_METHOD_PUT).when(abfsHttpOperation).getConnRequestMethod(); switch (errorType) { case OUTPUTSTREAM: @@ -233,28 +260,51 @@ private AbfsRestOperation getRestOperation() throws Exception { // enabled, it returns back to processResponse and hence we have // mocked the response code and the response message to check different // behaviour based on response code. - Mockito.doReturn(responseCode).when(abfsHttpOperation).getConnResponseCode(); + + Mockito.doReturn(responseCode).when(httpOperation).getStatusCode(); if (responseCode == HTTP_UNAVAILABLE) { Mockito.doReturn(EGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage()) - .when(abfsHttpOperation) + .when(httpOperation) .getStorageErrorMessage(); } Mockito.doReturn(responseMessage) - .when(abfsHttpOperation) + .when(httpOperation) .getConnResponseMessage(); - Mockito.doThrow(new ProtocolException(EXPECT_100_JDK_ERROR)) - .when(abfsHttpOperation) - .getConnOutputStream(); + if (httpOperation instanceof AbfsJdkHttpOperation) { + Mockito.doThrow(new ProtocolException(EXPECT_100_JDK_ERROR)) + .when((AbfsJdkHttpOperation) httpOperation) + .getConnOutputStream(); + } + if (httpOperation instanceof AbfsAHCHttpOperation) { + Mockito.doNothing() + .when((AbfsAHCHttpOperation) httpOperation) + .parseResponseHeaderAndBody(Mockito.any(byte[].class), + Mockito.anyInt(), Mockito.anyInt()); + Mockito.doReturn(HTTP_NOT_FOUND) + .when((AbfsAHCHttpOperation) httpOperation) + .parseStatusCode(Mockito.nullable( + HttpResponse.class)); + Mockito.doThrow( + new AbfsApacheHttpExpect100Exception(Mockito.mock(HttpResponse.class))) + .when((AbfsAHCHttpOperation) httpOperation).executeRequest(); + } break; case WRITE: // If write() throws IOException and Expect Header is // enabled or not, it should throw back the exception. + if (httpOperation instanceof AbfsAHCHttpOperation) { + Mockito.doThrow(new IOException()) + .when((AbfsAHCHttpOperation) httpOperation).executeRequest(); + return; + } OutputStream outputStream = Mockito.spy(new OutputStream() { @Override public void write(final int i) throws IOException { } }); - Mockito.doReturn(outputStream).when(abfsHttpOperation).getConnOutputStream(); + Mockito.doReturn(outputStream) + .when((AbfsJdkHttpOperation) httpOperation) + .getConnOutputStream(); Mockito.doThrow(new IOException()) .when(outputStream) .write(buffer, appendRequestParameters.getoffset(), @@ -263,12 +313,6 @@ public void write(final int i) throws IOException { default: break; } - - // Sets the httpOperation for the rest operation. - Mockito.doReturn(abfsHttpOperation) - .when(op) - .createHttpOperation(); - return op; } void assertTraceContextState(int retryCount, int assertRetryCount, int bytesSent, int assertBytesSent, @@ -295,8 +339,6 @@ void assertTraceContextState(int retryCount, int assertRetryCount, int bytesSent public void testExpectHundredContinue() throws Exception { // Gets the AbfsRestOperation. AbfsRestOperation op = getRestOperation(); - AbfsHttpOperation httpOperation = op.createHttpOperation(); - TracingContext tracingContext = Mockito.spy(new TracingContext("abcd", "abcde", FSOperationType.APPEND, TracingHeaderFormat.ALL_ID_FORMAT, null)); @@ -311,7 +353,7 @@ public void testExpectHundredContinue() throws Exception { () -> op.execute(tracingContext)); // Asserting update of metrics and retries. - assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, httpOperation.getBytesSent(), BUFFER_LENGTH, + assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, op.getResult().getBytesSent(), BUFFER_LENGTH, 0, 0); break; case OUTPUTSTREAM: @@ -322,8 +364,8 @@ public void testExpectHundredContinue() throws Exception { () -> op.execute(tracingContext)); // Asserting update of metrics and retries. - assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, httpOperation.getBytesSent(), ZERO, - httpOperation.getExpectedBytesToBeSent(), BUFFER_LENGTH); + assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, op.getResult().getBytesSent(), ZERO, + op.getResult().getExpectedBytesToBeSent(), BUFFER_LENGTH); // Verifies that update Metrics call is made for throttle case and for the first without retry + // for the retried cases as well. @@ -336,7 +378,7 @@ public void testExpectHundredContinue() throws Exception { () -> op.execute(tracingContext)); // Asserting update of metrics and retries. - assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, httpOperation.getBytesSent(), + assertTraceContextState(tracingContext.getRetryCount(), REDUCED_RETRY_COUNT, op.getResult().getBytesSent(), ZERO, 0, 0); // Verifies that update Metrics call is made for ErrorType case and for the first without retry + diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestApacheClientConnectionPool.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestApacheClientConnectionPool.java new file mode 100644 index 0000000000000..d864fc8c1d379 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestApacheClientConnectionPool.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.ClosedIOException; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsDriverException; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.KEEP_ALIVE_CACHE_CLOSED; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_NETWORKING_LIBRARY; +import static org.apache.hadoop.fs.azurebfs.constants.HttpOperationType.APACHE_HTTP_CLIENT; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.test.LambdaTestUtils.verifyCause; + +/** + * This test class tests the exception handling in ABFS thrown by the + * {@link KeepAliveCache}. + */ +public class ITestApacheClientConnectionPool extends + AbstractAbfsIntegrationTest { + + public ITestApacheClientConnectionPool() throws Exception { + super(); + } + + @Test + public void testKacIsClosed() throws Throwable { + Configuration configuration = new Configuration(getRawConfiguration()); + configuration.set(FS_AZURE_NETWORKING_LIBRARY, APACHE_HTTP_CLIENT.name()); + try (AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance( + configuration)) { + KeepAliveCache kac = fs.getAbfsStore().getClient().getKeepAliveCache(); + kac.close(); + AbfsDriverException ex = intercept(AbfsDriverException.class, + KEEP_ALIVE_CACHE_CLOSED, () -> { + fs.create(new Path("/test")); + }); + verifyCause(ClosedIOException.class, ex); + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsPerfTracker.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsPerfTracker.java index b7fb892362b4f..f45a333fae1f7 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsPerfTracker.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsPerfTracker.java @@ -76,8 +76,8 @@ public void verifyDisablingOfTracker() throws Exception { try (AbfsPerfInfo tracker = new AbfsPerfInfo(abfsPerfTracker, "disablingCaller", "disablingCallee")) { - AbfsHttpOperation op = new AbfsHttpOperation(url, "GET", new ArrayList<>(), - DEFAULT_HTTP_CONNECTION_TIMEOUT, DEFAULT_HTTP_READ_TIMEOUT); + AbfsJdkHttpOperation op = new AbfsJdkHttpOperation(url, "GET", new ArrayList<>(), + Duration.ofMillis(DEFAULT_HTTP_CONNECTION_TIMEOUT), Duration.ofMillis(DEFAULT_HTTP_READ_TIMEOUT)); tracker.registerResult(op).registerSuccess(true); } @@ -95,8 +95,8 @@ public void verifyTrackingForSingletonLatencyRecords() throws Exception { assertThat(latencyDetails).describedAs("AbfsPerfTracker should be empty").isNull(); List> tasks = new ArrayList<>(); - AbfsHttpOperation httpOperation = new AbfsHttpOperation(url, "GET", new ArrayList<>(), - DEFAULT_HTTP_CONNECTION_TIMEOUT, DEFAULT_HTTP_READ_TIMEOUT); + AbfsJdkHttpOperation httpOperation = new AbfsJdkHttpOperation(url, "GET", new ArrayList<>(), + Duration.ofMillis(DEFAULT_HTTP_CONNECTION_TIMEOUT), Duration.ofMillis(DEFAULT_HTTP_READ_TIMEOUT)); for (int i = 0; i < numTasks; i++) { tasks.add(() -> { @@ -135,8 +135,8 @@ public void verifyTrackingForAggregateLatencyRecords() throws Exception { assertThat(latencyDetails).describedAs("AbfsPerfTracker should be empty").isNull(); List> tasks = new ArrayList<>(); - AbfsHttpOperation httpOperation = new AbfsHttpOperation(url, "GET", new ArrayList<>(), - DEFAULT_HTTP_CONNECTION_TIMEOUT, DEFAULT_HTTP_READ_TIMEOUT); + AbfsJdkHttpOperation httpOperation = new AbfsJdkHttpOperation(url, "GET", new ArrayList<>(), + Duration.ofMillis(DEFAULT_HTTP_CONNECTION_TIMEOUT), Duration.ofMillis(DEFAULT_HTTP_READ_TIMEOUT)); for (int i = 0; i < numTasks; i++) { tasks.add(() -> { @@ -175,8 +175,8 @@ public void verifyRecordingSingletonLatencyIsCheapWhenDisabled() throws Exceptio long aggregateLatency = 0; AbfsPerfTracker abfsPerfTracker = new AbfsPerfTracker(accountName, filesystemName, false); List> tasks = new ArrayList<>(); - final AbfsHttpOperation httpOperation = new AbfsHttpOperation(url, "GET", new ArrayList<>(), - DEFAULT_HTTP_CONNECTION_TIMEOUT, DEFAULT_HTTP_READ_TIMEOUT); + final AbfsJdkHttpOperation httpOperation = new AbfsJdkHttpOperation(url, "GET", new ArrayList<>(), + Duration.ofMillis(DEFAULT_HTTP_CONNECTION_TIMEOUT), Duration.ofMillis(DEFAULT_HTTP_READ_TIMEOUT)); for (int i = 0; i < numTasks; i++) { tasks.add(() -> { @@ -211,8 +211,8 @@ public void verifyRecordingAggregateLatencyIsCheapWhenDisabled() throws Exceptio long aggregateLatency = 0; AbfsPerfTracker abfsPerfTracker = new AbfsPerfTracker(accountName, filesystemName, false); List> tasks = new ArrayList<>(); - final AbfsHttpOperation httpOperation = new AbfsHttpOperation(url, "GET", new ArrayList<>(), - DEFAULT_HTTP_CONNECTION_TIMEOUT, DEFAULT_HTTP_READ_TIMEOUT); + final AbfsJdkHttpOperation httpOperation = new AbfsJdkHttpOperation(url, "GET", new ArrayList<>(), + Duration.ofMillis(DEFAULT_HTTP_CONNECTION_TIMEOUT), Duration.ofMillis(DEFAULT_HTTP_READ_TIMEOUT)); for (int i = 0; i < numTasks; i++) { tasks.add(() -> { @@ -276,8 +276,8 @@ public void verifyRecordingSingletonLatencyIsCheapWhenEnabled() throws Exception long aggregateLatency = 0; AbfsPerfTracker abfsPerfTracker = new AbfsPerfTracker(accountName, filesystemName, true); List> tasks = new ArrayList<>(); - final AbfsHttpOperation httpOperation = new AbfsHttpOperation(url, "GET", new ArrayList<>(), - DEFAULT_HTTP_CONNECTION_TIMEOUT, DEFAULT_HTTP_READ_TIMEOUT); + final AbfsJdkHttpOperation httpOperation = new AbfsJdkHttpOperation(url, "GET", new ArrayList<>(), + Duration.ofMillis(DEFAULT_HTTP_CONNECTION_TIMEOUT), Duration.ofMillis(DEFAULT_HTTP_READ_TIMEOUT)); for (int i = 0; i < numTasks; i++) { tasks.add(() -> { @@ -311,8 +311,8 @@ public void verifyRecordingAggregateLatencyIsCheapWhenEnabled() throws Exception long aggregateLatency = 0; AbfsPerfTracker abfsPerfTracker = new AbfsPerfTracker(accountName, filesystemName, true); List> tasks = new ArrayList<>(); - final AbfsHttpOperation httpOperation = new AbfsHttpOperation(url, "GET", new ArrayList<>(), - DEFAULT_HTTP_CONNECTION_TIMEOUT, DEFAULT_HTTP_READ_TIMEOUT); + final AbfsJdkHttpOperation httpOperation = new AbfsJdkHttpOperation(url, "GET", new ArrayList<>(), + Duration.ofMillis(DEFAULT_HTTP_CONNECTION_TIMEOUT), Duration.ofMillis(DEFAULT_HTTP_READ_TIMEOUT)); for (int i = 0; i < numTasks; i++) { tasks.add(() -> { @@ -372,8 +372,8 @@ public void verifyNoExceptionOnInvalidInput() throws Exception { Instant testInstant = Instant.now(); AbfsPerfTracker abfsPerfTrackerDisabled = new AbfsPerfTracker(accountName, filesystemName, false); AbfsPerfTracker abfsPerfTrackerEnabled = new AbfsPerfTracker(accountName, filesystemName, true); - final AbfsHttpOperation httpOperation = new AbfsHttpOperation(url, "GET", new ArrayList(), - DEFAULT_HTTP_CONNECTION_TIMEOUT, DEFAULT_HTTP_READ_TIMEOUT); + final AbfsJdkHttpOperation httpOperation = new AbfsJdkHttpOperation(url, "GET", new ArrayList(), + Duration.ofMillis(DEFAULT_HTTP_CONNECTION_TIMEOUT), Duration.ofMillis(DEFAULT_HTTP_READ_TIMEOUT)); verifyNoException(abfsPerfTrackerDisabled); verifyNoException(abfsPerfTrackerEnabled); @@ -381,8 +381,8 @@ public void verifyNoExceptionOnInvalidInput() throws Exception { private void verifyNoException(AbfsPerfTracker abfsPerfTracker) throws Exception { Instant testInstant = Instant.now(); - final AbfsHttpOperation httpOperation = new AbfsHttpOperation(url, "GET", new ArrayList(), - DEFAULT_HTTP_CONNECTION_TIMEOUT, DEFAULT_HTTP_READ_TIMEOUT); + final AbfsJdkHttpOperation httpOperation = new AbfsJdkHttpOperation(url, "GET", new ArrayList(), + Duration.ofMillis(DEFAULT_HTTP_CONNECTION_TIMEOUT), Duration.ofMillis(DEFAULT_HTTP_READ_TIMEOUT)); try ( AbfsPerfInfo tracker01 = new AbfsPerfInfo(abfsPerfTracker, null, null); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java index 1c53e62dd58bc..741459254d400 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRenameRetryRecovery.java @@ -99,14 +99,14 @@ public void testRenameFailuresDueToIncompleteMetadata() throws Exception { // SuccessFul Result. AbfsRestOperation successOp = new AbfsRestOperation(AbfsRestOperationType.RenamePath, mockClient, - HTTP_METHOD_PUT, null, null); + HTTP_METHOD_PUT, null, null, mockClient.getAbfsConfiguration()); AbfsClientRenameResult successResult = mock(AbfsClientRenameResult.class); doReturn(successOp).when(successResult).getOp(); when(successResult.isIncompleteMetadataState()).thenReturn(false); // Failed Result. AbfsRestOperation failedOp = new AbfsRestOperation(AbfsRestOperationType.RenamePath, mockClient, - HTTP_METHOD_PUT, null, null); + HTTP_METHOD_PUT, null, null, mockClient.getAbfsConfiguration()); AbfsClientRenameResult recoveredMetaDataIncompleteResult = mock(AbfsClientRenameResult.class); @@ -167,12 +167,17 @@ AbfsClient getMockAbfsClient() throws IOException { Mockito.doReturn(spiedConf).when(spyClient).getAbfsConfiguration(); Mockito.doAnswer(answer -> { - AbfsRestOperation op = new AbfsRestOperation(AbfsRestOperationType.RenamePath, - spyClient, HTTP_METHOD_PUT, answer.getArgument(0), answer.getArgument(1)); - AbfsRestOperation spiedOp = Mockito.spy(op); - addSpyBehavior(spiedOp, op, spyClient); - return spiedOp; - }).when(spyClient).createRenameRestOperation(Mockito.any(URL.class), anyList()); + AbfsRestOperation op = new AbfsRestOperation( + AbfsRestOperationType.RenamePath, + spyClient, HTTP_METHOD_PUT, answer.getArgument(0), + answer.getArgument(1), + spyClient.getAbfsConfiguration()); + AbfsRestOperation spiedOp = Mockito.spy(op); + addSpyBehavior(spiedOp, op, spyClient); + return spiedOp; + }) + .when(spyClient) + .createRenameRestOperation(Mockito.any(URL.class), anyList()); return spyClient; @@ -195,7 +200,7 @@ private void addSpyBehavior(final AbfsRestOperation spiedRestOp, AbfsHttpOperation normalOp1 = normalRestOp.createHttpOperation(); executeThenFail(client, normalRestOp, failingOperation, normalOp1); AbfsHttpOperation normalOp2 = normalRestOp.createHttpOperation(); - normalOp2.getConnection().setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, + normalOp2.setRequestProperty(HttpHeaderConfigurations.AUTHORIZATION, client.getAccessToken()); Mockito.doReturn(failingOperation).doReturn(normalOp2).when(spiedRestOp).createHttpOperation(); @@ -221,14 +226,14 @@ private void executeThenFail(final AbfsClient client, final int offset = answer.getArgument(1); final int length = answer.getArgument(2); normalRestOp.signRequest(normalOp, length); - normalOp.sendRequest(buffer, offset, length); + normalOp.sendPayload(buffer, offset, length); normalOp.processResponse(buffer, offset, length); LOG.info("Actual outcome is {} \"{}\" \"{}\"; injecting failure", normalOp.getStatusCode(), normalOp.getStorageErrorCode(), normalOp.getStorageErrorMessage()); throw new SocketException("connection-reset"); - }).when(failingOperation).sendRequest(Mockito.nullable(byte[].class), + }).when(failingOperation).sendPayload(Mockito.nullable(byte[].class), Mockito.nullable(int.class), Mockito.nullable(int.class)); } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java index 683528b9c54d1..e99d05d41ca60 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperation.java @@ -24,8 +24,8 @@ import org.apache.hadoop.fs.azurebfs.utils.MetricFormat; import org.junit.Test; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_DELETE; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; import static org.apache.hadoop.fs.azurebfs.services.AbfsRestOperationType.DeletePath; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_METRIC_FORMAT; import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; import java.util.ArrayList; @@ -58,8 +58,9 @@ public void testBackoffRetryMetrics() throws Exception { // Get an instance of AbfsClient and AbfsRestOperation. AbfsClient testClient = super.getAbfsClient(super.getAbfsStore(fs)); AbfsRestOperation op = ITestAbfsClient.getRestOp( - DeletePath, testClient, HTTP_METHOD_DELETE, - ITestAbfsClient.getTestUrl(testClient, "/NonExistingPath"), ITestAbfsClient.getTestRequestHeaders(testClient)); + DeletePath, testClient, HTTP_METHOD_DELETE, + ITestAbfsClient.getTestUrl(testClient, "/NonExistingPath"), + ITestAbfsClient.getTestRequestHeaders(testClient), getConfiguration()); // Mock retry counts and status code. ArrayList retryCounts = new ArrayList<>(Arrays.asList("35", "28", "31", "45", "10", "2", "9")); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java index 966b34f872a8b..8ee3a71f358cb 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestAbfsRestOperationMockFailures.java @@ -28,6 +28,7 @@ import org.mockito.Mockito; import org.mockito.stubbing.Stubber; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; @@ -207,7 +208,8 @@ public void testRetryPolicyWithDifferentFailureReasons() throws Exception { abfsClient, "PUT", null, - new ArrayList<>() + new ArrayList<>(), + Mockito.mock(AbfsConfiguration.class) )); AbfsHttpOperation httpOperation = Mockito.mock(AbfsHttpOperation.class); @@ -224,6 +226,8 @@ public void testRetryPolicyWithDifferentFailureReasons() throws Exception { Mockito.doReturn("").when(httpOperation).getStorageErrorMessage(); Mockito.doReturn("").when(httpOperation).getStorageErrorCode(); Mockito.doReturn("HEAD").when(httpOperation).getMethod(); + Mockito.doReturn("").when(httpOperation).getMaskedUrl(); + Mockito.doReturn("").when(httpOperation).getRequestId(); Mockito.doReturn(EGRESS_OVER_ACCOUNT_LIMIT.getErrorMessage()).when(httpOperation).getStorageErrorMessage(); Mockito.doReturn(tracingContext).when(abfsRestOperation).createNewTracingContext(any()); @@ -270,7 +274,8 @@ public void testRetryPolicyWithDifferentFailureReasons() throws Exception { // Assert that intercept.updateMetrics was called 2 times. Both the retried request fails with EGR. Mockito.verify(intercept, Mockito.times(2)) - .updateMetrics(nullable(AbfsRestOperationType.class), nullable(AbfsHttpOperation.class)); + .updateMetrics(nullable(AbfsRestOperationType.class), nullable( + AbfsHttpOperation.class)); } private void testClientRequestIdForStatusRetry(int status, @@ -292,7 +297,8 @@ private void testClientRequestIdForStatusRetry(int status, abfsClient, "PUT", null, - new ArrayList<>() + new ArrayList<>(), + Mockito.mock(AbfsConfiguration.class) )); AbfsHttpOperation httpOperation = Mockito.mock(AbfsHttpOperation.class); @@ -357,7 +363,8 @@ private void testClientRequestIdForTimeoutRetry(Exception[] exceptions, abfsClient, "PUT", null, - new ArrayList<>() + new ArrayList<>(), + Mockito.mock(AbfsConfiguration.class) )); AbfsHttpOperation httpOperation = Mockito.mock(AbfsHttpOperation.class); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestApacheClientConnectionPool.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestApacheClientConnectionPool.java new file mode 100644 index 0000000000000..1e97bbca5ed5f --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestApacheClientConnectionPool.java @@ -0,0 +1,290 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.mockito.Mockito; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.ClosedIOException; +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsTestWithTimeout; + +import org.apache.http.HttpClientConnection; + +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_MAX_CONN_SYS_PROP; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.KEEP_ALIVE_CACHE_CLOSED; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_APACHE_HTTP_CLIENT_IDLE_CONNECTION_TTL; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_APACHE_HTTP_CLIENT_MAX_CACHE_CONNECTION_SIZE; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_HTTP_CLIENT_CONN_MAX_CACHED_CONNECTIONS; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_HTTP_CLIENT_CONN_MAX_IDLE_TIME; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.HUNDRED; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +public class TestApacheClientConnectionPool extends + AbstractAbfsTestWithTimeout { + + public TestApacheClientConnectionPool() throws Exception { + super(); + } + + @Override + protected int getTestTimeoutMillis() { + return (int) DEFAULT_HTTP_CLIENT_CONN_MAX_IDLE_TIME * 4; + } + + @Test + public void testBasicPool() throws Exception { + System.clearProperty(HTTP_MAX_CONN_SYS_PROP); + validatePoolSize(DEFAULT_HTTP_CLIENT_CONN_MAX_CACHED_CONNECTIONS); + } + + @Test + public void testSysPropAppliedPool() throws Exception { + final String customPoolSize = "10"; + System.setProperty(HTTP_MAX_CONN_SYS_PROP, customPoolSize); + validatePoolSize(Integer.parseInt(customPoolSize)); + } + + @Test + public void testPoolWithZeroSysProp() throws Exception { + final String customPoolSize = "0"; + System.setProperty(HTTP_MAX_CONN_SYS_PROP, customPoolSize); + validatePoolSize(DEFAULT_HTTP_CLIENT_CONN_MAX_CACHED_CONNECTIONS); + } + + @Test + public void testEmptySizePool() throws Exception { + Configuration configuration = new Configuration(); + configuration.set(FS_AZURE_APACHE_HTTP_CLIENT_MAX_CACHE_CONNECTION_SIZE, + "0"); + AbfsConfiguration abfsConfiguration = new AbfsConfiguration(configuration, + EMPTY_STRING); + try (KeepAliveCache keepAliveCache = new KeepAliveCache( + abfsConfiguration)) { + assertCachePutFail(keepAliveCache, + Mockito.mock(HttpClientConnection.class)); + assertCacheGetIsNull(keepAliveCache); + } + } + + private void assertCacheGetIsNull(final KeepAliveCache keepAliveCache) + throws IOException { + Assertions.assertThat(keepAliveCache.get()) + .describedAs("cache.get()") + .isNull(); + } + + private void assertCacheGetIsNonNull(final KeepAliveCache keepAliveCache) + throws IOException { + Assertions.assertThat(keepAliveCache.get()) + .describedAs("cache.get()") + .isNotNull(); + } + + private void assertCachePutFail(final KeepAliveCache keepAliveCache, + final HttpClientConnection mock) { + Assertions.assertThat(keepAliveCache.put(mock)) + .describedAs("cache.put()") + .isFalse(); + } + + private void assertCachePutSuccess(final KeepAliveCache keepAliveCache, + final HttpClientConnection connections) { + Assertions.assertThat(keepAliveCache.put(connections)) + .describedAs("cache.put()") + .isTrue(); + } + + private void validatePoolSize(int size) throws Exception { + try (KeepAliveCache keepAliveCache = new KeepAliveCache( + new AbfsConfiguration(new Configuration(), EMPTY_STRING))) { + keepAliveCache.clear(); + final HttpClientConnection[] connections = new HttpClientConnection[size + * 2]; + + for (int i = 0; i < size * 2; i++) { + connections[i] = Mockito.mock(HttpClientConnection.class); + } + + for (int i = 0; i < size; i++) { + assertCachePutSuccess(keepAliveCache, connections[i]); + Mockito.verify(connections[i], Mockito.times(0)).close(); + } + + for (int i = size; i < size * 2; i++) { + assertCachePutSuccess(keepAliveCache, connections[i]); + Mockito.verify(connections[i - size], Mockito.times(1)).close(); + } + + for (int i = 0; i < size * 2; i++) { + if (i < size) { + assertCacheGetIsNonNull(keepAliveCache); + } else { + assertCacheGetIsNull(keepAliveCache); + } + } + System.clearProperty(HTTP_MAX_CONN_SYS_PROP); + } + } + + @Test + public void testKeepAliveCache() throws Exception { + try (KeepAliveCache keepAliveCache = new KeepAliveCache( + new AbfsConfiguration(new Configuration(), EMPTY_STRING))) { + keepAliveCache.clear(); + HttpClientConnection connection = Mockito.mock( + HttpClientConnection.class); + + keepAliveCache.put(connection); + + assertCacheGetIsNonNull(keepAliveCache); + } + } + + @Test + public void testKeepAliveCacheCleanup() throws Exception { + Configuration configuration = new Configuration(); + configuration.set(FS_AZURE_APACHE_HTTP_CLIENT_IDLE_CONNECTION_TTL, + HUNDRED + EMPTY_STRING); + try (KeepAliveCache keepAliveCache = new KeepAliveCache( + new AbfsConfiguration(configuration, EMPTY_STRING))) { + keepAliveCache.clear(); + HttpClientConnection connection = Mockito.mock( + HttpClientConnection.class); + + + // Eviction thread would close the TTL-elapsed connection and remove it from cache. + AtomicBoolean isConnClosed = new AtomicBoolean(false); + Mockito.doAnswer(closeInvocation -> { + isConnClosed.set(true); + return null; + }).when(connection).close(); + keepAliveCache.put(connection); + + while (!isConnClosed.get()) { + Thread.sleep(HUNDRED); + } + + // Assert that the closed connection is removed from the cache. + assertCacheGetIsNull(keepAliveCache); + Mockito.verify(connection, Mockito.times(1)).close(); + } + } + + @Test + public void testKeepAliveCacheCleanupWithConnections() throws Exception { + Configuration configuration = new Configuration(); + configuration.set(FS_AZURE_APACHE_HTTP_CLIENT_IDLE_CONNECTION_TTL, + HUNDRED + EMPTY_STRING); + try (KeepAliveCache keepAliveCache = new KeepAliveCache( + new AbfsConfiguration(configuration, EMPTY_STRING))) { + keepAliveCache.pauseThread(); + keepAliveCache.clear(); + HttpClientConnection connection = Mockito.mock( + HttpClientConnection.class); + keepAliveCache.put(connection); + + Thread.sleep(2 * keepAliveCache.getConnectionIdleTTL()); + /* + * Eviction thread is switched off, the get() on the cache would close and + * remove the TTL-elapsed connection. + */ + Mockito.verify(connection, Mockito.times(0)).close(); + assertCacheGetIsNull(keepAliveCache); + Mockito.verify(connection, Mockito.times(1)).close(); + keepAliveCache.resumeThread(); + } + } + + @Test + public void testKeepAliveCacheConnectionRecache() throws Exception { + try (KeepAliveCache keepAliveCache = new KeepAliveCache( + new AbfsConfiguration(new Configuration(), EMPTY_STRING))) { + keepAliveCache.clear(); + HttpClientConnection connection = Mockito.mock( + HttpClientConnection.class); + keepAliveCache.put(connection); + + assertCacheGetIsNonNull(keepAliveCache); + keepAliveCache.put(connection); + assertCacheGetIsNonNull(keepAliveCache); + } + } + + @Test + public void testKeepAliveCacheRemoveStaleConnection() throws Exception { + try (KeepAliveCache keepAliveCache = new KeepAliveCache( + new AbfsConfiguration(new Configuration(), EMPTY_STRING))) { + keepAliveCache.clear(); + HttpClientConnection[] connections = new HttpClientConnection[5]; + + // Fill up the cache. + for (int i = 0; + i < DEFAULT_HTTP_CLIENT_CONN_MAX_CACHED_CONNECTIONS; + i++) { + connections[i] = Mockito.mock(HttpClientConnection.class); + keepAliveCache.put(connections[i]); + } + + // Mark all but the last two connections as stale. + for (int i = 0; + i < DEFAULT_HTTP_CLIENT_CONN_MAX_CACHED_CONNECTIONS - 2; + i++) { + Mockito.doReturn(true).when(connections[i]).isStale(); + } + + // Verify that the stale connections are removed. + for (int i = DEFAULT_HTTP_CLIENT_CONN_MAX_CACHED_CONNECTIONS - 1; + i >= 0; + i--) { + // The last two connections are not stale and would be returned. + if (i >= (DEFAULT_HTTP_CLIENT_CONN_MAX_CACHED_CONNECTIONS - 2)) { + assertCacheGetIsNonNull(keepAliveCache); + } else { + // Stale connections are closed and removed. + assertCacheGetIsNull(keepAliveCache); + Mockito.verify(connections[i], Mockito.times(1)).close(); + } + } + } + } + + @Test + public void testKeepAliveCacheClosed() throws Exception { + KeepAliveCache keepAliveCache = Mockito.spy(new KeepAliveCache( + new AbfsConfiguration(new Configuration(), EMPTY_STRING))); + keepAliveCache.put(Mockito.mock(HttpClientConnection.class)); + keepAliveCache.close(); + intercept(ClosedIOException.class, + KEEP_ALIVE_CACHE_CLOSED, + () -> keepAliveCache.get()); + + HttpClientConnection conn = Mockito.mock(HttpClientConnection.class); + assertCachePutFail(keepAliveCache, conn); + Mockito.verify(conn, Mockito.times(1)).close(); + keepAliveCache.close(); + Mockito.verify(keepAliveCache, Mockito.times(1)).closeInternal(); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestApacheHttpClientFallback.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestApacheHttpClientFallback.java new file mode 100644 index 0000000000000..159405d86815d --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/TestApacheHttpClientFallback.java @@ -0,0 +1,226 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.net.URL; +import java.util.ArrayList; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.mockito.Mockito; + +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AbstractAbfsTestWithTimeout; +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.fs.azurebfs.utils.TracingHeaderFormat; + +import static java.net.HttpURLConnection.HTTP_OK; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APACHE_IMPL; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JDK_FALLBACK; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JDK_IMPL; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_APACHE_HTTP_CLIENT_MAX_IO_EXCEPTION_RETRIES; +import static org.apache.hadoop.fs.azurebfs.constants.HttpOperationType.APACHE_HTTP_CLIENT; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + + +public class TestApacheHttpClientFallback extends AbstractAbfsTestWithTimeout { + + public TestApacheHttpClientFallback() throws Exception { + super(); + } + + private TracingContext getSampleTracingContext(int[] jdkCallsRegister, + int[] apacheCallsRegister) { + String correlationId, fsId; + TracingHeaderFormat format; + correlationId = "test-corr-id"; + fsId = "test-filesystem-id"; + format = TracingHeaderFormat.ALL_ID_FORMAT; + TracingContext tc = Mockito.spy(new TracingContext(correlationId, fsId, + FSOperationType.TEST_OP, true, format, null)); + Mockito.doAnswer(answer -> { + answer.callRealMethod(); + AbfsHttpOperation op = answer.getArgument(0); + if (op instanceof AbfsAHCHttpOperation) { + Assertions.assertThat(tc.getHeader()).endsWith(APACHE_IMPL); + apacheCallsRegister[0]++; + } + if (op instanceof AbfsJdkHttpOperation) { + jdkCallsRegister[0]++; + if (AbfsApacheHttpClient.usable()) { + Assertions.assertThat(tc.getHeader()).endsWith(JDK_IMPL); + } else { + Assertions.assertThat(tc.getHeader()).endsWith(JDK_FALLBACK); + } + } + return null; + }) + .when(tc) + .constructHeader(Mockito.any(AbfsHttpOperation.class), + Mockito.nullable(String.class), Mockito.nullable(String.class)); + return tc; + } + + @Test + public void testMultipleFailureLeadToFallback() + throws Exception { + int[] apacheCallsTest1 = {0}; + int[] jdkCallsTest1 = {0}; + TracingContext tcTest1 = getSampleTracingContext(jdkCallsTest1, + apacheCallsTest1); + int[] retryIterationTest1 = {0}; + intercept(IOException.class, () -> { + getMockRestOperation(retryIterationTest1).execute(tcTest1); + }); + Assertions.assertThat(apacheCallsTest1[0]) + .isEqualTo(DEFAULT_APACHE_HTTP_CLIENT_MAX_IO_EXCEPTION_RETRIES); + Assertions.assertThat(jdkCallsTest1[0]).isEqualTo(1); + + int[] retryIteration1 = {0}; + int[] apacheCallsTest2 = {0}; + int[] jdkCallsTest2 = {0}; + TracingContext tcTest2 = getSampleTracingContext(jdkCallsTest2, + apacheCallsTest2); + intercept(IOException.class, () -> { + getMockRestOperation(retryIteration1).execute(tcTest2); + }); + Assertions.assertThat(apacheCallsTest2[0]).isEqualTo(0); + Assertions.assertThat(jdkCallsTest2[0]) + .isEqualTo(DEFAULT_APACHE_HTTP_CLIENT_MAX_IO_EXCEPTION_RETRIES + 1); + } + + private AbfsRestOperation getMockRestOperation(int[] retryIteration) + throws IOException { + AbfsConfiguration configuration = Mockito.mock(AbfsConfiguration.class); + Mockito.doReturn(APACHE_HTTP_CLIENT) + .when(configuration) + .getPreferredHttpOperationType(); + Mockito.doReturn(DEFAULT_APACHE_HTTP_CLIENT_MAX_IO_EXCEPTION_RETRIES) + .when(configuration) + .getMaxApacheHttpClientIoExceptionsRetries(); + AbfsClient client = Mockito.mock(AbfsClient.class); + Mockito.doReturn(Mockito.mock(ExponentialRetryPolicy.class)) + .when(client) + .getExponentialRetryPolicy(); + + AbfsRetryPolicy retryPolicy = Mockito.mock(AbfsRetryPolicy.class); + Mockito.doReturn(retryPolicy) + .when(client) + .getRetryPolicy(Mockito.nullable(String.class)); + + Mockito.doAnswer(answer -> { + if (retryIteration[0] + < DEFAULT_APACHE_HTTP_CLIENT_MAX_IO_EXCEPTION_RETRIES) { + retryIteration[0]++; + return true; + } else { + return false; + } + }) + .when(retryPolicy) + .shouldRetry(Mockito.anyInt(), Mockito.nullable(Integer.class)); + + AbfsThrottlingIntercept abfsThrottlingIntercept = Mockito.mock( + AbfsThrottlingIntercept.class); + Mockito.doNothing() + .when(abfsThrottlingIntercept) + .updateMetrics(Mockito.any(AbfsRestOperationType.class), + Mockito.any(AbfsHttpOperation.class)); + Mockito.doNothing() + .when(abfsThrottlingIntercept) + .sendingRequest(Mockito.any(AbfsRestOperationType.class), + Mockito.nullable(AbfsCounters.class)); + Mockito.doReturn(abfsThrottlingIntercept).when(client).getIntercept(); + + + AbfsRestOperation op = Mockito.spy(new AbfsRestOperation( + AbfsRestOperationType.ReadFile, + client, + AbfsHttpConstants.HTTP_METHOD_GET, + new URL("http://localhost"), + new ArrayList<>(), + null, + configuration + )); + + Mockito.doReturn(null).when(op).getClientLatency(); + + Mockito.doReturn(createApacheHttpOp()) + .when(op) + .createAbfsHttpOperation(); + Mockito.doReturn(createAhcHttpOp()) + .when(op) + .createAbfsAHCHttpOperation(); + + Mockito.doAnswer(answer -> { + return answer.getArgument(0); + }).when(op).createNewTracingContext(Mockito.nullable(TracingContext.class)); + + Mockito.doNothing() + .when(op) + .signRequest(Mockito.any(AbfsHttpOperation.class), Mockito.anyInt()); + + Mockito.doAnswer(answer -> { + AbfsHttpOperation operation = Mockito.spy( + (AbfsHttpOperation) answer.callRealMethod()); + Assertions.assertThat(operation).isInstanceOf( + (retryIteration[0] + < DEFAULT_APACHE_HTTP_CLIENT_MAX_IO_EXCEPTION_RETRIES + && AbfsApacheHttpClient.usable()) + ? AbfsAHCHttpOperation.class + : AbfsJdkHttpOperation.class); + Mockito.doReturn(HTTP_OK).when(operation).getStatusCode(); + Mockito.doThrow(new IOException("Test Exception")) + .when(operation) + .processResponse(Mockito.nullable(byte[].class), Mockito.anyInt(), + Mockito.anyInt()); + Mockito.doCallRealMethod().when(operation).getTracingContextSuffix(); + return operation; + }).when(op).createHttpOperation(); + return op; + } + + private AbfsAHCHttpOperation createAhcHttpOp() { + AbfsAHCHttpOperation ahcOp = Mockito.mock(AbfsAHCHttpOperation.class); + Mockito.doCallRealMethod().when(ahcOp).getTracingContextSuffix(); + return ahcOp; + } + + private AbfsJdkHttpOperation createApacheHttpOp() { + AbfsJdkHttpOperation httpOperationMock = Mockito.mock(AbfsJdkHttpOperation.class); + Mockito.doCallRealMethod() + .when(httpOperationMock) + .getTracingContextSuffix(); + return httpOperationMock; + } + + @Test + public void testTcHeaderOnJDKClientUse() { + int[] jdkCallsRegister = {0}; + int[] apacheCallsRegister = {0}; + TracingContext tc = getSampleTracingContext(jdkCallsRegister, + apacheCallsRegister); + AbfsJdkHttpOperation op = Mockito.mock(AbfsJdkHttpOperation.class); + Mockito.doCallRealMethod().when(op).getTracingContextSuffix(); + tc.constructHeader(op, null, null); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java index 7569c80d67c61..27a84e4978ad2 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/TracingHeaderValidator.java @@ -93,7 +93,7 @@ private void validateTracingHeader(String tracingContextHeader) { private void validateBasicFormat(String[] idList) { if (format == TracingHeaderFormat.ALL_ID_FORMAT) { Assertions.assertThat(idList) - .describedAs("header should have 7 elements").hasSize(7); + .describedAs("header should have 8 elements").hasSize(8); } else if (format == TracingHeaderFormat.TWO_ID_FORMAT) { Assertions.assertThat(idList) .describedAs("header should have 2 elements").hasSize(2); From e000cbf27718f467b5d16002ac57f87b9ca39266 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Tue, 23 Jul 2024 04:47:36 -0800 Subject: [PATCH 068/113] HADOOP-19218. Addendum. Update TestFSNamesystemLockReport to exclude hostname resolution from regex. (#6951). Contributed by Viraj Jasani. Signed-off-by: He Xiaoqiao --- .../namenode/TestFSNamesystemLockReport.java | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemLockReport.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemLockReport.java index 9c77f9d92b8ba..ef1ed9b78357b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemLockReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSNamesystemLockReport.java @@ -103,7 +103,7 @@ public void test() throws Exception { FSDataOutputStream os = testLockReport(() -> userfs.create(new Path("/file")), ".* by create \\(ugi=bob \\(auth:SIMPLE\\)," + - "ip=[a-zA-Z0-9.]+/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/file,dst=null," + + "ip=/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/file,dst=null," + "perm=bob:hadoop:rw-r--r--\\) .*"); os.close(); @@ -111,7 +111,7 @@ public void test() throws Exception { // ip=/127.0.0.1,src=/file,dst=null,perm=null)" FSDataInputStream is = testLockReport(() -> userfs.open(new Path("/file")), ".* by open \\(ugi=bob \\(auth:SIMPLE\\)," + - "ip=[a-zA-Z0-9.]+/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/file,dst=null," + + "ip=/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/file,dst=null," + "perm=null\\) .*"); is.close(); @@ -120,49 +120,49 @@ public void test() throws Exception { testLockReport(() -> userfs.setPermission(new Path("/file"), new FsPermission(644)), ".* by setPermission \\(ugi=bob \\(auth:SIMPLE\\)," + - "ip=[a-zA-Z0-9.]+/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/file,dst=null," + + "ip=/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/file,dst=null," + "perm=bob:hadoop:-w----r-T\\) .*"); // The log output should contain "by setOwner (ugi=bob (auth:SIMPLE), // ip=/127.0.0.1,src=/file,dst=null,perm=alice:group1:-w----r-T)" testLockReport(() -> userfs.setOwner(new Path("/file"), "alice", "group1"), ".* by setOwner \\(ugi=bob \\(auth:SIMPLE\\)," + - "ip=[a-zA-Z0-9.]+/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/file,dst=null," + + "ip=/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/file,dst=null," + "perm=alice:group1:-w----r-T\\) .*"); // The log output should contain "by listStatus (ugi=bob (auth:SIMPLE), // ip=/127.0.0.1,src=/,dst=null,perm=null)" testLockReport(() -> userfs.listStatus(new Path("/")), ".* by listStatus \\(ugi=bob \\(auth:SIMPLE\\)," + - "ip=[a-zA-Z0-9.]+/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/,dst=null," + + "ip=/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/,dst=null," + "perm=null\\) .*"); // The log output should contain "by getfileinfo (ugi=bob (auth:SIMPLE), // ip=/127.0.0.1,src=/file,dst=null,perm=null)" testLockReport(() -> userfs.getFileStatus(new Path("/file")), ".* by getfileinfo \\(ugi=bob \\(auth:SIMPLE\\)," + - "ip=[a-zA-Z0-9.]+/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/file,dst=null," + + "ip=/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/file,dst=null," + "perm=null\\) .*"); // The log output should contain "by mkdirs (ugi=bob (auth:SIMPLE), // ip=/127.0.0.1,src=/dir,dst=null,perm=bob:hadoop:rwxr-xr-x)" testLockReport(() -> userfs.mkdirs(new Path("/dir")), ".* by mkdirs \\(ugi=bob \\(auth:SIMPLE\\)," + - "ip=[a-zA-Z0-9.]+/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/dir,dst=null," + + "ip=/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/dir,dst=null," + "perm=bob:hadoop:rwxr-xr-x\\) .*"); // The log output should contain "by delete (ugi=bob (auth:SIMPLE), // ip=/127.0.0.1,src=/file2,dst=null,perm=null)" testLockReport(() -> userfs.rename(new Path("/file"), new Path("/file2")), ".* by rename \\(ugi=bob \\(auth:SIMPLE\\)," + - "ip=[a-zA-Z0-9.]+/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/file,dst=/file2," + + "ip=/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/file,dst=/file2," + "perm=alice:group1:-w----r-T\\) .*"); // The log output should contain "by rename (ugi=bob (auth:SIMPLE), // ip=/127.0.0.1,src=/file,dst=/file2,perm=alice:group1:-w----r-T)" testLockReport(() -> userfs.delete(new Path("/file2"), false), ".* by delete \\(ugi=bob \\(auth:SIMPLE\\)," + - "ip=[a-zA-Z0-9.]+/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/file2,dst=null," + + "ip=/\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3},src=/file2,dst=null," + "perm=null\\) .*"); } From e2a0dca43b57993fe8bd1be05281e126a325cf83 Mon Sep 17 00:00:00 2001 From: Aswin M Prabhu <31558262+aswinmprabhu@users.noreply.github.com> Date: Tue, 23 Jul 2024 18:25:57 +0530 Subject: [PATCH 069/113] HDFS-16690. Automatically format unformatted JNs with JournalNodeSyncer (#6925). Contributed by Aswin M Prabhu. Signed-off-by: He Xiaoqiao --- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 3 + .../protocol/InterQJournalProtocol.java | 10 +++ ...JournalProtocolServerSideTranslatorPB.java | 16 ++++ .../InterQJournalProtocolTranslatorPB.java | 14 ++++ .../qjournal/server/JournalNodeRpcServer.java | 28 ++++--- .../qjournal/server/JournalNodeSyncer.java | 81 ++++++++++++++++++- .../main/proto/InterQJournalProtocol.proto | 7 ++ .../src/main/resources/hdfs-default.xml | 10 +++ .../qjournal/server/TestJournalNodeSync.java | 44 ++++++++++ 9 files changed, 202 insertions(+), 11 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index b9f8e07f67a5f..dd3193fdadff2 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -1471,6 +1471,9 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_JOURNALNODE_SYNC_INTERVAL_KEY = "dfs.journalnode.sync.interval"; public static final long DFS_JOURNALNODE_SYNC_INTERVAL_DEFAULT = 2*60*1000L; + public static final String DFS_JOURNALNODE_ENABLE_SYNC_FORMAT_KEY = + "dfs.journalnode.enable.sync.format"; + public static final boolean DFS_JOURNALNODE_ENABLE_SYNC_FORMAT_DEFAULT = false; public static final String DFS_JOURNALNODE_EDIT_CACHE_SIZE_KEY = "dfs.journalnode.edit-cache-size.bytes"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/InterQJournalProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/InterQJournalProtocol.java index f1f7e9ce1ff47..c3eed14c3b662 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/InterQJournalProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocol/InterQJournalProtocol.java @@ -20,6 +20,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.StorageInfoProto; import org.apache.hadoop.hdfs.qjournal.server.JournalNode; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetEditLogManifestResponseProto; import org.apache.hadoop.security.KerberosInfo; @@ -51,4 +52,13 @@ GetEditLogManifestResponseProto getEditLogManifestFromJournal( String jid, String nameServiceId, long sinceTxId, boolean inProgressOk) throws IOException; + /** + * Get the storage info for the specified journal. + * @param jid the journal identifier + * @param nameServiceId the name service id + * @return the storage info object + */ + StorageInfoProto getStorageInfo(String jid, String nameServiceId) + throws IOException; + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/InterQJournalProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/InterQJournalProtocolServerSideTranslatorPB.java index ba5ddb1ab6770..ac67bcb0cbd17 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/InterQJournalProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/InterQJournalProtocolServerSideTranslatorPB.java @@ -24,6 +24,8 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.qjournal.protocol.InterQJournalProtocol; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.StorageInfoProto; +import org.apache.hadoop.hdfs.qjournal.protocol.InterQJournalProtocolProtos.GetStorageInfoRequestProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetEditLogManifestRequestProto; import org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocolProtos.GetEditLogManifestResponseProto; @@ -60,4 +62,18 @@ public GetEditLogManifestResponseProto getEditLogManifestFromJournal( throw new ServiceException(e); } } + + @Override + public StorageInfoProto getStorageInfo( + RpcController controller, GetStorageInfoRequestProto request) + throws ServiceException { + try { + return impl.getStorageInfo( + request.getJid().getIdentifier(), + request.hasNameServiceId() ? request.getNameServiceId() : null + ); + } catch (IOException e) { + throw new ServiceException(e); + } + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/InterQJournalProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/InterQJournalProtocolTranslatorPB.java index 4544308fff2fc..49ae53fceebc6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/InterQJournalProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/protocolPB/InterQJournalProtocolTranslatorPB.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.qjournal.protocolPB; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.StorageInfoProto; +import org.apache.hadoop.hdfs.qjournal.protocol.InterQJournalProtocolProtos; import org.apache.hadoop.thirdparty.protobuf.RpcController; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -75,6 +77,18 @@ public GetEditLogManifestResponseProto getEditLogManifestFromJournal( req.build())); } + @Override + public StorageInfoProto getStorageInfo(String jid, String nameServiceId) + throws IOException { + InterQJournalProtocolProtos.GetStorageInfoRequestProto.Builder req = + InterQJournalProtocolProtos.GetStorageInfoRequestProto.newBuilder() + .setJid(convertJournalId(jid)); + if (nameServiceId != null) { + req.setNameServiceId(nameServiceId); + } + return ipc(() -> rpcProxy.getStorageInfo(NULL_CONTROLLER, req.build())); + } + private QJournalProtocolProtos.JournalIdProto convertJournalId(String jid) { return QJournalProtocolProtos.JournalIdProto.newBuilder() .setIdentifier(jid) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java index 7e33ab5c759f5..b09d09aed0379 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeRpcServer.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.qjournal.server; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos.StorageInfoProto; import org.apache.hadoop.thirdparty.protobuf.BlockingService; import org.slf4j.Logger; import org.apache.hadoop.classification.InterfaceAudience; @@ -71,14 +72,14 @@ public class JournalNodeRpcServer implements QJournalProtocol, JournalNodeRpcServer(Configuration conf, JournalNode jn) throws IOException { this.jn = jn; - + Configuration confCopy = new Configuration(conf); - + // Ensure that nagling doesn't kick in, which could cause latency issues. confCopy.setBoolean( CommonConfigurationKeysPublic.IPC_SERVER_TCPNODELAY_KEY, true); - + InetSocketAddress addr = getAddress(confCopy); String bindHost = conf.getTrimmed(DFS_JOURNALNODE_RPC_BIND_HOST_KEY, null); if (bindHost == null) { @@ -104,7 +105,7 @@ public class JournalNodeRpcServer implements QJournalProtocol, this.handlerCount = confHandlerCount; LOG.info("The number of JournalNodeRpcServer handlers is {}.", this.handlerCount); - + this.server = new RPC.Builder(confCopy) .setProtocol(QJournalProtocolPB.class) .setInstance(service) @@ -149,15 +150,15 @@ void start() { public InetSocketAddress getAddress() { return server.getListenerAddress(); } - + void join() throws InterruptedException { this.server.join(); } - + void stop() { this.server.stop(); } - + static InetSocketAddress getAddress(Configuration conf) { String addr = conf.get( DFSConfigKeys.DFS_JOURNALNODE_RPC_ADDRESS_KEY, @@ -211,7 +212,7 @@ public void journal(RequestInfo reqInfo, jn.getOrCreateJournal(reqInfo.getJournalId(), reqInfo.getNameServiceId()) .journal(reqInfo, segmentTxId, firstTxnId, numTxns, records); } - + @Override public void heartbeat(RequestInfo reqInfo) throws IOException { jn.getOrCreateJournal(reqInfo.getJournalId(), reqInfo.getNameServiceId()) @@ -245,10 +246,10 @@ public GetEditLogManifestResponseProto getEditLogManifest( String jid, String nameServiceId, long sinceTxId, boolean inProgressOk) throws IOException { - + RemoteEditLogManifest manifest = jn.getOrCreateJournal(jid, nameServiceId) .getEditLogManifest(sinceTxId, inProgressOk); - + return GetEditLogManifestResponseProto.newBuilder() .setManifest(PBHelper.convert(manifest)) .setHttpPort(jn.getBoundHttpAddress().getPort()) @@ -256,6 +257,13 @@ public GetEditLogManifestResponseProto getEditLogManifest( .build(); } + @Override + public StorageInfoProto getStorageInfo(String jid, + String nameServiceId) throws IOException { + StorageInfo storage = jn.getOrCreateJournal(jid, nameServiceId).getStorage(); + return PBHelper.convert(storage); + } + @Override public GetJournaledEditsResponseProto getJournaledEdits(String jid, String nameServiceId, long sinceTxId, int maxTxns) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeSyncer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeSyncer.java index f451b46de7b37..75010596b1ae6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeSyncer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNodeSyncer.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hdfs.qjournal.server; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.hdfs.protocol.proto.HdfsServerProtos; +import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; +import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; @@ -79,6 +82,7 @@ public class JournalNodeSyncer { private int numOtherJNs; private int journalNodeIndexForSync = 0; private final long journalSyncInterval; + private final boolean tryFormatting; private final int logSegmentTransferTimeout; private final DataTransferThrottler throttler; private final JournalMetrics metrics; @@ -98,6 +102,9 @@ public class JournalNodeSyncer { logSegmentTransferTimeout = conf.getInt( DFSConfigKeys.DFS_EDIT_LOG_TRANSFER_TIMEOUT_KEY, DFSConfigKeys.DFS_EDIT_LOG_TRANSFER_TIMEOUT_DEFAULT); + tryFormatting = conf.getBoolean( + DFSConfigKeys.DFS_JOURNALNODE_ENABLE_SYNC_FORMAT_KEY, + DFSConfigKeys.DFS_JOURNALNODE_ENABLE_SYNC_FORMAT_DEFAULT); throttler = getThrottler(conf); metrics = journal.getMetrics(); journalSyncerStarted = false; @@ -171,6 +178,8 @@ private void startSyncJournalsDaemon() { // Wait for journal to be formatted to create edits.sync directory while(!journal.isFormatted()) { try { + // Format the journal with namespace info from the other JNs if it is not formatted + formatWithSyncer(); Thread.sleep(journalSyncInterval); } catch (InterruptedException e) { LOG.error("JournalNodeSyncer daemon received Runtime exception.", e); @@ -187,7 +196,15 @@ private void startSyncJournalsDaemon() { while(shouldSync) { try { if (!journal.isFormatted()) { - LOG.warn("Journal cannot sync. Not formatted."); + LOG.warn("Journal cannot sync. Not formatted. Trying to format with the syncer"); + formatWithSyncer(); + if (journal.isFormatted() && !createEditsSyncDir()) { + LOG.error("Failed to create directory for downloading log " + + "segments: {}. Stopping Journal Node Sync.", + journal.getStorage().getEditsSyncDir()); + return; + } + continue; } else { syncJournals(); } @@ -233,6 +250,68 @@ private void syncJournals() { journalNodeIndexForSync = (journalNodeIndexForSync + 1) % numOtherJNs; } + private void formatWithSyncer() { + if (!tryFormatting) { + return; + } + LOG.info("Trying to format the journal with the syncer"); + try { + StorageInfo storage = null; + for (JournalNodeProxy jnProxy : otherJNProxies) { + if (!hasEditLogs(jnProxy)) { + // This avoids a race condition between `hdfs namenode -format` and + // JN syncer by checking if the other JN is not newly formatted. + continue; + } + try { + HdfsServerProtos.StorageInfoProto storageInfoResponse = + jnProxy.jnProxy.getStorageInfo(jid, nameServiceId); + storage = PBHelper.convert( + storageInfoResponse, HdfsServerConstants.NodeType.JOURNAL_NODE + ); + if (storage.getNamespaceID() == 0) { + LOG.error("Got invalid StorageInfo from " + jnProxy); + storage = null; + continue; + } + LOG.info("Got StorageInfo " + storage + " from " + jnProxy); + break; + } catch (IOException e) { + LOG.error("Could not get StorageInfo from " + jnProxy, e); + } + } + if (storage == null) { + LOG.error("Could not get StorageInfo from any JournalNode. " + + "JournalNodeSyncer cannot format the journal."); + return; + } + NamespaceInfo nsInfo = new NamespaceInfo(storage); + journal.format(nsInfo, true); + } catch (IOException e) { + LOG.error("Exception in formatting the journal with the syncer", e); + } + } + + private boolean hasEditLogs(JournalNodeProxy journalProxy) { + GetEditLogManifestResponseProto editLogManifest; + try { + editLogManifest = journalProxy.jnProxy.getEditLogManifestFromJournal( + jid, nameServiceId, 0, false); + } catch (IOException e) { + LOG.error("Could not get edit log manifest from " + journalProxy, e); + return false; + } + + List otherJournalEditLogs = PBHelper.convert( + editLogManifest.getManifest()).getLogs(); + if (otherJournalEditLogs == null || otherJournalEditLogs.isEmpty()) { + LOG.warn("Journal at " + journalProxy.jnAddr + " has no edit logs"); + return false; + } + + return true; + } + private void syncWithJournalAtIndex(int index) { LOG.info("Syncing Journal " + jn.getBoundIpcAddress().getAddress() + ":" + jn.getBoundIpcAddress().getPort() + " with " diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterQJournalProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterQJournalProtocol.proto index 1c78423b40990..5510eeb7c4239 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterQJournalProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/InterQJournalProtocol.proto @@ -31,8 +31,15 @@ package hadoop.hdfs.qjournal; import "HdfsServer.proto"; import "QJournalProtocol.proto"; +message GetStorageInfoRequestProto { + required JournalIdProto jid = 1; + optional string nameServiceId = 2; +} service InterQJournalProtocolService { rpc getEditLogManifestFromJournal(GetEditLogManifestRequestProto) returns (GetEditLogManifestResponseProto); + + rpc getStorageInfo(GetStorageInfoRequestProto) + returns (StorageInfoProto); } \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index d6fefa4e93989..1295c0dca8752 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -5071,6 +5071,16 @@ + + dfs.journalnode.enable.sync.format + false + + If true, the journal node syncer daemon that tries to sync edit + logs between journal nodes will try to format its journal if it is not. + It will query the other journal nodes for the storage info required to format. + + + dfs.journalnode.edit-cache-size.bytes diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeSync.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeSync.java index 28e36e03bfaa5..ac250ffc4f2c0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeSync.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/server/TestJournalNodeSync.java @@ -20,6 +20,7 @@ import java.net.InetSocketAddress; import java.net.URISyntaxException; import java.util.function.Supplier; +import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -75,6 +76,7 @@ public void setUpMiniCluster() throws IOException { conf = new HdfsConfiguration(); conf.setBoolean(DFSConfigKeys.DFS_JOURNALNODE_ENABLE_SYNC_KEY, true); conf.setLong(DFSConfigKeys.DFS_JOURNALNODE_SYNC_INTERVAL_KEY, 1000L); + conf.setBoolean(DFSConfigKeys.DFS_JOURNALNODE_ENABLE_SYNC_FORMAT_KEY, true); if (testName.getMethodName().equals( "testSyncAfterJNdowntimeWithoutQJournalQueue")) { conf.setInt(DFSConfigKeys.DFS_QJOURNAL_QUEUE_SIZE_LIMIT_KEY, 0); @@ -478,6 +480,33 @@ public void testSyncDuringRollingUpgrade() throws Exception { } } + @Test(timeout=300_000) + public void testFormatWithSyncer() throws Exception { + File firstJournalDir = jCluster.getJournalDir(0, jid); + File firstJournalCurrentDir = new StorageDirectory(firstJournalDir) + .getCurrentDir(); + + // Generate some edit logs + long firstTxId = generateEditLog(); + + // Delete them from the JN01 + List missingLogs = Lists.newArrayList(); + missingLogs.add(deleteEditLog(firstJournalCurrentDir, firstTxId)); + + // Wait to ensure sync starts, delete the storage directory itself to simulate a disk wipe + // and ensure that the in-memory formatting state of JNStorage gets updated + Thread.sleep(2000); + FileUtils.deleteDirectory(firstJournalDir); + jCluster.getJournalNode(0).getOrCreateJournal(jid).getStorage().analyzeStorage(); + + // Wait for JN formatting with Syncer + GenericTestUtils.waitFor(jnFormatted(0), 500, 30000); + // Generate some more edit log so that the JN updates its committed tx id + generateEditLog(); + // Check that the missing edit logs have been synced + GenericTestUtils.waitFor(editLogExists(missingLogs), 500, 30000); + } + private File deleteEditLog(File currentDir, long startTxId) throws IOException { EditLogFile logFile = getLogFile(currentDir, startTxId); @@ -581,4 +610,19 @@ public Boolean get() { }; return supplier; } + + private Supplier jnFormatted(int jnIndex) throws Exception { + Supplier supplier = new Supplier() { + @Override + public Boolean get() { + try { + return jCluster.getJournalNode(jnIndex).getOrCreateJournal(jid) + .isFormatted(); + } catch (Exception e) { + return false; + } + } + }; + return supplier; + } } From 4525c7e35ea22d7a6350b8af10eb8d2ff68376e7 Mon Sep 17 00:00:00 2001 From: Raphael Azzolini Date: Tue, 23 Jul 2024 09:09:04 -0700 Subject: [PATCH 070/113] HADOOP-19197. S3A: Support AWS KMS Encryption Context (#6874) The new property fs.s3a.encryption.context allow users to specify the AWS KMS Encryption Context to be used in S3A. The value of the encryption context is a key/value string that will be Base64 encoded and set in the parameter ssekmsEncryptionContext from the S3 client. Contributed by Raphael Azzolini --- .../fs/CommonConfigurationKeysPublic.java | 1 + .../src/main/resources/core-default.xml | 10 ++ .../org/apache/hadoop/fs/s3a/Constants.java | 10 ++ .../org/apache/hadoop/fs/s3a/S3AUtils.java | 22 +++- .../EncryptionSecretOperations.java | 16 +++ .../auth/delegation/EncryptionSecrets.java | 35 +++++- .../fs/s3a/impl/RequestFactoryImpl.java | 14 +++ .../hadoop/fs/s3a/impl/S3AEncryption.java | 106 ++++++++++++++++++ .../markdown/tools/hadoop-aws/encryption.md | 30 +++++ .../site/markdown/tools/hadoop-aws/index.md | 14 +++ .../fs/s3a/AbstractTestS3AEncryption.java | 2 + ...EncryptionSSEKMSWithEncryptionContext.java | 101 +++++++++++++++++ .../hadoop/fs/s3a/TestSSEConfiguration.java | 69 +++++++++--- .../s3a/auth/TestMarshalledCredentials.java | 3 +- .../ITestSessionDelegationTokens.java | 6 +- .../TestS3ADelegationTokenSupport.java | 24 +++- .../fs/s3a/impl/TestRequestFactory.java | 2 +- .../hadoop/fs/s3a/impl/TestS3AEncryption.java | 77 +++++++++++++ 18 files changed, 513 insertions(+), 29 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AEncryption.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSWithEncryptionContext.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestS3AEncryption.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java index d01ddd30f4705..0b36aec318dfb 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java @@ -1022,6 +1022,7 @@ public class CommonConfigurationKeysPublic { "fs.s3a.*.server-side-encryption.key", "fs.s3a.encryption.algorithm", "fs.s3a.encryption.key", + "fs.s3a.encryption.context", "fs.azure\\.account.key.*", "credential$", "oauth.*secret", diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index bd91de0f080fd..4104e3043149e 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -742,6 +742,7 @@ fs.s3a.*.server-side-encryption.key fs.s3a.encryption.algorithm fs.s3a.encryption.key + fs.s3a.encryption.context fs.s3a.secret.key fs.s3a.*.secret.key fs.s3a.session.key @@ -1760,6 +1761,15 @@ + + fs.s3a.encryption.context + Specific encryption context to use if fs.s3a.encryption.algorithm + has been set to 'SSE-KMS' or 'DSSE-KMS'. The value of this property is a set + of non-secret comma-separated key-value pairs of additional contextual + information about the data that are separated by equal operator (=). + + + fs.s3a.signing-algorithm Override the default signing algorithm so legacy diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 185389739cbad..8833aeba2fc8f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -736,6 +736,16 @@ private Constants() { public static final String S3_ENCRYPTION_KEY = "fs.s3a.encryption.key"; + /** + * Set S3-SSE encryption context. + * The value of this property is a set of non-secret comma-separated key-value pairs + * of additional contextual information about the data that are separated by equal + * operator (=). + * value:{@value} + */ + public static final String S3_ENCRYPTION_CONTEXT = + "fs.s3a.encryption.context"; + /** * List of custom Signers. The signer class will be loaded, and the signer * name will be associated with this signer class in the S3 SDK. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index b7c89c4626a03..685b95dfc3d77 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -37,6 +37,7 @@ import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.impl.S3AEncryption; import org.apache.hadoop.util.functional.RemoteIterators; import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; import org.apache.hadoop.fs.s3a.impl.MultiObjectDeleteException; @@ -1312,7 +1313,7 @@ static void patchSecurityCredentialProviders(Configuration conf) { * @throws IOException on any IO problem * @throws IllegalArgumentException bad arguments */ - private static String lookupBucketSecret( + public static String lookupBucketSecret( String bucket, Configuration conf, String baseKey) @@ -1458,6 +1459,8 @@ public static EncryptionSecrets buildEncryptionSecrets(String bucket, int encryptionKeyLen = StringUtils.isBlank(encryptionKey) ? 0 : encryptionKey.length(); String diagnostics = passwordDiagnostics(encryptionKey, "key"); + String encryptionContext = S3AEncryption.getS3EncryptionContextBase64Encoded(bucket, conf, + encryptionMethod.requiresSecret()); switch (encryptionMethod) { case SSE_C: LOG.debug("Using SSE-C with {}", diagnostics); @@ -1493,7 +1496,7 @@ public static EncryptionSecrets buildEncryptionSecrets(String bucket, LOG.debug("Data is unencrypted"); break; } - return new EncryptionSecrets(encryptionMethod, encryptionKey); + return new EncryptionSecrets(encryptionMethod, encryptionKey, encryptionContext); } /** @@ -1686,6 +1689,21 @@ public static Map getTrimmedStringCollectionSplitByEquals( final Configuration configuration, final String name) { String valueString = configuration.get(name); + return getTrimmedStringCollectionSplitByEquals(valueString); + } + + /** + * Get the equal op (=) delimited key-value pairs of the name property as + * a collection of pair of Strings, trimmed of the leading and trailing whitespace + * after delimiting the name by comma and new line separator. + * If no such property is specified then empty Map is returned. + * + * @param valueString the string containing the key-value pairs. + * @return property value as a Map of Strings, or empty + * Map. + */ + public static Map getTrimmedStringCollectionSplitByEquals( + final String valueString) { if (null == valueString) { return new HashMap<>(); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java index 8a55a970134f3..ea5c0cf20786d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecretOperations.java @@ -61,4 +61,20 @@ public static Optional getSSEAwsKMSKey(final EncryptionSecrets secrets) return Optional.empty(); } } + + /** + * Gets the SSE-KMS context if present, else don't set it in the S3 request. + * + * @param secrets source of the encryption secrets. + * @return an optional AWS KMS encryption context to attach to a request. + */ + public static Optional getSSEAwsKMSEncryptionContext(final EncryptionSecrets secrets) { + if ((secrets.getEncryptionMethod() == S3AEncryptionMethods.SSE_KMS + || secrets.getEncryptionMethod() == S3AEncryptionMethods.DSSE_KMS) + && secrets.hasEncryptionContext()) { + return Optional.of(secrets.getEncryptionContext()); + } else { + return Optional.empty(); + } + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecrets.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecrets.java index 092653de557f0..f421ecca24cf9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecrets.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/EncryptionSecrets.java @@ -67,6 +67,11 @@ public class EncryptionSecrets implements Writable, Serializable { */ private String encryptionKey = ""; + /** + * Encryption context: base64-encoded UTF-8 string. + */ + private String encryptionContext = ""; + /** * This field isn't serialized/marshalled; it is rebuilt from the * encryptionAlgorithm field. @@ -84,23 +89,28 @@ public EncryptionSecrets() { * Create a pair of secrets. * @param encryptionAlgorithm algorithm enumeration. * @param encryptionKey key/key reference. + * @param encryptionContext base64-encoded string with the encryption context key-value pairs. * @throws IOException failure to initialize. */ public EncryptionSecrets(final S3AEncryptionMethods encryptionAlgorithm, - final String encryptionKey) throws IOException { - this(encryptionAlgorithm.getMethod(), encryptionKey); + final String encryptionKey, + final String encryptionContext) throws IOException { + this(encryptionAlgorithm.getMethod(), encryptionKey, encryptionContext); } /** * Create a pair of secrets. * @param encryptionAlgorithm algorithm name * @param encryptionKey key/key reference. + * @param encryptionContext base64-encoded string with the encryption context key-value pairs. * @throws IOException failure to initialize. */ public EncryptionSecrets(final String encryptionAlgorithm, - final String encryptionKey) throws IOException { + final String encryptionKey, + final String encryptionContext) throws IOException { this.encryptionAlgorithm = encryptionAlgorithm; this.encryptionKey = encryptionKey; + this.encryptionContext = encryptionContext; init(); } @@ -114,6 +124,7 @@ public void write(final DataOutput out) throws IOException { new LongWritable(serialVersionUID).write(out); Text.writeString(out, encryptionAlgorithm); Text.writeString(out, encryptionKey); + Text.writeString(out, encryptionContext); } /** @@ -132,6 +143,7 @@ public void readFields(final DataInput in) throws IOException { } encryptionAlgorithm = Text.readString(in, MAX_SECRET_LENGTH); encryptionKey = Text.readString(in, MAX_SECRET_LENGTH); + encryptionContext = Text.readString(in); init(); } @@ -164,6 +176,10 @@ public String getEncryptionKey() { return encryptionKey; } + public String getEncryptionContext() { + return encryptionContext; + } + /** * Does this instance have encryption options? * That is: is the algorithm non-null. @@ -181,6 +197,14 @@ public boolean hasEncryptionKey() { return StringUtils.isNotEmpty(encryptionKey); } + /** + * Does this instance have an encryption context? + * @return true if there's an encryption context. + */ + public boolean hasEncryptionContext() { + return StringUtils.isNotEmpty(encryptionContext); + } + @Override public boolean equals(final Object o) { if (this == o) { @@ -191,12 +215,13 @@ public boolean equals(final Object o) { } final EncryptionSecrets that = (EncryptionSecrets) o; return Objects.equals(encryptionAlgorithm, that.encryptionAlgorithm) - && Objects.equals(encryptionKey, that.encryptionKey); + && Objects.equals(encryptionKey, that.encryptionKey) + && Objects.equals(encryptionContext, that.encryptionContext); } @Override public int hashCode() { - return Objects.hash(encryptionAlgorithm, encryptionKey); + return Objects.hash(encryptionAlgorithm, encryptionKey, encryptionContext); } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index c91324da7cb15..df2a6567dbdec 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -270,6 +270,8 @@ protected void copyEncryptionParameters(HeadObjectResponse srcom, LOG.debug("Propagating SSE-KMS settings from source {}", sourceKMSId); copyObjectRequestBuilder.ssekmsKeyId(sourceKMSId); + EncryptionSecretOperations.getSSEAwsKMSEncryptionContext(encryptionSecrets) + .ifPresent(copyObjectRequestBuilder::ssekmsEncryptionContext); return; } @@ -282,11 +284,15 @@ protected void copyEncryptionParameters(HeadObjectResponse srcom, // Set the KMS key if present, else S3 uses AWS managed key. EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets) .ifPresent(copyObjectRequestBuilder::ssekmsKeyId); + EncryptionSecretOperations.getSSEAwsKMSEncryptionContext(encryptionSecrets) + .ifPresent(copyObjectRequestBuilder::ssekmsEncryptionContext); break; case DSSE_KMS: copyObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS_DSSE); EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets) .ifPresent(copyObjectRequestBuilder::ssekmsKeyId); + EncryptionSecretOperations.getSSEAwsKMSEncryptionContext(encryptionSecrets) + .ifPresent(copyObjectRequestBuilder::ssekmsEncryptionContext); break; case SSE_C: EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets) @@ -371,11 +377,15 @@ private void putEncryptionParameters(PutObjectRequest.Builder putObjectRequestBu // Set the KMS key if present, else S3 uses AWS managed key. EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets) .ifPresent(putObjectRequestBuilder::ssekmsKeyId); + EncryptionSecretOperations.getSSEAwsKMSEncryptionContext(encryptionSecrets) + .ifPresent(putObjectRequestBuilder::ssekmsEncryptionContext); break; case DSSE_KMS: putObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS_DSSE); EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets) .ifPresent(putObjectRequestBuilder::ssekmsKeyId); + EncryptionSecretOperations.getSSEAwsKMSEncryptionContext(encryptionSecrets) + .ifPresent(putObjectRequestBuilder::ssekmsEncryptionContext); break; case SSE_C: EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets) @@ -447,11 +457,15 @@ private void multipartUploadEncryptionParameters( // Set the KMS key if present, else S3 uses AWS managed key. EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets) .ifPresent(mpuRequestBuilder::ssekmsKeyId); + EncryptionSecretOperations.getSSEAwsKMSEncryptionContext(encryptionSecrets) + .ifPresent(mpuRequestBuilder::ssekmsEncryptionContext); break; case DSSE_KMS: mpuRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS_DSSE); EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets) .ifPresent(mpuRequestBuilder::ssekmsKeyId); + EncryptionSecretOperations.getSSEAwsKMSEncryptionContext(encryptionSecrets) + .ifPresent(mpuRequestBuilder::ssekmsEncryptionContext); break; case SSE_C: EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AEncryption.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AEncryption.java new file mode 100644 index 0000000000000..a720d2ca10000 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AEncryption.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Map; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.S3AUtils; + +import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_CONTEXT; + +/** + * Utility methods for S3A encryption properties. + */ +public final class S3AEncryption { + + private static final Logger LOG = LoggerFactory.getLogger(S3AEncryption.class); + + private S3AEncryption() { + } + + /** + * Get any SSE context from a configuration/credential provider. + * @param bucket bucket to query for + * @param conf configuration to examine + * @return the encryption context value or "" + * @throws IOException if reading a JCEKS file raised an IOE + * @throws IllegalArgumentException bad arguments. + */ + public static String getS3EncryptionContext(String bucket, Configuration conf) + throws IOException { + // look up the per-bucket value of the encryption context + String encryptionContext = S3AUtils.lookupBucketSecret(bucket, conf, S3_ENCRYPTION_CONTEXT); + if (encryptionContext == null) { + // look up the global value of the encryption context + encryptionContext = S3AUtils.lookupPassword(null, conf, S3_ENCRYPTION_CONTEXT); + } + if (encryptionContext == null) { + // no encryption context, return "" + return ""; + } + return encryptionContext; + } + + /** + * Get any SSE context from a configuration/credential provider. + * This includes converting the values to a base64-encoded UTF-8 string + * holding JSON with the encryption context key-value pairs + * @param bucket bucket to query for + * @param conf configuration to examine + * @param propagateExceptions should IO exceptions be rethrown? + * @return the Base64 encryption context or "" + * @throws IllegalArgumentException bad arguments. + * @throws IOException if propagateExceptions==true and reading a JCEKS file raised an IOE + */ + public static String getS3EncryptionContextBase64Encoded( + String bucket, + Configuration conf, + boolean propagateExceptions) throws IOException { + try { + final String encryptionContextValue = getS3EncryptionContext(bucket, conf); + if (StringUtils.isBlank(encryptionContextValue)) { + return ""; + } + final Map encryptionContextMap = S3AUtils + .getTrimmedStringCollectionSplitByEquals(encryptionContextValue); + if (encryptionContextMap.isEmpty()) { + return ""; + } + final String encryptionContextJson = new ObjectMapper().writeValueAsString( + encryptionContextMap); + return Base64.encodeBase64String(encryptionContextJson.getBytes(StandardCharsets.UTF_8)); + } catch (IOException e) { + if (propagateExceptions) { + throw e; + } + LOG.warn("Cannot retrieve {} for bucket {}", + S3_ENCRYPTION_CONTEXT, bucket, e); + return ""; + } + } +} diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md index 42ef91c032ba8..7b9e8d0412efd 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md @@ -243,6 +243,21 @@ The ID of the specific key used to encrypt the data should also be set in the pr ``` +Optionally, you can specify the encryption context in the property `fs.s3a.encryption.context`: + +```xml + + fs.s3a.encryption.context + + key1=value1, + key2=value2, + key3=value3, + key4=value4, + key5=value5 + + +``` + Organizations may define a default key in the Amazon KMS; if a default key is set, then it will be used whenever SSE-KMS encryption is chosen and the value of `fs.s3a.encryption.key` is empty. @@ -378,6 +393,21 @@ The ID of the specific key used to encrypt the data should also be set in the pr ``` +Optionally, you can specify the encryption context in the property `fs.s3a.encryption.context`: + +```xml + + fs.s3a.encryption.context + + key1=value1, + key2=value2, + key3=value3, + key4=value4, + key5=value5 + + +``` + Organizations may define a default key in the Amazon KMS; if a default key is set, then it will be used whenever SSE-KMS encryption is chosen and the value of `fs.s3a.encryption.key` is empty. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 7412a4cebcc4f..1b4b2e8b21b38 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -625,6 +625,15 @@ Here are some the S3A properties for use in production. + + fs.s3a.encryption.context + Specific encryption context to use if fs.s3a.encryption.algorithm + has been set to 'SSE-KMS' or 'DSSE-KMS'. The value of this property is a set + of non-secret comma-separated key-value pairs of additional contextual + information about the data that are separated by equal operator (=). + + + fs.s3a.signing-algorithm Override the default signing algorithm so legacy @@ -1294,6 +1303,11 @@ For a site configuration of: unset + + fs.s3a.encryption.context + unset + + ``` diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractTestS3AEncryption.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractTestS3AEncryption.java index 3a3d82d94ffaf..55cebeab8ef32 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractTestS3AEncryption.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractTestS3AEncryption.java @@ -30,6 +30,7 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.*; import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_ALGORITHM; +import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_CONTEXT; import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_KEY; import static org.apache.hadoop.fs.s3a.Constants.SERVER_SIDE_ENCRYPTION_ALGORITHM; import static org.apache.hadoop.fs.s3a.Constants.SERVER_SIDE_ENCRYPTION_KEY; @@ -69,6 +70,7 @@ protected void patchConfigurationEncryptionSettings( removeBaseAndBucketOverrides(conf, S3_ENCRYPTION_ALGORITHM, S3_ENCRYPTION_KEY, + S3_ENCRYPTION_CONTEXT, SERVER_SIDE_ENCRYPTION_ALGORITHM, SERVER_SIDE_ENCRYPTION_KEY); conf.set(S3_ENCRYPTION_ALGORITHM, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSWithEncryptionContext.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSWithEncryptionContext.java new file mode 100644 index 0000000000000..c3d4cd41fc5ae --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSWithEncryptionContext.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Set; + +import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableSet; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.impl.S3AEncryption; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; +import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_CONTEXT; +import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_KEY; +import static org.apache.hadoop.fs.s3a.S3AEncryptionMethods.DSSE_KMS; +import static org.apache.hadoop.fs.s3a.S3AEncryptionMethods.SSE_KMS; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; + +/** + * Concrete class that extends {@link AbstractTestS3AEncryption} + * and tests KMS encryption with encryption context. + * S3's HeadObject doesn't return the object's encryption context. + * Therefore, we don't have a way to assert its value in code. + * In order to properly test if the encryption context is being set, + * the KMS key or the IAM User need to have a deny statements like the one below in the policy: + *

+ * {
+ *     "Effect": "Deny",
+ *     "Principal": {
+ *         "AWS": "*"
+ *     },
+ *     "Action": "kms:Decrypt",
+ *     "Resource": "*",
+ *     "Condition": {
+ *         "StringNotEquals": {
+ *             "kms:EncryptionContext:project": "hadoop"
+ *         }
+ *     }
+ * }
+ * 
+ * With the statement above, S3A will fail to read the object from S3 if it was encrypted + * without the key-pair "project": "hadoop" in the encryption context. + */ +public class ITestS3AEncryptionSSEKMSWithEncryptionContext + extends AbstractTestS3AEncryption { + + private static final Set KMS_ENCRYPTION_ALGORITHMS = ImmutableSet.of( + SSE_KMS, DSSE_KMS); + + private S3AEncryptionMethods encryptionAlgorithm; + + @Override + protected Configuration createConfiguration() { + try { + // get the KMS key and context for this test. + Configuration c = new Configuration(); + final String bucketName = getTestBucketName(c); + String kmsKey = S3AUtils.getS3EncryptionKey(bucketName, c); + String encryptionContext = S3AEncryption.getS3EncryptionContext(bucketName, c); + encryptionAlgorithm = S3AUtils.getEncryptionAlgorithm(bucketName, c); + assume("Expected a KMS encryption algorithm", + KMS_ENCRYPTION_ALGORITHMS.contains(encryptionAlgorithm)); + if (StringUtils.isBlank(encryptionContext)) { + skip(S3_ENCRYPTION_CONTEXT + " is not set."); + } + Configuration conf = super.createConfiguration(); + S3ATestUtils.removeBaseAndBucketOverrides(conf, S3_ENCRYPTION_KEY, S3_ENCRYPTION_CONTEXT); + conf.set(S3_ENCRYPTION_KEY, kmsKey); + conf.set(S3_ENCRYPTION_CONTEXT, encryptionContext); + return conf; + + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + protected S3AEncryptionMethods getSSEAlgorithm() { + return encryptionAlgorithm; + } +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestSSEConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestSSEConfiguration.java index 6985fa44c3bda..dcda68155195e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestSSEConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestSSEConfiguration.java @@ -29,9 +29,11 @@ import org.junit.rules.Timeout; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.impl.S3AEncryption; import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.security.alias.CredentialProvider; import org.apache.hadoop.security.alias.CredentialProviderFactory; +import org.apache.hadoop.util.StringUtils; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3AEncryptionMethods.*; @@ -48,6 +50,9 @@ public class TestSSEConfiguration extends Assert { /** Bucket to use for per-bucket options. */ public static final String BUCKET = "dataset-1"; + /** Valid set of key/value pairs for the encryption context. */ + private static final String VALID_ENCRYPTION_CONTEXT = "key1=value1, key2=value2, key3=value3"; + @Rule public Timeout testTimeout = new Timeout( S3ATestConstants.S3A_TEST_TIMEOUT @@ -58,41 +63,41 @@ public class TestSSEConfiguration extends Assert { @Test public void testSSECNoKey() throws Throwable { - assertGetAlgorithmFails(SSE_C_NO_KEY_ERROR, SSE_C.getMethod(), null); + assertGetAlgorithmFails(SSE_C_NO_KEY_ERROR, SSE_C.getMethod(), null, null); } @Test public void testSSECBlankKey() throws Throwable { - assertGetAlgorithmFails(SSE_C_NO_KEY_ERROR, SSE_C.getMethod(), ""); + assertGetAlgorithmFails(SSE_C_NO_KEY_ERROR, SSE_C.getMethod(), "", null); } @Test public void testSSECGoodKey() throws Throwable { - assertEquals(SSE_C, getAlgorithm(SSE_C, "sseckey")); + assertEquals(SSE_C, getAlgorithm(SSE_C, "sseckey", null)); } @Test public void testKMSGoodKey() throws Throwable { - assertEquals(SSE_KMS, getAlgorithm(SSE_KMS, "kmskey")); + assertEquals(SSE_KMS, getAlgorithm(SSE_KMS, "kmskey", null)); } @Test public void testAESKeySet() throws Throwable { assertGetAlgorithmFails(SSE_S3_WITH_KEY_ERROR, - SSE_S3.getMethod(), "setkey"); + SSE_S3.getMethod(), "setkey", null); } @Test public void testSSEEmptyKey() { // test the internal logic of the test setup code - Configuration c = buildConf(SSE_C.getMethod(), ""); + Configuration c = buildConf(SSE_C.getMethod(), "", null); assertEquals("", getS3EncryptionKey(BUCKET, c)); } @Test public void testSSEKeyNull() throws Throwable { // test the internal logic of the test setup code - final Configuration c = buildConf(SSE_C.getMethod(), null); + final Configuration c = buildConf(SSE_C.getMethod(), null, null); assertEquals("", getS3EncryptionKey(BUCKET, c)); intercept(IOException.class, SSE_C_NO_KEY_ERROR, @@ -147,28 +152,30 @@ void setProviderOption(final Configuration conf, } /** - * Assert that the exception text from {@link #getAlgorithm(String, String)} + * Assert that the exception text from {@link #getAlgorithm(String, String, String)} * is as expected. * @param expected expected substring in error * @param alg algorithm to ask for * @param key optional key value + * @param context optional encryption context value * @throws Exception anything else which gets raised */ public void assertGetAlgorithmFails(String expected, - final String alg, final String key) throws Exception { + final String alg, final String key, final String context) throws Exception { intercept(IOException.class, expected, - () -> getAlgorithm(alg, key)); + () -> getAlgorithm(alg, key, context)); } private S3AEncryptionMethods getAlgorithm(S3AEncryptionMethods algorithm, - String key) + String key, + String encryptionContext) throws IOException { - return getAlgorithm(algorithm.getMethod(), key); + return getAlgorithm(algorithm.getMethod(), key, encryptionContext); } - private S3AEncryptionMethods getAlgorithm(String algorithm, String key) + private S3AEncryptionMethods getAlgorithm(String algorithm, String key, String encryptionContext) throws IOException { - return getEncryptionAlgorithm(BUCKET, buildConf(algorithm, key)); + return getEncryptionAlgorithm(BUCKET, buildConf(algorithm, key, encryptionContext)); } /** @@ -176,10 +183,11 @@ private S3AEncryptionMethods getAlgorithm(String algorithm, String key) * and key. * @param algorithm algorithm to use, may be null * @param key key, may be null + * @param encryptionContext encryption context, may be null * @return the new config. */ @SuppressWarnings("deprecation") - private Configuration buildConf(String algorithm, String key) { + private Configuration buildConf(String algorithm, String key, String encryptionContext) { Configuration conf = emptyConf(); if (algorithm != null) { conf.set(Constants.S3_ENCRYPTION_ALGORITHM, algorithm); @@ -193,6 +201,11 @@ private Configuration buildConf(String algorithm, String key) { conf.unset(SERVER_SIDE_ENCRYPTION_KEY); conf.unset(Constants.S3_ENCRYPTION_KEY); } + if (encryptionContext != null) { + conf.set(S3_ENCRYPTION_CONTEXT, encryptionContext); + } else { + conf.unset(S3_ENCRYPTION_CONTEXT); + } return conf; } @@ -308,4 +321,30 @@ public void testNoEncryptionMethod() throws Throwable { assertEquals(NONE, getMethod(" ")); } + @Test + public void testGoodEncryptionContext() throws Throwable { + assertEquals(SSE_KMS, getAlgorithm(SSE_KMS, "kmskey", VALID_ENCRYPTION_CONTEXT)); + } + + @Test + public void testSSEEmptyEncryptionContext() throws Throwable { + // test the internal logic of the test setup code + Configuration c = buildConf(SSE_KMS.getMethod(), "kmskey", ""); + assertEquals("", S3AEncryption.getS3EncryptionContext(BUCKET, c)); + } + + @Test + public void testSSEEncryptionContextNull() throws Throwable { + // test the internal logic of the test setup code + final Configuration c = buildConf(SSE_KMS.getMethod(), "kmskey", null); + assertEquals("", S3AEncryption.getS3EncryptionContext(BUCKET, c)); + } + + @Test + public void testSSEInvalidEncryptionContext() throws Throwable { + intercept(IllegalArgumentException.class, + StringUtils.STRING_COLLECTION_SPLIT_EQUALS_INVALID_ARG, + () -> getAlgorithm(SSE_KMS.getMethod(), "kmskey", "invalid context")); + } + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java index b9d547635f7f3..71f22f4314f4f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/TestMarshalledCredentials.java @@ -80,7 +80,8 @@ public void testRoundTripNoSessionData() throws Throwable { public void testRoundTripEncryptionData() throws Throwable { EncryptionSecrets secrets = new EncryptionSecrets( S3AEncryptionMethods.SSE_KMS, - "key"); + "key", + "encryptionContext"); EncryptionSecrets result = S3ATestUtils.roundTrip(secrets, new Configuration()); assertEquals("round trip", secrets, result); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java index efc775966859d..b58ca24aaa832 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java @@ -116,7 +116,7 @@ public void testCanonicalization() throws Throwable { public void testSaveLoadTokens() throws Throwable { File tokenFile = File.createTempFile("token", "bin"); EncryptionSecrets encryptionSecrets = new EncryptionSecrets( - S3AEncryptionMethods.SSE_KMS, KMS_KEY); + S3AEncryptionMethods.SSE_KMS, KMS_KEY, ""); Token dt = delegationTokens.createDelegationToken(encryptionSecrets, null); final SessionTokenIdentifier origIdentifier @@ -171,7 +171,7 @@ public void testCreateAndUseDT() throws Throwable { assertNull("Current User has delegation token", delegationTokens.selectTokenFromFSOwner()); EncryptionSecrets secrets = new EncryptionSecrets( - S3AEncryptionMethods.SSE_KMS, KMS_KEY); + S3AEncryptionMethods.SSE_KMS, KMS_KEY, ""); Token originalDT = delegationTokens.createDelegationToken(secrets, null); assertEquals("Token kind mismatch", getTokenKind(), originalDT.getKind()); @@ -229,7 +229,7 @@ public void testCreateWithRenewer() throws Throwable { assertNull("Current User has delegation token", delegationTokens.selectTokenFromFSOwner()); EncryptionSecrets secrets = new EncryptionSecrets( - S3AEncryptionMethods.SSE_KMS, KMS_KEY); + S3AEncryptionMethods.SSE_KMS, KMS_KEY, ""); Token dt = delegationTokens.createDelegationToken(secrets, renewer); assertEquals("Token kind mismatch", getTokenKind(), dt.getKind()); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/TestS3ADelegationTokenSupport.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/TestS3ADelegationTokenSupport.java index af306cc5a9a5f..a06e9ac62ff71 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/TestS3ADelegationTokenSupport.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/TestS3ADelegationTokenSupport.java @@ -19,10 +19,12 @@ package org.apache.hadoop.fs.s3a.auth.delegation; import java.net.URI; +import java.nio.charset.StandardCharsets; import org.junit.BeforeClass; import org.junit.Test; +import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.fs.s3a.auth.MarshalledCredentialBinding; @@ -70,13 +72,17 @@ public void testSessionTokenIssueDate() throws Throwable { public void testSessionTokenDecode() throws Throwable { Text alice = new Text("alice"); Text renewer = new Text("yarn"); + String encryptionKey = "encryptionKey"; + String encryptionContextJson = "{\"key\":\"value\", \"key2\": \"value3\"}"; + String encryptionContextEncoded = Base64.encodeBase64String(encryptionContextJson.getBytes( + StandardCharsets.UTF_8)); AbstractS3ATokenIdentifier identifier = new SessionTokenIdentifier(SESSION_TOKEN_KIND, alice, renewer, new URI("s3a://anything/"), new MarshalledCredentials("a", "b", ""), - new EncryptionSecrets(S3AEncryptionMethods.SSE_S3, ""), + new EncryptionSecrets(S3AEncryptionMethods.SSE_S3, encryptionKey, encryptionContextEncoded), "origin"); Token t1 = new Token<>(identifier, @@ -100,6 +106,10 @@ public void testSessionTokenDecode() throws Throwable { assertEquals("origin", decoded.getOrigin()); assertEquals("issue date", identifier.getIssueDate(), decoded.getIssueDate()); + EncryptionSecrets encryptionSecrets = decoded.getEncryptionSecrets(); + assertEquals(S3AEncryptionMethods.SSE_S3, encryptionSecrets.getEncryptionMethod()); + assertEquals(encryptionKey, encryptionSecrets.getEncryptionKey()); + assertEquals(encryptionContextEncoded, encryptionSecrets.getEncryptionContext()); } @Test @@ -112,13 +122,19 @@ public void testFullTokenKind() throws Throwable { @Test public void testSessionTokenIdentifierRoundTrip() throws Throwable { Text renewer = new Text("yarn"); + String encryptionKey = "encryptionKey"; + String encryptionContextJson = "{\"key\":\"value\", \"key2\": \"value3\"}"; + String encryptionContextEncoded = Base64.encodeBase64String(encryptionContextJson.getBytes( + StandardCharsets.UTF_8)); SessionTokenIdentifier id = new SessionTokenIdentifier( SESSION_TOKEN_KIND, new Text(), renewer, externalUri, new MarshalledCredentials("a", "b", "c"), - new EncryptionSecrets(), ""); + new EncryptionSecrets(S3AEncryptionMethods.DSSE_KMS, encryptionKey, + encryptionContextEncoded), + ""); SessionTokenIdentifier result = S3ATestUtils.roundTrip(id, null); String ids = id.toString(); @@ -127,6 +143,10 @@ public void testSessionTokenIdentifierRoundTrip() throws Throwable { id.getMarshalledCredentials(), result.getMarshalledCredentials()); assertEquals("renewer in " + ids, renewer, id.getRenewer()); + EncryptionSecrets encryptionSecrets = result.getEncryptionSecrets(); + assertEquals(S3AEncryptionMethods.DSSE_KMS, encryptionSecrets.getEncryptionMethod()); + assertEquals(encryptionKey, encryptionSecrets.getEncryptionKey()); + assertEquals(encryptionContextEncoded, encryptionSecrets.getEncryptionContext()); } @Test diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java index f5e91fae2a33e..9fee2fd63a0ef 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestRequestFactory.java @@ -70,7 +70,7 @@ public void testRequestFactoryWithEncryption() throws Throwable { .withBucket("bucket") .withEncryptionSecrets( new EncryptionSecrets(S3AEncryptionMethods.SSE_KMS, - "kms:key")) + "kms:key", "")) .build(); createFactoryObjects(factory); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestS3AEncryption.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestS3AEncryption.java new file mode 100644 index 0000000000000..a9d83819fda56 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestS3AEncryption.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Map; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.junit.Assert; +import org.junit.Test; + +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.conf.Configuration; + +import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_CONTEXT; + +public class TestS3AEncryption { + + private static final String GLOBAL_CONTEXT = " project=hadoop, jira=HADOOP-19197 "; + private static final String BUCKET_CONTEXT = "component=fs/s3"; + + @Test + public void testGetS3EncryptionContextPerBucket() throws IOException { + Configuration configuration = new Configuration(false); + configuration.set("fs.s3a.bucket.bucket1.encryption.context", BUCKET_CONTEXT); + configuration.set(S3_ENCRYPTION_CONTEXT, GLOBAL_CONTEXT); + final String result = S3AEncryption.getS3EncryptionContext("bucket1", configuration); + Assert.assertEquals(BUCKET_CONTEXT, result); + } + + @Test + public void testGetS3EncryptionContextFromGlobal() throws IOException { + Configuration configuration = new Configuration(false); + configuration.set("fs.s3a.bucket.bucket1.encryption.context", BUCKET_CONTEXT); + configuration.set(S3_ENCRYPTION_CONTEXT, GLOBAL_CONTEXT); + final String result = S3AEncryption.getS3EncryptionContext("bucket2", configuration); + Assert.assertEquals(GLOBAL_CONTEXT.trim(), result); + } + + @Test + public void testGetS3EncryptionContextNoSet() throws IOException { + Configuration configuration = new Configuration(false); + final String result = S3AEncryption.getS3EncryptionContext("bucket1", configuration); + Assert.assertEquals("", result); + } + + @Test + public void testGetS3EncryptionContextBase64Encoded() throws IOException { + Configuration configuration = new Configuration(false); + configuration.set(S3_ENCRYPTION_CONTEXT, GLOBAL_CONTEXT); + final String result = S3AEncryption.getS3EncryptionContextBase64Encoded("bucket", + configuration, true); + final String decoded = new String(Base64.decodeBase64(result), StandardCharsets.UTF_8); + final TypeReference> typeRef = new TypeReference>() {}; + final Map resultMap = new ObjectMapper().readValue(decoded, typeRef); + Assert.assertEquals("hadoop", resultMap.get("project")); + Assert.assertEquals("HADOOP-19197", resultMap.get("jira")); + } +} From a5806a9e7bc6d018de84e6511f10c359f110f78c Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 29 Jul 2024 11:33:51 +0100 Subject: [PATCH 071/113] HADOOP-19161. S3A: option "fs.s3a.performance.flags" to take list of performance flags (#6789) 1. Configuration adds new method `getEnumSet()` to get a set of enum values from a configuration string. > EnumSet getEnumSet(String key, Class enumClass, boolean ignoreUnknown) Whitespace is ignored, case is ignored and the value "*" is mapped to "all values of the enum". If "ignoreUnknown" is true then when parsing, unknown values are ignored. This is recommended for forward compatiblity with later versions. 2. This support is implemented in org.apache.hadoop.fs.s3a.impl.ConfigurationHelper -it can be used elsewhere in the hadoop codebase. 3. A new private FlagSet class in hadoop common manages a set of enum flags. It implements StreamCapabilities and can be probed for a specific option being set (with a prefix) S3A adds an option fs.s3a.performance.flags which builds a FlagSet with enum type PerformanceFlagEnum * which initially contains {Create, Delete, Mkdir, Open} * the existing fs.s3a.create.performance option sets the flag "Create". * tests which configure fs.s3a.create.performance MUST clear fs.s3a.performance.flags in test setup. Future performance flags are planned, with different levels of safety and/or backwards compatibility. Contributed by Steve Loughran --- .../org/apache/hadoop/conf/Configuration.java | 22 + .../org/apache/hadoop/fs/impl/FlagSet.java | 327 +++++++++++++ .../hadoop/util/ConfigurationHelper.java | 126 +++++ .../apache/hadoop/fs/impl/TestFlagSet.java | 431 ++++++++++++++++++ .../hadoop/util/TestConfigurationHelper.java | 174 +++++++ .../org/apache/hadoop/fs/s3a/Constants.java | 5 + .../apache/hadoop/fs/s3a/S3AFileSystem.java | 82 +++- .../fs/s3a/api/PerformanceFlagEnum.java | 51 +++ .../hadoop/fs/s3a/impl/StoreContext.java | 19 +- .../fs/s3a/impl/StoreContextBuilder.java | 17 +- .../hadoop/fs/s3a/s3guard/S3GuardTool.java | 9 +- .../markdown/tools/hadoop-aws/performance.md | 110 +++-- .../contract/s3a/ITestS3AContractCreate.java | 4 +- .../fs/s3a/ITestS3AFileOperationCost.java | 5 +- .../apache/hadoop/fs/s3a/S3ATestUtils.java | 5 + .../fs/s3a/impl/ITestConnectionTimeouts.java | 4 +- .../s3a/performance/AbstractS3ACostTest.java | 3 +- .../s3a/performance/ITestCreateFileCost.java | 4 +- .../ITestDirectoryMarkerListing.java | 4 +- .../s3a/performance/ITestS3ADeleteCost.java | 5 +- .../fs/s3a/tools/AbstractMarkerToolTest.java | 3 +- 21 files changed, 1350 insertions(+), 60 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FlagSet.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ConfigurationHelper.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestFlagSet.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestConfigurationHelper.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/PerformanceFlagEnum.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 8fc3a696c4aa5..94285a4dfb7e5 100755 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -49,6 +49,7 @@ import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.EnumSet; import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; @@ -99,6 +100,7 @@ import org.apache.hadoop.security.alias.CredentialProvider.CredentialEntry; import org.apache.hadoop.security.alias.CredentialProviderFactory; import org.apache.hadoop.thirdparty.com.google.common.base.Strings; +import org.apache.hadoop.util.ConfigurationHelper; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringInterner; @@ -1786,6 +1788,26 @@ public > T getEnum(String name, T defaultValue) { : Enum.valueOf(defaultValue.getDeclaringClass(), val); } + /** + * Build an enumset from a comma separated list of values. + * Case independent. + * Special handling of "*" meaning: all values. + * @param key key to look for + * @param enumClass class of enum + * @param ignoreUnknown should unknown values raise an exception? + * @return a mutable set of the identified enum values declared in the configuration + * @param enumeration type + * @throws IllegalArgumentException if one of the entries was unknown and ignoreUnknown is false, + * or there are two entries in the enum which differ only by case. + */ + public > EnumSet getEnumSet( + final String key, + final Class enumClass, + final boolean ignoreUnknown) throws IllegalArgumentException { + final String value = get(key, ""); + return ConfigurationHelper.parseEnumSet(key, value, enumClass, ignoreUnknown); + } + enum ParsedTimeDuration { NS { TimeUnit unit() { return TimeUnit.NANOSECONDS; } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FlagSet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FlagSet.java new file mode 100644 index 0000000000000..4ca4d36918ef0 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FlagSet.java @@ -0,0 +1,327 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.impl; + +import java.util.Arrays; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import javax.annotation.Nullable; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.StreamCapabilities; +import org.apache.hadoop.util.ConfigurationHelper; +import org.apache.hadoop.util.Preconditions; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.util.ConfigurationHelper.mapEnumNamesToValues; + +/** + * A set of flags, constructed from a configuration option or from a string, + * with the semantics of + * {@link ConfigurationHelper#parseEnumSet(String, String, Class, boolean)} + * and implementing {@link StreamCapabilities}. + *

+ * Thread safety: there is no synchronization on a mutable {@code FlagSet}. + * Once declared immutable, flags cannot be changed, so they + * becomes implicitly thread-safe. + */ +public final class FlagSet> implements StreamCapabilities { + + /** + * Class of the enum. + * Used for duplicating the flags as java type erasure + * loses this information otherwise. + */ + private final Class enumClass; + + /** + * Prefix for path capabilities probe. + */ + private final String prefix; + + /** + * Set of flags. + */ + private final EnumSet flags; + + /** + * Is the set immutable? + */ + private final AtomicBoolean immutable = new AtomicBoolean(false); + + /** + * Mapping of prefixed flag names to enum values. + */ + private final Map namesToValues; + + /** + * Create a FlagSet. + * @param enumClass class of enum + * @param prefix prefix (with trailing ".") for path capabilities probe + * @param flags flags. A copy of these are made. + */ + private FlagSet(final Class enumClass, + final String prefix, + @Nullable final EnumSet flags) { + this.enumClass = requireNonNull(enumClass, "null enumClass"); + this.prefix = requireNonNull(prefix, "null prefix"); + this.flags = flags != null + ? EnumSet.copyOf(flags) + : EnumSet.noneOf(enumClass); + this.namesToValues = mapEnumNamesToValues(prefix, enumClass); + } + + /** + * Get a copy of the flags. + *

+ * This is immutable. + * @return the flags. + */ + public EnumSet flags() { + return EnumSet.copyOf(flags); + } + + /** + * Probe for the FlagSet being empty. + * @return true if there are no flags set. + */ + public boolean isEmpty() { + return flags.isEmpty(); + } + + /** + * Is a flag enabled? + * @param flag flag to check + * @return true if it is in the set of enabled flags. + */ + public boolean enabled(final E flag) { + return flags.contains(flag); + } + + /** + * Check for mutability before any mutating operation. + * @throws IllegalStateException if the set is still mutable + */ + private void checkMutable() { + Preconditions.checkState(!immutable.get(), + "FlagSet is immutable"); + } + + /** + * Enable a flag. + * @param flag flag to enable. + */ + public void enable(final E flag) { + checkMutable(); + flags.add(flag); + } + + /** + * Disable a flag. + * @param flag flag to disable + */ + public void disable(final E flag) { + checkMutable(); + flags.remove(flag); + } + + /** + * Set a flag to the chosen value. + * @param flag flag + * @param state true to enable, false to disable. + */ + public void set(final E flag, boolean state) { + if (state) { + enable(flag); + } else { + disable(flag); + } + } + + /** + * Is a flag enabled? + * @param capability string to query the stream support for. + * @return true if the capability maps to an enum value and + * that value is set. + */ + @Override + public boolean hasCapability(final String capability) { + final E e = namesToValues.get(capability); + return e != null && enabled(e); + } + + /** + * Make immutable; no-op if already set. + */ + public void makeImmutable() { + immutable.set(true); + } + + /** + * Is the FlagSet immutable? + * @return true iff the FlagSet is immutable. + */ + public boolean isImmutable() { + return immutable.get(); + } + + /** + * Get the enum class. + * @return the enum class. + */ + public Class getEnumClass() { + return enumClass; + } + + @Override + public String toString() { + return "{" + + (flags.stream() + .map(Enum::name) + .collect(Collectors.joining(", "))) + + "}"; + } + + /** + * Generate the list of capabilities. + * @return a possibly empty list. + */ + public List pathCapabilities() { + return namesToValues.keySet().stream() + .filter(this::hasCapability) + .collect(Collectors.toList()); + } + + /** + * Equality is based on the value of {@link #enumClass} and + * {@link #prefix} and the contents of the set, which must match. + *

+ * The immutability flag is not considered, nor is the + * {@link #namesToValues} map, though as that is generated from + * the enumeration and prefix, it is implicitly equal if the prefix + * and enumClass fields are equal. + * @param o other object + * @return true iff the equality condition is met. + */ + @Override + public boolean equals(final Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + FlagSet flagSet = (FlagSet) o; + return Objects.equals(enumClass, flagSet.enumClass) + && Objects.equals(prefix, flagSet.prefix) + && Objects.equals(flags, flagSet.flags); + } + + /** + * Hash code is based on the flags. + * @return a hash code. + */ + @Override + public int hashCode() { + return Objects.hashCode(flags); + } + + /** + * Create a copy of the FlagSet. + * @return a new mutable instance with a separate copy of the flags + */ + public FlagSet copy() { + return new FlagSet<>(enumClass, prefix, flags); + } + + /** + * Convert to a string which can be then set in a configuration. + * This is effectively a marshalled form of the flags. + * @return a comma separated list of flag names. + */ + public String toConfigurationString() { + return flags.stream() + .map(Enum::name) + .collect(Collectors.joining(", ")); + } + + /** + * Create a FlagSet. + * @param enumClass class of enum + * @param prefix prefix (with trailing ".") for path capabilities probe + * @param flags flags + * @param enum type + * @return a mutable FlagSet + */ + public static > FlagSet createFlagSet( + final Class enumClass, + final String prefix, + final EnumSet flags) { + return new FlagSet<>(enumClass, prefix, flags); + } + + /** + * Create a FlagSet from a list of enum values. + * @param enumClass class of enum + * @param prefix prefix (with trailing ".") for path capabilities probe + * @param enabled varags list of flags to enable. + * @param enum type + * @return a mutable FlagSet + */ + @SafeVarargs + public static > FlagSet createFlagSet( + final Class enumClass, + final String prefix, + final E... enabled) { + final FlagSet flagSet = new FlagSet<>(enumClass, prefix, null); + Arrays.stream(enabled).forEach(flag -> { + if (flag != null) { + flagSet.enable(flag); + } + }); + return flagSet; + } + + /** + * Build a FlagSet from a comma separated list of values. + * Case independent. + * Special handling of "*" meaning: all values. + * @param enumClass class of enum + * @param conf configuration + * @param key key to look for + * @param ignoreUnknown should unknown values raise an exception? + * @param enumeration type + * @return a mutable FlagSet + * @throws IllegalArgumentException if one of the entries was unknown and ignoreUnknown is false, + * or there are two entries in the enum which differ only by case. + */ + public static > FlagSet buildFlagSet( + final Class enumClass, + final Configuration conf, + final String key, + final boolean ignoreUnknown) { + final EnumSet flags = conf.getEnumSet(key, enumClass, ignoreUnknown); + return createFlagSet(enumClass, key + ".", flags); + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ConfigurationHelper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ConfigurationHelper.java new file mode 100644 index 0000000000000..db39bb363238b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ConfigurationHelper.java @@ -0,0 +1,126 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.util.EnumSet; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.conf.Configuration; + +import static java.util.EnumSet.noneOf; +import static org.apache.hadoop.util.Preconditions.checkArgument; +import static org.apache.hadoop.util.StringUtils.getTrimmedStringCollection; + +/** + * Configuration Helper class to provide advanced configuration parsing. + * Private; external code MUST use {@link Configuration} instead + */ +@InterfaceAudience.Private +public final class ConfigurationHelper { + + /** + * Error string if there are multiple enum elements which only differ + * by case: {@value}. + */ + @VisibleForTesting + static final String ERROR_MULTIPLE_ELEMENTS_MATCHING_TO_LOWER_CASE_VALUE = + "has multiple elements matching to lower case value"; + + private ConfigurationHelper() { + } + + /** + * Given a comma separated list of enum values, + * trim the list, map to enum values in the message (case insensitive) + * and return the set. + * Special handling of "*" meaning: all values. + * @param key Configuration object key -used in error messages. + * @param valueString value from Configuration + * @param enumClass class of enum + * @param ignoreUnknown should unknown values be ignored? + * @param enum type + * @return a mutable set of enum values parsed from the valueString, with any unknown + * matches stripped if {@code ignoreUnknown} is true. + * @throws IllegalArgumentException if one of the entries was unknown and ignoreUnknown is false, + * or there are two entries in the enum which differ only by case. + */ + @SuppressWarnings("unchecked") + public static > EnumSet parseEnumSet(final String key, + final String valueString, + final Class enumClass, + final boolean ignoreUnknown) throws IllegalArgumentException { + + // build a map of lower case string to enum values. + final Map mapping = mapEnumNamesToValues("", enumClass); + + // scan the input string and add all which match + final EnumSet enumSet = noneOf(enumClass); + for (String element : getTrimmedStringCollection(valueString)) { + final String item = element.toLowerCase(Locale.ROOT); + if ("*".equals(item)) { + enumSet.addAll(mapping.values()); + continue; + } + final E e = mapping.get(item); + if (e != null) { + enumSet.add(e); + } else { + // no match + // unless configured to ignore unknown values, raise an exception + checkArgument(ignoreUnknown, "%s: Unknown option value: %s in list %s." + + " Valid options for enum class %s are: %s", + key, element, valueString, + enumClass.getName(), + mapping.keySet().stream().collect(Collectors.joining(","))); + } + } + return enumSet; + } + + /** + * Given an enum class, build a map of lower case names to values. + * @param prefix prefix (with trailing ".") for path capabilities probe + * @param enumClass class of enum + * @param enum type + * @return a mutable map of lower case names to enum values + * @throws IllegalArgumentException if there are two entries which differ only by case. + */ + public static > Map mapEnumNamesToValues( + final String prefix, + final Class enumClass) { + final E[] constants = enumClass.getEnumConstants(); + Map mapping = new HashMap<>(constants.length); + for (E constant : constants) { + final String lc = constant.name().toLowerCase(Locale.ROOT); + final E orig = mapping.put(prefix + lc, constant); + checkArgument(orig == null, + "Enum %s " + + ERROR_MULTIPLE_ELEMENTS_MATCHING_TO_LOWER_CASE_VALUE + + " %s", + enumClass, lc); + } + return mapping; + } + +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestFlagSet.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestFlagSet.java new file mode 100644 index 0000000000000..c0ee3bae0f411 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestFlagSet.java @@ -0,0 +1,431 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.impl; + +import java.util.EnumSet; + +import org.assertj.core.api.Assertions; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static java.util.EnumSet.allOf; +import static java.util.EnumSet.noneOf; +import static org.apache.hadoop.fs.impl.FlagSet.buildFlagSet; +import static org.apache.hadoop.fs.impl.FlagSet.createFlagSet; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Unit tests for {@link FlagSet} class. + */ +public final class TestFlagSet extends AbstractHadoopTestBase { + + private static final String KEY = "key"; + + public static final String CAPABILITY_B = KEY + ".b"; + + public static final String CAPABILITY_C = KEY + ".c"; + + public static final String CAPABILITY_A = KEY + ".a"; + + private static final String KEYDOT = KEY + "."; + + /** + * Flagset used in tests and assertions. + */ + private FlagSet flagSet = + createFlagSet(SimpleEnum.class, KEYDOT, noneOf(SimpleEnum.class)); + + /** + * Simple Enums for the tests. + */ + private enum SimpleEnum { a, b, c } + + /** + * Enum with a single value. + */ + private enum OtherEnum { a } + + /** + * Test that an entry can be enabled and disabled. + */ + @Test + public void testEntryEnableDisable() { + Assertions.assertThat(flagSet.flags()).isEmpty(); + assertDisabled(SimpleEnum.a); + flagSet.enable(SimpleEnum.a); + assertEnabled(SimpleEnum.a); + flagSet.disable(SimpleEnum.a); + assertDisabled(SimpleEnum.a); + } + + /** + * Test the setter. + */ + @Test + public void testSetMethod() { + Assertions.assertThat(flagSet.flags()).isEmpty(); + flagSet.set(SimpleEnum.a, true); + assertEnabled(SimpleEnum.a); + flagSet.set(SimpleEnum.a, false); + assertDisabled(SimpleEnum.a); + } + + /** + * Test mutability by making immutable and + * expecting setters to fail. + */ + @Test + public void testMutability() throws Throwable { + flagSet.set(SimpleEnum.a, true); + flagSet.makeImmutable(); + intercept(IllegalStateException.class, () -> + flagSet.disable(SimpleEnum.a)); + assertEnabled(SimpleEnum.a); + intercept(IllegalStateException.class, () -> + flagSet.set(SimpleEnum.a, false)); + assertEnabled(SimpleEnum.a); + // now look at the setters + intercept(IllegalStateException.class, () -> + flagSet.enable(SimpleEnum.b)); + assertDisabled(SimpleEnum.b); + intercept(IllegalStateException.class, () -> + flagSet.set(SimpleEnum.b, true)); + assertDisabled(SimpleEnum.b); + } + + /** + * Test stringification. + */ + @Test + public void testToString() throws Throwable { + // empty + assertStringValue("{}"); + assertConfigurationStringMatches(""); + + // single value + flagSet.enable(SimpleEnum.a); + assertStringValue("{a}"); + assertConfigurationStringMatches("a"); + + // add a second value. + flagSet.enable(SimpleEnum.b); + assertStringValue("{a, b}"); + } + + /** + * Assert that {@link FlagSet#toString()} matches the expected + * value. + * @param expected expected value + */ + private void assertStringValue(final String expected) { + Assertions.assertThat(flagSet.toString()) + .isEqualTo(expected); + } + + /** + * Assert the configuration string form matches that expected. + */ + public void assertConfigurationStringMatches(final String expected) { + Assertions.assertThat(flagSet.toConfigurationString()) + .describedAs("Configuration string of %s", flagSet) + .isEqualTo(expected); + } + + /** + * Test parsing from a configuration file. + * Multiple entries must be parsed, whitespace trimmed. + */ + @Test + public void testConfEntry() { + flagSet = flagSetFromConfig("a\t,\nc ", true); + assertFlagSetMatches(flagSet, SimpleEnum.a, SimpleEnum.c); + assertHasCapability(CAPABILITY_A); + assertHasCapability(CAPABILITY_C); + assertLacksCapability(CAPABILITY_B); + assertPathCapabilitiesMatch(flagSet, CAPABILITY_A, CAPABILITY_C); + } + + /** + * Create a flagset from a configuration string. + * @param string configuration string. + * @param ignoreUnknown should unknown values be ignored? + * @return a flagset + */ + private static FlagSet flagSetFromConfig(final String string, + final boolean ignoreUnknown) { + final Configuration conf = mkConf(string); + return buildFlagSet(SimpleEnum.class, conf, KEY, ignoreUnknown); + } + + /** + * Test parsing from a configuration file, + * where an entry is unknown; the builder is set to ignoreUnknown. + */ + @Test + public void testConfEntryWithUnknownIgnored() { + flagSet = flagSetFromConfig("a, unknown", true); + assertFlagSetMatches(flagSet, SimpleEnum.a); + assertHasCapability(CAPABILITY_A); + assertLacksCapability(CAPABILITY_B); + assertLacksCapability(CAPABILITY_C); + } + + /** + * Test parsing from a configuration file where + * the same entry is duplicated. + */ + @Test + public void testDuplicateConfEntry() { + flagSet = flagSetFromConfig("a,\ta,\na\"", true); + assertFlagSetMatches(flagSet, SimpleEnum.a); + assertHasCapability(CAPABILITY_A); + } + + /** + * Handle an unknown configuration value. + */ + @Test + public void testConfUnknownFailure() throws Throwable { + intercept(IllegalArgumentException.class, () -> + flagSetFromConfig("a, unknown", false)); + } + + /** + * Create a configuration with {@link #KEY} set to the given value. + * @param value value to set + * @return the configuration. + */ + private static Configuration mkConf(final String value) { + final Configuration conf = new Configuration(false); + conf.set(KEY, value); + return conf; + } + + /** + * Assert that the flagset has a capability. + * @param capability capability to probe for + */ + private void assertHasCapability(final String capability) { + Assertions.assertThat(flagSet.hasCapability(capability)) + .describedAs("Capability of %s on %s", capability, flagSet) + .isTrue(); + } + + /** + * Assert that the flagset lacks a capability. + * @param capability capability to probe for + */ + private void assertLacksCapability(final String capability) { + Assertions.assertThat(flagSet.hasCapability(capability)) + .describedAs("Capability of %s on %s", capability, flagSet) + .isFalse(); + } + + /** + * Test the * binding. + */ + @Test + public void testStarEntry() { + flagSet = flagSetFromConfig("*", false); + assertFlags(SimpleEnum.a, SimpleEnum.b, SimpleEnum.c); + assertHasCapability(CAPABILITY_A); + assertHasCapability(CAPABILITY_B); + Assertions.assertThat(flagSet.pathCapabilities()) + .describedAs("path capabilities of %s", flagSet) + .containsExactlyInAnyOrder(CAPABILITY_A, CAPABILITY_B, CAPABILITY_C); + } + + @Test + public void testRoundTrip() { + final FlagSet s1 = createFlagSet(SimpleEnum.class, + KEYDOT, + allOf(SimpleEnum.class)); + final FlagSet s2 = roundTrip(s1); + Assertions.assertThat(s1.flags()).isEqualTo(s2.flags()); + assertFlagSetMatches(s2, SimpleEnum.a, SimpleEnum.b, SimpleEnum.c); + } + + @Test + public void testEmptyRoundTrip() { + final FlagSet s1 = createFlagSet(SimpleEnum.class, KEYDOT, + noneOf(SimpleEnum.class)); + final FlagSet s2 = roundTrip(s1); + Assertions.assertThat(s1.flags()) + .isEqualTo(s2.flags()); + Assertions.assertThat(s2.isEmpty()) + .describedAs("empty flagset %s", s2) + .isTrue(); + assertFlagSetMatches(flagSet); + Assertions.assertThat(flagSet.pathCapabilities()) + .describedAs("path capabilities of %s", flagSet) + .isEmpty(); + } + + @Test + public void testSetIsClone() { + final EnumSet flags = noneOf(SimpleEnum.class); + final FlagSet s1 = createFlagSet(SimpleEnum.class, KEYDOT, flags); + s1.enable(SimpleEnum.b); + + // set a source flag + flags.add(SimpleEnum.a); + + // verify the derived flagset is unchanged + assertFlagSetMatches(s1, SimpleEnum.b); + } + + @Test + public void testEquality() { + final FlagSet s1 = createFlagSet(SimpleEnum.class, KEYDOT, SimpleEnum.a); + final FlagSet s2 = createFlagSet(SimpleEnum.class, KEYDOT, SimpleEnum.a); + // make one of them immutable + s2.makeImmutable(); + Assertions.assertThat(s1) + .describedAs("s1 == s2") + .isEqualTo(s2); + Assertions.assertThat(s1.hashCode()) + .describedAs("hashcode of s1 == hashcode of s2") + .isEqualTo(s2.hashCode()); + } + + @Test + public void testInequality() { + final FlagSet s1 = + createFlagSet(SimpleEnum.class, KEYDOT, noneOf(SimpleEnum.class)); + final FlagSet s2 = + createFlagSet(SimpleEnum.class, KEYDOT, SimpleEnum.a, SimpleEnum.b); + Assertions.assertThat(s1) + .describedAs("s1 == s2") + .isNotEqualTo(s2); + } + + @Test + public void testClassInequality() { + final FlagSet s1 = + createFlagSet(SimpleEnum.class, KEYDOT, noneOf(SimpleEnum.class)); + final FlagSet s2 = + createFlagSet(OtherEnum.class, KEYDOT, OtherEnum.a); + Assertions.assertThat(s1) + .describedAs("s1 == s2") + .isNotEqualTo(s2); + } + + /** + * The copy operation creates a new instance which is now mutable, + * even if the original was immutable. + */ + @Test + public void testCopy() throws Throwable { + FlagSet s1 = + createFlagSet(SimpleEnum.class, KEYDOT, SimpleEnum.a, SimpleEnum.b); + s1.makeImmutable(); + FlagSet s2 = s1.copy(); + Assertions.assertThat(s2) + .describedAs("copy of %s", s1) + .isNotSameAs(s1); + Assertions.assertThat(!s2.isImmutable()) + .describedAs("set %s is immutable", s2) + .isTrue(); + Assertions.assertThat(s1) + .describedAs("s1 == s2") + .isEqualTo(s2); + } + + @Test + public void testCreateNullEnumClass() throws Throwable { + intercept(NullPointerException.class, () -> + createFlagSet(null, KEYDOT, SimpleEnum.a)); + } + + @Test + public void testCreateNullPrefix() throws Throwable { + intercept(NullPointerException.class, () -> + createFlagSet(SimpleEnum.class, null, SimpleEnum.a)); + } + + /** + * Round trip a FlagSet. + * @param flagset FlagSet to save to a configuration and retrieve. + * @return a new FlagSet. + */ + private FlagSet roundTrip(FlagSet flagset) { + final Configuration conf = new Configuration(false); + conf.set(KEY, flagset.toConfigurationString()); + return buildFlagSet(SimpleEnum.class, conf, KEY, false); + } + + /** + * Assert a flag is enabled in the {@link #flagSet} field. + * @param flag flag to check + */ + private void assertEnabled(final SimpleEnum flag) { + Assertions.assertThat(flagSet.enabled(flag)) + .describedAs("status of flag %s in %s", flag, flagSet) + .isTrue(); + } + + /** + * Assert a flag is disabled in the {@link #flagSet} field. + * @param flag flag to check + */ + private void assertDisabled(final SimpleEnum flag) { + Assertions.assertThat(flagSet.enabled(flag)) + .describedAs("status of flag %s in %s", flag, flagSet) + .isFalse(); + } + + /** + * Assert that a set of flags are enabled in the {@link #flagSet} field. + * @param flags flags which must be set. + */ + private void assertFlags(final SimpleEnum... flags) { + for (SimpleEnum flag : flags) { + assertEnabled(flag); + } + } + + /** + * Assert that a FlagSet contains an exclusive set of values. + * @param flags flags which must be set. + */ + private void assertFlagSetMatches( + FlagSet fs, + SimpleEnum... flags) { + Assertions.assertThat(fs.flags()) + .describedAs("path capabilities of %s", fs) + .containsExactly(flags); + } + + /** + * Assert that a flagset contains exactly the capabilities. + * This is calculated by getting the list of active capabilities + * and asserting on the list. + * @param fs flagset + * @param capabilities capabilities + */ + private void assertPathCapabilitiesMatch( + FlagSet fs, + String... capabilities) { + Assertions.assertThat(fs.pathCapabilities()) + .describedAs("path capabilities of %s", fs) + .containsExactlyInAnyOrder(capabilities); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestConfigurationHelper.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestConfigurationHelper.java new file mode 100644 index 0000000000000..529d231572dda --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestConfigurationHelper.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util; + +import java.util.Set; + +import org.assertj.core.api.Assertions; +import org.assertj.core.api.IterableAssert; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.util.ConfigurationHelper.ERROR_MULTIPLE_ELEMENTS_MATCHING_TO_LOWER_CASE_VALUE; +import static org.apache.hadoop.util.ConfigurationHelper.mapEnumNamesToValues; +import static org.apache.hadoop.util.ConfigurationHelper.parseEnumSet; + +/** + * Test for {@link ConfigurationHelper}. + */ +public class TestConfigurationHelper extends AbstractHadoopTestBase { + + /** + * Simple Enums. + * "i" is included for case tests, as it is special in turkey. + */ + private enum SimpleEnum { a, b, c, i } + + + /** + * Special case: an enum with no values. + */ + private enum EmptyEnum { } + + /** + * Create assertion about the outcome of + * {@link ConfigurationHelper#parseEnumSet(String, String, Class, boolean)}. + * @param valueString value from Configuration + * @param enumClass class of enum + * @param ignoreUnknown should unknown values be ignored? + * @param enum type + * @return an assertion on the outcome. + * @throws IllegalArgumentException if one of the entries was unknown and ignoreUnknown is false, + * or there are two entries in the enum which differ only by case. + */ + private static > IterableAssert assertEnumParse( + final String valueString, + final Class enumClass, + final boolean ignoreUnknown) { + final Set enumSet = parseEnumSet("key", valueString, enumClass, ignoreUnknown); + final IterableAssert assertion = Assertions.assertThat(enumSet); + return assertion.describedAs("parsed enum set '%s'", valueString); + } + + + /** + * Create a configuration with the key {@code key} set to a {@code value}. + * @param value value for the key + * @return a configuration with only key set. + */ + private Configuration confWithKey(String value) { + final Configuration conf = new Configuration(false); + conf.set("key", value); + return conf; + } + + @Test + public void testEnumParseAll() { + assertEnumParse("*", SimpleEnum.class, false) + .containsExactly(SimpleEnum.a, SimpleEnum.b, SimpleEnum.c, SimpleEnum.i); + } + + @Test + public void testEnumParse() { + assertEnumParse("a, b,c", SimpleEnum.class, false) + .containsExactly(SimpleEnum.a, SimpleEnum.b, SimpleEnum.c); + } + + @Test + public void testEnumCaseIndependence() { + assertEnumParse("A, B, C, I", SimpleEnum.class, false) + .containsExactly(SimpleEnum.a, SimpleEnum.b, SimpleEnum.c, SimpleEnum.i); + } + + @Test + public void testEmptyArguments() { + assertEnumParse(" ", SimpleEnum.class, false) + .isEmpty(); + } + + @Test + public void testUnknownEnumNotIgnored() throws Throwable { + intercept(IllegalArgumentException.class, "unrecognized", () -> + parseEnumSet("key", "c, unrecognized", SimpleEnum.class, false)); + } + + @Test + public void testUnknownEnumNotIgnoredThroughConf() throws Throwable { + intercept(IllegalArgumentException.class, "unrecognized", () -> + confWithKey("c, unrecognized") + .getEnumSet("key", SimpleEnum.class, false)); + } + + @Test + public void testUnknownEnumIgnored() { + assertEnumParse("c, d", SimpleEnum.class, true) + .containsExactly(SimpleEnum.c); + } + + @Test + public void testUnknownStarEnum() throws Throwable { + intercept(IllegalArgumentException.class, "unrecognized", () -> + parseEnumSet("key", "*, unrecognized", SimpleEnum.class, false)); + } + + @Test + public void testUnknownStarEnumIgnored() { + assertEnumParse("*, d", SimpleEnum.class, true) + .containsExactly(SimpleEnum.a, SimpleEnum.b, SimpleEnum.c, SimpleEnum.i); + } + + /** + * Unsupported enum as the same case value is present. + */ + private enum CaseConflictingEnum { a, A } + + @Test + public void testCaseConflictingEnumNotSupported() throws Throwable { + intercept(IllegalArgumentException.class, + ERROR_MULTIPLE_ELEMENTS_MATCHING_TO_LOWER_CASE_VALUE, + () -> + parseEnumSet("key", "c, unrecognized", + CaseConflictingEnum.class, false)); + } + + @Test + public void testEmptyEnumMap() { + Assertions.assertThat(mapEnumNamesToValues("", EmptyEnum.class)) + .isEmpty(); + } + + /** + * A star enum for an empty enum must be empty. + */ + @Test + public void testEmptyStarEnum() { + assertEnumParse("*", EmptyEnum.class, false) + .isEmpty(); + } + + @Test + public void testDuplicateValues() { + assertEnumParse("a, a, c, b, c", SimpleEnum.class, true) + .containsExactly(SimpleEnum.a, SimpleEnum.b, SimpleEnum.c); + } + +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 8833aeba2fc8f..078ffaa471aeb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1404,6 +1404,11 @@ private Constants() { public static final String FS_S3A_CREATE_PERFORMANCE_ENABLED = FS_S3A_CREATE_PERFORMANCE + ".enabled"; + /** + * Comma separated list of performance flags. + */ + public static final String FS_S3A_PERFORMANCE_FLAGS = + "fs.s3a.performance.flags"; /** * Prefix for adding a header to the object when created. * The actual value must have a "." suffix and then the actual header. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index f5937ae0a4abd..f8207696096fb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -109,8 +109,10 @@ import org.apache.hadoop.fs.FSDataOutputStreamBuilder; import org.apache.hadoop.fs.Globber; import org.apache.hadoop.fs.Options; +import org.apache.hadoop.fs.impl.FlagSet; import org.apache.hadoop.fs.impl.OpenFileParameters; import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.s3a.api.PerformanceFlagEnum; import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A; import org.apache.hadoop.fs.s3a.auth.SignerManager; import org.apache.hadoop.fs.s3a.auth.delegation.DelegationOperations; @@ -223,6 +225,7 @@ import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_DEFAULT; import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.IO_FILE_BUFFER_SIZE_KEY; import static org.apache.hadoop.fs.CommonPathCapabilities.DIRECTORY_LISTING_INCONSISTENT; +import static org.apache.hadoop.fs.impl.FlagSet.buildFlagSet; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.Invoker.*; @@ -369,8 +372,12 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private S3AStatisticsContext statisticsContext; /** Storage Statistics Bonded to the instrumentation. */ private S3AStorageStatistics storageStatistics; - /** Should all create files be "performance" unless unset. */ - private boolean performanceCreation; + + /** + * Performance flags. + */ + private FlagSet performanceFlags; + /** * Default input policy; may be overridden in * {@code openFile()}. @@ -740,10 +747,23 @@ public void initialize(URI name, Configuration originalConf) // verify there's no S3Guard in the store config. checkNoS3Guard(this.getUri(), getConf()); + // read in performance options and parse them to a list of flags. + performanceFlags = buildFlagSet( + PerformanceFlagEnum.class, + conf, + FS_S3A_PERFORMANCE_FLAGS, + true); // performance creation flag for code which wants performance // at the risk of overwrites. - performanceCreation = conf.getBoolean(FS_S3A_CREATE_PERFORMANCE, - FS_S3A_CREATE_PERFORMANCE_DEFAULT); + // this uses the performance flags as the default and then + // updates the performance flags to match. + // a bit convoluted. + boolean performanceCreation = conf.getBoolean(FS_S3A_CREATE_PERFORMANCE, + performanceFlags.enabled(PerformanceFlagEnum.Create)); + performanceFlags.set(PerformanceFlagEnum.Create, performanceCreation); + // freeze. + performanceFlags.makeImmutable(); + LOG.debug("{} = {}", FS_S3A_CREATE_PERFORMANCE, performanceCreation); allowAuthoritativePaths = S3Guard.getAuthoritativePaths(this); @@ -1289,6 +1309,14 @@ public RequestFactory getRequestFactory() { return requestFactory; } + /** + * Get the performance flags. + * @return performance flags. + */ + public FlagSet getPerformanceFlags() { + return performanceFlags; + } + /** * Implementation of all operations used by delegation tokens. */ @@ -2036,9 +2064,9 @@ public FSDataOutputStream create(Path f, FsPermission permission, // work out the options to pass down CreateFileBuilder.CreateFileOptions options; - if (performanceCreation) { + if (getPerformanceFlags().enabled(PerformanceFlagEnum.Create)) { options = OPTIONS_CREATE_FILE_PERFORMANCE; - }else { + } else { options = overwrite ? OPTIONS_CREATE_FILE_OVERWRITE : OPTIONS_CREATE_FILE_NO_OVERWRITE; @@ -2209,7 +2237,8 @@ public FSDataOutputStreamBuilder createFile(final Path path) { builder .create() .overwrite(true) - .must(FS_S3A_CREATE_PERFORMANCE, performanceCreation); + .must(FS_S3A_CREATE_PERFORMANCE, + getPerformanceFlags().enabled(PerformanceFlagEnum.Create)); return builder; } catch (IOException e) { // catch any IOEs raised in span creation and convert to @@ -2274,7 +2303,8 @@ public FSDataOutputStream createNonRecursive(Path p, .withFlags(flags) .blockSize(blockSize) .bufferSize(bufferSize) - .must(FS_S3A_CREATE_PERFORMANCE, performanceCreation); + .must(FS_S3A_CREATE_PERFORMANCE, + getPerformanceFlags().enabled(PerformanceFlagEnum.Create)); if (progress != null) { builder.progress(progress); } @@ -4845,6 +4875,7 @@ public String toString() { sb.append(", partSize=").append(partSize); sb.append(", enableMultiObjectsDelete=").append(enableMultiObjectsDelete); sb.append(", maxKeys=").append(maxKeys); + sb.append(", performanceFlags=").append(performanceFlags); if (cannedACL != null) { sb.append(", cannedACL=").append(cannedACL); } @@ -5557,7 +5588,7 @@ public boolean hasPathCapability(final Path path, final String capability) // is the FS configured for create file performance case FS_S3A_CREATE_PERFORMANCE_ENABLED: - return performanceCreation; + return performanceFlags.enabled(PerformanceFlagEnum.Create); // is the optimized copy from local enabled. case OPTIMIZED_COPY_FROM_LOCAL: @@ -5572,8 +5603,15 @@ public boolean hasPathCapability(final Path path, final String capability) return s3AccessGrantsEnabled; default: - return super.hasPathCapability(p, cap); + // is it a performance flag? + if (performanceFlags.hasCapability(capability)) { + return true; + } + // fall through } + + // hand off to superclass + return super.hasPathCapability(p, cap); } /** @@ -5697,23 +5735,27 @@ public S3AMultipartUploaderBuilder createMultipartUploader( @Override @InterfaceAudience.Private public StoreContext createStoreContext() { - return new StoreContextBuilder().setFsURI(getUri()) + + // please keep after setFsURI() in alphabetical order + return new StoreContextBuilder() + .setFsURI(getUri()) + .setAuditor(getAuditor()) .setBucket(getBucket()) + .setChangeDetectionPolicy(changeDetectionPolicy) .setConfiguration(getConf()) - .setUsername(getUsername()) - .setOwner(owner) + .setContextAccessors(new ContextAccessorsImpl()) + .setEnableCSE(isCSEEnabled) .setExecutor(boundedThreadPool) .setExecutorCapacity(executorCapacity) - .setInvoker(invoker) - .setInstrumentation(statisticsContext) - .setStorageStatistics(getStorageStatistics()) .setInputPolicy(getInputPolicy()) - .setChangeDetectionPolicy(changeDetectionPolicy) + .setInstrumentation(statisticsContext) + .setInvoker(invoker) .setMultiObjectDeleteEnabled(enableMultiObjectsDelete) + .setOwner(owner) + .setPerformanceFlags(performanceFlags) + .setStorageStatistics(getStorageStatistics()) .setUseListV1(useListV1) - .setContextAccessors(new ContextAccessorsImpl()) - .setAuditor(getAuditor()) - .setEnableCSE(isCSEEnabled) + .setUsername(getUsername()) .build(); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/PerformanceFlagEnum.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/PerformanceFlagEnum.java new file mode 100644 index 0000000000000..b4368692542a2 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/PerformanceFlagEnum.java @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.api; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Enum of performance flags. + *

+ * When adding new flags, please keep in alphabetical order. + */ +@InterfaceAudience.LimitedPrivate("S3A Filesystem and extensions") +@InterfaceStability.Unstable +public enum PerformanceFlagEnum { + /** + * Create performance. + */ + Create, + + /** + * Delete performance. + */ + Delete, + + /** + * Mkdir performance. + */ + Mkdir, + + /** + * Open performance. + */ + Open +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StoreContext.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StoreContext.java index 4b8a28f3e7bb0..323c323ef0e26 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StoreContext.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StoreContext.java @@ -32,6 +32,8 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.impl.FlagSet; +import org.apache.hadoop.fs.s3a.api.PerformanceFlagEnum; import org.apache.hadoop.fs.s3a.api.RequestFactory; import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A; import org.apache.hadoop.fs.s3a.Invoker; @@ -117,6 +119,11 @@ public class StoreContext implements ActiveThreadSpanSource { /** Is client side encryption enabled? */ private final boolean isCSEEnabled; + /** + * Performance flags. + */ + private final FlagSet performanceFlags; + /** * Instantiate. */ @@ -137,7 +144,8 @@ public class StoreContext implements ActiveThreadSpanSource { final boolean useListV1, final ContextAccessors contextAccessors, final AuditSpanSource auditor, - final boolean isCSEEnabled) { + final boolean isCSEEnabled, + final FlagSet performanceFlags) { this.fsURI = fsURI; this.bucket = bucket; this.configuration = configuration; @@ -158,6 +166,7 @@ public class StoreContext implements ActiveThreadSpanSource { this.contextAccessors = contextAccessors; this.auditor = auditor; this.isCSEEnabled = isCSEEnabled; + this.performanceFlags = performanceFlags; } public URI getFsURI() { @@ -411,4 +420,12 @@ public RequestFactory getRequestFactory() { public boolean isCSEEnabled() { return isCSEEnabled; } + + /** + * Get the performance flags. + * @return FlagSet containing the performance flags. + */ + public FlagSet getPerformanceFlags() { + return performanceFlags; + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StoreContextBuilder.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StoreContextBuilder.java index cff38b9fc4b7d..fd9debfba8878 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StoreContextBuilder.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/StoreContextBuilder.java @@ -22,9 +22,11 @@ import java.util.concurrent.ExecutorService; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.impl.FlagSet; import org.apache.hadoop.fs.s3a.Invoker; import org.apache.hadoop.fs.s3a.S3AInputPolicy; import org.apache.hadoop.fs.s3a.S3AStorageStatistics; +import org.apache.hadoop.fs.s3a.api.PerformanceFlagEnum; import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A; import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext; import org.apache.hadoop.fs.store.audit.AuditSpanSource; @@ -69,6 +71,8 @@ public class StoreContextBuilder { private boolean isCSEEnabled; + private FlagSet performanceFlags; + public StoreContextBuilder setFsURI(final URI fsURI) { this.fsURI = fsURI; return this; @@ -175,6 +179,16 @@ public StoreContextBuilder setEnableCSE( return this; } + public FlagSet getPerformanceFlags() { + return performanceFlags; + } + + public StoreContextBuilder setPerformanceFlags( + final FlagSet flagSet) { + this.performanceFlags = flagSet; + return this; + } + public StoreContext build() { return new StoreContext(fsURI, bucket, @@ -192,6 +206,7 @@ public StoreContext build() { useListV1, contextAccessors, auditor, - isCSEEnabled); + isCSEEnabled, + performanceFlags); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 51bff4228be0f..57fd879c38cf6 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -392,6 +392,8 @@ public static class BucketInfo extends S3GuardTool { "\tThe S3A connector is compatible with buckets where" + " directory markers are not deleted"; + public static final String CAPABILITY_FORMAT = "\t%s %s%n"; + public BucketInfo(Configuration conf) { super(conf, GUARDED_FLAG, UNGUARDED_FLAG, FIPS_FLAG, MAGIC_FLAG); CommandFormat format = getCommandFormat(); @@ -560,9 +562,14 @@ public int run(String[] args, PrintStream out) // and check for capabilities println(out, "%nStore Capabilities"); for (String capability : S3A_DYNAMIC_CAPABILITIES) { - out.printf("\t%s %s%n", capability, + out.printf(CAPABILITY_FORMAT, capability, fs.hasPathCapability(root, capability)); } + // the performance flags are dynamically generated + fs.createStoreContext().getPerformanceFlags().pathCapabilities() + .forEach(capability -> out.printf(CAPABILITY_FORMAT, capability, "true")); + + // finish with a newline println(out, ""); if (commands.getOpt(FIPS_FLAG) && !fs.hasPathCapability(root, FIPS_ENDPOINT)) { diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md index 954823f2172ea..876072e81e8fd 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md @@ -180,7 +180,11 @@ The S3A Filesystem client supports the notion of input policies, similar to that of the Posix `fadvise()` API call. This tunes the behavior of the S3A client to optimise HTTP GET requests for the different use cases. -### fadvise `sequential` +The list of supported options is found in +[FSDataInputStream](../../../../../../hadoop-common-project/hadoop-common/target/site/filesystem/fsdatainputstreambuilder.html). + + +### fadvise `sequential`, `whole-file` Read through the file, possibly with some short forward seeks. @@ -196,6 +200,9 @@ sequential access, as should those reading data from gzipped `.gz` files. Because the "normal" fadvise policy starts off in sequential IO mode, there is rarely any need to explicit request this policy. +Distcp will automatically request `whole-file` access, even on deployments +where the cluster configuration is for `random` IO. + ### fadvise `random` Optimised for random IO, specifically the Hadoop `PositionedReadable` @@ -243,7 +250,7 @@ basis. to set fadvise policies on input streams. Once implemented, this will become the supported mechanism used for configuring the input IO policy. -### fadvise `normal` (default) +### fadvise `normal` or `adaptive` (default) The `normal` policy starts off reading a file in `sequential` mode, but if the caller seeks backwards in the stream, it switches from @@ -276,7 +283,45 @@ Fix: Use one of the dedicated [S3A Committers](committers.md). ## Options to Tune -### Thread and connection pool settings. +### Performance Flags: `fs.s3a.performance.flag` + +This option takes a comma separated list of performance flags. +View it as the equivalent of the `-O` compiler optimization list C/C++ compilers offer. +That is a complicated list of options which deliver speed if the person setting them +understands the risks. + +* The list of flags MAY change across releases +* The semantics of specific flags SHOULD NOT change across releases. +* If an option is to be tuned which may relax semantics, a new option MUST be defined. +* Unknown flags are ignored; this is to avoid compatibility. +* The option `*` means "turn everything on". This is implicitly unstable across releases. + +| *Option* | *Meaning* | Since | +|----------|--------------------|:------| +| `create` | Create Performance | 3.4.1 | + +The `create` flag has the same semantics as [`fs.s3a.create.performance`](#create-performance) + + +### Create Performance `fs.s3a.create.performance` + + +The configuration option `fs.s3a.create.performance` has the same behavior as +the `fs.s3a.performance.flag` flag option `create`: + +* No overwrite checks are made when creating a file, even if overwrite is set to `false` in the application/library code +* No checks are made for an object being written above a path containing other objects (i.e. a "directory") +* No checks are made for a parent path containing an object which is not a directory marker (i.e. a "file") + +This saves multiple probes per operation, especially a `LIST` call. + +It may however result in +* Unintentional overwriting of data +* Creation of directory structures which can no longer be navigated through filesystem APIs. + +Use with care, and, ideally, enable versioning on the S3 store. + +### Thread and connection pool settings. Each S3A client interacting with a single bucket, as a single user, has its own dedicated pool of open HTTP connections alongside a pool of threads used @@ -441,9 +486,6 @@ killer. 1. As discussed [earlier](#pooling), use large values for `fs.s3a.threads.max` and `fs.s3a.connection.maximum`. -1. Make sure that the bucket is using `sequential` or `normal` fadvise seek policies, -that is, `fs.s3a.experimental.input.fadvise` is not set to `random` - 1. Perform listings in parallel by setting `-numListstatusThreads` to a higher number. Make sure that `fs.s3a.connection.maximum` is equal to or greater than the value used. @@ -451,6 +493,9 @@ is equal to or greater than the value used. 1. If using `-delete`, set `fs.trash.interval` to 0 to avoid the deleted objects from being copied to a trash directory. +1. If using distcp to upload to a new path where no existing data exists, + consider adding the option `create` to the flags in `fs.s3a.performance.flag`. + *DO NOT* switch `fs.s3a.fast.upload.buffer` to buffer in memory. If one distcp mapper runs out of memory it will fail, and that runs the risk of failing the entire job. @@ -461,12 +506,6 @@ efficient in terms of HTTP connection use, and reduce the IOP rate against the S3 bucket/shard. ```xml - - - fs.s3a.experimental.input.fadvise - normal - - fs.s3a.block.size 128M @@ -481,6 +520,12 @@ the S3 bucket/shard. fs.trash.interval 0 + + + + fs.s3a.create.performance + create + ``` ## hadoop shell commands `fs -rm` @@ -642,7 +687,7 @@ expects an immediate response. For example, a thread may block so long that other liveness checks start to fail. Consider spawning off an executor thread to do these background cleanup operations. -## Tuning SSL Performance +## Tuning SSL Performance By default, S3A uses HTTPS to communicate with AWS Services. This means that all communication with S3 is encrypted using SSL. The overhead of this encryption @@ -666,8 +711,6 @@ running with the vanilla JSSE. ### OpenSSL Acceleration -**Experimental Feature** - As of HADOOP-16050 and HADOOP-16346, `fs.s3a.ssl.channel.mode` can be set to either `default` or `openssl` to enable native OpenSSL acceleration of HTTPS requests. OpenSSL implements the SSL and TLS protocols using native code. For @@ -721,12 +764,12 @@ exception and S3A initialization will fail. Supported values for `fs.s3a.ssl.channel.mode`: -| `fs.s3a.ssl.channel.mode` Value | Description | -|-------------------------------|-------------| -| `default_jsse` | Uses Java JSSE without GCM on Java 8 | -| `default_jsse_with_gcm` | Uses Java JSSE | -| `default` | Uses OpenSSL, falls back to `default_jsse` if OpenSSL cannot be loaded | -| `openssl` | Uses OpenSSL, fails if OpenSSL cannot be loaded | +| `fs.s3a.ssl.channel.mode` Value | Description | +|---------------------------------|------------------------------------------------------------------------| +| `default_jsse` | Uses Java JSSE without GCM on Java 8 | +| `default_jsse_with_gcm` | Uses Java JSSE | +| `default` | Uses OpenSSL, falls back to `default_jsse` if OpenSSL cannot be loaded | +| `openssl` | Uses OpenSSL, fails if OpenSSL cannot be loaded | The naming convention is setup in order to preserve backwards compatibility with the ABFS support of [HADOOP-15669](https://issues.apache.org/jira/browse/HADOOP-15669). @@ -734,7 +777,7 @@ with the ABFS support of [HADOOP-15669](https://issues.apache.org/jira/browse/HA Other options may be added to `fs.s3a.ssl.channel.mode` in the future as further SSL optimizations are made. -### WildFly classpath requirements +### WildFly classpath and SSL library requirements For OpenSSL acceleration to work, a compatible version of the wildfly JAR must be on the classpath. This is not explicitly declared @@ -744,21 +787,28 @@ optional. If the wildfly JAR is not found, the network acceleration will fall back to the JVM, always. -Note: there have been compatibility problems with wildfly JARs and openSSL +Similarly, the `libssl` library must be compatibile with wildfly. + +Wildfly requires this native library to be part of an `openssl` installation. +Third party implementations may not work correctly. +This can be an isse in FIPS-compliant deployments, where the `libssl` library +is a third-party implementation built with restricted TLS protocols. + + +There have been compatibility problems with wildfly JARs and openSSL releases in the past: version 1.0.4.Final is not compatible with openssl 1.1.1. An extra complication was older versions of the `azure-data-lake-store-sdk` JAR used in `hadoop-azure-datalake` contained an unshaded copy of the 1.0.4.Final classes, causing binding problems even when a later version was explicitly being placed on the classpath. +## Tuning FileSystem Initialization. -## Tuning FileSystem Initialization. - -### Disabling bucket existence checks +### Bucket existence checks When an S3A Filesystem instance is created and initialized, the client -checks if the bucket provided is valid. This can be slow. -You can ignore bucket validation by configuring `fs.s3a.bucket.probe` as follows: +can be checks if the bucket provided is valid. This can be slow, which is why +it is disabled by default. ```xml @@ -767,9 +817,11 @@ You can ignore bucket validation by configuring `fs.s3a.bucket.probe` as follows ``` -Note: if the bucket does not exist, this issue will surface when operations are performed +If the bucket does not exist, this issue will surface when operations are performed on the filesystem; you will see `UnknownStoreException` stack traces. +Re-enabling the probe will force an early check but but is generally not needed. + ### Rate limiting parallel FileSystem creation operations Applications normally ask for filesystems from the shared cache, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java index 7a2a10879dd8e..a1067ddc0ecfe 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java @@ -30,6 +30,7 @@ import org.apache.hadoop.fs.s3a.S3ATestUtils; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; /** @@ -71,7 +72,8 @@ protected AbstractFSContract createContract(Configuration conf) { protected Configuration createConfiguration() { final Configuration conf = super.createConfiguration(); removeBaseAndBucketOverrides(conf, - FS_S3A_CREATE_PERFORMANCE); + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS); conf.setBoolean(FS_S3A_CREATE_PERFORMANCE, createPerformance); S3ATestUtils.disableFilesystemCaching(conf); return conf; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java index 0e4a8eda5b297..aa46557e9104b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java @@ -41,6 +41,7 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.*; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.fs.s3a.Statistic.*; import static org.apache.hadoop.fs.s3a.performance.OperationCost.*; @@ -80,7 +81,9 @@ public ITestS3AFileOperationCost( @Override public Configuration createConfiguration() { final Configuration conf = super.createConfiguration(); - removeBaseAndBucketOverrides(conf, FS_S3A_CREATE_PERFORMANCE); + removeBaseAndBucketOverrides(conf, + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS); conf.setBoolean(FS_S3A_CREATE_PERFORMANCE, isKeepingMarkers()); return conf; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index caff545eadfcb..d8bb5898aa076 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -34,6 +34,7 @@ import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.s3a.api.PerformanceFlagEnum; import org.apache.hadoop.fs.s3a.auth.MarshalledCredentialBinding; import org.apache.hadoop.fs.s3a.auth.MarshalledCredentials; import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; @@ -102,6 +103,7 @@ import java.util.stream.Collectors; import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; +import static org.apache.hadoop.fs.impl.FlagSet.createFlagSet; import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.submit; import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletion; import static org.apache.hadoop.fs.s3a.impl.S3ExpressStorage.STORE_CAPABILITY_S3_EXPRESS_STORAGE; @@ -992,6 +994,9 @@ public static StoreContext createMockStoreContext( .setMultiObjectDeleteEnabled(multiDelete) .setUseListV1(false) .setContextAccessors(accessors) + .setPerformanceFlags(createFlagSet( + PerformanceFlagEnum.class, + FS_S3A_PERFORMANCE_FLAGS)) .build(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestConnectionTimeouts.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestConnectionTimeouts.java index 2da70e6ef4598..dc8270c9ffd9d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestConnectionTimeouts.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/ITestConnectionTimeouts.java @@ -44,6 +44,7 @@ import static org.apache.hadoop.fs.s3a.Constants.CONNECTION_TTL; import static org.apache.hadoop.fs.s3a.Constants.ESTABLISH_TIMEOUT; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; import static org.apache.hadoop.fs.s3a.Constants.MAXIMUM_CONNECTIONS; import static org.apache.hadoop.fs.s3a.Constants.MAX_ERROR_RETRIES; import static org.apache.hadoop.fs.s3a.Constants.PREFETCH_ENABLED_KEY; @@ -88,7 +89,8 @@ private Configuration timingOutConfiguration() { PREFETCH_ENABLED_KEY, REQUEST_TIMEOUT, SOCKET_TIMEOUT, - FS_S3A_CREATE_PERFORMANCE + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS ); // only one connection is allowed, and the establish timeout is low diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/AbstractS3ACostTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/AbstractS3ACostTest.java index b4b139ca3062e..19feb386333a8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/AbstractS3ACostTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/AbstractS3ACostTest.java @@ -108,7 +108,8 @@ public Configuration createConfiguration() { removeBaseAndBucketOverrides(bucketName, conf, DIRECTORY_MARKER_POLICY, AUTHORITATIVE_PATH, - FS_S3A_CREATE_PERFORMANCE); + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS); // directory marker options conf.set(DIRECTORY_MARKER_POLICY, keepMarkers diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateFileCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateFileCost.java index 2d128cffc5af0..c9a7415c18103 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateFileCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateFileCost.java @@ -41,6 +41,7 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.toChar; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_HEADER; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; import static org.apache.hadoop.fs.s3a.Constants.XA_HEADER_PREFIX; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.fs.s3a.Statistic.OBJECT_BULK_DELETE_REQUEST; @@ -105,7 +106,8 @@ private OperationCost expected(OperationCost source) { public Configuration createConfiguration() { final Configuration conf = super.createConfiguration(); removeBaseAndBucketOverrides(conf, - FS_S3A_CREATE_PERFORMANCE); + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS); conf.setBoolean(FS_S3A_CREATE_PERFORMANCE, createPerformance); S3ATestUtils.disableFilesystemCaching(conf); return conf; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java index e00130047434d..088e4989af4f7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java @@ -56,6 +56,7 @@ import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY_DELETE; import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_MARKER_POLICY_KEEP; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -201,7 +202,8 @@ protected Configuration createConfiguration() { // directory marker options removeBaseAndBucketOverrides(bucketName, conf, DIRECTORY_MARKER_POLICY, - FS_S3A_CREATE_PERFORMANCE); + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS); conf.set(DIRECTORY_MARKER_POLICY, keepMarkers ? DIRECTORY_MARKER_POLICY_KEEP diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3ADeleteCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3ADeleteCost.java index 97f51fe2c8dcd..9979b72e7110d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3ADeleteCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3ADeleteCost.java @@ -40,6 +40,7 @@ import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.fs.s3a.Statistic.*; import static org.apache.hadoop.fs.s3a.performance.OperationCost.*; @@ -80,7 +81,9 @@ public ITestS3ADeleteCost(final String name, @Override public Configuration createConfiguration() { Configuration conf = super.createConfiguration(); - removeBaseAndBucketOverrides(conf, FS_S3A_CREATE_PERFORMANCE); + removeBaseAndBucketOverrides(conf, + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS); conf.setBoolean(FS_S3A_CREATE_PERFORMANCE, false); return conf; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java index 759a3bf129eef..b72335d9a7a89 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/tools/AbstractMarkerToolTest.java @@ -74,7 +74,8 @@ protected Configuration createConfiguration() { S3A_BUCKET_PROBE, DIRECTORY_MARKER_POLICY, AUTHORITATIVE_PATH, - FS_S3A_CREATE_PERFORMANCE); + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS); // base FS is legacy conf.set(DIRECTORY_MARKER_POLICY, DIRECTORY_MARKER_POLICY_DELETE); conf.setBoolean(FS_S3A_CREATE_PERFORMANCE, false); From 038636a1b5250e06622cac7ee11b12965c91111e Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Mon, 29 Jul 2024 13:45:14 -0500 Subject: [PATCH 072/113] HADOOP-19238. Fix create-release script for arm64 based MacOS (#6962) Contributed by Mukund Thakur --- dev-support/bin/create-release | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev-support/bin/create-release b/dev-support/bin/create-release index 8cdcc14acf026..148b88a9912fe 100755 --- a/dev-support/bin/create-release +++ b/dev-support/bin/create-release @@ -205,7 +205,7 @@ function set_defaults DOCKERRAN=false CPU_ARCH=$(echo "$MACHTYPE" | cut -d- -f1) - if [ "$CPU_ARCH" = "aarch64" ]; then + if [[ "$CPU_ARCH" = "aarch64" || "$CPU_ARCH" = "arm64" ]]; then DOCKERFILE="${BASEDIR}/dev-support/docker/Dockerfile_aarch64" fi @@ -514,7 +514,7 @@ function dockermode # we always force build with the OpenJDK JDK # but with the correct version - if [ "$CPU_ARCH" = "aarch64" ]; then + if [[ "$CPU_ARCH" = "aarch64" || "$CPU_ARCH" = "arm64" ]]; then echo "ENV JAVA_HOME /usr/lib/jvm/java-${JVM_VERSION}-openjdk-arm64" else echo "ENV JAVA_HOME /usr/lib/jvm/java-${JVM_VERSION}-openjdk-amd64" From 059e996c02d64716707d8dfb905dc84bab317aef Mon Sep 17 00:00:00 2001 From: Takanobu Asanuma Date: Tue, 30 Jul 2024 10:14:33 +0900 Subject: [PATCH 073/113] HDFS-17591. RBF: Router should follow X-FRAME-OPTIONS protection setting (#6963) --- .../federation/router/RouterHttpServer.java | 11 ++++ .../router/TestRouterHttpServerXFrame.java | 65 +++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100644 hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterHttpServerXFrame.java diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHttpServer.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHttpServer.java index 9f665644aa185..229b47d7d9e3c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHttpServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/RouterHttpServer.java @@ -20,6 +20,7 @@ import java.net.InetSocketAddress; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.server.common.JspHelper; import org.apache.hadoop.hdfs.server.namenode.NameNodeHttpServer; @@ -86,6 +87,16 @@ protected void serviceStart() throws Exception { RBFConfigKeys.DFS_ROUTER_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY, RBFConfigKeys.DFS_ROUTER_KEYTAB_FILE_KEY); + final boolean xFrameEnabled = conf.getBoolean( + DFSConfigKeys.DFS_XFRAME_OPTION_ENABLED, + DFSConfigKeys.DFS_XFRAME_OPTION_ENABLED_DEFAULT); + + final String xFrameOptionValue = conf.getTrimmed( + DFSConfigKeys.DFS_XFRAME_OPTION_VALUE, + DFSConfigKeys.DFS_XFRAME_OPTION_VALUE_DEFAULT); + + builder.configureXFrame(xFrameEnabled).setXFrameOption(xFrameOptionValue); + this.httpServer = builder.build(); NameNodeHttpServer.initWebHdfs(conf, httpServer, diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterHttpServerXFrame.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterHttpServerXFrame.java new file mode 100644 index 0000000000000..58053e20ea78e --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/test/java/org/apache/hadoop/hdfs/server/federation/router/TestRouterHttpServerXFrame.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ + +package org.apache.hadoop.hdfs.server.federation.router; + +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.InetSocketAddress; +import java.net.URI; +import java.net.URL; + +import org.junit.Assert; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.HdfsConfiguration; + +import static org.apache.hadoop.http.HttpServer2.XFrameOption.SAMEORIGIN; + +/** + * A class to test the XFrame options of Router HTTP Server. + */ +public class TestRouterHttpServerXFrame { + + @Test + public void testRouterXFrame() throws IOException { + Configuration conf = new HdfsConfiguration(); + conf.setBoolean(DFSConfigKeys.DFS_XFRAME_OPTION_ENABLED, true); + conf.set(DFSConfigKeys.DFS_XFRAME_OPTION_VALUE, SAMEORIGIN.toString()); + + Router router = new Router(); + try { + router.init(conf); + router.start(); + + InetSocketAddress httpAddress = router.getHttpServerAddress(); + URL url = + URI.create("http://" + httpAddress.getHostName() + ":" + httpAddress.getPort()).toURL(); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.connect(); + + String xfoHeader = conn.getHeaderField("X-FRAME-OPTIONS"); + Assert.assertNotNull("X-FRAME-OPTIONS is absent in the header", xfoHeader); + Assert.assertTrue(xfoHeader.endsWith(SAMEORIGIN.toString())); + } finally { + router.stop(); + router.close(); + } + } +} From c593c17255c06a32b01055e2f4bb2394009bd94a Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Thu, 1 Aug 2024 20:07:36 +0100 Subject: [PATCH 074/113] HADOOP-19237. Upgrade to dnsjava 3.6.1 due to CVEs (#6961) Contributed by P J Fanning --- LICENSE-binary | 2 +- .../test/resources/ensure-jars-have-correct-contents.sh | 2 ++ hadoop-client-modules/hadoop-client-runtime/pom.xml | 3 +++ .../apache/hadoop/registry/server/dns/RegistryDNS.java | 2 +- .../hadoop/registry/server/dns/TestRegistryDNS.java | 8 ++++---- hadoop-project/pom.xml | 2 +- 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index ff8012096a437..c0eb82f3dabfb 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -379,7 +379,7 @@ hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/fuse-dfs/util/tree hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/compat/{fstatat|openat|unlinkat}.h com.github.luben:zstd-jni:1.5.2-1 -dnsjava:dnsjava:2.1.7 +dnsjava:dnsjava:3.6.1 org.codehaus.woodstox:stax2-api:4.2.1 diff --git a/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh b/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh index 2e927402d2542..3a7c5ce786047 100644 --- a/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh +++ b/hadoop-client-modules/hadoop-client-check-invariants/src/test/resources/ensure-jars-have-correct-contents.sh @@ -51,6 +51,8 @@ allowed_expr+="|^[^-]*-default.xml$" allowed_expr+="|^[^-]*-version-info.properties$" # * Hadoop's application classloader properties file. allowed_expr+="|^org.apache.hadoop.application-classloader.properties$" +# Comes from dnsjava, not sure if relocatable. +allowed_expr+="|^messages.properties$" # public suffix list used by httpcomponents allowed_expr+="|^mozilla/$" allowed_expr+="|^mozilla/public-suffix-list.txt$" diff --git a/hadoop-client-modules/hadoop-client-runtime/pom.xml b/hadoop-client-modules/hadoop-client-runtime/pom.xml index 22c8ae00a3a52..8c72f53e9189f 100644 --- a/hadoop-client-modules/hadoop-client-runtime/pom.xml +++ b/hadoop-client-modules/hadoop-client-runtime/pom.xml @@ -229,6 +229,8 @@ jnamed* lookup* update* + META-INF/versions/21/* + META-INF/versions/21/**/* @@ -243,6 +245,7 @@ META-INF/versions/9/module-info.class META-INF/versions/11/module-info.class + META-INF/versions/21/module-info.class diff --git a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNS.java b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNS.java index b6de757fc3c17..e99c49f7dc6a8 100644 --- a/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNS.java +++ b/hadoop-common-project/hadoop-registry/src/main/java/org/apache/hadoop/registry/server/dns/RegistryDNS.java @@ -1682,7 +1682,7 @@ public void exec(Zone zone, Record record) throws IOException { DNSSEC.sign(rRset, dnskeyRecord, privateKey, inception, expiration); LOG.info("Adding {}", rrsigRecord); - rRset.addRR(rrsigRecord); + zone.addRecord(rrsigRecord); //addDSRecord(zone, record.getName(), record.getDClass(), // record.getTTL(), inception, expiration); diff --git a/hadoop-common-project/hadoop-registry/src/test/java/org/apache/hadoop/registry/server/dns/TestRegistryDNS.java b/hadoop-common-project/hadoop-registry/src/test/java/org/apache/hadoop/registry/server/dns/TestRegistryDNS.java index 56e617144ad38..386cb3a196cad 100644 --- a/hadoop-common-project/hadoop-registry/src/test/java/org/apache/hadoop/registry/server/dns/TestRegistryDNS.java +++ b/hadoop-common-project/hadoop-registry/src/test/java/org/apache/hadoop/registry/server/dns/TestRegistryDNS.java @@ -350,7 +350,7 @@ public void testMissingReverseLookup() throws Exception { Name name = Name.fromString("19.1.17.172.in-addr.arpa."); Record question = Record.newRecord(name, Type.PTR, DClass.IN); Message query = Message.newQuery(question); - OPTRecord optRecord = new OPTRecord(4096, 0, 0, Flags.DO, null); + OPTRecord optRecord = new OPTRecord(4096, 0, 0, Flags.DO); query.addRecord(optRecord, Section.ADDITIONAL); byte[] responseBytes = getRegistryDNS().generateReply(query, null); Message response = new Message(responseBytes); @@ -392,7 +392,7 @@ private List assertDNSQuery(String lookup, int type, int numRecs) Name name = Name.fromString(lookup); Record question = Record.newRecord(name, type, DClass.IN); Message query = Message.newQuery(question); - OPTRecord optRecord = new OPTRecord(4096, 0, 0, Flags.DO, null); + OPTRecord optRecord = new OPTRecord(4096, 0, 0, Flags.DO); query.addRecord(optRecord, Section.ADDITIONAL); byte[] responseBytes = getRegistryDNS().generateReply(query, null); Message response = new Message(responseBytes); @@ -421,7 +421,7 @@ private List assertDNSQueryNotNull( Name name = Name.fromString(lookup); Record question = Record.newRecord(name, type, DClass.IN); Message query = Message.newQuery(question); - OPTRecord optRecord = new OPTRecord(4096, 0, 0, Flags.DO, null); + OPTRecord optRecord = new OPTRecord(4096, 0, 0, Flags.DO); query.addRecord(optRecord, Section.ADDITIONAL); byte[] responseBytes = getRegistryDNS().generateReply(query, null); Message response = new Message(responseBytes); @@ -592,7 +592,7 @@ public void testReadMasterFile() throws Exception { Name name = Name.fromString("5.0.17.172.in-addr.arpa."); Record question = Record.newRecord(name, Type.PTR, DClass.IN); Message query = Message.newQuery(question); - OPTRecord optRecord = new OPTRecord(4096, 0, 0, Flags.DO, null); + OPTRecord optRecord = new OPTRecord(4096, 0, 0, Flags.DO); query.addRecord(optRecord, Section.ADDITIONAL); byte[] responseBytes = getRegistryDNS().generateReply(query, null); Message response = new Message(responseBytes); diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 5129f5d99ca0a..3f0a8b3a85fa7 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -106,7 +106,7 @@ 3.8.4 5.2.0 3.0.5 - 3.4.0 + 3.6.1 27.0-jre 5.1.0 From 2cf4d638af3520d60a892c94d39cf7a3a784f8f9 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 2 Aug 2024 16:01:03 +0100 Subject: [PATCH 075/113] HADOOP-19245. S3ABlockOutputStream no longer sends progress events in close() (#6974) Contributed by Steve Loughran --- .../org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java | 3 ++- .../hadoop/fs/s3a/performance/ITestCreateFileCost.java | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java index de0f59154e995..5fe39ac6ea336 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ABlockOutputStream.java @@ -1100,7 +1100,8 @@ private static class ProgressableListener implements ProgressListener { this.progress = progress; } - public void progressChanged(ProgressListenerEvent eventType, int bytesTransferred) { + @Override + public void progressChanged(ProgressListenerEvent eventType, long bytesTransferred) { if (progress != null) { progress.progress(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateFileCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateFileCost.java index c9a7415c18103..5bd4bf412ffa5 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateFileCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateFileCost.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.Arrays; import java.util.Collection; +import java.util.concurrent.atomic.AtomicLong; import org.assertj.core.api.Assertions; import org.junit.Test; @@ -213,8 +214,11 @@ public void testCreateFilePerformanceFlag() throws Throwable { S3AFileSystem fs = getFileSystem(); Path path = methodPath(); + // increment progress events + AtomicLong progressEvents = new AtomicLong(0); FSDataOutputStreamBuilder builder = fs.createFile(path) .overwrite(false) + .progress(progressEvents::incrementAndGet) .recursive(); // this has a broken return type; something to do with the return value of @@ -225,6 +229,10 @@ public void testCreateFilePerformanceFlag() throws Throwable { always(NO_HEAD_OR_LIST), with(OBJECT_BULK_DELETE_REQUEST, 0), with(OBJECT_DELETE_REQUEST, 0)); + + Assertions.assertThat(progressEvents.get()) + .describedAs("progress events") + .isGreaterThanOrEqualTo(1); } @Test From b08d492abd905fc3c7606f54e5c4cb7351604d20 Mon Sep 17 00:00:00 2001 From: zhengchenyu Date: Mon, 5 Aug 2024 09:57:16 +0800 Subject: [PATCH 076/113] HADOOP-19246. Update the yasm rpm download address (#6973) Reviewed-by: Shilun Fan Signed-off-by: Tao Li --- dev-support/docker/pkg-resolver/install-yasm.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-support/docker/pkg-resolver/install-yasm.sh b/dev-support/docker/pkg-resolver/install-yasm.sh index a5f6162bc38d7..2aff0133c8bd9 100644 --- a/dev-support/docker/pkg-resolver/install-yasm.sh +++ b/dev-support/docker/pkg-resolver/install-yasm.sh @@ -40,7 +40,7 @@ fi if [ "$version_to_install" == "1.2.0-4" ]; then mkdir -p /tmp/yasm && - curl -L -s -S https://download-ib01.fedoraproject.org/pub/epel/7/x86_64/Packages/y/yasm-1.2.0-4.el7.x86_64.rpm \ + curl -L -s -S https://archives.fedoraproject.org/pub/archive/epel/7/x86_64/Packages/y/yasm-1.2.0-4.el7.x86_64.rpm \ -o /tmp/yasm-1.2.0-4.el7.x86_64.rpm && rpm -Uvh /tmp/yasm-1.2.0-4.el7.x86_64.rpm else From 59d5e0bb2e7e9022057b3ad107c963cf3089a3d2 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Mon, 5 Aug 2024 15:30:36 +0800 Subject: [PATCH 077/113] HADOOP-19244. Pullout arch-agnostic maven javadoc plugin configurations in hadoop-common (#6970) Contributed by Cheng Pan. Reviewed-by: Steve Loughran Signed-off-by: Shilun Fan --- hadoop-common-project/hadoop-common/pom.xml | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 7521cec6a1db4..90d667797343e 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -708,6 +708,17 @@ + + + org.apache.maven.plugins + maven-javadoc-plugin + + + **/FSProtos.java + + *.proto:*.tracing:*.protobuf + + @@ -1279,16 +1290,6 @@ - - org.apache.maven.plugins - maven-javadoc-plugin - - - **/FSProtos.java - - *.proto:*.tracing:*.protobuf - - From b189ef8197872b2255ec6ce64db9afeaeeba6e43 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Mon, 5 Aug 2024 10:42:12 -0700 Subject: [PATCH 078/113] HDFS-17575. SaslDataTransferClient should use SaslParticipant to create messages. (#6954) --- .../sasl/SaslDataTransferClient.java | 9 +++++---- .../datatransfer/sasl/SaslParticipant.java | 16 ++++++++++++++-- .../datatransfer/sasl/TestSaslDataTransfer.java | 2 +- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java index 043439130d5dc..dd1da77af1efd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferClient.java @@ -519,25 +519,25 @@ private IOStreamPair doSaslHandshake(InetAddress addr, // In which case there will be no encrypted secret sent from NN. BlockTokenIdentifier blockTokenIdentifier = accessToken.decodeIdentifier(); + final byte[] first = sasl.createFirstMessage(); if (blockTokenIdentifier != null) { byte[] handshakeSecret = accessToken.decodeIdentifier().getHandshakeMsg(); if (handshakeSecret == null || handshakeSecret.length == 0) { LOG.debug("Handshake secret is null, " + "sending without handshake secret."); - sendSaslMessage(out, new byte[0]); + sendSaslMessage(out, first); } else { LOG.debug("Sending handshake secret."); BlockTokenIdentifier identifier = new BlockTokenIdentifier(); identifier.readFields(new DataInputStream( new ByteArrayInputStream(accessToken.getIdentifier()))); String bpid = identifier.getBlockPoolId(); - sendSaslMessageHandshakeSecret(out, new byte[0], - handshakeSecret, bpid); + sendSaslMessageHandshakeSecret(out, first, handshakeSecret, bpid); } } else { LOG.debug("Block token id is null, sending without handshake secret."); - sendSaslMessage(out, new byte[0]); + sendSaslMessage(out, first); } // step 1 @@ -565,6 +565,7 @@ private IOStreamPair doSaslHandshake(InetAddress addr, cipherOptions.add(option); } } + LOG.debug("{}: cipherOptions={}", sasl, cipherOptions); sendSaslMessageAndNegotiationCipherOptions(out, localResponse, cipherOptions); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslParticipant.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslParticipant.java index e32f76a8ebd7d..7abd5bd126db8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslParticipant.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslParticipant.java @@ -20,6 +20,7 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.util.Map; +import java.util.Objects; import javax.security.auth.callback.CallbackHandler; import javax.security.sasl.Sasl; import javax.security.sasl.SaslClient; @@ -52,6 +53,7 @@ class SaslParticipant { private static final String SERVER_NAME = "0"; private static final String PROTOCOL = "hdfs"; private static final String[] MECHANISM_ARRAY = {SaslConstants.SASL_MECHANISM}; + private static final byte[] EMPTY_BYTE_ARRAY = {}; // One of these will always be null. private final SaslServer saslServer; @@ -110,7 +112,7 @@ public static SaslParticipant createClientSaslParticipant(String userName, * @param saslServer to wrap */ private SaslParticipant(SaslServer saslServer) { - this.saslServer = saslServer; + this.saslServer = Objects.requireNonNull(saslServer, "saslServer == null"); this.saslClient = null; } @@ -121,7 +123,12 @@ private SaslParticipant(SaslServer saslServer) { */ private SaslParticipant(SaslClient saslClient) { this.saslServer = null; - this.saslClient = saslClient; + this.saslClient = Objects.requireNonNull(saslClient, "saslClient == null"); + } + + byte[] createFirstMessage() throws SaslException { + return MECHANISM_ARRAY[0].equals(SaslConstants.SASL_MECHANISM_DEFAULT) ? EMPTY_BYTE_ARRAY + : evaluateChallengeOrResponse(EMPTY_BYTE_ARRAY); } /** @@ -228,4 +235,9 @@ public IOStreamPair createStreamPair(DataOutputStream out, new SaslOutputStream(out, saslServer)); } } + + @Override + public String toString() { + return "Sasl" + (saslServer != null? "Server" : "Client"); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java index 85e43f65c37b2..cbb3c07962dc7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestSaslDataTransfer.java @@ -77,7 +77,7 @@ public class TestSaslDataTransfer extends SaslDataTransferTestCase { public ExpectedException exception = ExpectedException.none(); @Rule - public Timeout timeout = new Timeout(60000); + public Timeout timeout = new Timeout(300_000); @After public void shutdown() { From 2a509117344a6b348aa418d8d426cbc12aefb999 Mon Sep 17 00:00:00 2001 From: Masatake Iwasaki Date: Thu, 8 Aug 2024 21:03:05 +0900 Subject: [PATCH 079/113] HADOOP-17609. Make SM4 support optional for OpenSSL native code. (#3019) Reviewed-by: Steve Loughran Reviewed-by: Wei-Chiu Chuang --- .../apache/hadoop/crypto/OpensslCipher.java | 16 ++++++++++++ .../crypto/OpensslSm4CtrCryptoCodec.java | 4 +++ .../org/apache/hadoop/crypto/OpensslCipher.c | 26 ++++++++++++++++++- .../apache/hadoop/crypto/TestCryptoCodec.java | 13 +++------- ...toStreamsWithOpensslSm4CtrCryptoCodec.java | 2 ++ .../hadoop/crypto/TestOpensslCipher.java | 10 +++++++ 6 files changed, 60 insertions(+), 11 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslCipher.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslCipher.java index b166cfc8611b3..c8a10404b0f84 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslCipher.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslCipher.java @@ -177,6 +177,20 @@ private static Transform tokenizeTransformation(String transformation) } return new Transform(parts[0], parts[1], parts[2]); } + + public static boolean isSupported(CipherSuite suite) { + Transform transform; + int algMode; + int padding; + try { + transform = tokenizeTransformation(suite.getName()); + algMode = AlgMode.get(transform.alg, transform.mode); + padding = Padding.get(transform.padding); + } catch (NoSuchAlgorithmException|NoSuchPaddingException e) { + return false; + } + return isSupportedSuite(algMode, padding); + } /** * Initialize this cipher with a key and IV. @@ -298,5 +312,7 @@ private native int doFinal(long context, ByteBuffer output, int offset, private native void clean(long ctx, long engineNum); + private native static boolean isSupportedSuite(int alg, int padding); + public native static String getLibraryName(); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslSm4CtrCryptoCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslSm4CtrCryptoCodec.java index f6b2f6a802556..9df1bbe89efa4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslSm4CtrCryptoCodec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslSm4CtrCryptoCodec.java @@ -41,6 +41,10 @@ public OpensslSm4CtrCryptoCodec() { if (loadingFailureReason != null) { throw new RuntimeException(loadingFailureReason); } + + if (!OpensslCipher.isSupported(CipherSuite.SM4_CTR_NOPADDING)) { + throw new RuntimeException("The OpenSSL native library is built without SM4 CTR support"); + } } @Override diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/OpensslCipher.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/OpensslCipher.c index f60a19a662c4c..976bf135ce7dd 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/OpensslCipher.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/crypto/OpensslCipher.c @@ -232,7 +232,10 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_initIDs #endif loadAesCtr(env); +#if !defined(OPENSSL_NO_SM4) loadSm4Ctr(env); +#endif + #if OPENSSL_VERSION_NUMBER >= 0x10101001L int ret = dlsym_OPENSSL_init_crypto(OPENSSL_INIT_LOAD_CONFIG, NULL); if(!ret) { @@ -245,7 +248,7 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_initIDs if (jthr) { (*env)->DeleteLocalRef(env, jthr); THROW(env, "java/lang/UnsatisfiedLinkError", \ - "Cannot find AES-CTR/SM4-CTR support, is your version of Openssl new enough?"); + "Cannot find AES-CTR support, is your version of OpenSSL new enough?"); return; } } @@ -554,3 +557,24 @@ JNIEXPORT jstring JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_getLibrary } #endif } + +JNIEXPORT jboolean JNICALL Java_org_apache_hadoop_crypto_OpensslCipher_isSupportedSuite + (JNIEnv *env, jclass clazz, jint alg, jint padding) +{ + if (padding != NOPADDING) { + return JNI_FALSE; + } + + if (alg == AES_CTR && (dlsym_EVP_aes_256_ctr != NULL && dlsym_EVP_aes_128_ctr != NULL)) { + return JNI_TRUE; + } + + if (alg == SM4_CTR) { +#if OPENSSL_VERSION_NUMBER >= 0x10101001L && !defined(OPENSSL_NO_SM4) + if (dlsym_EVP_sm4_ctr != NULL) { + return JNI_TRUE; + } +#endif + } + return JNI_FALSE; +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoCodec.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoCodec.java index c0fdc51b1389b..c5b493390a968 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoCodec.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoCodec.java @@ -106,31 +106,21 @@ public void testJceAesCtrCryptoCodec() throws Exception { @Test(timeout=120000) public void testJceSm4CtrCryptoCodec() throws Exception { - GenericTestUtils.assumeInNativeProfile(); - if (!NativeCodeLoader.buildSupportsOpenssl()) { - LOG.warn("Skipping test since openSSL library not loaded"); - Assume.assumeTrue(false); - } conf.set(HADOOP_SECURITY_CRYPTO_CIPHER_SUITE_KEY, "SM4/CTR/NoPadding"); conf.set(HADOOP_SECURITY_CRYPTO_CODEC_CLASSES_SM4_CTR_NOPADDING_KEY, JceSm4CtrCryptoCodec.class.getName()); conf.set(HADOOP_SECURITY_CRYPTO_JCE_PROVIDER_KEY, BouncyCastleProvider.PROVIDER_NAME); - Assert.assertEquals(null, OpensslCipher.getLoadingFailureReason()); cryptoCodecTest(conf, seed, 0, jceSm4CodecClass, jceSm4CodecClass, iv); cryptoCodecTest(conf, seed, count, jceSm4CodecClass, jceSm4CodecClass, iv); - cryptoCodecTest(conf, seed, count, - jceSm4CodecClass, opensslSm4CodecClass, iv); // Overflow test, IV: xx xx xx xx xx xx xx xx ff ff ff ff ff ff ff ff for(int i = 0; i < 8; i++) { iv[8 + i] = (byte) 0xff; } cryptoCodecTest(conf, seed, count, jceSm4CodecClass, jceSm4CodecClass, iv); - cryptoCodecTest(conf, seed, count, - jceSm4CodecClass, opensslSm4CodecClass, iv); } @Test(timeout=120000) @@ -164,6 +154,7 @@ public void testOpensslSm4CtrCryptoCodec() throws Exception { LOG.warn("Skipping test since openSSL library not loaded"); Assume.assumeTrue(false); } + Assume.assumeTrue(OpensslCipher.isSupported(CipherSuite.SM4_CTR_NOPADDING)); conf.set(HADOOP_SECURITY_CRYPTO_JCE_PROVIDER_KEY, BouncyCastleProvider.PROVIDER_NAME); Assert.assertEquals(null, OpensslCipher.getLoadingFailureReason()); @@ -181,6 +172,8 @@ public void testOpensslSm4CtrCryptoCodec() throws Exception { opensslSm4CodecClass, opensslSm4CodecClass, iv); cryptoCodecTest(conf, seed, count, opensslSm4CodecClass, jceSm4CodecClass, iv); + cryptoCodecTest(conf, seed, count, + jceSm4CodecClass, opensslSm4CodecClass, iv); } private void cryptoCodecTest(Configuration conf, int seed, int count, diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoStreamsWithOpensslSm4CtrCryptoCodec.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoStreamsWithOpensslSm4CtrCryptoCodec.java index f6345557211f9..ebc91959e21e5 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoStreamsWithOpensslSm4CtrCryptoCodec.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestCryptoStreamsWithOpensslSm4CtrCryptoCodec.java @@ -21,6 +21,7 @@ import org.apache.hadoop.crypto.random.OsSecureRandom; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.test.GenericTestUtils; +import org.junit.Assume; import org.junit.BeforeClass; import org.junit.Test; @@ -40,6 +41,7 @@ public class TestCryptoStreamsWithOpensslSm4CtrCryptoCodec @BeforeClass public static void init() throws Exception { GenericTestUtils.assumeInNativeProfile(); + Assume.assumeTrue(OpensslCipher.isSupported(CipherSuite.SM4_CTR_NOPADDING)); Configuration conf = new Configuration(); conf.set(HADOOP_SECURITY_CRYPTO_CIPHER_SUITE_KEY, "SM4/CTR/NoPadding"); conf.set(HADOOP_SECURITY_CRYPTO_CODEC_CLASSES_SM4_CTR_NOPADDING_KEY, diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestOpensslCipher.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestOpensslCipher.java index 966a88723a223..ff12f3cfe3322 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestOpensslCipher.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/TestOpensslCipher.java @@ -107,4 +107,14 @@ public void testDoFinalArguments() throws Exception { "Direct buffer is required", e); } } + + @Test(timeout=120000) + public void testIsSupportedSuite() throws Exception { + Assume.assumeTrue("Skipping due to falilure of loading OpensslCipher.", + OpensslCipher.getLoadingFailureReason() == null); + Assert.assertFalse("Unknown suite must not be supported.", + OpensslCipher.isSupported(CipherSuite.UNKNOWN)); + Assert.assertTrue("AES/CTR/NoPadding is not an optional suite.", + OpensslCipher.isSupported(CipherSuite.AES_CTR_NOPADDING)); + } } From 321a6cc55ed2df5222bde7b5c801322e8cf68203 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Thu, 8 Aug 2024 09:48:51 -0700 Subject: [PATCH 080/113] HADOOP-19072. S3A: expand optimisations on stores with "fs.s3a.performance.flags" for mkdir (#6543) If the flag list in fs.s3a.performance.flags includes "mkdir" then the safety check of a walk up the tree to look for a parent directory, -done to verify a directory isn't being created under a file- are skipped. This saves the cost of multiple list operations. Contributed by Viraj Jasani --- .../filesystem/fsdataoutputstreambuilder.md | 4 +- .../fs/FileContextCreateMkdirBaseTest.java | 21 ++--- .../contract/AbstractContractMkdirTest.java | 7 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 7 +- .../hadoop/fs/s3a/impl/MkdirOperation.java | 77 +++++++++++++++---- .../markdown/tools/hadoop-aws/performance.md | 21 ++++- .../contract/s3a/ITestS3AContractMkdir.java | 11 +++ .../ITestS3AContractMkdirWithCreatePerf.java | 75 ++++++++++++++++++ .../ITestS3AFileContextCreateMkdir.java | 9 ++- ...stS3AFileContextCreateMkdirCreatePerf.java | 67 ++++++++++++++++ 10 files changed, 265 insertions(+), 34 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md index 5f24e75569786..7dd3170036ce9 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdataoutputstreambuilder.md @@ -200,8 +200,8 @@ Prioritize file creation performance over safety checks for filesystem consisten This: 1. Skips the `LIST` call which makes sure a file is being created over a directory. Risk: a file is created over a directory. -1. Ignores the overwrite flag. -1. Never issues a `DELETE` call to delete parent directory markers. +2. Ignores the overwrite flag. +3. Never issues a `DELETE` call to delete parent directory markers. It is possible to probe an S3A Filesystem instance for this capability through the `hasPathCapability(path, "fs.s3a.create.performance")` check. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java index fbd598c9deb6a..fcb1b6925a494 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextCreateMkdirBaseTest.java @@ -27,6 +27,7 @@ import static org.apache.hadoop.fs.FileContextTestHelper.*; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsDirectory; import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsFile; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; import org.apache.hadoop.test.GenericTestUtils; import org.slf4j.event.Level; @@ -55,7 +56,10 @@ public abstract class FileContextCreateMkdirBaseTest { protected final FileContextTestHelper fileContextTestHelper; protected static FileContext fc; - + + public static final String MKDIR_FILE_PRESENT_ERROR = + " should have failed as a file was present"; + static { GenericTestUtils.setLogLevel(FileSystem.LOG, Level.DEBUG); } @@ -128,7 +132,7 @@ public void testMkdirsRecursiveWithExistingDir() throws IOException { } @Test - public void testMkdirRecursiveWithExistingFile() throws IOException { + public void testMkdirRecursiveWithExistingFile() throws Exception { Path f = getTestRootPath(fc, "NonExistant3/aDir"); fc.mkdir(f, FileContext.DEFAULT_PERM, true); assertIsDirectory(fc.getFileStatus(f)); @@ -141,13 +145,12 @@ public void testMkdirRecursiveWithExistingFile() throws IOException { // try creating another folder which conflicts with filePath Path dirPath = new Path(filePath, "bDir/cDir"); - try { - fc.mkdir(dirPath, FileContext.DEFAULT_PERM, true); - Assert.fail("Mkdir for " + dirPath - + " should have failed as a file was present"); - } catch(IOException e) { - // failed as expected - } + intercept( + IOException.class, + null, + "Mkdir for " + dirPath + MKDIR_FILE_PRESENT_ERROR, + () -> fc.mkdir(dirPath, FileContext.DEFAULT_PERM, true) + ); } @Test diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java index de44bc232e784..65ca0ee218fd9 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractMkdirTest.java @@ -35,6 +35,9 @@ */ public abstract class AbstractContractMkdirTest extends AbstractFSContractTestBase { + public static final String MKDIRS_NOT_FAILED_OVER_FILE = + "mkdirs did not fail over a file but returned "; + @Test public void testMkDirRmDir() throws Throwable { FileSystem fs = getFileSystem(); @@ -66,7 +69,7 @@ public void testNoMkdirOverFile() throws Throwable { createFile(getFileSystem(), path, false, dataset); try { boolean made = fs.mkdirs(path); - fail("mkdirs did not fail over a file but returned " + made + fail(MKDIRS_NOT_FAILED_OVER_FILE + made + "; " + ls(path)); } catch (ParentNotDirectoryException | FileAlreadyExistsException e) { //parent is a directory @@ -93,7 +96,7 @@ public void testMkdirOverParentFile() throws Throwable { Path child = new Path(path,"child-to-mkdir"); try { boolean made = fs.mkdirs(child); - fail("mkdirs did not fail over a file but returned " + made + fail(MKDIRS_NOT_FAILED_OVER_FILE + made + "; " + ls(path)); } catch (ParentNotDirectoryException | FileAlreadyExistsException e) { //parent is a directory diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index f8207696096fb..25b036b5fc7f9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3828,7 +3828,8 @@ public boolean mkdirs(Path p, FsPermission permission) throws IOException, createStoreContext(), path, createMkdirOperationCallbacks(), - isMagicCommitPath(path))); + isMagicCommitPath(path), + performanceFlags.enabled(PerformanceFlagEnum.Mkdir))); } /** @@ -4281,7 +4282,9 @@ public boolean createEmptyDir(Path path, StoreContext storeContext) new MkdirOperation( storeContext, path, - createMkdirOperationCallbacks(), false)); + createMkdirOperationCallbacks(), + false, + performanceFlags.enabled(PerformanceFlagEnum.Mkdir))); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java index 98a91b1881ba1..a027cabffd46d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/MkdirOperation.java @@ -26,6 +26,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; @@ -54,30 +56,54 @@ *

  • If needed, one PUT
  • * */ +@InterfaceAudience.Private +@InterfaceStability.Evolving public class MkdirOperation extends ExecutingStoreOperation { private static final Logger LOG = LoggerFactory.getLogger( MkdirOperation.class); + /** + * Path of the directory to be created. + */ private final Path dir; + /** + * Mkdir Callbacks object to be used by the Mkdir operation. + */ private final MkdirCallbacks callbacks; /** - * Should checks for ancestors existing be skipped? - * This flag is set when working with magic directories. + * Whether to skip the validation of the parent directory. + */ + private final boolean performanceMkdir; + + /** + * Whether the path is magic commit path. */ private final boolean isMagicPath; + /** + * Initialize Mkdir Operation context for S3A. + * + * @param storeContext Store context. + * @param dir Dir path of the directory. + * @param callbacks MkdirCallbacks object used by the Mkdir operation. + * @param isMagicPath True if the path is magic commit path. + * @param performanceMkdir If true, skip validation of the parent directory + * structure. + */ public MkdirOperation( final StoreContext storeContext, final Path dir, final MkdirCallbacks callbacks, - final boolean isMagicPath) { + final boolean isMagicPath, + final boolean performanceMkdir) { super(storeContext); this.dir = dir; this.callbacks = callbacks; this.isMagicPath = isMagicPath; + this.performanceMkdir = performanceMkdir; } /** @@ -124,7 +150,32 @@ public Boolean execute() throws IOException { return true; } - // Walk path to root, ensuring closest ancestor is a directory, not file + // if performance creation mode is set, no need to check + // whether the closest ancestor is dir. + if (!performanceMkdir) { + verifyFileStatusOfClosestAncestor(); + } + + // if we get here there is no directory at the destination. + // so create one. + + // Create the marker file, delete the parent entries + // if the filesystem isn't configured to retain them + callbacks.createFakeDirectory(dir, false); + return true; + } + + /** + * Verify the file status of the closest ancestor, if it is + * dir, the mkdir operation should proceed. If it is file, + * the mkdir operation should throw error. + * + * @throws IOException If either file status could not be retrieved, + * or if the closest ancestor is a file. + */ + private void verifyFileStatusOfClosestAncestor() throws IOException { + FileStatus fileStatus; + // Walk path to root, ensuring the closest ancestor is a directory, not file Path fPart = dir.getParent(); try { while (fPart != null && !fPart.isRoot()) { @@ -140,24 +191,18 @@ public Boolean execute() throws IOException { } // there's a file at the parent entry - throw new FileAlreadyExistsException(String.format( - "Can't make directory for path '%s' since it is a file.", - fPart)); + throw new FileAlreadyExistsException( + String.format( + "Can't make directory for path '%s' since it is a file.", + fPart)); } } catch (AccessDeniedException e) { LOG.info("mkdirs({}}: Access denied when looking" + " for parent directory {}; skipping checks", - dir, fPart); + dir, + fPart); LOG.debug("{}", e, e); } - - // if we get here there is no directory at the destination. - // so create one. - - // Create the marker file, delete the parent entries - // if the filesystem isn't configured to retain them - callbacks.createFakeDirectory(dir, false); - return true; } /** diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md index 876072e81e8fd..b8cb3ff732b36 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md @@ -299,8 +299,11 @@ understands the risks. | *Option* | *Meaning* | Since | |----------|--------------------|:------| | `create` | Create Performance | 3.4.1 | +| `mkdir` | Mkdir Performance | 3.4.1 | -The `create` flag has the same semantics as [`fs.s3a.create.performance`](#create-performance) + +* The `create` flag has the same semantics as [`fs.s3a.create.performance`](#create-performance) +* The `mkdir` flag semantics are explained in [Mkdir Performance](#mkdir-performance) ### Create Performance `fs.s3a.create.performance` @@ -321,6 +324,22 @@ It may however result in Use with care, and, ideally, enable versioning on the S3 store. + +### Mkdir Performance + +`fs.s3a.performance.flag` flag option `mkdir`: + +* Mkdir does not check whether the parent is directory or file. + +This avoids the verification of the file status of the parent file +or the closest ancestor. Unlike the default mkdir operation, if the +parent is not a directory, the mkdir operation does not throw any +error. + +This option can help with mkdir performance improvement but must be used +only if the person setting them understands the above-mentioned risk. + + ### Thread and connection pool settings. Each S3A client interacting with a single bucket, as a single user, has its diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java index d953e7eb6aea9..bace0a79f2458 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java @@ -22,11 +22,22 @@ import org.apache.hadoop.fs.contract.AbstractContractMkdirTest; import org.apache.hadoop.fs.contract.AbstractFSContract; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; + /** * Test dir operations on S3A. */ public class ITestS3AContractMkdir extends AbstractContractMkdirTest { + @Override + protected Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + removeBaseAndBucketOverrides(conf, + FS_S3A_CREATE_PERFORMANCE); + return conf; + } + @Override protected AbstractFSContract createContract(Configuration conf) { return new S3AContract(conf); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java new file mode 100644 index 0000000000000..cacd6945d2fa0 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.s3a; + +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractContractMkdirTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; + +import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; +import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; + +/** + * Test mkdir operations on S3A with create performance mode. + */ +public class ITestS3AContractMkdirWithCreatePerf extends AbstractContractMkdirTest { + + @Override + protected Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + removeBaseAndBucketOverrides( + conf, + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS); + conf.setStrings(FS_S3A_PERFORMANCE_FLAGS, + "create,mkdir"); + return conf; + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new S3AContract(conf); + } + + @Test + public void testMkdirOverParentFile() throws Throwable { + describe("try to mkdir where a parent is a file, should pass"); + FileSystem fs = getFileSystem(); + Path path = methodPath(); + byte[] dataset = dataset(1024, ' ', 'z'); + createFile(getFileSystem(), path, false, dataset); + Path child = new Path(path, "child-to-mkdir"); + boolean childCreated = fs.mkdirs(child); + assertTrue("Child dir is created", childCreated); + assertIsFile(path); + byte[] bytes = ContractTestUtils.readDataset(getFileSystem(), path, dataset.length); + ContractTestUtils.compareByteArrays(dataset, bytes, dataset.length); + assertPathExists("mkdir failed", child); + assertDeleted(child, true); + } + +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java index dcc9da933656f..e71ca2ae52ca0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java @@ -13,12 +13,14 @@ */ package org.apache.hadoop.fs.s3a.fileContext; -import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileContextCreateMkdirBaseTest; import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.junit.Before; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; + /** * Extends FileContextCreateMkdirBaseTest for a S3a FileContext. */ @@ -26,8 +28,11 @@ public class ITestS3AFileContextCreateMkdir extends FileContextCreateMkdirBaseTest { @Before - public void setUp() throws IOException, Exception { + public void setUp() throws Exception { Configuration conf = new Configuration(); + removeBaseAndBucketOverrides( + conf, + FS_S3A_CREATE_PERFORMANCE); fc = S3ATestUtils.createTestFileContext(conf); super.setUp(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java new file mode 100644 index 0000000000000..64039e4c5206c --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java @@ -0,0 +1,67 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.s3a.fileContext; + +import org.junit.Before; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileContextCreateMkdirBaseTest; +import org.apache.hadoop.fs.s3a.S3ATestUtils; + +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Extends FileContextCreateMkdirBaseTest for a S3a FileContext with + * create performance mode. + */ +public class ITestS3AFileContextCreateMkdirCreatePerf + extends FileContextCreateMkdirBaseTest { + + @Before + public void setUp() throws Exception { + Configuration conf = new Configuration(); + removeBaseAndBucketOverrides( + conf, + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS); + conf.setStrings(FS_S3A_PERFORMANCE_FLAGS, + "mkdir"); + fc = S3ATestUtils.createTestFileContext(conf); + super.setUp(); + } + + @Override + public void tearDown() throws Exception { + if (fc != null) { + super.tearDown(); + } + } + + @Test + public void testMkdirRecursiveWithExistingFile() throws Exception { + intercept( + AssertionError.class, + MKDIR_FILE_PRESENT_ERROR, + "Dir creation should not have failed. " + + "Creation performance mode is expected " + + "to create dir without checking file " + + "status of parent dir.", + super::testMkdirRecursiveWithExistingFile); + } + +} From 74ff00705cf67911f1ff8320c6c97354350d6952 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Mon, 12 Aug 2024 06:16:44 -0700 Subject: [PATCH 081/113] HADOOP-19072. S3A: Override fs.s3a.performance.flags for tests (ADDENDUM) (#6985) This is a followup to #6543 which ensures all test pass in configurations where fs.s3a.performance.flags is set to "*" or contains "mkdirs" Contributed by VJ Jasani --- .../hadoop/fs/contract/s3a/ITestS3AContractMkdir.java | 8 ++++++-- .../s3a/fileContext/ITestS3AFileContextCreateMkdir.java | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java index bace0a79f2458..bce67ed67f31d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java @@ -23,6 +23,7 @@ import org.apache.hadoop.fs.contract.AbstractFSContract; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; /** @@ -33,8 +34,11 @@ public class ITestS3AContractMkdir extends AbstractContractMkdirTest { @Override protected Configuration createConfiguration() { Configuration conf = super.createConfiguration(); - removeBaseAndBucketOverrides(conf, - FS_S3A_CREATE_PERFORMANCE); + removeBaseAndBucketOverrides( + conf, + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS); + conf.set(FS_S3A_PERFORMANCE_FLAGS, ""); return conf; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java index e71ca2ae52ca0..ce7e9d4a43d80 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java @@ -19,6 +19,7 @@ import org.junit.Before; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; +import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; /** @@ -32,7 +33,8 @@ public void setUp() throws Exception { Configuration conf = new Configuration(); removeBaseAndBucketOverrides( conf, - FS_S3A_CREATE_PERFORMANCE); + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS); fc = S3ATestUtils.createTestFileContext(conf); super.setUp(); } From fa83c9a805041b94b3663b773e99e8074c534770 Mon Sep 17 00:00:00 2001 From: Viraj Jasani Date: Wed, 14 Aug 2024 02:57:44 -0700 Subject: [PATCH 082/113] HADOOP-19072 S3A: Override fs.s3a.performance.flags for tests (ADDENDUM 2) (#6993) Second followup to #6543; all hadoop-aws integration tests complete correctly even when fs.s3a.performance.flags = * Contributed by Viraj Jasani --- .../contract/s3a/ITestS3AContractCreate.java | 12 ++++-------- .../contract/s3a/ITestS3AContractMkdir.java | 14 ++++---------- .../ITestS3AContractMkdirWithCreatePerf.java | 13 +++---------- .../fs/s3a/ITestS3AFSMainOperations.java | 6 +++++- .../fs/s3a/ITestS3AFileOperationCost.java | 13 ++++--------- .../fs/s3a/ITestS3AFileSystemContract.java | 5 ++++- .../apache/hadoop/fs/s3a/S3ATestUtils.java | 19 +++++++++++++++++++ .../ITestS3AFileContextCreateMkdir.java | 12 ++++-------- ...stS3AFileContextCreateMkdirCreatePerf.java | 12 +++--------- .../ITestS3AFileContextMainOperations.java | 7 ++++++- .../fileContext/ITestS3AFileContextURI.java | 6 +++++- .../s3a/performance/ITestCreateFileCost.java | 11 ++++------- .../s3a/performance/ITestS3ADeleteCost.java | 13 ++++--------- 13 files changed, 69 insertions(+), 74 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java index a1067ddc0ecfe..a6590e99e6caf 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractCreate.java @@ -29,9 +29,7 @@ import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.s3a.S3ATestUtils; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.setPerformanceFlags; /** * S3A contract tests creating files. @@ -70,11 +68,9 @@ protected AbstractFSContract createContract(Configuration conf) { @Override protected Configuration createConfiguration() { - final Configuration conf = super.createConfiguration(); - removeBaseAndBucketOverrides(conf, - FS_S3A_CREATE_PERFORMANCE, - FS_S3A_PERFORMANCE_FLAGS); - conf.setBoolean(FS_S3A_CREATE_PERFORMANCE, createPerformance); + final Configuration conf = setPerformanceFlags( + super.createConfiguration(), + createPerformance ? "create" : ""); S3ATestUtils.disableFilesystemCaching(conf); return conf; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java index bce67ed67f31d..847f6980b5619 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdir.java @@ -22,9 +22,7 @@ import org.apache.hadoop.fs.contract.AbstractContractMkdirTest; import org.apache.hadoop.fs.contract.AbstractFSContract; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.setPerformanceFlags; /** * Test dir operations on S3A. @@ -33,13 +31,9 @@ public class ITestS3AContractMkdir extends AbstractContractMkdirTest { @Override protected Configuration createConfiguration() { - Configuration conf = super.createConfiguration(); - removeBaseAndBucketOverrides( - conf, - FS_S3A_CREATE_PERFORMANCE, - FS_S3A_PERFORMANCE_FLAGS); - conf.set(FS_S3A_PERFORMANCE_FLAGS, ""); - return conf; + return setPerformanceFlags( + super.createConfiguration(), + ""); } @Override diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java index cacd6945d2fa0..4b2468de97bb8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractMkdirWithCreatePerf.java @@ -29,9 +29,7 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.setPerformanceFlags; /** * Test mkdir operations on S3A with create performance mode. @@ -40,14 +38,9 @@ public class ITestS3AContractMkdirWithCreatePerf extends AbstractContractMkdirTe @Override protected Configuration createConfiguration() { - Configuration conf = super.createConfiguration(); - removeBaseAndBucketOverrides( - conf, - FS_S3A_CREATE_PERFORMANCE, - FS_S3A_PERFORMANCE_FLAGS); - conf.setStrings(FS_S3A_PERFORMANCE_FLAGS, + return setPerformanceFlags( + super.createConfiguration(), "create,mkdir"); - return conf; } @Override diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFSMainOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFSMainOperations.java index 013ec901d0a77..0281c57f5cbce 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFSMainOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFSMainOperations.java @@ -31,6 +31,7 @@ import static org.apache.hadoop.fs.s3a.S3ATestUtils.createTestPath; import static org.apache.hadoop.fs.s3a.S3ATestUtils.isCreatePerformanceEnabled; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.setPerformanceFlags; /** * S3A Test suite for the FSMainOperationsBaseTest tests. @@ -46,7 +47,10 @@ public ITestS3AFSMainOperations() { @Override protected FileSystem createFileSystem() throws Exception { - contract = new S3AContract(new Configuration()); + Configuration conf = setPerformanceFlags( + new Configuration(), + ""); + contract = new S3AContract(conf); contract.init(); return contract.getTestFileSystem(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java index aa46557e9104b..585317c8daf3a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java @@ -40,9 +40,7 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.*; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.setPerformanceFlags; import static org.apache.hadoop.fs.s3a.Statistic.*; import static org.apache.hadoop.fs.s3a.performance.OperationCost.*; import static org.apache.hadoop.test.GenericTestUtils.getTestDir; @@ -80,12 +78,9 @@ public ITestS3AFileOperationCost( @Override public Configuration createConfiguration() { - final Configuration conf = super.createConfiguration(); - removeBaseAndBucketOverrides(conf, - FS_S3A_CREATE_PERFORMANCE, - FS_S3A_PERFORMANCE_FLAGS); - conf.setBoolean(FS_S3A_CREATE_PERFORMANCE, isKeepingMarkers()); - return conf; + return setPerformanceFlags( + super.createConfiguration(), + isKeepingMarkers() ? "create" : ""); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileSystemContract.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileSystemContract.java index 56827043c9b82..4808145765822 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileSystemContract.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileSystemContract.java @@ -35,6 +35,7 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; import static org.apache.hadoop.fs.s3a.S3ATestUtils.isCreatePerformanceEnabled; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.setPerformanceFlags; import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.junit.Assume.*; import static org.junit.Assert.*; @@ -65,7 +66,9 @@ protected int getGlobalTimeout() { @Before public void setUp() throws Exception { nameThread(); - Configuration conf = new Configuration(); + Configuration conf = setPerformanceFlags( + new Configuration(), + ""); fs = S3ATestUtils.createTestFileSystem(conf); assumeNotNull(fs); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index d8bb5898aa076..3a3f875f5f0d5 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -1127,6 +1127,25 @@ public static void assumeStoreAwsHosted(final FileSystem fs) { .getTrimmed(ENDPOINT, DEFAULT_ENDPOINT))); } + /** + * Modify the config by setting the performance flags and return the modified config. + * + * @param conf The configuration object. + * @param flagStr The performance flag string. + * @return The modified configuration object. + */ + public static Configuration setPerformanceFlags(final Configuration conf, + final String flagStr) { + removeBaseAndBucketOverrides( + conf, + FS_S3A_CREATE_PERFORMANCE, + FS_S3A_PERFORMANCE_FLAGS); + if (flagStr != null) { + conf.set(FS_S3A_PERFORMANCE_FLAGS, flagStr); + } + return conf; + } + /** * Helper class to do diffs of metrics. */ diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java index ce7e9d4a43d80..095d2239eed70 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdir.java @@ -18,9 +18,7 @@ import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.junit.Before; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.setPerformanceFlags; /** * Extends FileContextCreateMkdirBaseTest for a S3a FileContext. @@ -30,11 +28,9 @@ public class ITestS3AFileContextCreateMkdir @Before public void setUp() throws Exception { - Configuration conf = new Configuration(); - removeBaseAndBucketOverrides( - conf, - FS_S3A_CREATE_PERFORMANCE, - FS_S3A_PERFORMANCE_FLAGS); + Configuration conf = setPerformanceFlags( + new Configuration(), + null); fc = S3ATestUtils.createTestFileContext(conf); super.setUp(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java index 64039e4c5206c..68dde70bfeb50 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextCreateMkdirCreatePerf.java @@ -20,9 +20,7 @@ import org.apache.hadoop.fs.FileContextCreateMkdirBaseTest; import org.apache.hadoop.fs.s3a.S3ATestUtils; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.setPerformanceFlags; import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** @@ -34,12 +32,8 @@ public class ITestS3AFileContextCreateMkdirCreatePerf @Before public void setUp() throws Exception { - Configuration conf = new Configuration(); - removeBaseAndBucketOverrides( - conf, - FS_S3A_CREATE_PERFORMANCE, - FS_S3A_PERFORMANCE_FLAGS); - conf.setStrings(FS_S3A_PERFORMANCE_FLAGS, + Configuration conf = setPerformanceFlags( + new Configuration(), "mkdir"); fc = S3ATestUtils.createTestFileContext(conf); super.setUp(); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextMainOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextMainOperations.java index b28f88e43b42e..cc630484a131c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextMainOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextMainOperations.java @@ -27,6 +27,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.S3ATestUtils; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.setPerformanceFlags; + /** * S3A implementation of FileContextMainOperationsBaseTest. */ @@ -36,7 +38,10 @@ public class ITestS3AFileContextMainOperations @Before public void setUp() throws IOException, Exception { - Configuration conf = new Configuration(); + Configuration conf = setPerformanceFlags( + new Configuration(), + ""); + fc = S3ATestUtils.createTestFileContext(conf); super.setUp(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextURI.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextURI.java index bef359cca73c0..54161d10128e5 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextURI.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/fileContext/ITestS3AFileContextURI.java @@ -21,6 +21,8 @@ import org.junit.Ignore; import org.junit.Test; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.setPerformanceFlags; + /** * S3a implementation of FileContextURIBase. */ @@ -30,7 +32,9 @@ public class ITestS3AFileContextURI extends FileContextURIBase { @Before public void setUp() throws IOException, Exception { - conf = new Configuration(); + conf = setPerformanceFlags( + new Configuration(), + ""); fc1 = S3ATestUtils.createTestFileContext(conf); fc2 = S3ATestUtils.createTestFileContext(conf); //different object, same FS diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateFileCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateFileCost.java index 5bd4bf412ffa5..65786bf6d6919 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateFileCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateFileCost.java @@ -42,9 +42,8 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.toChar; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_HEADER; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; import static org.apache.hadoop.fs.s3a.Constants.XA_HEADER_PREFIX; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.setPerformanceFlags; import static org.apache.hadoop.fs.s3a.Statistic.OBJECT_BULK_DELETE_REQUEST; import static org.apache.hadoop.fs.s3a.Statistic.OBJECT_DELETE_REQUEST; import static org.apache.hadoop.fs.s3a.performance.OperationCost.CREATE_FILE_NO_OVERWRITE; @@ -105,11 +104,9 @@ private OperationCost expected(OperationCost source) { @Override public Configuration createConfiguration() { - final Configuration conf = super.createConfiguration(); - removeBaseAndBucketOverrides(conf, - FS_S3A_CREATE_PERFORMANCE, - FS_S3A_PERFORMANCE_FLAGS); - conf.setBoolean(FS_S3A_CREATE_PERFORMANCE, createPerformance); + final Configuration conf = setPerformanceFlags( + super.createConfiguration(), + createPerformance ? "create" : ""); S3ATestUtils.disableFilesystemCaching(conf); return conf; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3ADeleteCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3ADeleteCost.java index 9979b72e7110d..6bd4114f07cc3 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3ADeleteCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3ADeleteCost.java @@ -39,9 +39,7 @@ import org.apache.hadoop.fs.s3a.Tristate; import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; -import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.setPerformanceFlags; import static org.apache.hadoop.fs.s3a.Statistic.*; import static org.apache.hadoop.fs.s3a.performance.OperationCost.*; import static org.apache.hadoop.fs.s3a.performance.OperationCostValidator.probe; @@ -80,12 +78,9 @@ public ITestS3ADeleteCost(final String name, @Override public Configuration createConfiguration() { - Configuration conf = super.createConfiguration(); - removeBaseAndBucketOverrides(conf, - FS_S3A_CREATE_PERFORMANCE, - FS_S3A_PERFORMANCE_FLAGS); - conf.setBoolean(FS_S3A_CREATE_PERFORMANCE, false); - return conf; + return setPerformanceFlags( + super.createConfiguration(), + ""); } @Override From 55a576906dda00a04be1f13ed7f61e32ef73e309 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 14 Aug 2024 14:43:00 +0100 Subject: [PATCH 083/113] HADOOP-19131. Assist reflection IO with WrappedOperations class (#6686) 1. The class WrappedIO has been extended with more filesystem operations - openFile() - PathCapabilities - StreamCapabilities - ByteBufferPositionedReadable All these static methods raise UncheckedIOExceptions rather than checked ones. 2. The adjacent class org.apache.hadoop.io.wrappedio.WrappedStatistics provides similar access to IOStatistics/IOStatisticsContext classes and operations. Allows callers to: * Get a serializable IOStatisticsSnapshot from an IOStatisticsSource or IOStatistics instance * Save an IOStatisticsSnapshot to file * Convert an IOStatisticsSnapshot to JSON * Given an object which may be an IOStatisticsSource, return an object whose toString() value is a dynamically generated, human readable summary. This is for logging. * Separate getters to the different sections of IOStatistics. * Mean values are returned as a Map.Pair of (samples, sum) from which means may be calculated. There are examples of the dynamic bindings to these classes in: org.apache.hadoop.io.wrappedio.impl.DynamicWrappedIO org.apache.hadoop.io.wrappedio.impl.DynamicWrappedStatistics These use DynMethods and other classes in the package org.apache.hadoop.util.dynamic which are based on the Apache Parquet equivalents. This makes re-implementing these in that library and others which their own fork of the classes (example: Apache Iceberg) 3. The openFile() option "fs.option.openfile.read.policy" has added specific file format policies for the core filetypes * avro * columnar * csv * hbase * json * orc * parquet S3A chooses the appropriate sequential/random policy as a A policy `parquet, columnar, vector, random, adaptive` will use the parquet policy for any filesystem aware of it, falling back to the first entry in the list which the specific version of the filesystem recognizes 4. New Path capability fs.capability.virtual.block.locations Indicates that locations are generated client side and don't refer to real hosts. Contributed by Steve Loughran --- .../dev-support/findbugsExcludeFile.xml | 6 + .../hadoop/fs/CommonPathCapabilities.java | 16 + .../apache/hadoop/fs/FSDataInputStream.java | 8 + .../java/org/apache/hadoop/fs/Options.java | 65 +- .../apache/hadoop/fs/RawLocalFileSystem.java | 2 + .../apache/hadoop/io/wrappedio/WrappedIO.java | 149 +++- .../io/wrappedio/WrappedStatistics.java | 357 +++++++++ .../io/wrappedio/impl/DynamicWrappedIO.java | 500 +++++++++++++ .../impl/DynamicWrappedStatistics.java | 678 ++++++++++++++++++ .../io/wrappedio/impl/package-info.java | 29 + .../hadoop/io/wrappedio/package-info.java | 35 + .../hadoop/util/dynamic/BindingUtils.java | 214 ++++++ .../hadoop/util/dynamic/DynConstructors.java | 273 +++++++ .../hadoop/util/dynamic/DynMethods.java | 544 ++++++++++++++ .../hadoop/util/dynamic/package-info.java | 31 + .../util/functional/BiFunctionRaisingIOE.java | 16 + .../util/functional/CallableRaisingIOE.java | 19 + .../util/functional/FunctionRaisingIOE.java | 15 + .../hadoop/util/functional/FunctionalIO.java | 23 +- .../apache/hadoop/util/functional/Tuples.java | 17 + .../filesystem/fsdatainputstreambuilder.md | 95 ++- .../AbstractContractBulkDeleteTest.java | 28 +- .../hadoop/fs/contract/ContractTestUtils.java | 18 + .../io/wrappedio/impl/TestWrappedIO.java | 484 +++++++++++++ .../wrappedio/impl/TestWrappedStatistics.java | 496 +++++++++++++ .../hadoop/util/dynamic/Concatenator.java | 85 +++ .../util/dynamic/TestDynConstructors.java | 170 +++++ .../hadoop/util/dynamic/TestDynMethods.java | 320 +++++++++ .../util/functional/TestFunctionalIO.java | 14 + .../src/test/resources/log4j.properties | 4 +- .../fs/contract/hdfs/TestDFSWrappedIO.java | 49 ++ .../fs/aliyun/oss/AliyunOSSFileSystem.java | 17 + .../apache/hadoop/fs/s3a/S3AFileSystem.java | 4 +- .../apache/hadoop/fs/s3a/S3AInputPolicy.java | 24 +- .../s3a/ITestS3AContractVectoredRead.java | 4 +- .../fs/contract/s3a/ITestS3AWrappedIO.java | 35 + .../fs/s3a/impl/TestOpenFileSupport.java | 43 +- .../fs/s3a/performance/ITestS3AOpenCost.java | 2 + .../fs/azurebfs/AzureBlobFileSystem.java | 3 +- .../azurebfs/contract/ITestAbfsWrappedIO.java | 53 ++ 40 files changed, 4896 insertions(+), 49 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedStatistics.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/impl/DynamicWrappedIO.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/impl/DynamicWrappedStatistics.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/impl/package-info.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/package-info.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/BindingUtils.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/DynConstructors.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/DynMethods.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/package-info.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/wrappedio/impl/TestWrappedIO.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/wrappedio/impl/TestWrappedStatistics.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/dynamic/Concatenator.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/dynamic/TestDynConstructors.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/dynamic/TestDynMethods.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/contract/hdfs/TestDFSWrappedIO.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AWrappedIO.java create mode 100644 hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsWrappedIO.java diff --git a/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml b/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml index fdc90ed3c96c0..82e31355831ca 100644 --- a/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml +++ b/hadoop-common-project/hadoop-common/dev-support/findbugsExcludeFile.xml @@ -454,4 +454,10 @@ + + + + + + diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java index 2005f0ae3be31..4211a344b6d2c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java @@ -187,4 +187,20 @@ private CommonPathCapabilities() { */ public static final String BULK_DELETE = "fs.capability.bulk.delete"; + /** + * Capability string to probe for block locations returned in {@code LocatedFileStatus} + * instances from calls such as {@code getBlockLocations()} and {@code listStatus()}l + * to be 'virtual' rather than actual values resolved against a Distributed Filesystem including + * HDFS: {@value}. + *

    + * Key implications from this path capability being true: + *

      + *
    1. Work can be scheduled anywhere
    2. + *
    3. Creation of the location list is a low cost-client side operation
    4. + *
    + * Implication #2 means there is no performance penalty from use of FileSystem operations which + * return lists or iterators of {@code LocatedFileStatus}. + */ + public static final String VIRTUAL_BLOCK_LOCATIONS = "fs.capability.virtual.block.locations"; + } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java index cca6c28da11a3..fc36b5bd6d657 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java @@ -262,6 +262,14 @@ public int read(long position, ByteBuffer buf) throws IOException { "by " + in.getClass().getCanonicalName()); } + /** + * Delegate to the underlying stream. + * @param position position within file + * @param buf the ByteBuffer to receive the results of the read operation. + * @throws IOException on a failure from the nested stream. + * @throws UnsupportedOperationException if the inner stream does not + * support this operation. + */ @Override public void readFully(long position, ByteBuffer buf) throws IOException { if (in instanceof ByteBufferPositionedReadable) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Options.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Options.java index 9ef7de657dc15..b59d2f3be1526 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Options.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Options.java @@ -573,6 +573,12 @@ private OpenFileOptions() { public static final String FS_OPTION_OPENFILE_BUFFER_SIZE = FS_OPTION_OPENFILE + "buffer.size"; + /** + * OpenFile footer cache flag: {@value}. + */ + public static final String FS_OPTION_OPENFILE_FOOTER_CACHE = + FS_OPTION_OPENFILE + "footer.cache"; + /** * OpenFile option for read policies: {@value}. */ @@ -586,6 +592,7 @@ private OpenFileOptions() { public static final Set FS_OPTION_OPENFILE_STANDARD_OPTIONS = Collections.unmodifiableSet(Stream.of( FS_OPTION_OPENFILE_BUFFER_SIZE, + FS_OPTION_OPENFILE_FOOTER_CACHE, FS_OPTION_OPENFILE_READ_POLICY, FS_OPTION_OPENFILE_LENGTH, FS_OPTION_OPENFILE_SPLIT_START, @@ -599,11 +606,61 @@ private OpenFileOptions() { "adaptive"; /** - * Read policy {@value} -whateve the implementation does by default. + * We are an avro file: {@value}. + */ + public static final String FS_OPTION_OPENFILE_READ_POLICY_AVRO = "avro"; + + /** + * This is a columnar file format. + * Do whatever is needed to optimize for it: {@value}. + */ + public static final String FS_OPTION_OPENFILE_READ_POLICY_COLUMNAR = + "columnar"; + + /** + * This is a CSV file of plain or UTF-8 text + * to be read sequentially. + * Do whatever is needed to optimize for it: {@value}. + */ + public static final String FS_OPTION_OPENFILE_READ_POLICY_CSV = + "csv"; + + /** + * Read policy {@value} -whatever the implementation does by default. */ public static final String FS_OPTION_OPENFILE_READ_POLICY_DEFAULT = "default"; + /** + * This is a table file for Apache HBase. + * Do whatever is needed to optimize for it: {@value}. + */ + public static final String FS_OPTION_OPENFILE_READ_POLICY_HBASE = + "hbase"; + + /** + * This is a JSON file of UTF-8 text, including a + * JSON line file where each line is a JSON entity. + * Do whatever is needed to optimize for it: {@value}. + */ + public static final String FS_OPTION_OPENFILE_READ_POLICY_JSON = + "json"; + + /** + * This is an ORC file. + * Do whatever is needed to optimize for it: {@value}. + */ + public static final String FS_OPTION_OPENFILE_READ_POLICY_ORC = + "orc"; + + /** + * This is a parquet file with a v1/v3 footer: {@value}. + * Do whatever is needed to optimize for it, such as footer + * prefetch and cache, + */ + public static final String FS_OPTION_OPENFILE_READ_POLICY_PARQUET = + "parquet"; + /** * Read policy for random IO: {@value}. */ @@ -634,7 +691,13 @@ private OpenFileOptions() { public static final Set FS_OPTION_OPENFILE_READ_POLICIES = Collections.unmodifiableSet(Stream.of( FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE, + FS_OPTION_OPENFILE_READ_POLICY_AVRO, + FS_OPTION_OPENFILE_READ_POLICY_COLUMNAR, + FS_OPTION_OPENFILE_READ_POLICY_CSV, FS_OPTION_OPENFILE_READ_POLICY_DEFAULT, + FS_OPTION_OPENFILE_READ_POLICY_JSON, + FS_OPTION_OPENFILE_READ_POLICY_ORC, + FS_OPTION_OPENFILE_READ_POLICY_PARQUET, FS_OPTION_OPENFILE_READ_POLICY_RANDOM, FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL, FS_OPTION_OPENFILE_READ_POLICY_VECTOR, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java index 083d2752b6b2a..7866c794c8d05 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java @@ -1320,6 +1320,8 @@ public boolean hasPathCapability(final Path path, final String capability) case CommonPathCapabilities.FS_PATHHANDLES: case CommonPathCapabilities.FS_PERMISSIONS: case CommonPathCapabilities.FS_TRUNCATE: + // block locations are generated locally + case CommonPathCapabilities.VIRTUAL_BLOCK_LOCATIONS: return true; case CommonPathCapabilities.FS_SYMLINKS: return FileSystem.areSymlinksEnabled(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java index d6fe311fba866..439f905355d4d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedIO.java @@ -18,17 +18,30 @@ package org.apache.hadoop.io.wrappedio; +import java.io.IOException; +import java.io.InputStream; import java.io.UncheckedIOException; +import java.nio.ByteBuffer; import java.util.Collection; import java.util.List; import java.util.Map; +import javax.annotation.Nullable; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.BulkDelete; +import org.apache.hadoop.fs.ByteBufferPositionedReadable; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FutureDataInputStreamBuilder; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathCapabilities; +import org.apache.hadoop.fs.StreamCapabilities; +import org.apache.hadoop.util.functional.FutureIO; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; import static org.apache.hadoop.util.functional.FunctionalIO.uncheckIOExceptions; /** @@ -82,7 +95,8 @@ public static int bulkDelete_pageSize(FileSystem fs, Path path) { * @param fs filesystem * @param base path to delete under. * @param paths list of paths which must be absolute and under the base path. - * @return a list of all the paths which couldn't be deleted for a reason other than "not found" and any associated error message. + * @return a list of all the paths which couldn't be deleted for a reason other + * than "not found" and any associated error message. * @throws UnsupportedOperationException bulk delete under that path is not supported. * @throws UncheckedIOException if an IOE was raised. * @throws IllegalArgumentException if a path argument is invalid. @@ -97,4 +111,137 @@ public static List> bulkDelete_delete(FileSystem fs, } }); } + + /** + * Does a path have a given capability? + * Calls {@link PathCapabilities#hasPathCapability(Path, String)}, + * mapping IOExceptions to false. + * @param fs filesystem + * @param path path to query the capability of. + * @param capability non-null, non-empty string to query the path for support. + * @return true if the capability is supported under that part of the FS. + * resolving paths or relaying the call. + * @throws IllegalArgumentException invalid arguments + */ + public static boolean pathCapabilities_hasPathCapability(Object fs, + Path path, + String capability) { + try { + return ((PathCapabilities) fs).hasPathCapability(path, capability); + } catch (IOException e) { + return false; + } + } + + /** + * Does an object implement {@link StreamCapabilities} and, if so, + * what is the result of the probe for the capability? + * Calls {@link StreamCapabilities#hasCapability(String)}, + * @param object object to probe + * @param capability capability string + * @return true iff the object implements StreamCapabilities and the capability is + * declared available. + */ + public static boolean streamCapabilities_hasCapability(Object object, String capability) { + if (!(object instanceof StreamCapabilities)) { + return false; + } + return ((StreamCapabilities) object).hasCapability(capability); + } + + /** + * OpenFile assistant, easy reflection-based access to + * {@link FileSystem#openFile(Path)} and blocks + * awaiting the operation completion. + * @param fs filesystem + * @param path path + * @param policy read policy + * @param status optional file status + * @param length optional file length + * @param options nullable map of other options + * @return stream of the opened file + * @throws UncheckedIOException if an IOE was raised. + */ + @InterfaceStability.Stable + public static FSDataInputStream fileSystem_openFile( + final FileSystem fs, + final Path path, + final String policy, + @Nullable final FileStatus status, + @Nullable final Long length, + @Nullable final Map options) { + final FutureDataInputStreamBuilder builder = uncheckIOExceptions(() -> + fs.openFile(path)); + if (policy != null) { + builder.opt(FS_OPTION_OPENFILE_READ_POLICY, policy); + } + if (status != null) { + builder.withFileStatus(status); + } + if (length != null) { + builder.opt(FS_OPTION_OPENFILE_LENGTH, Long.toString(length)); + } + if (options != null) { + // add all the options map entries + options.forEach(builder::opt); + } + // wait for the opening. + return uncheckIOExceptions(() -> + FutureIO.awaitFuture(builder.build())); + } + + /** + * Return path of the enclosing root for a given path. + * The enclosing root path is a common ancestor that should be used for temp and staging dirs + * as well as within encryption zones and other restricted directories. + * @param fs filesystem + * @param path file path to find the enclosing root path for + * @return a path to the enclosing root + * @throws IOException early checks like failure to resolve path cause IO failures + */ + public static Path fileSystem_getEnclosingRoot(FileSystem fs, Path path) throws IOException { + return fs.getEnclosingRoot(path); + } + + /** + * Delegate to {@link ByteBufferPositionedReadable#read(long, ByteBuffer)}. + * @param in input stream + * @param position position within file + * @param buf the ByteBuffer to receive the results of the read operation. + * Note: that is the default behaviour of {@link FSDataInputStream#readFully(long, ByteBuffer)}. + */ + public static void byteBufferPositionedReadable_readFully( + InputStream in, + long position, + ByteBuffer buf) { + if (!(in instanceof ByteBufferPositionedReadable)) { + throw new UnsupportedOperationException("Not a ByteBufferPositionedReadable: " + in); + } + uncheckIOExceptions(() -> { + ((ByteBufferPositionedReadable) in).readFully(position, buf); + return null; + }); + } + + /** + * Probe to see if the input stream is an instance of ByteBufferPositionedReadable. + * If the stream is an FSDataInputStream, the wrapped stream is checked. + * @param in input stream + * @return true if the stream implements the interface (including a wrapped stream) + * and that it declares the stream capability. + */ + public static boolean byteBufferPositionedReadable_readFullyAvailable( + InputStream in) { + if (!(in instanceof ByteBufferPositionedReadable)) { + return false; + } + if (in instanceof FSDataInputStream) { + // ask the wrapped stream. + return byteBufferPositionedReadable_readFullyAvailable( + ((FSDataInputStream) in).getWrappedStream()); + } + // now rely on the input stream implementing path capabilities, which + // all the Hadoop FS implementations do. + return streamCapabilities_hasCapability(in, StreamCapabilities.PREADBYTEBUFFER); + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedStatistics.java new file mode 100644 index 0000000000000..c6243dc9f5bbe --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/WrappedStatistics.java @@ -0,0 +1,357 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.wrappedio; + +import java.io.Serializable; +import java.io.UncheckedIOException; +import java.util.HashMap; +import java.util.Map; +import javax.annotation.Nullable; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsContext; +import org.apache.hadoop.fs.statistics.IOStatisticsSnapshot; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.util.functional.FunctionRaisingIOE; +import org.apache.hadoop.util.functional.Tuples; + +import static org.apache.hadoop.fs.statistics.IOStatisticsContext.getCurrentIOStatisticsContext; +import static org.apache.hadoop.fs.statistics.IOStatisticsContext.setThreadIOStatisticsContext; +import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToPrettyString; +import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.retrieveIOStatistics; +import static org.apache.hadoop.util.Preconditions.checkArgument; +import static org.apache.hadoop.util.functional.FunctionalIO.uncheckIOExceptions; + +/** + * Reflection-friendly access to IOStatistics APIs. + * All {@code Serializable} arguments/return values are actually + * {@code IOStatisticsSource} instances; passing in the wrong value + * will raise IllegalArgumentExceptions. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public final class WrappedStatistics { + + private WrappedStatistics() { + } + + /** + * Probe for an object being an instance of {@code IOStatisticsSource}. + * @param object object to probe + * @return true if the object is the right type. + */ + public static boolean isIOStatisticsSource(Object object) { + return object instanceof IOStatisticsSource; + } + + /** + * Probe for an object being an instance of {@code IOStatistics}. + * @param object object to probe + * @return true if the object is the right type. + */ + public static boolean isIOStatistics(Object object) { + return object instanceof IOStatistics; + } + + /** + * Probe for an object being an instance of {@code IOStatisticsSnapshot}. + * @param object object to probe + * @return true if the object is the right type. + */ + public static boolean isIOStatisticsSnapshot(Serializable object) { + return object instanceof IOStatisticsSnapshot; + } + + /** + * Aggregate an existing {@link IOStatisticsSnapshot} with + * the supplied statistics. + * @param snapshot snapshot to update + * @param statistics IOStatistics to add + * @return true if the snapshot was updated. + * @throws IllegalArgumentException if the {@code statistics} argument is not + * null but not an instance of IOStatistics, or if {@code snapshot} is invalid. + */ + public static boolean iostatisticsSnapshot_aggregate( + Serializable snapshot, @Nullable Object statistics) { + + requireIOStatisticsSnapshot(snapshot); + if (statistics == null) { + return false; + } + checkArgument(statistics instanceof IOStatistics, + "Not an IOStatistics instance: %s", statistics); + + final IOStatistics sourceStats = (IOStatistics) statistics; + return applyToIOStatisticsSnapshot(snapshot, s -> + s.aggregate(sourceStats)); + } + + /** + * Create a new {@link IOStatisticsSnapshot} instance. + * @return an empty IOStatisticsSnapshot. + */ + public static Serializable iostatisticsSnapshot_create() { + return iostatisticsSnapshot_create(null); + } + + /** + * Create a new {@link IOStatisticsSnapshot} instance. + * @param source optional source statistics + * @return an IOStatisticsSnapshot. + * @throws ClassCastException if the {@code source} is not null and not an IOStatistics instance + */ + public static Serializable iostatisticsSnapshot_create(@Nullable Object source) { + return new IOStatisticsSnapshot((IOStatistics) source); + } + + /** + * Load IOStatisticsSnapshot from a Hadoop filesystem. + * @param fs filesystem + * @param path path + * @return the loaded snapshot + * @throws UncheckedIOException Any IO exception. + */ + public static Serializable iostatisticsSnapshot_load( + FileSystem fs, + Path path) { + return uncheckIOExceptions(() -> + IOStatisticsSnapshot.serializer().load(fs, path)); + } + + /** + * Extract the IOStatistics from an object in a serializable form. + * @param source source object, may be null/not a statistics source/instance + * @return {@link IOStatisticsSnapshot} or null if the object is null/doesn't have statistics + */ + public static Serializable iostatisticsSnapshot_retrieve(@Nullable Object source) { + IOStatistics stats = retrieveIOStatistics(source); + if (stats == null) { + return null; + } + return iostatisticsSnapshot_create(stats); + } + + /** + * Save IOStatisticsSnapshot to a Hadoop filesystem as a JSON file. + * @param snapshot statistics + * @param fs filesystem + * @param path path + * @param overwrite should any existing file be overwritten? + * @throws UncheckedIOException Any IO exception. + */ + public static void iostatisticsSnapshot_save( + @Nullable Serializable snapshot, + FileSystem fs, + Path path, + boolean overwrite) { + applyToIOStatisticsSnapshot(snapshot, s -> { + IOStatisticsSnapshot.serializer().save(fs, path, s, overwrite); + return null; + }); + } + + /** + * Save IOStatisticsSnapshot to a JSON string. + * @param snapshot statistics; may be null or of an incompatible type + * @return JSON string value + * @throws UncheckedIOException Any IO/jackson exception. + * @throws IllegalArgumentException if the supplied class is not a snapshot + */ + public static String iostatisticsSnapshot_toJsonString(@Nullable Serializable snapshot) { + + return applyToIOStatisticsSnapshot(snapshot, + IOStatisticsSnapshot.serializer()::toJson); + } + + /** + * Load IOStatisticsSnapshot from a JSON string. + * @param json JSON string value. + * @return deserialized snapshot. + * @throws UncheckedIOException Any IO/jackson exception. + */ + public static Serializable iostatisticsSnapshot_fromJsonString( + final String json) { + return uncheckIOExceptions(() -> + IOStatisticsSnapshot.serializer().fromJson(json)); + } + + /** + * Get the counters of an IOStatisticsSnapshot. + * @param source source of statistics. + * @return the map of counters. + */ + public static Map iostatistics_counters( + Serializable source) { + return applyToIOStatisticsSnapshot(source, IOStatisticsSnapshot::counters); + } + + /** + * Get the gauges of an IOStatisticsSnapshot. + * @param source source of statistics. + * @return the map of gauges. + */ + public static Map iostatistics_gauges( + Serializable source) { + return applyToIOStatisticsSnapshot(source, IOStatisticsSnapshot::gauges); + } + + /** + * Get the minimums of an IOStatisticsSnapshot. + * @param source source of statistics. + * @return the map of minimums. + */ + public static Map iostatistics_minimums( + Serializable source) { + return applyToIOStatisticsSnapshot(source, IOStatisticsSnapshot::minimums); + } + + /** + * Get the maximums of an IOStatisticsSnapshot. + * @param source source of statistics. + * @return the map of maximums. + */ + public static Map iostatistics_maximums( + Serializable source) { + return applyToIOStatisticsSnapshot(source, IOStatisticsSnapshot::maximums); + } + + /** + * Get the means of an IOStatisticsSnapshot. + * Each value in the map is the (sample, sum) tuple of the values; + * the mean is then calculated by dividing sum/sample wherever sample count is non-zero. + * @param source source of statistics. + * @return a map of mean key to (sample, sum) tuples. + */ + public static Map> iostatistics_means( + Serializable source) { + return applyToIOStatisticsSnapshot(source, stats -> { + Map> map = new HashMap<>(); + stats.meanStatistics().forEach((k, v) -> + map.put(k, Tuples.pair(v.getSamples(), v.getSum()))); + return map; + }); + } + + /** + * Get the context's {@link IOStatisticsContext} which + * implements {@link IOStatisticsSource}. + * This is either a thread-local value or a global empty context. + * @return instance of {@link IOStatisticsContext}. + */ + public static Object iostatisticsContext_getCurrent() { + return getCurrentIOStatisticsContext(); + } + + /** + * Set the IOStatisticsContext for the current thread. + * @param statisticsContext IOStatistics context instance for the + * current thread. If null, the context is reset. + */ + public static void iostatisticsContext_setThreadIOStatisticsContext( + @Nullable Object statisticsContext) { + setThreadIOStatisticsContext((IOStatisticsContext) statisticsContext); + } + + /** + * Static probe to check if the thread-level IO statistics enabled. + * @return true if the thread-level IO statistics are enabled. + */ + public static boolean iostatisticsContext_enabled() { + return IOStatisticsContext.enabled(); + } + + /** + * Reset the context's IOStatistics. + * {@link IOStatisticsContext#reset()} + */ + public static void iostatisticsContext_reset() { + getCurrentIOStatisticsContext().reset(); + } + + /** + * Take a snapshot of the context IOStatistics. + * {@link IOStatisticsContext#snapshot()} + * @return an instance of {@link IOStatisticsSnapshot}. + */ + public static Serializable iostatisticsContext_snapshot() { + return getCurrentIOStatisticsContext().snapshot(); + } + + /** + * Aggregate into the IOStatistics context the statistics passed in via + * IOStatistics/source parameter. + *

    + * Returns false if the source is null or does not contain any statistics. + * @param source implementation of {@link IOStatisticsSource} or {@link IOStatistics} + * @return true if the the source object was aggregated. + */ + public static boolean iostatisticsContext_aggregate(Object source) { + IOStatistics stats = retrieveIOStatistics(source); + if (stats != null) { + getCurrentIOStatisticsContext().getAggregator().aggregate(stats); + return true; + } else { + return false; + } + } + + /** + * Convert IOStatistics to a string form, with all the metrics sorted + * and empty value stripped. + * @param statistics A statistics instance; may be null + * @return string value or the empty string if null + */ + public static String iostatistics_toPrettyString(@Nullable Object statistics) { + return statistics == null + ? "" + : ioStatisticsToPrettyString((IOStatistics) statistics); + } + + /** + * Apply a function to an object which may be an IOStatisticsSnapshot. + * @param return type + * @param source statistics snapshot + * @param fun function to invoke if {@code source} is valid. + * @return the applied value + * @throws UncheckedIOException Any IO exception. + * @throws IllegalArgumentException if the supplied class is not a snapshot + */ + public static T applyToIOStatisticsSnapshot( + Serializable source, + FunctionRaisingIOE fun) { + + return fun.unchecked(requireIOStatisticsSnapshot(source)); + } + + /** + * Require the parameter to be an instance of {@link IOStatisticsSnapshot}. + * @param snapshot object to validate + * @return cast value + * @throws IllegalArgumentException if the supplied class is not a snapshot + */ + private static IOStatisticsSnapshot requireIOStatisticsSnapshot(final Serializable snapshot) { + checkArgument(snapshot instanceof IOStatisticsSnapshot, + "Not an IOStatisticsSnapshot %s", snapshot); + return (IOStatisticsSnapshot) snapshot; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/impl/DynamicWrappedIO.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/impl/DynamicWrappedIO.java new file mode 100644 index 0000000000000..acd656ca2a959 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/impl/DynamicWrappedIO.java @@ -0,0 +1,500 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.wrappedio.impl; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import javax.annotation.Nullable; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.dynamic.DynMethods; + +import static org.apache.hadoop.util.dynamic.BindingUtils.available; +import static org.apache.hadoop.util.dynamic.BindingUtils.checkAvailable; +import static org.apache.hadoop.util.dynamic.BindingUtils.extractIOEs; +import static org.apache.hadoop.util.dynamic.BindingUtils.loadClass; +import static org.apache.hadoop.util.dynamic.BindingUtils.loadStaticMethod; + +/** + * The wrapped IO methods in {@code org.apache.hadoop.io.wrappedio.WrappedIO}, + * dynamically loaded. + */ +public final class DynamicWrappedIO { + + private static final Logger LOG = LoggerFactory.getLogger(DynamicWrappedIO.class); + + /** + * Classname of the wrapped IO class: {@value}. + */ + private static final String WRAPPED_IO_CLASSNAME = + "org.apache.hadoop.io.wrappedio.WrappedIO"; + + /** + * Method name for openFile: {@value}. + */ + private static final String FILESYSTEM_OPEN_FILE = "fileSystem_openFile"; + + /** + * Method name for bulk delete: {@value}. + */ + private static final String BULKDELETE_DELETE = "bulkDelete_delete"; + + /** + * Method name for bulk delete: {@value}. + */ + private static final String BULKDELETE_PAGESIZE = "bulkDelete_pageSize"; + + /** + * Method name for {@code byteBufferPositionedReadable}: {@value}. + */ + private static final String BYTE_BUFFER_POSITIONED_READABLE_READ_FULLY_AVAILABLE = + "byteBufferPositionedReadable_readFullyAvailable"; + + /** + * Method name for {@code byteBufferPositionedReadable}: {@value}. + */ + private static final String BYTE_BUFFER_POSITIONED_READABLE_READ_FULLY = + "byteBufferPositionedReadable_readFully"; + + /** + * Method name for {@code PathCapabilities.hasPathCapability()}. + * {@value} + */ + private static final String PATH_CAPABILITIES_HAS_PATH_CAPABILITY = + "pathCapabilities_hasPathCapability"; + + /** + * Method name for {@code StreamCapabilities.hasCapability()}. + * {@value} + */ + private static final String STREAM_CAPABILITIES_HAS_CAPABILITY = + "streamCapabilities_hasCapability"; + + /** + * A singleton instance of the wrapper. + */ + private static final DynamicWrappedIO INSTANCE = new DynamicWrappedIO(); + + /** + * Read policy for parquet files: {@value}. + */ + public static final String PARQUET_READ_POLICIES = "parquet, columnar, vector, random"; + + /** + * Was wrapped IO loaded? + * In the hadoop codebase, this is true. + * But in other libraries it may not always be true...this + * field is used to assist copy-and-paste adoption. + */ + private final boolean loaded; + + /** + * Method binding. + * {@code WrappedIO.bulkDelete_delete(FileSystem, Path, Collection)}. + */ + private final DynMethods.UnboundMethod bulkDeleteDeleteMethod; + + /** + * Method binding. + * {@code WrappedIO.bulkDelete_pageSize(FileSystem, Path)}. + */ + private final DynMethods.UnboundMethod bulkDeletePageSizeMethod; + + /** + * Dynamic openFile() method. + * {@code WrappedIO.fileSystem_openFile(FileSystem, Path, String, FileStatus, Long, Map)}. + */ + private final DynMethods.UnboundMethod fileSystemOpenFileMethod; + + private final DynMethods.UnboundMethod pathCapabilitiesHasPathCapabilityMethod; + + private final DynMethods.UnboundMethod streamCapabilitiesHasCapabilityMethod; + + private final DynMethods.UnboundMethod byteBufferPositionedReadableReadFullyAvailableMethod; + + private final DynMethods.UnboundMethod byteBufferPositionedReadableReadFullyMethod; + + public DynamicWrappedIO() { + this(WRAPPED_IO_CLASSNAME); + } + + public DynamicWrappedIO(String classname) { + + // Wrapped IO class. + Class wrappedClass = loadClass(classname); + + loaded = wrappedClass != null; + + // bulk delete APIs + bulkDeleteDeleteMethod = loadStaticMethod( + wrappedClass, + List.class, + BULKDELETE_DELETE, + FileSystem.class, + Path.class, + Collection.class); + + bulkDeletePageSizeMethod = loadStaticMethod( + wrappedClass, + Integer.class, + BULKDELETE_PAGESIZE, + FileSystem.class, + Path.class); + + // load the openFile method + fileSystemOpenFileMethod = loadStaticMethod( + wrappedClass, + FSDataInputStream.class, + FILESYSTEM_OPEN_FILE, + FileSystem.class, + Path.class, + String.class, + FileStatus.class, + Long.class, + Map.class); + + // path and stream capabilities + pathCapabilitiesHasPathCapabilityMethod = loadStaticMethod(wrappedClass, + boolean.class, + PATH_CAPABILITIES_HAS_PATH_CAPABILITY, + Object.class, + Path.class, + String.class); + + streamCapabilitiesHasCapabilityMethod = loadStaticMethod(wrappedClass, + boolean.class, + STREAM_CAPABILITIES_HAS_CAPABILITY, + Object.class, + String.class); + + // ByteBufferPositionedReadable + byteBufferPositionedReadableReadFullyAvailableMethod = loadStaticMethod(wrappedClass, + Void.class, + BYTE_BUFFER_POSITIONED_READABLE_READ_FULLY_AVAILABLE, + InputStream.class); + + byteBufferPositionedReadableReadFullyMethod = loadStaticMethod(wrappedClass, + Void.class, + BYTE_BUFFER_POSITIONED_READABLE_READ_FULLY, + InputStream.class, + long.class, + ByteBuffer.class); + + } + + /** + * Is the wrapped IO class loaded? + * @return true if the wrappedIO class was found and loaded. + */ + public boolean loaded() { + return loaded; + } + + + /** + * For testing: verify that all methods were found. + * @throws UnsupportedOperationException if the method was not found. + */ + void requireAllMethodsAvailable() throws UnsupportedOperationException { + + final DynMethods.UnboundMethod[] methods = { + bulkDeleteDeleteMethod, + bulkDeletePageSizeMethod, + fileSystemOpenFileMethod, + pathCapabilitiesHasPathCapabilityMethod, + streamCapabilitiesHasCapabilityMethod, + byteBufferPositionedReadableReadFullyAvailableMethod, + byteBufferPositionedReadableReadFullyMethod, + }; + for (DynMethods.UnboundMethod method : methods) { + LOG.info("Checking method {}", method); + if (!available(method)) { + throw new UnsupportedOperationException("Unbound " + method); + } + } + } + + + /** + * Are the bulk delete methods available? + * @return true if the methods were found. + */ + public boolean bulkDelete_available() { + return available(bulkDeleteDeleteMethod); + } + + /** + * Get the maximum number of objects/files to delete in a single request. + * @param fileSystem filesystem + * @param path path to delete under. + * @return a number greater than or equal to zero. + * @throws UnsupportedOperationException bulk delete under that path is not supported. + * @throws IllegalArgumentException path not valid. + * @throws IOException problems resolving paths + * @throws RuntimeException invocation failure. + */ + public int bulkDelete_pageSize(final FileSystem fileSystem, final Path path) + throws IOException { + checkAvailable(bulkDeletePageSizeMethod); + return extractIOEs(() -> + bulkDeletePageSizeMethod.invoke(null, fileSystem, path)); + } + + /** + * Delete a list of files/objects. + *

      + *
    • Files must be under the path provided in {@code base}.
    • + *
    • The size of the list must be equal to or less than the page size.
    • + *
    • Directories are not supported; the outcome of attempting to delete + * directories is undefined (ignored; undetected, listed as failures...).
    • + *
    • The operation is not atomic.
    • + *
    • The operation is treated as idempotent: network failures may + * trigger resubmission of the request -any new objects created under a + * path in the list may then be deleted.
    • + *
    • There is no guarantee that any parent directories exist after this call. + *
    • + *
    + * @param fs filesystem + * @param base path to delete under. + * @param paths list of paths which must be absolute and under the base path. + * @return a list of all the paths which couldn't be deleted for a reason other than + * "not found" and any associated error message. + * @throws UnsupportedOperationException bulk delete under that path is not supported. + * @throws IllegalArgumentException if a path argument is invalid. + * @throws IOException IO problems including networking, authentication and more. + */ + public List> bulkDelete_delete(FileSystem fs, + Path base, + Collection paths) throws IOException { + checkAvailable(bulkDeleteDeleteMethod); + return extractIOEs(() -> + bulkDeleteDeleteMethod.invoke(null, fs, base, paths)); + } + + /** + * Is the {@link #fileSystem_openFile(FileSystem, Path, String, FileStatus, Long, Map)} + * method available. + * @return true if the optimized open file method can be invoked. + */ + public boolean fileSystem_openFile_available() { + return available(fileSystemOpenFileMethod); + } + + /** + * OpenFile assistant, easy reflection-based access to + * {@code FileSystem#openFile(Path)} and blocks + * awaiting the operation completion. + * @param fs filesystem + * @param path path + * @param policy read policy + * @param status optional file status + * @param length optional file length + * @param options nullable map of other options + * @return stream of the opened file + * @throws IOException if the operation was attempted and failed. + */ + public FSDataInputStream fileSystem_openFile( + final FileSystem fs, + final Path path, + final String policy, + @Nullable final FileStatus status, + @Nullable final Long length, + @Nullable final Map options) + throws IOException { + checkAvailable(fileSystemOpenFileMethod); + return extractIOEs(() -> + fileSystemOpenFileMethod.invoke(null, + fs, path, policy, status, length, options)); + } + + /** + * Does a path have a given capability? + * Calls {@code PathCapabilities#hasPathCapability(Path, String)}, + * mapping IOExceptions to false. + * @param fs filesystem + * @param path path to query the capability of. + * @param capability non-null, non-empty string to query the path for support. + * @return true if the capability is supported + * under that part of the FS + * false if the method is not loaded or the path lacks the capability. + * @throws IllegalArgumentException invalid arguments + */ + public boolean pathCapabilities_hasPathCapability(Object fs, + Path path, + String capability) { + if (!available(pathCapabilitiesHasPathCapabilityMethod)) { + return false; + } + return pathCapabilitiesHasPathCapabilityMethod.invoke(null, fs, path, capability); + } + + /** + * Does an object implement {@code StreamCapabilities} and, if so, + * what is the result of the probe for the capability? + * Calls {@code StreamCapabilities#hasCapability(String)}, + * @param object object to probe + * @param capability capability string + * @return true iff the object implements StreamCapabilities and the capability is + * declared available. + */ + public boolean streamCapabilities_hasCapability(Object object, String capability) { + if (!available(streamCapabilitiesHasCapabilityMethod)) { + return false; + } + return streamCapabilitiesHasCapabilityMethod.invoke(null, object, capability); + } + + /** + * Are the ByteBufferPositionedReadable methods loaded? + * This does not check that a specific stream implements the API; + * use {@link #byteBufferPositionedReadable_readFullyAvailable(InputStream)}. + * @return true if the hadoop libraries have the method. + */ + public boolean byteBufferPositionedReadable_available() { + return available(byteBufferPositionedReadableReadFullyAvailableMethod); + } + + /** + * Probe to see if the input stream is an instance of ByteBufferPositionedReadable. + * If the stream is an FSDataInputStream, the wrapped stream is checked. + * @param in input stream + * @return true if the API is available, the stream implements the interface + * (including the innermost wrapped stream) and that it declares the stream capability. + * @throws IOException if the operation was attempted and failed. + */ + public boolean byteBufferPositionedReadable_readFullyAvailable( + InputStream in) throws IOException { + if (available(byteBufferPositionedReadableReadFullyAvailableMethod)) { + return extractIOEs(() -> + byteBufferPositionedReadableReadFullyAvailableMethod.invoke(null, in)); + } else { + return false; + } + } + + /** + * Delegate to {@code ByteBufferPositionedReadable#read(long, ByteBuffer)}. + * @param in input stream + * @param position position within file + * @param buf the ByteBuffer to receive the results of the read operation. + * @throws UnsupportedOperationException if the input doesn't implement + * the interface or, if when invoked, it is raised. + * Note: that is the default behaviour of {@code FSDataInputStream#readFully(long, ByteBuffer)}. + * @throws IOException if the operation was attempted and failed. + */ + public void byteBufferPositionedReadable_readFully( + InputStream in, + long position, + ByteBuffer buf) throws IOException { + checkAvailable(byteBufferPositionedReadableReadFullyMethod); + extractIOEs(() -> + byteBufferPositionedReadableReadFullyMethod.invoke(null, in, position, buf)); + } + + /** + * Get the singleton instance. + * @return the instance + */ + public static DynamicWrappedIO instance() { + return INSTANCE; + } + + /** + * Is the wrapped IO class loaded? + * @return true if the instance is loaded. + */ + public static boolean isAvailable() { + return instance().loaded(); + } + + /** + * Open a file. + *

    + * If the WrappedIO class is found, use it. + *

    + * If not, falls back to the classic {@code fs.open(Path)} call. + * @param fs filesystem + * @param status file status + * @param readPolicies read policy to use + * @return the input stream + * @throws IOException any IO failure. + */ + public static FSDataInputStream openFile( + FileSystem fs, + FileStatus status, + String readPolicies) throws IOException { + return openFileOnInstance(instance(), fs, status, readPolicies); + } + + /** + * Open a file. + *

    + * If the WrappedIO class is found, uses + * {@link #fileSystem_openFile(FileSystem, Path, String, FileStatus, Long, Map)} with + * {@link #PARQUET_READ_POLICIES} as the list of read policies and passing down + * the file status. + *

    + * If not, falls back to the classic {@code fs.open(Path)} call. + * @param instance dynamic wrapped IO instance. + * @param fs filesystem + * @param status file status + * @param readPolicies read policy to use + * @return the input stream + * @throws IOException any IO failure. + */ + @VisibleForTesting + static FSDataInputStream openFileOnInstance( + DynamicWrappedIO instance, + FileSystem fs, + FileStatus status, + String readPolicies) throws IOException { + FSDataInputStream stream; + if (instance.fileSystem_openFile_available()) { + // use openfile for a higher performance read + // and the ability to set a read policy. + // This optimizes for cloud storage by saving on IO + // in open and choosing the range for GET requests. + // For other stores, it ultimately invokes the classic open(Path) + // call so is no more expensive than before. + LOG.debug("Opening file {} through fileSystem_openFile", status); + stream = instance.fileSystem_openFile(fs, + status.getPath(), + readPolicies, + status, + null, + null); + } else { + LOG.debug("Opening file {} through open()", status); + stream = fs.open(status.getPath()); + } + return stream; + } + +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/impl/DynamicWrappedStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/impl/DynamicWrappedStatistics.java new file mode 100644 index 0000000000000..a4a25b036bc92 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/impl/DynamicWrappedStatistics.java @@ -0,0 +1,678 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.wrappedio.impl; + +import java.io.Serializable; +import java.io.UncheckedIOException; +import java.util.Map; +import javax.annotation.Nullable; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.statistics.IOStatistics; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.apache.hadoop.util.dynamic.DynMethods; + +import static org.apache.hadoop.util.dynamic.BindingUtils.available; +import static org.apache.hadoop.util.dynamic.BindingUtils.checkAvailable; +import static org.apache.hadoop.util.dynamic.BindingUtils.loadClass; +import static org.apache.hadoop.util.dynamic.BindingUtils.loadStaticMethod; + +/** + * The wrapped IOStatistics methods in {@code WrappedStatistics}, + * dynamically loaded. + * This is suitable for copy-and-paste into other libraries which have some + * version of the Parquet DynMethods classes already present. + */ +public final class DynamicWrappedStatistics { + + /** + * Classname of the wrapped statistics class: {@value}. + */ + public static final String WRAPPED_STATISTICS_CLASSNAME = + "org.apache.hadoop.io.wrappedio.WrappedStatistics"; + + /** + * Method name: {@value}. + */ + public static final String IS_IOSTATISTICS_SOURCE = "isIOStatisticsSource"; + + /** + * Method name: {@value}. + */ + public static final String IS_IOSTATISTICS = "isIOStatistics"; + + /** + * Method name: {@value}. + */ + public static final String IS_IOSTATISTICS_SNAPSHOT = "isIOStatisticsSnapshot"; + + /** + * IOStatisticsContext method: {@value}. + */ + public static final String IOSTATISTICS_CONTEXT_AGGREGATE = "iostatisticsContext_aggregate"; + + /** + * IOStatisticsContext method: {@value}. + */ + public static final String IOSTATISTICS_CONTEXT_ENABLED = "iostatisticsContext_enabled"; + + /** + * IOStatisticsContext method: {@value}. + */ + public static final String IOSTATISTICS_CONTEXT_GET_CURRENT = "iostatisticsContext_getCurrent"; + + /** + * IOStatisticsContext method: {@value}. + */ + public static final String IOSTATISTICS_CONTEXT_SET_THREAD_CONTEXT = + "iostatisticsContext_setThreadIOStatisticsContext"; + + /** + * IOStatisticsContext method: {@value}. + */ + public static final String IOSTATISTICS_CONTEXT_RESET = "iostatisticsContext_reset"; + + /** + * IOStatisticsContext method: {@value}. + */ + public static final String IOSTATISTICS_CONTEXT_SNAPSHOT = "iostatisticsContext_snapshot"; + + + /** + * Method name: {@value}. + */ + public static final String IOSTATISTICS_SNAPSHOT_AGGREGATE = "iostatisticsSnapshot_aggregate"; + + /** + * Method name: {@value}. + */ + public static final String IOSTATISTICS_SNAPSHOT_CREATE = "iostatisticsSnapshot_create"; + + /** + * Method name: {@value}. + */ + public static final String IOSTATISTICS_SNAPSHOT_FROM_JSON_STRING = + "iostatisticsSnapshot_fromJsonString"; + + /** + * Method name: {@value}. + */ + public static final String IOSTATISTICS_SNAPSHOT_LOAD = "iostatisticsSnapshot_load"; + + /** + * Method name: {@value}. + */ + public static final String IOSTATISTICS_SNAPSHOT_RETRIEVE = "iostatisticsSnapshot_retrieve"; + + /** + * Method name: {@value}. + */ + public static final String IOSTATISTICS_SNAPSHOT_SAVE = "iostatisticsSnapshot_save"; + + /** + * Method name: {@value}. + */ + public static final String IOSTATISTICS_SNAPSHOT_TO_JSON_STRING = + "iostatisticsSnapshot_toJsonString"; + + /** + * Method name: {@value}. + */ + public static final String IOSTATISTICS_TO_PRETTY_STRING = + "iostatistics_toPrettyString"; + + /** + * Method name: {@value}. + */ + public static final String IOSTATISTICS_COUNTERS = "iostatistics_counters"; + + /** + * Method name: {@value}. + */ + public static final String IOSTATISTICS_GAUGES = "iostatistics_gauges"; + + /** + * Method name: {@value}. + */ + public static final String IOSTATISTICS_MINIMUMS = "iostatistics_minimums"; + + /** + * Method name: {@value}. + */ + public static final String IOSTATISTICS_MAXIMUMS = "iostatistics_maximums"; + + /** + * Method name: {@value}. + */ + public static final String IOSTATISTICS_MEANS = "iostatistics_means"; + + /** + * Was wrapped IO loaded? + * In the hadoop codebase, this is true. + * But in other libraries it may not always be true...this + * field is used to assist copy-and-paste adoption. + */ + private final boolean loaded; + + /* + IOStatisticsContext methods. + */ + private final DynMethods.UnboundMethod iostatisticsContextAggregateMethod; + + private final DynMethods.UnboundMethod iostatisticsContextEnabledMethod; + + private final DynMethods.UnboundMethod iostatisticsContextGetCurrentMethod; + + private final DynMethods.UnboundMethod iostatisticsContextResetMethod; + + private final DynMethods.UnboundMethod iostatisticsContextSetThreadContextMethod; + + private final DynMethods.UnboundMethod iostatisticsContextSnapshotMethod; + + private final DynMethods.UnboundMethod iostatisticsSnapshotAggregateMethod; + + private final DynMethods.UnboundMethod iostatisticsSnapshotCreateMethod; + + private final DynMethods.UnboundMethod iostatisticsSnapshotCreateWithSourceMethod; + + private final DynMethods.UnboundMethod iostatisticsSnapshotLoadMethod; + + private final DynMethods.UnboundMethod iostatisticsSnapshotFromJsonStringMethod; + + private final DynMethods.UnboundMethod iostatisticsSnapshotRetrieveMethod; + + private final DynMethods.UnboundMethod iostatisticsSnapshotSaveMethod; + + private final DynMethods.UnboundMethod iostatisticsToPrettyStringMethod; + + private final DynMethods.UnboundMethod iostatisticsSnapshotToJsonStringMethod; + + private final DynMethods.UnboundMethod iostatisticsCountersMethod; + + private final DynMethods.UnboundMethod iostatisticsGaugesMethod; + + private final DynMethods.UnboundMethod iostatisticsMinimumsMethod; + + private final DynMethods.UnboundMethod iostatisticsMaximumsMethod; + + private final DynMethods.UnboundMethod iostatisticsMeansMethod; + + private final DynMethods.UnboundMethod isIOStatisticsSourceMethod; + + private final DynMethods.UnboundMethod isIOStatisticsMethod; + + private final DynMethods.UnboundMethod isIOStatisticsSnapshotMethod; + + + public DynamicWrappedStatistics() { + this(WRAPPED_STATISTICS_CLASSNAME); + } + + public DynamicWrappedStatistics(String classname) { + + // wrap the real class. + Class wrappedClass = loadClass(classname); + + loaded = wrappedClass != null; + + // instanceof checks + isIOStatisticsSourceMethod = loadStaticMethod(wrappedClass, + Boolean.class, IS_IOSTATISTICS_SOURCE, Object.class); + isIOStatisticsMethod = loadStaticMethod(wrappedClass, + Boolean.class, IS_IOSTATISTICS, Object.class); + isIOStatisticsSnapshotMethod = loadStaticMethod(wrappedClass, + Boolean.class, IS_IOSTATISTICS_SNAPSHOT, Serializable.class); + + // IOStatisticsContext operations + iostatisticsContextAggregateMethod = loadStaticMethod(wrappedClass, + Boolean.class, IOSTATISTICS_CONTEXT_AGGREGATE, Object.class); + iostatisticsContextEnabledMethod = loadStaticMethod(wrappedClass, + Boolean.class, IOSTATISTICS_CONTEXT_ENABLED); + iostatisticsContextGetCurrentMethod = loadStaticMethod(wrappedClass, + Object.class, IOSTATISTICS_CONTEXT_GET_CURRENT); + iostatisticsContextResetMethod = loadStaticMethod(wrappedClass, + Void.class, IOSTATISTICS_CONTEXT_RESET); + iostatisticsContextSetThreadContextMethod = loadStaticMethod(wrappedClass, + Void.class, IOSTATISTICS_CONTEXT_SET_THREAD_CONTEXT, Object.class); + iostatisticsContextSnapshotMethod = loadStaticMethod(wrappedClass, + Serializable.class, IOSTATISTICS_CONTEXT_SNAPSHOT); + + // IOStatistics Snapshot operations + + iostatisticsSnapshotAggregateMethod = + loadStaticMethod(wrappedClass, + Boolean.class, + IOSTATISTICS_SNAPSHOT_AGGREGATE, + Serializable.class, + Object.class); + + iostatisticsSnapshotCreateMethod = + loadStaticMethod(wrappedClass, + Serializable.class, + IOSTATISTICS_SNAPSHOT_CREATE); + + iostatisticsSnapshotCreateWithSourceMethod = + loadStaticMethod(wrappedClass, + Serializable.class, + IOSTATISTICS_SNAPSHOT_CREATE, + Object.class); + + iostatisticsSnapshotFromJsonStringMethod = + loadStaticMethod(wrappedClass, + Serializable.class, + IOSTATISTICS_SNAPSHOT_FROM_JSON_STRING, + String.class); + + iostatisticsSnapshotToJsonStringMethod = + loadStaticMethod(wrappedClass, + String.class, + IOSTATISTICS_SNAPSHOT_TO_JSON_STRING, + Serializable.class); + + iostatisticsSnapshotRetrieveMethod = + loadStaticMethod(wrappedClass, + Serializable.class, + IOSTATISTICS_SNAPSHOT_RETRIEVE, + Object.class); + + iostatisticsSnapshotLoadMethod = + loadStaticMethod(wrappedClass, + Serializable.class, + IOSTATISTICS_SNAPSHOT_LOAD, + FileSystem.class, + Path.class); + + iostatisticsSnapshotSaveMethod = + loadStaticMethod(wrappedClass, + Void.class, + IOSTATISTICS_SNAPSHOT_SAVE, + Serializable.class, + FileSystem.class, + Path.class, + boolean.class); // note: not Boolean.class + + // getting contents of snapshots + iostatisticsCountersMethod = + loadStaticMethod(wrappedClass, + Map.class, + IOSTATISTICS_COUNTERS, + Serializable.class); + iostatisticsGaugesMethod = + loadStaticMethod(wrappedClass, + Map.class, + IOSTATISTICS_GAUGES, + Serializable.class); + iostatisticsMinimumsMethod = + loadStaticMethod(wrappedClass, + Map.class, + IOSTATISTICS_MINIMUMS, + Serializable.class); + iostatisticsMaximumsMethod = + loadStaticMethod(wrappedClass, + Map.class, + IOSTATISTICS_MAXIMUMS, + Serializable.class); + iostatisticsMeansMethod = + loadStaticMethod(wrappedClass, + Map.class, + IOSTATISTICS_MEANS, + Serializable.class); + + // stringification + + iostatisticsToPrettyStringMethod = + loadStaticMethod(wrappedClass, + String.class, + IOSTATISTICS_TO_PRETTY_STRING, + Object.class); + + } + + /** + * Is the wrapped statistics class loaded? + * @return true if the wrappedIO class was found and loaded. + */ + public boolean loaded() { + return loaded; + } + + /** + * Are the core IOStatistics methods and classes available. + * @return true if the relevant methods are loaded. + */ + public boolean ioStatisticsAvailable() { + return available(iostatisticsSnapshotCreateMethod); + } + + /** + * Are the IOStatisticsContext methods and classes available? + * @return true if the relevant methods are loaded. + */ + public boolean ioStatisticsContextAvailable() { + return available(iostatisticsContextEnabledMethod); + } + + /** + * Require a IOStatistics to be available. + * @throws UnsupportedOperationException if the method was not found. + */ + private void checkIoStatisticsAvailable() { + checkAvailable(iostatisticsSnapshotCreateMethod); + } + + /** + * Require IOStatisticsContext methods to be available. + * @throws UnsupportedOperationException if the classes/methods were not found + */ + private void checkIoStatisticsContextAvailable() { + checkAvailable(iostatisticsContextEnabledMethod); + } + + /** + * Probe for an object being an instance of {@code IOStatisticsSource}. + * @param object object to probe + * @return true if the object is the right type, false if the classes + * were not found or the object is null/of a different type + */ + public boolean isIOStatisticsSource(Object object) { + return ioStatisticsAvailable() + && (boolean) isIOStatisticsSourceMethod.invoke(null, object); + } + + /** + * Probe for an object being an instance of {@code IOStatisticsSource}. + * @param object object to probe + * @return true if the object is the right type, false if the classes + * were not found or the object is null/of a different type + */ + public boolean isIOStatistics(Object object) { + return ioStatisticsAvailable() + && (boolean) isIOStatisticsMethod.invoke(null, object); + } + + /** + * Probe for an object being an instance of {@code IOStatisticsSnapshot}. + * @param object object to probe + * @return true if the object is the right type, false if the classes + * were not found or the object is null/of a different type + */ + public boolean isIOStatisticsSnapshot(Serializable object) { + return ioStatisticsAvailable() + && (boolean) isIOStatisticsSnapshotMethod.invoke(null, object); + } + + /** + * Probe to check if the thread-level IO statistics enabled. + * If the relevant classes and methods were not found, returns false + * @return true if the IOStatisticsContext API was found + * and is enabled. + */ + public boolean iostatisticsContext_enabled() { + return ioStatisticsAvailable() + && (boolean) iostatisticsContextEnabledMethod.invoke(null); + } + + /** + * Get the context's {@code IOStatisticsContext} which + * implements {@code IOStatisticsSource}. + * This is either a thread-local value or a global empty context. + * @return instance of {@code IOStatisticsContext}. + * @throws UnsupportedOperationException if the IOStatisticsContext API was not found + */ + public Object iostatisticsContext_getCurrent() + throws UnsupportedOperationException { + checkIoStatisticsContextAvailable(); + return iostatisticsContextGetCurrentMethod.invoke(null); + } + + /** + * Set the IOStatisticsContext for the current thread. + * @param statisticsContext IOStatistics context instance for the + * current thread. If null, the context is reset. + * @throws UnsupportedOperationException if the IOStatisticsContext API was not found + */ + public void iostatisticsContext_setThreadIOStatisticsContext( + @Nullable Object statisticsContext) throws UnsupportedOperationException { + checkIoStatisticsContextAvailable(); + iostatisticsContextSetThreadContextMethod.invoke(null, statisticsContext); + } + + /** + * Reset the context's IOStatistics. + * {@code IOStatisticsContext#reset()} + * @throws UnsupportedOperationException if the IOStatisticsContext API was not found + */ + public void iostatisticsContext_reset() + throws UnsupportedOperationException { + checkIoStatisticsContextAvailable(); + iostatisticsContextResetMethod.invoke(null); + } + + /** + * Take a snapshot of the context IOStatistics. + * {@code IOStatisticsContext#snapshot()} + * @return an instance of {@code IOStatisticsSnapshot}. + * @throws UnsupportedOperationException if the IOStatisticsContext API was not found + */ + public Serializable iostatisticsContext_snapshot() + throws UnsupportedOperationException { + checkIoStatisticsContextAvailable(); + return iostatisticsContextSnapshotMethod.invoke(null); + } + /** + * Aggregate into the IOStatistics context the statistics passed in via + * IOStatistics/source parameter. + *

    + * Returns false if the source is null or does not contain any statistics. + * @param source implementation of {@link IOStatisticsSource} or {@link IOStatistics} + * @return true if the the source object was aggregated. + */ + public boolean iostatisticsContext_aggregate(Object source) { + checkIoStatisticsContextAvailable(); + return iostatisticsContextAggregateMethod.invoke(null, source); + } + + /** + * Aggregate an existing {@code IOStatisticsSnapshot} with + * the supplied statistics. + * @param snapshot snapshot to update + * @param statistics IOStatistics to add + * @return true if the snapshot was updated. + * @throws IllegalArgumentException if the {@code statistics} argument is not + * null but not an instance of IOStatistics, or if {@code snapshot} is invalid. + * @throws UnsupportedOperationException if the IOStatistics classes were not found + */ + public boolean iostatisticsSnapshot_aggregate( + Serializable snapshot, @Nullable Object statistics) + throws UnsupportedOperationException { + checkIoStatisticsAvailable(); + return iostatisticsSnapshotAggregateMethod.invoke(null, snapshot, statistics); + } + + /** + * Create a new {@code IOStatisticsSnapshot} instance. + * @return an empty IOStatisticsSnapshot. + * @throws UnsupportedOperationException if the IOStatistics classes were not found + */ + public Serializable iostatisticsSnapshot_create() + throws UnsupportedOperationException { + checkIoStatisticsAvailable(); + return iostatisticsSnapshotCreateMethod.invoke(null); + } + + /** + * Create a new {@code IOStatisticsSnapshot} instance. + * @param source optional source statistics + * @return an IOStatisticsSnapshot. + * @throws ClassCastException if the {@code source} is not valid. + * @throws UnsupportedOperationException if the IOStatistics classes were not found + */ + public Serializable iostatisticsSnapshot_create( + @Nullable Object source) + throws UnsupportedOperationException, ClassCastException { + checkIoStatisticsAvailable(); + return iostatisticsSnapshotCreateWithSourceMethod.invoke(null, source); + } + + /** + * Save IOStatisticsSnapshot to a JSON string. + * @param snapshot statistics; may be null or of an incompatible type + * @return JSON string value or null if source is not an IOStatisticsSnapshot + * @throws UncheckedIOException Any IO/jackson exception. + * @throws UnsupportedOperationException if the IOStatistics classes were not found + */ + public String iostatisticsSnapshot_toJsonString(@Nullable Serializable snapshot) + throws UncheckedIOException, UnsupportedOperationException { + checkIoStatisticsAvailable(); + return iostatisticsSnapshotToJsonStringMethod.invoke(null, snapshot); + } + + /** + * Load IOStatisticsSnapshot from a JSON string. + * @param json JSON string value. + * @return deserialized snapshot. + * @throws UncheckedIOException Any IO/jackson exception. + * @throws UnsupportedOperationException if the IOStatistics classes were not found + */ + public Serializable iostatisticsSnapshot_fromJsonString( + final String json) throws UncheckedIOException, UnsupportedOperationException { + checkIoStatisticsAvailable(); + return iostatisticsSnapshotFromJsonStringMethod.invoke(null, json); + } + + /** + * Load IOStatisticsSnapshot from a Hadoop filesystem. + * @param fs filesystem + * @param path path + * @return the loaded snapshot + * @throws UncheckedIOException Any IO exception. + * @throws UnsupportedOperationException if the IOStatistics classes were not found + */ + public Serializable iostatisticsSnapshot_load( + FileSystem fs, + Path path) throws UncheckedIOException, UnsupportedOperationException { + checkIoStatisticsAvailable(); + return iostatisticsSnapshotLoadMethod.invoke(null, fs, path); + } + + /** + * Extract the IOStatistics from an object in a serializable form. + * @param source source object, may be null/not a statistics source/instance + * @return {@code IOStatisticsSnapshot} or null if the object is null/doesn't have statistics + * @throws UnsupportedOperationException if the IOStatistics classes were not found + */ + public Serializable iostatisticsSnapshot_retrieve(@Nullable Object source) + throws UnsupportedOperationException { + checkIoStatisticsAvailable(); + return iostatisticsSnapshotRetrieveMethod.invoke(null, source); + } + + /** + * Save IOStatisticsSnapshot to a Hadoop filesystem as a JSON file. + * @param snapshot statistics + * @param fs filesystem + * @param path path + * @param overwrite should any existing file be overwritten? + * @throws UncheckedIOException Any IO exception. + * @throws UnsupportedOperationException if the IOStatistics classes were not found + */ + public void iostatisticsSnapshot_save( + @Nullable Serializable snapshot, + FileSystem fs, + Path path, + boolean overwrite) throws UncheckedIOException, UnsupportedOperationException { + + checkIoStatisticsAvailable(); + iostatisticsSnapshotSaveMethod.invoke(null, snapshot, fs, path, overwrite); + } + + /** + * Get the counters of an IOStatisticsSnapshot. + * @param source source of statistics. + * @return the map of counters. + */ + public Map iostatistics_counters( + Serializable source) { + return iostatisticsCountersMethod.invoke(null, source); + } + + /** + * Get the gauges of an IOStatisticsSnapshot. + * @param source source of statistics. + * @return the map of gauges. + */ + public Map iostatistics_gauges( + Serializable source) { + return iostatisticsGaugesMethod.invoke(null, source); + + } + + /** + * Get the minimums of an IOStatisticsSnapshot. + * @param source source of statistics. + * @return the map of minimums. + */ + public Map iostatistics_minimums( + Serializable source) { + return iostatisticsMinimumsMethod.invoke(null, source); + } + + /** + * Get the maximums of an IOStatisticsSnapshot. + * @param source source of statistics. + * @return the map of maximums. + */ + public Map iostatistics_maximums( + Serializable source) { + return iostatisticsMaximumsMethod.invoke(null, source); + } + + /** + * Get the means of an IOStatisticsSnapshot. + * Each value in the map is the (sample, sum) tuple of the values; + * the mean is then calculated by dividing sum/sample wherever sample is non-zero. + * @param source source of statistics. + * @return a map of mean key to (sample, sum) tuples. + */ + public Map> iostatistics_means( + Serializable source) { + return iostatisticsMeansMethod.invoke(null, source); + } + + /** + * Convert IOStatistics to a string form, with all the metrics sorted + * and empty value stripped. + * @param statistics A statistics instance. + * @return string value or the empty string if null + * @throws UnsupportedOperationException if the IOStatistics classes were not found + */ + public String iostatistics_toPrettyString(Object statistics) { + checkIoStatisticsAvailable(); + return iostatisticsToPrettyStringMethod.invoke(null, statistics); + } + + @Override + public String toString() { + return "DynamicWrappedStatistics{" + + "ioStatisticsAvailable =" + ioStatisticsAvailable() + + ", ioStatisticsContextAvailable =" + ioStatisticsContextAvailable() + + '}'; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/impl/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/impl/package-info.java new file mode 100644 index 0000000000000..042d834581cae --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/impl/package-info.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Implementation/testing support for wrapped IO. + */ + +@InterfaceAudience.LimitedPrivate("testing") +@InterfaceStability.Unstable +package org.apache.hadoop.io.wrappedio.impl; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/package-info.java new file mode 100644 index 0000000000000..176c3f030f41d --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/wrappedio/package-info.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Support for dynamic access to filesystem operations which are not available + * in older hadoop releases. + *

    + * Classes in this package tagged as {@code @InterfaceAudience#Public} export + * methods to be loaded by reflection by other applications/libraries. + * Tests against these SHOULD use reflection themselves so as to guarantee + * stability of reflection-based access. + *

    + * Classes tagged as private/limited private are for support and testing. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +package org.apache.hadoop.io.wrappedio; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/BindingUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/BindingUtils.java new file mode 100644 index 0000000000000..47a2deed41dcb --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/BindingUtils.java @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.util.dynamic; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.function.Supplier; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import static org.apache.hadoop.util.Preconditions.checkState; + +/** + * Utility methods to assist binding to Hadoop APIs through reflection. + * Source: {@code org.apache.parquet.hadoop.util.wrapped.io.BindingUtils}. + */ +@InterfaceAudience.LimitedPrivate("testing") +@InterfaceStability.Unstable +public final class BindingUtils { + + private static final Logger LOG = LoggerFactory.getLogger(BindingUtils.class); + + private BindingUtils() {} + + /** + * Load a class by name. + * @param className classname + * @return the class or null if it could not be loaded. + */ + public static Class loadClass(String className) { + try { + return Class.forName(className); + } catch (ClassNotFoundException e) { + LOG.debug("No class {}", className, e); + return null; + } + } + + /** + * Load a class by name. + * @param className classname + * @return the class. + * @throws RuntimeException if the class was not found. + */ + public static Class loadClassSafely(String className) { + try { + return Class.forName(className); + } catch (ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + + /** + * Load a class by name. + * @param cl classloader to use. + * @param className classname + * @return the class or null if it could not be loaded. + */ + public static Class loadClass(ClassLoader cl, String className) { + try { + return cl.loadClass(className); + } catch (ClassNotFoundException e) { + LOG.debug("No class {}", className, e); + return null; + } + } + + + /** + * Get an invocation from the source class, which will be unavailable() if + * the class is null or the method isn't found. + * + * @param return type + * @param source source. If null, the method is a no-op. + * @param returnType return type class (unused) + * @param name method name + * @param parameterTypes parameters + * + * @return the method or "unavailable" + */ + public static DynMethods.UnboundMethod loadInvocation( + Class source, Class returnType, String name, Class... parameterTypes) { + + if (source != null) { + final DynMethods.UnboundMethod m = new DynMethods.Builder(name) + .impl(source, name, parameterTypes) + .orNoop() + .build(); + if (m.isNoop()) { + // this is a sign of a mismatch between this class's expected + // signatures and actual ones. + // log at debug. + LOG.debug("Failed to load method {} from {}", name, source); + } else { + LOG.debug("Found method {} from {}", name, source); + } + return m; + } else { + return noop(name); + } + } + + /** + * Load a static method from the source class, which will be a noop() if + * the class is null or the method isn't found. + * If the class and method are not found, then an {@code IllegalStateException} + * is raised on the basis that this means that the binding class is broken, + * rather than missing/out of date. + * + * @param return type + * @param source source. If null, the method is a no-op. + * @param returnType return type class (unused) + * @param name method name + * @param parameterTypes parameters + * + * @return the method or a no-op. + * @throws IllegalStateException if the method is not static. + */ + public static DynMethods.UnboundMethod loadStaticMethod( + Class source, Class returnType, String name, Class... parameterTypes) { + + final DynMethods.UnboundMethod method = + loadInvocation(source, returnType, name, parameterTypes); + if (!available(method)) { + LOG.debug("Method not found: {}", name); + } + checkState(method.isStatic(), "Method is not static %s", method); + return method; + } + + /** + * Create a no-op method. + * + * @param name method name + * + * @return a no-op method. + */ + public static DynMethods.UnboundMethod noop(final String name) { + return new DynMethods.Builder(name).orNoop().build(); + } + + /** + * Given a sequence of methods, verify that they are all available. + * + * @param methods methods + * + * @return true if they are all implemented + */ + public static boolean implemented(DynMethods.UnboundMethod... methods) { + for (DynMethods.UnboundMethod method : methods) { + if (method.isNoop()) { + return false; + } + } + return true; + } + + /** + * Require a method to be available. + * @param method method to probe + * @throws UnsupportedOperationException if the method was not found. + */ + public static void checkAvailable(DynMethods.UnboundMethod method) + throws UnsupportedOperationException { + if (!available(method)) { + throw new UnsupportedOperationException("Unbound " + method); + } + } + + /** + * Is a method available? + * @param method method to probe + * @return true iff the method is found and loaded. + */ + public static boolean available(DynMethods.UnboundMethod method) { + return !method.isNoop(); + } + + /** + * Invoke the supplier, catching any {@code UncheckedIOException} raised, + * extracting the inner IOException and rethrowing it. + * @param call call to invoke + * @return result + * @param type of result + * @throws IOException if the call raised an IOException wrapped by an UncheckedIOException. + */ + public static T extractIOEs(Supplier call) throws IOException { + try { + return call.get(); + } catch (UncheckedIOException e) { + throw e.getCause(); + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/DynConstructors.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/DynConstructors.java new file mode 100644 index 0000000000000..4c8e5e2695f33 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/DynConstructors.java @@ -0,0 +1,273 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.util.dynamic; + +import java.lang.reflect.Constructor; +import java.lang.reflect.InvocationTargetException; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +import static org.apache.hadoop.util.dynamic.DynMethods.throwIfInstance; +import static org.apache.hadoop.util.Preconditions.checkArgument; + +/** + * Dynamic constructors. + * Taken from {@code org.apache.parquet.util.DynConstructors}. + */ +@InterfaceAudience.LimitedPrivate("testing") +@InterfaceStability.Unstable +public class DynConstructors { + public static final class Ctor extends DynMethods.UnboundMethod { + private final Constructor ctor; + private final Class constructed; + + private Ctor(Constructor constructor, Class constructed) { + super(null, "newInstance"); + this.ctor = constructor; + this.constructed = constructed; + } + + public Class getConstructedClass() { + return constructed; + } + + public C newInstanceChecked(Object... args) throws Exception { + try { + return ctor.newInstance(args); + } catch (InstantiationException | IllegalAccessException e) { + throw e; + } catch (InvocationTargetException e) { + throwIfInstance(e.getCause(), Exception.class); + throwIfInstance(e.getCause(), RuntimeException.class); + throw new RuntimeException(e.getCause()); + } + } + + public C newInstance(Object... args) { + try { + return newInstanceChecked(args); + } catch (Exception e) { + throwIfInstance(e, RuntimeException.class); + throw new RuntimeException(e); + } + } + + @Override + @SuppressWarnings("unchecked") + public R invoke(Object target, Object... args) { + checkArgument(target == null, "Invalid call to constructor: target must be null"); + return (R) newInstance(args); + } + + @Override + @SuppressWarnings("unchecked") + public R invokeChecked(Object target, Object... args) throws Exception { + checkArgument(target == null, "Invalid call to constructor: target must be null"); + return (R) newInstanceChecked(args); + } + + @Override + public DynMethods.BoundMethod bind(Object receiver) { + throw new IllegalStateException("Cannot bind constructors"); + } + + @Override + public boolean isStatic() { + return true; + } + + @Override + public String toString() { + return getClass().getSimpleName() + "(constructor=" + ctor + ", class=" + constructed + ")"; + } + } + + public static class Builder { + private final Class baseClass; + private ClassLoader loader = Thread.currentThread().getContextClassLoader(); + private Ctor ctor = null; + private Map problems = new HashMap(); + + public Builder(Class baseClass) { + this.baseClass = baseClass; + } + + public Builder() { + this.baseClass = null; + } + + /** + * Set the {@link ClassLoader} used to lookup classes by name. + *

    + * If not set, the current thread's ClassLoader is used. + * + * @param value a ClassLoader + * @return this Builder for method chaining + */ + public Builder loader(ClassLoader value) { + this.loader = value; + return this; + } + + public Builder impl(String className, Class... types) { + // don't do any work if an implementation has been found + if (ctor != null) { + return this; + } + + try { + Class targetClass = Class.forName(className, true, loader); + impl(targetClass, types); + } catch (NoClassDefFoundError | ClassNotFoundException e) { + // cannot load this implementation + problems.put(className, e); + } + + return this; + } + + public Builder impl(Class targetClass, Class... types) { + // don't do any work if an implementation has been found + if (ctor != null) { + return this; + } + + try { + ctor = new Ctor(targetClass.getConstructor(types), targetClass); + } catch (NoSuchMethodException e) { + // not the right implementation + problems.put(methodName(targetClass, types), e); + } + return this; + } + + public Builder hiddenImpl(Class... types) { + hiddenImpl(baseClass, types); + return this; + } + + @SuppressWarnings("unchecked") + public Builder hiddenImpl(String className, Class... types) { + // don't do any work if an implementation has been found + if (ctor != null) { + return this; + } + + try { + Class targetClass = Class.forName(className, true, loader); + hiddenImpl(targetClass, types); + } catch (NoClassDefFoundError | ClassNotFoundException e) { + // cannot load this implementation + problems.put(className, e); + } + return this; + } + + public Builder hiddenImpl(Class targetClass, Class... types) { + // don't do any work if an implementation has been found + if (ctor != null) { + return this; + } + + try { + Constructor hidden = targetClass.getDeclaredConstructor(types); + AccessController.doPrivileged(new MakeAccessible(hidden)); + ctor = new Ctor(hidden, targetClass); + } catch (NoSuchMethodException | SecurityException e) { + // unusable or not the right implementation + problems.put(methodName(targetClass, types), e); + } + return this; + } + + @SuppressWarnings("unchecked") + public Ctor buildChecked() throws NoSuchMethodException { + if (ctor != null) { + return ctor; + } + throw new NoSuchMethodException( + "Cannot find constructor for " + baseClass + "\n" + formatProblems(problems)); + } + + @SuppressWarnings("unchecked") + public Ctor build() { + if (ctor != null) { + return ctor; + } + throw new RuntimeException("Cannot find constructor for " + baseClass + + "\n" + formatProblems(problems)); + } + } + + private static final class MakeAccessible implements PrivilegedAction { + private Constructor hidden; + + private MakeAccessible(Constructor hidden) { + this.hidden = hidden; + } + + @Override + public Void run() { + hidden.setAccessible(true); + return null; + } + } + + private static String formatProblems(Map problems) { + StringBuilder sb = new StringBuilder(); + boolean first = true; + for (Map.Entry problem : problems.entrySet()) { + if (first) { + first = false; + } else { + sb.append("\n"); + } + sb.append("\tMissing ") + .append(problem.getKey()) + .append(" [") + .append(problem.getValue().getClass().getName()) + .append(": ") + .append(problem.getValue().getMessage()) + .append("]"); + } + return sb.toString(); + } + + private static String methodName(Class targetClass, Class... types) { + StringBuilder sb = new StringBuilder(); + sb.append(targetClass.getName()).append("("); + boolean first = true; + for (Class type : types) { + if (first) { + first = false; + } else { + sb.append(","); + } + sb.append(type.getName()); + } + sb.append(")"); + return sb.toString(); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/DynMethods.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/DynMethods.java new file mode 100644 index 0000000000000..3f703ad9c918e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/DynMethods.java @@ -0,0 +1,544 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.util.dynamic; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.lang.reflect.Modifier; +import java.security.AccessController; +import java.security.PrivilegedAction; +import java.util.Arrays; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.util.Preconditions; + +import static org.apache.hadoop.util.Preconditions.checkState; + + +/** + * Dynamic method invocation. + * Taken from {@code org.apache.parquet.util.DynMethods}. + */ +@InterfaceAudience.LimitedPrivate("testing") +@InterfaceStability.Unstable +public final class DynMethods { + + private static final Logger LOG = LoggerFactory.getLogger(DynMethods.class); + + private DynMethods() { + } + + /** + * Convenience wrapper class around {@link Method}. + *

    + * Allows callers to invoke the wrapped method with all Exceptions wrapped by + * RuntimeException, or with a single Exception catch block. + */ + public static class UnboundMethod { + + private final Method method; + + private final String name; + + private final int argLength; + + UnboundMethod(Method method, String name) { + this.method = method; + this.name = name; + this.argLength = + (method == null || method.isVarArgs()) ? -1 : method.getParameterTypes().length; + } + + @SuppressWarnings("unchecked") + public R invokeChecked(Object target, Object... args) throws Exception { + try { + if (argLength < 0) { + return (R) method.invoke(target, args); + } else { + if (argLength != args.length) { + LOG.error("expected {} arguments but got {}", argLength, args.length); + } + return (R) method.invoke(target, Arrays.copyOfRange(args, 0, argLength)); + } + } catch (InvocationTargetException e) { + throwIfInstance(e.getCause(), Exception.class); + throwIfInstance(e.getCause(), RuntimeException.class); + throw new RuntimeException(e.getCause()); + } + } + + public R invoke(Object target, Object... args) { + try { + return this.invokeChecked(target, args); + } catch (Exception e) { + throwIfInstance(e, RuntimeException.class); + throw new RuntimeException(e); + } + } + + /** + * Invoke a static method. + * @param args arguments. + * @return result. + * @param type of result. + */ + public R invokeStatic(Object... args) { + checkState(isStatic(), "Method is not static %s", toString()); + return invoke(null, args); + } + + /** + * Returns this method as a BoundMethod for the given receiver. + * @param receiver an Object to receive the method invocation + * @return a {@link BoundMethod} for this method and the receiver + * @throws IllegalStateException if the method is static + * @throws IllegalArgumentException if the receiver's class is incompatible + */ + public BoundMethod bind(Object receiver) { + checkState(!isStatic(), "Cannot bind static method %s", + method.toGenericString()); + Preconditions.checkArgument(method.getDeclaringClass().isAssignableFrom(receiver.getClass()), + "Cannot bind %s to instance of %s", method.toGenericString(), receiver.getClass()); + + return new BoundMethod(this, receiver); + } + + /** + * @return whether the method is a static method + */ + public boolean isStatic() { + return Modifier.isStatic(method.getModifiers()); + } + + /** + * @return whether the method is a noop + */ + public boolean isNoop() { + return this == NOOP; + } + + /** + * Returns this method as a StaticMethod. + * @return a {@link StaticMethod} for this method + * @throws IllegalStateException if the method is not static + */ + public StaticMethod asStatic() { + checkState(isStatic(), "Method is not static"); + return new StaticMethod(this); + } + + public String toString() { + return "DynMethods.UnboundMethod(name=" + name + " method=" + method.toGenericString() + ")"; + } + + /** + * Singleton {@link UnboundMethod}, performs no operation and returns null. + */ + private static final UnboundMethod NOOP = new UnboundMethod(null, "NOOP") { + + @Override + public R invokeChecked(Object target, Object... args) throws Exception { + return null; + } + + @Override + public BoundMethod bind(Object receiver) { + return new BoundMethod(this, receiver); + } + + @Override + public StaticMethod asStatic() { + return new StaticMethod(this); + } + + @Override + public boolean isStatic() { + return true; + } + + @Override + public String toString() { + return "DynMethods.UnboundMethod(NOOP)"; + } + }; + } + + public static final class BoundMethod { + + private final UnboundMethod method; + + private final Object receiver; + + private BoundMethod(UnboundMethod method, Object receiver) { + this.method = method; + this.receiver = receiver; + } + + public R invokeChecked(Object... args) throws Exception { + return method.invokeChecked(receiver, args); + } + + public R invoke(Object... args) { + return method.invoke(receiver, args); + } + } + + public static final class StaticMethod { + + private final UnboundMethod method; + + private StaticMethod(UnboundMethod method) { + this.method = method; + } + + public R invokeChecked(Object... args) throws Exception { + return method.invokeChecked(null, args); + } + + public R invoke(Object... args) { + return method.invoke(null, args); + } + } + + /** + * If the given throwable is an instance of E, throw it as an E. + * @param t an exception instance + * @param excClass an exception class t may be an instance of + * @param the type of exception that will be thrown if throwable is an instance + * @throws E if t is an instance of E + */ + @SuppressWarnings("unchecked") + public static void throwIfInstance(Throwable t, Class excClass) + throws E { + if (excClass.isAssignableFrom(t.getClass())) { + // the throwable is already an exception, so throw it + throw (E)t; + } + } + + public static final class Builder { + + private final String name; + + private ClassLoader loader = Thread.currentThread().getContextClassLoader(); + + private UnboundMethod method = null; + + public Builder(String methodName) { + this.name = methodName; + } + + /** + * Set the {@link ClassLoader} used to lookup classes by name. + *

    + * If not set, the current thread's ClassLoader is used. + * @param classLoader a ClassLoader + * @return this Builder for method chaining + */ + public Builder loader(ClassLoader classLoader) { + this.loader = classLoader; + return this; + } + + /** + * If no implementation has been found, adds a NOOP method. + *

    + * Note: calls to impl will not match after this method is called! + * @return this Builder for method chaining + */ + public Builder orNoop() { + if (method == null) { + this.method = UnboundMethod.NOOP; + } + return this; + } + + /** + * Checks for an implementation, first finding the given class by name. + * @param className name of a class + * @param methodName name of a method (different from constructor) + * @param argClasses argument classes for the method + * @return this Builder for method chaining + */ + public Builder impl(String className, String methodName, Class... argClasses) { + // don't do any work if an implementation has been found + if (method != null) { + return this; + } + + try { + Class targetClass = Class.forName(className, true, loader); + impl(targetClass, methodName, argClasses); + } catch (ClassNotFoundException e) { + // class not found on supplied classloader. + LOG.debug("failed to load class {}", className, e); + } + return this; + } + + /** + * Checks for an implementation, first finding the given class by name. + *

    + * The name passed to the constructor is the method name used. + * @param className name of a class + * @param argClasses argument classes for the method + * @return this Builder for method chaining + */ + public Builder impl(String className, Class... argClasses) { + impl(className, name, argClasses); + return this; + } + + /** + * Checks for a method implementation. + * @param targetClass the class to check for an implementation + * @param methodName name of a method (different from constructor) + * @param argClasses argument classes for the method + * @return this Builder for method chaining + */ + public Builder impl(Class targetClass, String methodName, Class... argClasses) { + // don't do any work if an implementation has been found + if (method != null) { + return this; + } + + try { + this.method = new UnboundMethod(targetClass.getMethod(methodName, argClasses), name); + } catch (NoSuchMethodException e) { + // not the right implementation + LOG.debug("failed to load method {} from class {}", methodName, targetClass, e); + } + return this; + } + + /** + * Checks for a method implementation. + *

    + * The name passed to the constructor is the method name used. + * @param targetClass the class to check for an implementation + * @param argClasses argument classes for the method + * @return this Builder for method chaining + */ + public Builder impl(Class targetClass, Class... argClasses) { + impl(targetClass, name, argClasses); + return this; + } + + public Builder ctorImpl(Class targetClass, Class... argClasses) { + // don't do any work if an implementation has been found + if (method != null) { + return this; + } + + try { + this.method = new DynConstructors.Builder().impl(targetClass, argClasses).buildChecked(); + } catch (NoSuchMethodException e) { + // not the right implementation + LOG.debug("failed to load constructor arity {} from class {}", argClasses.length, + targetClass, e); + } + return this; + } + + public Builder ctorImpl(String className, Class... argClasses) { + // don't do any work if an implementation has been found + if (method != null) { + return this; + } + + try { + this.method = new DynConstructors.Builder().impl(className, argClasses).buildChecked(); + } catch (NoSuchMethodException e) { + // not the right implementation + LOG.debug("failed to load constructor arity {} from class {}", argClasses.length, className, + e); + } + return this; + } + + /** + * Checks for an implementation, first finding the given class by name. + * @param className name of a class + * @param methodName name of a method (different from constructor) + * @param argClasses argument classes for the method + * @return this Builder for method chaining + */ + public Builder hiddenImpl(String className, String methodName, Class... argClasses) { + // don't do any work if an implementation has been found + if (method != null) { + return this; + } + + try { + Class targetClass = Class.forName(className, true, loader); + hiddenImpl(targetClass, methodName, argClasses); + } catch (ClassNotFoundException e) { + // class not found on supplied classloader. + LOG.debug("failed to load class {}", className, e); + } + return this; + } + + /** + * Checks for an implementation, first finding the given class by name. + *

    + * The name passed to the constructor is the method name used. + * @param className name of a class + * @param argClasses argument classes for the method + * @return this Builder for method chaining + */ + public Builder hiddenImpl(String className, Class... argClasses) { + hiddenImpl(className, name, argClasses); + return this; + } + + /** + * Checks for a method implementation. + * @param targetClass the class to check for an implementation + * @param methodName name of a method (different from constructor) + * @param argClasses argument classes for the method + * @return this Builder for method chaining + */ + public Builder hiddenImpl(Class targetClass, String methodName, Class... argClasses) { + // don't do any work if an implementation has been found + if (method != null) { + return this; + } + + try { + Method hidden = targetClass.getDeclaredMethod(methodName, argClasses); + AccessController.doPrivileged(new MakeAccessible(hidden)); + this.method = new UnboundMethod(hidden, name); + } catch (SecurityException | NoSuchMethodException e) { + // unusable or not the right implementation + LOG.debug("failed to load method {} from class {}", methodName, targetClass, e); + } + return this; + } + + /** + * Checks for a method implementation. + *

    + * The name passed to the constructor is the method name used. + * @param targetClass the class to check for an implementation + * @param argClasses argument classes for the method + * @return this Builder for method chaining + */ + public Builder hiddenImpl(Class targetClass, Class... argClasses) { + hiddenImpl(targetClass, name, argClasses); + return this; + } + + /** + * Returns the first valid implementation as a UnboundMethod or throws a + * NoSuchMethodException if there is none. + * @return a {@link UnboundMethod} with a valid implementation + * @throws NoSuchMethodException if no implementation was found + */ + public UnboundMethod buildChecked() throws NoSuchMethodException { + if (method != null) { + return method; + } else { + throw new NoSuchMethodException("Cannot find method: " + name); + } + } + + /** + * Returns the first valid implementation as a UnboundMethod or throws a + * RuntimeError if there is none. + * @return a {@link UnboundMethod} with a valid implementation + * @throws RuntimeException if no implementation was found + */ + public UnboundMethod build() { + if (method != null) { + return method; + } else { + throw new RuntimeException("Cannot find method: " + name); + } + } + + /** + * Returns the first valid implementation as a BoundMethod or throws a + * NoSuchMethodException if there is none. + * @param receiver an Object to receive the method invocation + * @return a {@link BoundMethod} with a valid implementation and receiver + * @throws IllegalStateException if the method is static + * @throws IllegalArgumentException if the receiver's class is incompatible + * @throws NoSuchMethodException if no implementation was found + */ + public BoundMethod buildChecked(Object receiver) throws NoSuchMethodException { + return buildChecked().bind(receiver); + } + + /** + * Returns the first valid implementation as a BoundMethod or throws a + * RuntimeError if there is none. + * @param receiver an Object to receive the method invocation + * @return a {@link BoundMethod} with a valid implementation and receiver + * @throws IllegalStateException if the method is static + * @throws IllegalArgumentException if the receiver's class is incompatible + * @throws RuntimeException if no implementation was found + */ + public BoundMethod build(Object receiver) { + return build().bind(receiver); + } + + /** + * Returns the first valid implementation as a StaticMethod or throws a + * NoSuchMethodException if there is none. + * @return a {@link StaticMethod} with a valid implementation + * @throws IllegalStateException if the method is not static + * @throws NoSuchMethodException if no implementation was found + */ + public StaticMethod buildStaticChecked() throws NoSuchMethodException { + return buildChecked().asStatic(); + } + + /** + * Returns the first valid implementation as a StaticMethod or throws a + * RuntimeException if there is none. + * @return a {@link StaticMethod} with a valid implementation + * @throws IllegalStateException if the method is not static + * @throws RuntimeException if no implementation was found + */ + public StaticMethod buildStatic() { + return build().asStatic(); + } + } + + private static final class MakeAccessible implements PrivilegedAction { + + private Method hidden; + + MakeAccessible(Method hidden) { + this.hidden = hidden; + } + + @Override + public Void run() { + hidden.setAccessible(true); + return null; + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/package-info.java new file mode 100644 index 0000000000000..afc1a2d02af51 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/dynamic/package-info.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Dynamic class loading and instantiation. + * Taken from {@code org.apache.parquet}; + * there is also a fork of this in Apache Iceberg, + * so code using these classes should be relatively + * easily portable between the projects. + */ +@InterfaceAudience.LimitedPrivate("testing") +@InterfaceStability.Unstable +package org.apache.hadoop.util.dynamic; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/BiFunctionRaisingIOE.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/BiFunctionRaisingIOE.java index ea17c16d01e87..c5b3ee19689c5 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/BiFunctionRaisingIOE.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/BiFunctionRaisingIOE.java @@ -19,6 +19,7 @@ package org.apache.hadoop.util.functional; import java.io.IOException; +import java.io.UncheckedIOException; /** * Function of arity 2 which may raise an IOException. @@ -37,4 +38,19 @@ public interface BiFunctionRaisingIOE { * @throws IOException Any IO failure */ R apply(T t, U u) throws IOException; + + /** + * Apply unchecked. + * @param t argument + * @param u argument 2 + * @return the evaluated function + * @throws UncheckedIOException IOE raised. + */ + default R unchecked(T t, U u) { + try { + return apply(t, u); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CallableRaisingIOE.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CallableRaisingIOE.java index 65b3a63b2b9a0..7b61c0e1866b8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CallableRaisingIOE.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/CallableRaisingIOE.java @@ -19,9 +19,14 @@ package org.apache.hadoop.util.functional; import java.io.IOException; +import java.io.UncheckedIOException; /** * This is a callable which only raises an IOException. + * Its method {@link #unchecked()} invokes the {@link #apply()} + * method and wraps all IOEs in UncheckedIOException; + * call this if you need to pass this through java streaming + * APIs * @param return type */ @FunctionalInterface @@ -33,4 +38,18 @@ public interface CallableRaisingIOE { * @throws IOException Any IO failure */ R apply() throws IOException; + + /** + * Apply unchecked. + * @return the evaluated call + * @throws UncheckedIOException IOE raised. + */ + default R unchecked() { + try { + return apply(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionRaisingIOE.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionRaisingIOE.java index 83e041e2b3160..c48ad82720849 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionRaisingIOE.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionRaisingIOE.java @@ -19,6 +19,7 @@ package org.apache.hadoop.util.functional; import java.io.IOException; +import java.io.UncheckedIOException; /** * Function of arity 1 which may raise an IOException. @@ -35,4 +36,18 @@ public interface FunctionRaisingIOE { * @throws IOException Any IO failure */ R apply(T t) throws IOException; + + /** + * Apply unchecked. + * @param t argument + * @return the evaluated function + * @throws UncheckedIOException IOE raised. + */ + default R unchecked(T t) { + try { + return apply(t); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionalIO.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionalIO.java index bc9e2ea729b97..485242f4af25b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionalIO.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/FunctionalIO.java @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.util.function.Function; import java.util.function.Supplier; import org.apache.hadoop.classification.InterfaceAudience; @@ -42,11 +43,7 @@ private FunctionalIO() { * @throws UncheckedIOException if an IOE was raised. */ public static T uncheckIOExceptions(CallableRaisingIOE call) { - try { - return call.apply(); - } catch (IOException e) { - throw new UncheckedIOException(e); - } + return call.unchecked(); } /** @@ -56,7 +53,7 @@ public static T uncheckIOExceptions(CallableRaisingIOE call) { * @return a supplier which invokes the call. */ public static Supplier toUncheckedIOExceptionSupplier(CallableRaisingIOE call) { - return () -> uncheckIOExceptions(call); + return call::unchecked; } /** @@ -75,4 +72,18 @@ public static T extractIOExceptions(Supplier call) throws IOException { } } + + /** + * Convert a {@link FunctionRaisingIOE} as a {@link Supplier}. + * @param fun function to wrap + * @param type of input + * @param type of return value. + * @return a new function which invokes the inner function and wraps + * exceptions. + */ + public static Function toUncheckedFunction(FunctionRaisingIOE fun) { + return fun::unchecked; + } + + } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/Tuples.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/Tuples.java index ed80c1daca726..e53f404228235 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/Tuples.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/functional/Tuples.java @@ -19,6 +19,7 @@ package org.apache.hadoop.util.functional; import java.util.Map; +import java.util.Objects; import org.apache.hadoop.classification.InterfaceStability; @@ -83,5 +84,21 @@ public String toString() { return "(" + key + ", " + value + ')'; } + @Override + public boolean equals(final Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + Tuple tuple = (Tuple) o; + return Objects.equals(key, tuple.key) && Objects.equals(value, tuple.value); + } + + @Override + public int hashCode() { + return Objects.hash(key, value); + } } } diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstreambuilder.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstreambuilder.md index 7bf6b16052b2f..c318a6a479b73 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstreambuilder.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/fsdatainputstreambuilder.md @@ -77,7 +77,7 @@ new `optLong()`, `optDouble()`, `mustLong()` and `mustDouble()` builder methods. ## Invariants The `FutureDataInputStreamBuilder` interface does not require parameters or -or the state of `FileSystem` until [`build()`](#build) is +or the state of `FileSystem` until `build()` is invoked and/or during the asynchronous open operation itself. Some aspects of the state of the filesystem, MAY be checked in the initial @@ -377,20 +377,30 @@ performance -and vice versa. subsystems. 1. If a policy is not recognized, the filesystem client MUST ignore it. -| Policy | Meaning | -|--------------|----------------------------------------------------------| -| `adaptive` | Any adaptive policy implemented by the store. | -| `default` | The default policy for this store. Generally "adaptive". | -| `random` | Optimize for random access. | -| `sequential` | Optimize for sequential access. | -| `vector` | The Vectored IO API is intended to be used. | -| `whole-file` | The whole file will be read. | - -Choosing the wrong read policy for an input source may be inefficient. +| Policy | Meaning | +|--------------|------------------------------------------------------------------------| +| `adaptive` | Any adaptive policy implemented by the store. | +| `avro` | This is an avro format which will be read sequentially | +| `csv` | This is CSV data which will be read sequentially | +| `default` | The default policy for this store. Generally "adaptive". | +| `columnar` | This is any columnar format other than ORC/parquet. | +| `hbase` | This is an HBase Table | +| `json` | This is a UTF-8 JSON/JSON lines format which will be read sequentially | +| `orc` | This is an ORC file. Optimize for it. | +| `parquet` | This is a Parquet file. Optimize for it. | +| `random` | Optimize for random access. | +| `sequential` | Optimize for sequential access. | +| `vector` | The Vectored IO API is intended to be used. | +| `whole-file` | The whole file will be read. | + +Choosing the wrong read policy for an input source may be inefficient but never fatal. A list of read policies MAY be supplied; the first one recognized/supported by -the filesystem SHALL be the one used. This allows for custom policies to be -supported, for example an `hbase-hfile` policy optimized for HBase HFiles. +the filesystem SHALL be the one used. This allows for configurations which are compatible +across versions. A policy `parquet, columnar, vector, random, adaptive` will use the parquet policy for +any filesystem aware of it, falling back to `columnar`, `vector`, `random` and finally `adaptive`. +The S3A connector will recognize the `random` since Hadoop 3.3.5 (i.e. since the `openFile()` API +was added), and `vector` from Hadoop 3.4.0. The S3A and ABFS input streams both implement the [IOStatisticsSource](iostatistics.html) API, and can be queried for their IO @@ -425,7 +435,7 @@ sequential to random seek policies may be exensive. When applications explicitly set the `fs.option.openfile.read.policy` option, if they know their read plan, they SHOULD declare which policy is most appropriate. -#### Read Policy `` +#### Read Policy `default` The default policy for the filesystem instance. Implementation/installation-specific. @@ -473,7 +483,45 @@ Strategies can include: Applications which know that the entire file is to be read from an opened stream SHOULD declare this read policy. -### Option: `fs.option.openfile.length` +#### Read Policy `columnar` + +Declare that the data is some (unspecific) columnar format and that read sequencies +should be expected to be random IO of whole column stripes/rowgroups, possibly fetching associated +column statistics first, to determine whether a scan of a stripe/rowgroup can +be skipped entirely. + +#### File Format Read Policies `parquet`, and `orc` + +These are read policies which declare that the file is of a specific columnar format +and that the input stream MAY be optimized for reading from these. + +In particular +* File footers may be fetched and cached. +* Vector IO and random IO SHOULD be expected. + +These read policies are a Hadoop 3.4.x addition, so applications and +libraries targeting multiple versions, SHOULD list their fallback +policies if these are not recognized, e.g. request a policy such as `parquet, vector, random`. + + +#### File format Read Policies `avro`, `json` and `csv` + +These are read policies which declare that the file is of a specific sequential format +and that the input stream MAY be optimized for reading from these. + +These read policies are a Hadoop 3.4.x addition, so applications and +libraries targeting multiple versions, SHOULD list their fallback +policies if these are not recognized, e.g. request a policy such as `avro, sequential`. + + +#### File Format Read Policy `hbase` + +The file is an HBase table. +Use whatever policy is appropriate for these files, where `random` is +what should be used unless there are specific optimizations related to HBase. + + +### Option: `fs.option.openfile.length`: `Long` Declare the length of a file. @@ -499,7 +547,7 @@ If this option is used by the FileSystem implementation * If a file status is supplied along with a value in `fs.opt.openfile.length`; the file status values take precedence. -### Options: `fs.option.openfile.split.start` and `fs.option.openfile.split.end` +### Options: `fs.option.openfile.split.start` and `fs.option.openfile.split.end`: `Long` Declare the start and end of the split when a file has been split for processing in pieces. @@ -528,6 +576,21 @@ Therefore clients MUST be allowed to `seek()`/`read()` past the length set in `fs.option.openfile.split.end` if the file is actually longer than that value. +### Option: `fs.option.openfile.footer.cache`: `Boolean` + +Should a footer be cached? + +* This is a hint for clients which cache footers. +* If a format with known footers are is declared in the read policy, the + default footer cache policy of that file type SHALL be used. + +This option allows for that default policy to be overridden. +This is recommended if an application wishes to explicitly declare that Parquet/ORC files +are being read -but does not want or need the filesystem stream to cache any footer +because the application itself does such caching. +Duplicating footer caching is inefficient and if there is memory/memory cache conflict, +potentially counter-efficient. + ## S3A-specific options The S3A Connector supports custom options for readahead and seek policy. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractBulkDeleteTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractBulkDeleteTest.java index daeb9d4808895..199790338b2df 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractBulkDeleteTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractBulkDeleteTest.java @@ -25,7 +25,6 @@ import java.util.Map; import org.assertj.core.api.Assertions; -import org.junit.Before; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -35,6 +34,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.wrappedio.WrappedIO; +import org.apache.hadoop.io.wrappedio.impl.DynamicWrappedIO; import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; @@ -43,6 +43,9 @@ /** * Contract tests for bulk delete operation. + * Many of these tests use {@link WrappedIO} wrappers through reflection, + * to validate the codepath we expect libraries designed to work with + * multiple versions to use. */ public abstract class AbstractContractBulkDeleteTest extends AbstractFSContractTestBase { @@ -66,11 +69,18 @@ public abstract class AbstractContractBulkDeleteTest extends AbstractFSContractT */ protected FileSystem fs; - @Before - public void setUp() throws Exception { + /** + * Reflection support. + */ + private DynamicWrappedIO dynamicWrappedIO; + + @Override + public void setup() throws Exception { + super.setup(); fs = getFileSystem(); basePath = path(getClass().getName()); - pageSize = WrappedIO.bulkDelete_pageSize(getFileSystem(), basePath); + dynamicWrappedIO = new DynamicWrappedIO(); + pageSize = dynamicWrappedIO.bulkDelete_pageSize(fs, basePath); fs.mkdirs(basePath); } @@ -103,15 +113,15 @@ public void testPathsSizeEqualsPageSizePrecondition() throws Exception { @Test public void testPathsSizeGreaterThanPageSizePrecondition() throws Exception { List listOfPaths = createListOfPaths(pageSize + 1, basePath); - intercept(IllegalArgumentException.class, - () -> bulkDelete_delete(getFileSystem(), basePath, listOfPaths)); + intercept(IllegalArgumentException.class, () -> + dynamicWrappedIO.bulkDelete_delete(getFileSystem(), basePath, listOfPaths)); } @Test public void testPathsSizeLessThanPageSizePrecondition() throws Exception { List listOfPaths = createListOfPaths(pageSize - 1, basePath); // Bulk delete call should pass with no exception. - bulkDelete_delete(getFileSystem(), basePath, listOfPaths); + dynamicWrappedIO.bulkDelete_delete(getFileSystem(), basePath, listOfPaths); } @Test @@ -285,7 +295,9 @@ public void testDeleteSamePathsMoreThanOnce() throws Exception { */ protected void pageSizePreconditionForTest(int size) { if (size > pageSize) { - skip("Test requires paths size less than or equal to page size: " + pageSize); + skip("Test requires paths size less than or equal to page size: " + + pageSize + + "; actual size is " + size); } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java index 66b1057f7bddf..739640aa34b86 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/ContractTestUtils.java @@ -30,6 +30,7 @@ import org.apache.hadoop.fs.PathCapabilities; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.StreamCapabilities; +import org.apache.hadoop.fs.statistics.IOStatistics; import org.apache.hadoop.io.ByteBufferPool; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.functional.RemoteIterators; @@ -651,6 +652,22 @@ public static void createFile(FileSystem fs, Path path, boolean overwrite, byte[] data) throws IOException { + file(fs, path, overwrite, data); + } + + /** + * Create a file, returning IOStatistics. + * @param fs filesystem + * @param path path to write + * @param overwrite overwrite flag + * @param data source dataset. Can be null + * @return any IOStatistics from the stream + * @throws IOException on any problem + */ + public static IOStatistics file(FileSystem fs, + Path path, + boolean overwrite, + byte[] data) throws IOException { FSDataOutputStream stream = fs.create(path, overwrite); try { if (data != null && data.length > 0) { @@ -660,6 +677,7 @@ public static void createFile(FileSystem fs, } finally { IOUtils.closeStream(stream); } + return stream.getIOStatistics(); } /** diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/wrappedio/impl/TestWrappedIO.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/wrappedio/impl/TestWrappedIO.java new file mode 100644 index 0000000000000..edbe06b8fe031 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/wrappedio/impl/TestWrappedIO.java @@ -0,0 +1,484 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.wrappedio.impl; + +import java.io.EOFException; +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.io.Serializable; +import java.nio.ByteBuffer; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.assertj.core.api.Assertions; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.AbstractFSContractTestBase; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.contract.localfs.LocalFSContract; +import org.apache.hadoop.io.wrappedio.WrappedIO; +import org.apache.hadoop.util.Lists; + +import static java.nio.ByteBuffer.allocate; +import static org.apache.hadoop.fs.CommonPathCapabilities.BULK_DELETE; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; +import static org.apache.hadoop.fs.StreamCapabilities.IOSTATISTICS_CONTEXT; +import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; +import static org.apache.hadoop.fs.contract.ContractTestUtils.file; +import static org.apache.hadoop.util.dynamic.BindingUtils.loadClass; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.util.functional.Tuples.pair; + +/** + * Test WrappedIO operations. + *

    + * This is a contract test; the base class is bonded to the local fs; + * it is possible for other stores to implement themselves. + * All classes/constants are referenced here because they are part of the reflected + * API. If anything changes, application code breaks. + */ +public class TestWrappedIO extends AbstractFSContractTestBase { + + private static final Logger LOG = LoggerFactory.getLogger(TestWrappedIO.class); + + /** + * Dynamic wrapped IO. + */ + private DynamicWrappedIO io; + + /** + * Dynamically Wrapped IO statistics. + */ + private DynamicWrappedStatistics statistics; + + @Before + public void setup() throws Exception { + super.setup(); + + io = new DynamicWrappedIO(); + statistics = new DynamicWrappedStatistics(); + statistics.iostatisticsContext_reset(); + } + + @Override + public void teardown() throws Exception { + super.teardown(); + logIOStatisticsContext(); + } + + @Override + protected AbstractFSContract createContract(final Configuration conf) { + return new LocalFSContract(conf); + } + + /** + * Verify the {@link #clazz(String)} method raises an assertion + * if the class isn't found. + */ + @Test + public void testClassResolution() throws Throwable { + intercept(AssertionError.class, () -> clazz("no.such.class")); + } + + @Test + public void testAllMethodsFound() throws Throwable { + io.requireAllMethodsAvailable(); + } + + /** + * Test the openFile operation. + * Lots of calls are made to read the same file to save on setup/teardown + * overhead and to allow for some statistics collection. + */ + @Test + public void testOpenFileOperations() throws Throwable { + Path path = path("testOpenFileOperations"); + final int len = 100; + final byte[] data = dataset(len, 'a', 26); + final FileSystem fs = getFileSystem(); + // create the file and any statistics from it. + final Serializable iostats = statistics.iostatisticsSnapshot_create( + file(fs, path, true, data)); + final FileStatus st = fs.getFileStatus(path); + final boolean ioStatisticsContextCapability; + + describe("reading file " + path); + try (FSDataInputStream in = DynamicWrappedIO.openFile(fs, + fs.getFileStatus(path), + DynamicWrappedIO.PARQUET_READ_POLICIES)) { + Assertions.assertThat(in.read()) + .describedAs("first byte") + .isEqualTo('a'); + ioStatisticsContextCapability = supportsIOStatisticsContext(in); + if (ioStatisticsContextCapability) { + LOG.info("Stream has IOStatisticsContext support: {}", in); + } else { + LOG.info("Stream has no IOStatisticsContext support: {}", in); + } + Assertions.assertThat(ioStatisticsContextCapability) + .describedAs("Retrieved stream capability %s from %s", + IOSTATISTICS_CONTEXT, in) + .isEqualTo(WrappedIO.streamCapabilities_hasCapability(in, IOSTATISTICS_CONTEXT)); + Assertions.assertThat(ioStatisticsContextCapability) + .describedAs("Actual stream capability %s from %s", + IOSTATISTICS_CONTEXT, in) + .isEqualTo(in.hasCapability(IOSTATISTICS_CONTEXT)); + retrieveAndAggregate(iostats, in); + } + + // open with a status + try (FSDataInputStream s = openFile(path, null, st, null, null)) { + s.seek(1); + s.read(); + + // and do a small amount of statistics collection + retrieveAndAggregate(iostats, s); + } + + // open with a length and random IO passed in the map + try (FSDataInputStream s = openFile(path, null, null, + (long) len, + map(pair(FS_OPTION_OPENFILE_READ_POLICY, "random")))) { + s.seek(len - 10); + s.read(); + retrieveAndAggregate(iostats, s); + } + + // now open a file with a length option greater than the file length + + // this string is used in exception logging to report where in the + // sequence an IOE was raised. + String validationPoint = "openfile call"; + + // open with a length and random IO passed in via the map + try (FSDataInputStream s = openFile(path, null, null, + null, + map(pair(FS_OPTION_OPENFILE_LENGTH, len * 2), + pair(FS_OPTION_OPENFILE_READ_POLICY, "random")))) { + + // fails if the file length was determined and fixed in open, + // and the stream doesn't permit seek() beyond the file length. + validationPoint = "seek()"; + s.seek(len + 10); + + validationPoint = "readFully()"; + + // readFully must fail. + s.readFully(len + 10, new byte[10], 0, 10); + Assertions.fail("Expected an EOFException but readFully from %s", s); + } catch (EOFException expected) { + // expected + LOG.info("EOF successfully raised, validation point: {}", validationPoint); + LOG.debug("stack", expected); + } + + // if we get this far, do a bulk delete + Assertions.assertThat(io.pathCapabilities_hasPathCapability(fs, path, BULK_DELETE)) + .describedAs("Path capability %s", BULK_DELETE) + .isTrue(); + + // first assert page size was picked up + Assertions.assertThat(io.bulkDelete_pageSize(fs, path)) + .describedAs("bulkDelete_pageSize for %s", path) + .isGreaterThanOrEqualTo(1); + + // then do the delete. + // pass in the parent path for the bulk delete to avoid HADOOP-19196 + Assertions + .assertThat(io.bulkDelete_delete(fs, path.getParent(), Lists.newArrayList(path))) + .describedAs("outcome of bulk delete") + .isEmpty(); + } + + @Test + public void testOpenFileNotFound() throws Throwable { + Path path = path("testOpenFileNotFound"); + + intercept(FileNotFoundException.class, () -> + io.fileSystem_openFile(getFileSystem(), path, null, null, null, null)); + } + + /** + * Test ByteBufferPositionedReadable. + * This is implemented by HDFS but not much else; this test skips if the stream + * doesn't support it. + */ + @Test + public void testByteBufferPositionedReadable() throws Throwable { + Path path = path("testByteBufferPositionedReadable"); + final int len = 100; + final byte[] data = dataset(len, 'a', 26); + final FileSystem fs = getFileSystem(); + file(fs, path, true, data); + + describe("reading file " + path); + try (FSDataInputStream in = openFile(path, "random", null, (long) len, null)) { + // skip rest of test if API is not found. + if (io.byteBufferPositionedReadable_readFullyAvailable(in)) { + + LOG.info("ByteBufferPositionedReadable is available in {}", in); + ByteBuffer buffer = allocate(len); + io.byteBufferPositionedReadable_readFully(in, 0, buffer); + Assertions.assertThat(buffer.array()) + .describedAs("Full buffer read of %s", in) + .isEqualTo(data); + + + // read from offset (verifies the offset is passed in) + final int offset = 10; + final int range = len - offset; + buffer = allocate(range); + io.byteBufferPositionedReadable_readFully(in, offset, buffer); + byte[] byteArray = new byte[range]; + in.readFully(offset, byteArray); + Assertions.assertThat(buffer.array()) + .describedAs("Offset buffer read of %s", in) + .isEqualTo(byteArray); + + // now try to read past the EOF + // first verify the stream rejects this call directly + intercept(EOFException.class, () -> + in.readFully(len + 1, allocate(len))); + + // then do the same through the wrapped API + intercept(EOFException.class, () -> + io.byteBufferPositionedReadable_readFully(in, len + 1, allocate(len))); + } else { + LOG.info("ByteBufferPositionedReadable is not available in {}", in); + + // expect failures here + intercept(UnsupportedOperationException.class, () -> + io.byteBufferPositionedReadable_readFully(in, 0, allocate(len))); + } + } + } + + @Test + public void testFilesystemIOStatistics() throws Throwable { + + final FileSystem fs = getFileSystem(); + final Serializable iostats = statistics.iostatisticsSnapshot_retrieve(fs); + if (iostats != null) { + final String status = statistics.iostatisticsSnapshot_toJsonString(iostats); + final Serializable roundTripped = statistics.iostatisticsSnapshot_fromJsonString( + status); + + final Path path = methodPath(); + statistics.iostatisticsSnapshot_save(roundTripped, fs, path, true); + final Serializable loaded = statistics.iostatisticsSnapshot_load(fs, path); + + Assertions.assertThat(loaded) + .describedAs("loaded statistics from %s", path) + .isNotNull() + .satisfies(statistics::isIOStatisticsSnapshot); + LOG.info("loaded statistics {}", + statistics.iostatistics_toPrettyString(loaded)); + } + + } + + /** + * Retrieve any IOStatistics from a class, and aggregate it to the + * existing IOStatistics. + * @param iostats statistics to update + * @param object statistics source + */ + private void retrieveAndAggregate(final Serializable iostats, final Object object) { + statistics.iostatisticsSnapshot_aggregate(iostats, + statistics.iostatisticsSnapshot_retrieve(object)); + } + + /** + * Log IOStatisticsContext if enabled. + */ + private void logIOStatisticsContext() { + // context IOStats + if (statistics.iostatisticsContext_enabled()) { + final Serializable iostats = statistics.iostatisticsContext_snapshot(); + LOG.info("Context: {}", + toPrettyString(iostats)); + } else { + LOG.info("IOStatisticsContext disabled"); + } + } + + private String toPrettyString(final Object iostats) { + return statistics.iostatistics_toPrettyString(iostats); + } + + /** + * Does the object update the thread-local IOStatisticsContext? + * @param o object to cast to StreamCapabilities and probe for the capability. + * @return true if the methods were found, the interface implemented and the probe successful. + */ + private boolean supportsIOStatisticsContext(final Object o) { + return io.streamCapabilities_hasCapability(o, IOSTATISTICS_CONTEXT); + } + + /** + * Open a file through dynamic invocation of {@link FileSystem#openFile(Path)}. + * @param path path + * @param policy read policy + * @param status optional file status + * @param length file length or null + * @param options nullable map of other options + * @return stream of the opened file + */ + private FSDataInputStream openFile( + final Path path, + final String policy, + final FileStatus status, + final Long length, + final Map options) throws Throwable { + + final FSDataInputStream stream = io.fileSystem_openFile( + getFileSystem(), path, policy, status, length, options); + Assertions.assertThat(stream) + .describedAs("null stream from openFile(%s)", path) + .isNotNull(); + return stream; + } + + /** + * Build a map from the tuples, which all have the value of + * their toString() method used. + * @param tuples object list (must be even) + * @return a map. + */ + private Map map(Map.Entry... tuples) { + Map map = new HashMap<>(); + for (Map.Entry tuple : tuples) { + map.put(tuple.getKey(), tuple.getValue().toString()); + } + return map; + } + + /** + * Load a class by name; includes an assertion that the class was loaded. + * @param className classname + * @return the class. + */ + private static Class clazz(final String className) { + final Class clazz = loadClass(className); + Assertions.assertThat(clazz) + .describedAs("Class %s not found", className) + .isNotNull(); + return clazz; + } + + /** + * Simulate a no binding and verify that everything downgrades as expected. + */ + @Test + public void testNoWrappedClass() throws Throwable { + final DynamicWrappedIO broken = new DynamicWrappedIO(this.getClass().getName()); + + Assertions.assertThat(broken) + .describedAs("broken dynamic io %s", broken) + .matches(d -> !d.bulkDelete_available()) + .matches(d -> !d.byteBufferPositionedReadable_available()) + .matches(d -> !d.fileSystem_openFile_available()); + + final Path path = methodPath(); + final FileSystem fs = getFileSystem(); + // bulk deletes fail + intercept(UnsupportedOperationException.class, () -> + broken.bulkDelete_pageSize(fs, path)); + intercept(UnsupportedOperationException.class, () -> + broken.bulkDelete_delete(fs, path, Lists.newArrayList())); + + // openfile + intercept(UnsupportedOperationException.class, () -> + broken.fileSystem_openFile(fs, path, "", null, null, null)); + + // hasPathCapability downgrades + Assertions.assertThat(broken.pathCapabilities_hasPathCapability(fs, path, "anything")) + .describedAs("hasPathCapability(anything) via %s", broken) + .isFalse(); + + // byte buffer positioned readable + ContractTestUtils.touch(fs, path); + try (InputStream in = fs.open(path)) { + Assertions.assertThat(broken.byteBufferPositionedReadable_readFullyAvailable(in)) + .describedAs("byteBufferPositionedReadable_readFullyAvailable on %s", in) + .isFalse(); + intercept(UnsupportedOperationException.class, () -> + broken.byteBufferPositionedReadable_readFully(in, 0, allocate(1))); + } + + } + + /** + * Simulate a missing binding and verify that static methods fallback as required. + */ + @Test + public void testMissingClassFallbacks() throws Throwable { + Path path = path("testMissingClassFallbacks"); + final FileSystem fs = getFileSystem(); + file(fs, path, true, dataset(100, 'a', 26)); + final DynamicWrappedIO broken = new DynamicWrappedIO(this.getClass().getName()); + try (FSDataInputStream in = DynamicWrappedIO.openFileOnInstance(broken, + fs, fs.getFileStatus(path), DynamicWrappedIO.PARQUET_READ_POLICIES)) { + Assertions.assertThat(in.read()) + .describedAs("first byte") + .isEqualTo('a'); + } + } + + /** + * Verify that if an attempt is made to bond to a class where the methods + * exist but are not static, that this fails during the object construction rather + * than on invocation. + */ + @Test + public void testNonStaticMethods() throws Throwable { + intercept(IllegalStateException.class, () -> + new DynamicWrappedIO(NonStaticBulkDeleteMethods.class.getName())); + } + + /** + * This class declares the bulk delete methods, but as non-static; the expectation + * is that class loading will raise an {@link IllegalStateException}. + */ + private static final class NonStaticBulkDeleteMethods { + + public int bulkDelete_pageSize(FileSystem ignoredFs, Path ignoredPath) { + return 0; + } + + public List> bulkDelete_delete( + FileSystem ignoredFs, + Path ignoredBase, + Collection ignoredPaths) { + return null; + } + } +} \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/wrappedio/impl/TestWrappedStatistics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/wrappedio/impl/TestWrappedStatistics.java new file mode 100644 index 0000000000000..02486f9137fd7 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/wrappedio/impl/TestWrappedStatistics.java @@ -0,0 +1,496 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.wrappedio.impl; + +import java.io.File; +import java.io.IOException; +import java.io.Serializable; +import java.io.UncheckedIOException; +import java.util.Map; + +import org.assertj.core.api.Assertions; +import org.junit.Before; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileSystemTestHelper; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.statistics.IOStatisticsContext; +import org.apache.hadoop.fs.statistics.IOStatisticsSnapshot; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding; +import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore; +import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.apache.hadoop.util.functional.Tuples; + +import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticCounter; +import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfInvocation; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Unit tests for IOStatistics wrapping. + *

    + * This mixes direct use of the API to generate statistics data for + * the reflection accessors to retrieve and manipulate. + */ +public class TestWrappedStatistics extends AbstractHadoopTestBase { + + private static final Logger LOG = LoggerFactory.getLogger(TestWrappedIO.class); + + /** + * Stub Serializable. + */ + private static final Serializable SERIALIZABLE = new Serializable() {}; + + /** + * Dynamically Wrapped IO statistics. + */ + private final DynamicWrappedStatistics statistics = new DynamicWrappedStatistics(); + + /** + * Local FS. + */ + private LocalFileSystem local; + + /** + * Path to temporary file. + */ + private Path jsonPath; + + @Before + public void setUp() throws Exception { + String testDataDir = new FileSystemTestHelper().getTestRootDir(); + File tempDir = new File(testDataDir); + local = FileSystem.getLocal(new Configuration()); + // Temporary file. + File jsonFile = new File(tempDir, "snapshot.json"); + jsonPath = new Path(jsonFile.toURI()); + } + + /** + * The class must load, with all method groups available. + */ + @Test + public void testLoaded() throws Throwable { + Assertions.assertThat(statistics.ioStatisticsAvailable()) + .describedAs("IOStatistics class must be available") + .isTrue(); + Assertions.assertThat(statistics.ioStatisticsContextAvailable()) + .describedAs("IOStatisticsContext must be available") + .isTrue(); + } + + @Test + public void testCreateEmptySnapshot() throws Throwable { + Assertions.assertThat(statistics.iostatisticsSnapshot_create()) + .describedAs("iostatisticsSnapshot_create()") + .isInstanceOf(IOStatisticsSnapshot.class) + .satisfies(statistics::isIOStatisticsSnapshot) + .satisfies(statistics::isIOStatistics); + } + + @Test + public void testCreateNullSource() throws Throwable { + Assertions.assertThat(statistics.iostatisticsSnapshot_create(null)) + .describedAs("iostatisticsSnapshot_create(null)") + .isInstanceOf(IOStatisticsSnapshot.class); + } + + @Test + public void testCreateOther() throws Throwable { + Assertions.assertThat(statistics.iostatisticsSnapshot_create(null)) + .describedAs("iostatisticsSnapshot_create(null)") + .isInstanceOf(IOStatisticsSnapshot.class); + } + + @Test + public void testCreateNonIOStatsSource() throws Throwable { + intercept(ClassCastException.class, () -> + statistics.iostatisticsSnapshot_create("hello")); + } + + @Test + public void testRetrieveNullSource() throws Throwable { + Assertions.assertThat(statistics.iostatisticsSnapshot_retrieve(null)) + .describedAs("iostatisticsSnapshot_retrieve(null)") + .isNull(); + } + + @Test + public void testRetrieveNonIOStatsSource() throws Throwable { + Assertions.assertThat(statistics.iostatisticsSnapshot_retrieve(this)) + .describedAs("iostatisticsSnapshot_retrieve(this)") + .isNull(); + } + + /** + * Assert handling of json serialization for null value. + */ + @Test + public void testNullInstanceToJson() throws Throwable { + intercept(IllegalArgumentException.class, () -> toJsonString(null)); + } + + /** + * Assert handling of json serialization for wrong value. + */ + @Test + public void testWrongSerializableTypeToJson() throws Throwable { + intercept(IllegalArgumentException.class, () -> toJsonString(SERIALIZABLE)); + } + + /** + * Try to aggregate into the wrong type. + */ + @Test + public void testAggregateWrongSerializable() throws Throwable { + intercept(IllegalArgumentException.class, () -> + statistics.iostatisticsSnapshot_aggregate(SERIALIZABLE, + statistics.iostatisticsContext_getCurrent())); + } + + /** + * Try to save the wrong type. + */ + @Test + public void testSaveWrongSerializable() throws Throwable { + intercept(IllegalArgumentException.class, () -> + statistics.iostatisticsSnapshot_save(SERIALIZABLE, local, jsonPath, true)); + } + + /** + * Test all the IOStatisticsContext operations, including + * JSON round trip of the statistics. + */ + @Test + public void testIOStatisticsContextMethods() { + + Assertions.assertThat(statistics.ioStatisticsContextAvailable()) + .describedAs("ioStatisticsContextAvailable() of %s", statistics) + .isTrue(); + Assertions.assertThat(statistics.iostatisticsContext_enabled()) + .describedAs("iostatisticsContext_enabled() of %s", statistics) + .isTrue(); + + // get the current context, validate it + final Object current = statistics.iostatisticsContext_getCurrent(); + Assertions.assertThat(current) + .describedAs("IOStatisticsContext") + .isInstanceOf(IOStatisticsContext.class) + .satisfies(statistics::isIOStatisticsSource); + + // take a snapshot + final Serializable snapshot = statistics.iostatisticsContext_snapshot(); + Assertions.assertThat(snapshot) + .satisfies(statistics::isIOStatisticsSnapshot); + + // use the retrieve API to create a snapshot from the IOStatisticsSource interface + final Serializable retrieved = statistics.iostatisticsSnapshot_retrieve(current); + assertJsonEqual(retrieved, snapshot); + + // to/from JSON + final String json = toJsonString(snapshot); + LOG.info("Serialized to json {}", json); + final Serializable snap2 = statistics.iostatisticsSnapshot_fromJsonString(json); + assertJsonEqual(snap2, snapshot); + + // get the values + statistics.iostatistics_counters(snapshot); + statistics.iostatistics_gauges(snapshot); + statistics.iostatistics_minimums(snapshot); + statistics.iostatistics_maximums(snapshot); + statistics.iostatistics_means(snapshot); + + // set to null + statistics.iostatisticsContext_setThreadIOStatisticsContext(null); + + Assertions.assertThat(statistics.iostatisticsContext_getCurrent()) + .describedAs("current IOStatisticsContext after resetting") + .isNotSameAs(current); + + // then set to the "current" value + statistics.iostatisticsContext_setThreadIOStatisticsContext(current); + + Assertions.assertThat(statistics.iostatisticsContext_getCurrent()) + .describedAs("current IOStatisticsContext after resetting") + .isSameAs(current); + + // and reset + statistics.iostatisticsContext_reset(); + + // now aggregate the retrieved stats into it. + Assertions.assertThat(statistics.iostatisticsContext_aggregate(retrieved)) + .describedAs("iostatisticsContext_aggregate of %s", retrieved) + .isTrue(); + } + + + /** + * Perform some real IOStatisticsContext operations. + */ + @Test + public void testIOStatisticsContextInteraction() { + statistics.iostatisticsContext_reset(); + + // create a snapshot with a counter + final IOStatisticsSnapshot snapshot = + (IOStatisticsSnapshot) statistics.iostatisticsSnapshot_create(); + snapshot.setCounter("c1", 10); + + // aggregate twice + statistics.iostatisticsContext_aggregate(snapshot); + statistics.iostatisticsContext_aggregate(snapshot); + + // take a snapshot + final IOStatisticsSnapshot snap2 = + (IOStatisticsSnapshot) statistics.iostatisticsContext_snapshot(); + + // assert the valuue + assertThatStatisticCounter(snap2, "c1") + .isEqualTo(20); + } + + /** + * Expect that two IOStatisticsInstances serialized to exactly the same JSON. + * @param actual actual value. + * @param expected expected value + */ + private void assertJsonEqual(Serializable actual, Serializable expected) { + Assertions.assertThat(toJsonString(actual)) + .describedAs("JSON format string of %s", actual) + .isEqualTo(toJsonString(expected)); + } + + /** + * Convert a snapshot to a JSON string. + * @param snapshot IOStatisticsSnapshot + * @return a JSON serialization. + */ + private String toJsonString(final Serializable snapshot) { + return statistics.iostatisticsSnapshot_toJsonString(snapshot); + } + + /** + * Create an empty snapshot, save it then load back. + */ + @Test + public void testLocalSaveOfEmptySnapshot() throws Throwable { + final Serializable snapshot = statistics.iostatisticsSnapshot_create(); + statistics.iostatisticsSnapshot_save(snapshot, local, jsonPath, true); + final Serializable loaded = statistics.iostatisticsSnapshot_load(local, jsonPath); + LOG.info("loaded statistics {}", + statistics.iostatistics_toPrettyString(loaded)); + + // now try to save over the same path with overwrite false + intercept(UncheckedIOException.class, () -> + statistics.iostatisticsSnapshot_save(snapshot, local, jsonPath, false)); + + // after delete the load fails + local.delete(jsonPath, false); + intercept(UncheckedIOException.class, () -> + statistics.iostatisticsSnapshot_load(local, jsonPath)); + } + + /** + * Build up a complex statistic and assert extraction on it. + */ + @Test + public void testStatisticExtraction() throws Throwable { + + final IOStatisticsStore store = IOStatisticsBinding.iostatisticsStore() + .withCounters("c1", "c2") + .withGauges("g1") + .withDurationTracking("d1", "d2") + .build(); + + store.incrementCounter("c1"); + store.setGauge("g1", 10); + trackDurationOfInvocation(store, "d1", () -> + sleep(20)); + store.trackDuration("d1").close(); + + intercept(IOException.class, () -> + trackDurationOfInvocation(store, "d2", () -> { + sleep(10); + throw new IOException("generated"); + })); + + final Serializable snapshot = statistics.iostatisticsSnapshot_create(store); + + + // complex round trip + statistics.iostatisticsSnapshot_save(snapshot, local, jsonPath, true); + final Serializable loaded = statistics.iostatisticsSnapshot_load(local, jsonPath); + LOG.info("loaded statistics {}", + statistics.iostatistics_toPrettyString(loaded)); + assertJsonEqual(loaded, snapshot); + + + // get the values + Assertions.assertThat(statistics.iostatistics_counters(loaded)) + .containsOnlyKeys("c1", "c2", + "d1", "d1.failures", + "d2", "d2.failures") + .containsEntry("c1", 1L) + .containsEntry("d1", 2L) + .containsEntry("d2", 1L); + Assertions.assertThat(statistics.iostatistics_gauges(loaded)) + .containsOnlyKeys("g1") + .containsEntry("g1", 10L); + + final Map minimums = statistics.iostatistics_minimums(snapshot); + Assertions.assertThat(minimums) + .containsEntry("d1.min", 0L); + final long d2FailuresMin = minimums.get("d2.failures.min"); + Assertions.assertThat(d2FailuresMin) + .describedAs("min d2.failures") + .isGreaterThan(0); + final Map maximums = statistics.iostatistics_maximums(snapshot); + Assertions.assertThat(maximums) + .containsEntry("d2.failures.max", d2FailuresMin); + final long d1Max = maximums.get("d1.max"); + + + final Map> means = + statistics.iostatistics_means(snapshot); + + Assertions.assertThat(means) + .containsEntry("d1.mean", Tuples.pair(2L, d1Max)) + .containsEntry("d2.failures.mean", Tuples.pair(1L, d2FailuresMin)); + + } + + /** + * Sleep for some milliseconds; interruptions are swallowed. + * @param millis time in milliseconds + */ + private static void sleep(final int millis) { + try { + Thread.sleep(millis); + } catch (InterruptedException ignored) { + + } + } + + /** + * Bind to an empty class to simulate a runtime where none of the methods were found + * through reflection, and verify the expected failure semantics. + */ + @Test + public void testMissingIOStatisticsMethods() throws Throwable { + final DynamicWrappedStatistics missing = + new DynamicWrappedStatistics(StubClass.class.getName()); + + // probes which just return false + Assertions.assertThat(missing.ioStatisticsAvailable()) + .describedAs("ioStatisticsAvailable() of %s", missing) + .isFalse(); + + // probes of type of argument which return false if the + // methods are missing + Assertions.assertThat(missing.isIOStatistics(SERIALIZABLE)) + .describedAs("isIOStatistics() of %s", missing) + .isFalse(); + Assertions.assertThat(missing.isIOStatisticsSource(SERIALIZABLE)) + .describedAs("isIOStatisticsSource() of %s", missing) + .isFalse(); + Assertions.assertThat(missing.isIOStatisticsSnapshot(SERIALIZABLE)) + .describedAs("isIOStatisticsSnapshot() of %s", missing) + .isFalse(); + + // operations which raise exceptions + intercept(UnsupportedOperationException.class, () -> + missing.iostatisticsSnapshot_create()); + + intercept(UnsupportedOperationException.class, () -> + missing.iostatisticsSnapshot_create(this)); + + intercept(UnsupportedOperationException.class, () -> + missing.iostatisticsSnapshot_aggregate(SERIALIZABLE, this)); + + intercept(UnsupportedOperationException.class, () -> + missing.iostatisticsSnapshot_fromJsonString("{}")); + intercept(UnsupportedOperationException.class, () -> + missing.iostatisticsSnapshot_toJsonString(SERIALIZABLE)); + + final Path path = new Path("/"); + + intercept(UnsupportedOperationException.class, () -> + missing.iostatisticsSnapshot_load(local, path)); + + intercept(UnsupportedOperationException.class, () -> + missing.iostatisticsSnapshot_save(SERIALIZABLE, local, path, true)); + + intercept(UnsupportedOperationException.class, () -> + missing.iostatisticsSnapshot_retrieve(this)); + + intercept(UnsupportedOperationException.class, () -> + missing.iostatistics_toPrettyString(this)); + + } + + + /** + * Empty class to bind against and ensure all methods fail to bind. + */ + private static final class StubClass { } + + /** + * Bind to {@link StubClass} to simulate a runtime where none of the methods were found + * through reflection, and verify the expected failure semantics. + */ + @Test + public void testMissingContextMethods() throws Throwable { + final DynamicWrappedStatistics missing = + new DynamicWrappedStatistics(StubClass.class.getName()); + + // probes which just return false + Assertions.assertThat(missing.ioStatisticsContextAvailable()) + .describedAs("ioStatisticsContextAvailable() of %s", missing) + .isFalse(); + Assertions.assertThat(missing.iostatisticsContext_enabled()) + .describedAs("iostatisticsContext_enabled() of %s", missing) + .isFalse(); + + // operations which raise exceptions + intercept(UnsupportedOperationException.class, missing::iostatisticsContext_reset); + intercept(UnsupportedOperationException.class, missing::iostatisticsContext_getCurrent); + intercept(UnsupportedOperationException.class, missing::iostatisticsContext_snapshot); + intercept(UnsupportedOperationException.class, () -> + missing.iostatisticsContext_setThreadIOStatisticsContext(null)); + } + + + /** + * Validate class checks in {@code iostatisticsSnapshot_aggregate()}. + */ + @Test + public void testStatisticCasting() throws Throwable { + Serializable iostats = statistics.iostatisticsSnapshot_create(null); + final String wrongType = "wrong type"; + intercept(IllegalArgumentException.class, () -> + statistics.iostatisticsSnapshot_aggregate(iostats, wrongType)); + } + +} + + diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/dynamic/Concatenator.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/dynamic/Concatenator.java new file mode 100644 index 0000000000000..1cf7daef9a0be --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/dynamic/Concatenator.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.util.dynamic; + +/** + * This is a class for testing {@link DynMethods} and {@code DynConstructors}. + *

    + * Derived from {@code org.apache.parquet.util} test suites. + */ +public class Concatenator { + + public static class SomeCheckedException extends Exception { + } + + private String sep = ""; + + public Concatenator() { + } + + public Concatenator(String sep) { + this.sep = sep; + } + + private Concatenator(char sep) { + this.sep = String.valueOf(sep); + } + + public Concatenator(Exception e) throws Exception { + throw e; + } + + public static Concatenator newConcatenator(String sep) { + return new Concatenator(sep); + } + + private void setSeparator(String value) { + this.sep = value; + } + + public String concat(String left, String right) { + return left + sep + right; + } + + public String concat(String left, String middle, String right) { + return left + sep + middle + sep + right; + } + + public String concat(Exception e) throws Exception { + throw e; + } + + public String concat(String... strings) { + if (strings.length >= 1) { + StringBuilder sb = new StringBuilder(); + sb.append(strings[0]); + for (int i = 1; i < strings.length; i += 1) { + sb.append(sep); + sb.append(strings[i]); + } + return sb.toString(); + } + return null; + } + + public static String cat(String... strings) { + return new Concatenator().concat(strings); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/dynamic/TestDynConstructors.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/dynamic/TestDynConstructors.java new file mode 100644 index 0000000000000..4d7a2db641703 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/dynamic/TestDynConstructors.java @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.util.dynamic; + +import java.util.concurrent.Callable; + +import org.junit.Assert; +import org.junit.Test; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Derived from {@code org.apache.parquet.util} test suites. + */ +public class TestDynConstructors extends AbstractHadoopTestBase { + + @Test + public void testNoImplCall() throws Exception { + final DynConstructors.Builder builder = new DynConstructors.Builder(); + + intercept(NoSuchMethodException.class, + (Callable) builder::buildChecked); + + intercept(RuntimeException.class, () -> + builder.build()); + } + + @Test + public void testMissingClass() throws Exception { + final DynConstructors.Builder builder = new DynConstructors.Builder() + .impl("not.a.RealClass"); + + intercept(NoSuchMethodException.class, + (Callable) builder::buildChecked); + + intercept(RuntimeException.class, (Callable) builder::build); + } + + @Test + public void testMissingConstructor() throws Exception { + final DynConstructors.Builder builder = new DynConstructors.Builder() + .impl(Concatenator.class, String.class, String.class); + + intercept(NoSuchMethodException.class, + (Callable) builder::buildChecked); + + intercept(RuntimeException.class, + (Callable) builder::build); + } + + @Test + public void testFirstImplReturned() throws Exception { + final DynConstructors.Ctor sepCtor = new DynConstructors.Builder() + .impl("not.a.RealClass", String.class) + .impl(Concatenator.class, String.class) + .impl(Concatenator.class) + .buildChecked(); + + Concatenator dashCat = sepCtor.newInstanceChecked("-"); + Assert.assertEquals("Should construct with the 1-arg version", + "a-b", dashCat.concat("a", "b")); + + intercept(IllegalArgumentException.class, () -> + sepCtor.newInstanceChecked("/", "-")); + + intercept(IllegalArgumentException.class, () -> + sepCtor.newInstance("/", "-")); + + DynConstructors.Ctor defaultCtor = new DynConstructors.Builder() + .impl("not.a.RealClass", String.class) + .impl(Concatenator.class) + .impl(Concatenator.class, String.class) + .buildChecked(); + + Concatenator cat = defaultCtor.newInstanceChecked(); + Assert.assertEquals("Should construct with the no-arg version", + "ab", cat.concat("a", "b")); + } + + @Test + public void testExceptionThrown() throws Exception { + final Concatenator.SomeCheckedException exc = new Concatenator.SomeCheckedException(); + final DynConstructors.Ctor sepCtor = new DynConstructors.Builder() + .impl("not.a.RealClass", String.class) + .impl(Concatenator.class, Exception.class) + .buildChecked(); + + intercept(Concatenator.SomeCheckedException.class, () -> + sepCtor.newInstanceChecked(exc)); + + intercept(RuntimeException.class, () -> sepCtor.newInstance(exc)); + } + + @Test + public void testStringClassname() throws Exception { + final DynConstructors.Ctor sepCtor = new DynConstructors.Builder() + .impl(Concatenator.class.getName(), String.class) + .buildChecked(); + + Assert.assertNotNull("Should find 1-arg constructor", sepCtor.newInstance("-")); + } + + @Test + public void testHiddenMethod() throws Exception { + intercept(NoSuchMethodException.class, () -> + new DynMethods.Builder("setSeparator") + .impl(Concatenator.class, char.class) + .buildChecked()); + + final DynConstructors.Ctor sepCtor = new DynConstructors.Builder() + .hiddenImpl(Concatenator.class.getName(), char.class) + .buildChecked(); + + Assert.assertNotNull("Should find hidden ctor with hiddenImpl", sepCtor); + + Concatenator slashCat = sepCtor.newInstanceChecked('/'); + + Assert.assertEquals("Should use separator /", + "a/b", slashCat.concat("a", "b")); + } + + @Test + public void testBind() throws Exception { + final DynConstructors.Ctor ctor = new DynConstructors.Builder() + .impl(Concatenator.class.getName()) + .buildChecked(); + + Assert.assertTrue("Should always be static", ctor.isStatic()); + + intercept(IllegalStateException.class, () -> + ctor.bind(null)); + } + + @Test + public void testInvoke() throws Exception { + final DynMethods.UnboundMethod ctor = new DynConstructors.Builder() + .impl(Concatenator.class.getName()) + .buildChecked(); + + intercept(IllegalArgumentException.class, () -> + ctor.invokeChecked("a")); + + intercept(IllegalArgumentException.class, () -> + ctor.invoke("a")); + + Assert.assertNotNull("Should allow invokeChecked(null, ...)", + ctor.invokeChecked(null)); + Assert.assertNotNull("Should allow invoke(null, ...)", + ctor.invoke(null)); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/dynamic/TestDynMethods.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/dynamic/TestDynMethods.java new file mode 100644 index 0000000000000..b774a95f8563b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/dynamic/TestDynMethods.java @@ -0,0 +1,320 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.util.dynamic; + +import java.util.concurrent.Callable; + +import org.junit.Assert; +import org.junit.Test; + +import org.apache.hadoop.test.AbstractHadoopTestBase; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Copied from {@code org.apache.parquet.util} test suites. + */ +public class TestDynMethods extends AbstractHadoopTestBase { + + @Test + public void testNoImplCall() throws Exception { + final DynMethods.Builder builder = new DynMethods.Builder("concat"); + + intercept(NoSuchMethodException.class, + (Callable) builder::buildChecked); + + intercept(RuntimeException.class, + (Callable) builder::build); + } + + @Test + public void testMissingClass() throws Exception { + final DynMethods.Builder builder = new DynMethods.Builder("concat") + .impl("not.a.RealClass", String.class, String.class); + + intercept(NoSuchMethodException.class, + (Callable) builder::buildChecked); + + intercept(RuntimeException.class, () -> + builder.build()); + } + + @Test + public void testMissingMethod() throws Exception { + final DynMethods.Builder builder = new DynMethods.Builder("concat") + .impl(Concatenator.class, "cat2strings", String.class, String.class); + + intercept(NoSuchMethodException.class, + (Callable) builder::buildChecked); + + intercept(RuntimeException.class, () -> + builder.build()); + + } + + @Test + public void testFirstImplReturned() throws Exception { + Concatenator obj = new Concatenator("-"); + DynMethods.UnboundMethod cat2 = new DynMethods.Builder("concat") + .impl("not.a.RealClass", String.class, String.class) + .impl(Concatenator.class, String.class, String.class) + .impl(Concatenator.class, String.class, String.class, String.class) + .buildChecked(); + + Assert.assertEquals("Should call the 2-arg version successfully", + "a-b", cat2.invoke(obj, "a", "b")); + + Assert.assertEquals("Should ignore extra arguments", + "a-b", cat2.invoke(obj, "a", "b", "c")); + + DynMethods.UnboundMethod cat3 = new DynMethods.Builder("concat") + .impl("not.a.RealClass", String.class, String.class) + .impl(Concatenator.class, String.class, String.class, String.class) + .impl(Concatenator.class, String.class, String.class) + .build(); + + Assert.assertEquals("Should call the 3-arg version successfully", + "a-b-c", cat3.invoke(obj, "a", "b", "c")); + + Assert.assertEquals("Should call the 3-arg version null padding", + "a-b-null", cat3.invoke(obj, "a", "b")); + } + + @Test + public void testVarArgs() throws Exception { + DynMethods.UnboundMethod cat = new DynMethods.Builder("concat") + .impl(Concatenator.class, String[].class) + .buildChecked(); + + Assert.assertEquals("Should use the varargs version", "abcde", + cat.invokeChecked( + new Concatenator(), + (Object) new String[]{"a", "b", "c", "d", "e"})); + + Assert.assertEquals("Should use the varargs version", "abcde", + cat.bind(new Concatenator()) + .invokeChecked((Object) new String[]{"a", "b", "c", "d", "e"})); + } + + @Test + public void testIncorrectArguments() throws Exception { + final Concatenator obj = new Concatenator("-"); + final DynMethods.UnboundMethod cat = new DynMethods.Builder("concat") + .impl("not.a.RealClass", String.class, String.class) + .impl(Concatenator.class, String.class, String.class) + .buildChecked(); + + intercept(IllegalArgumentException.class, () -> + cat.invoke(obj, 3, 4)); + + intercept(IllegalArgumentException.class, () -> + cat.invokeChecked(obj, 3, 4)); + } + + @Test + public void testExceptionThrown() throws Exception { + final Concatenator.SomeCheckedException exc = new Concatenator.SomeCheckedException(); + final Concatenator obj = new Concatenator("-"); + final DynMethods.UnboundMethod cat = new DynMethods.Builder("concat") + .impl("not.a.RealClass", String.class, String.class) + .impl(Concatenator.class, Exception.class) + .buildChecked(); + + intercept(Concatenator.SomeCheckedException.class, () -> + cat.invokeChecked(obj, exc)); + + intercept(RuntimeException.class, () -> + cat.invoke(obj, exc)); + } + + @Test + public void testNameChange() throws Exception { + Concatenator obj = new Concatenator("-"); + DynMethods.UnboundMethod cat = new DynMethods.Builder("cat") + .impl(Concatenator.class, "concat", String.class, String.class) + .buildChecked(); + + Assert.assertEquals("Should find 2-arg concat method", + "a-b", cat.invoke(obj, "a", "b")); + } + + @Test + public void testStringClassname() throws Exception { + Concatenator obj = new Concatenator("-"); + DynMethods.UnboundMethod cat = new DynMethods.Builder("concat") + .impl(Concatenator.class.getName(), String.class, String.class) + .buildChecked(); + + Assert.assertEquals("Should find 2-arg concat method", + "a-b", cat.invoke(obj, "a", "b")); + } + + @Test + public void testHiddenMethod() throws Exception { + Concatenator obj = new Concatenator("-"); + + intercept(NoSuchMethodException.class, () -> + new DynMethods.Builder("setSeparator") + .impl(Concatenator.class, String.class) + .buildChecked()); + + DynMethods.UnboundMethod changeSep = new DynMethods.Builder("setSeparator") + .hiddenImpl(Concatenator.class, String.class) + .buildChecked(); + + Assert.assertNotNull("Should find hidden method with hiddenImpl", + changeSep); + + changeSep.invokeChecked(obj, "/"); + + Assert.assertEquals("Should use separator / instead of -", + "a/b", obj.concat("a", "b")); + } + + @Test + public void testBoundMethod() throws Exception { + DynMethods.UnboundMethod cat = new DynMethods.Builder("concat") + .impl(Concatenator.class, String.class, String.class) + .buildChecked(); + + // Unbound methods can be bound multiple times + DynMethods.BoundMethod dashCat = cat.bind(new Concatenator("-")); + DynMethods.BoundMethod underCat = cat.bind(new Concatenator("_")); + + Assert.assertEquals("Should use '-' object without passing", + "a-b", dashCat.invoke("a", "b")); + Assert.assertEquals("Should use '_' object without passing", + "a_b", underCat.invoke("a", "b")); + + DynMethods.BoundMethod slashCat = new DynMethods.Builder("concat") + .impl(Concatenator.class, String.class, String.class) + .buildChecked(new Concatenator("/")); + + Assert.assertEquals("Should use bound object from builder without passing", + "a/b", slashCat.invoke("a", "b")); + } + + @Test + public void testBindStaticMethod() throws Exception { + final DynMethods.Builder builder = new DynMethods.Builder("cat") + .impl(Concatenator.class, String[].class); + + intercept(IllegalStateException.class, () -> + builder.buildChecked(new Concatenator())); + + intercept(IllegalStateException.class, () -> + builder.build(new Concatenator())); + + final DynMethods.UnboundMethod staticCat = builder.buildChecked(); + Assert.assertTrue("Should be static", staticCat.isStatic()); + + intercept(IllegalStateException.class, () -> + staticCat.bind(new Concatenator())); + } + + @Test + public void testStaticMethod() throws Exception { + DynMethods.StaticMethod staticCat = new DynMethods.Builder("cat") + .impl(Concatenator.class, String[].class) + .buildStaticChecked(); + + Assert.assertEquals("Should call varargs static method cat(String...)", + "abcde", staticCat.invokeChecked( + (Object) new String[]{"a", "b", "c", "d", "e"})); + } + + @Test + public void testNonStaticMethod() throws Exception { + final DynMethods.Builder builder = new DynMethods.Builder("concat") + .impl(Concatenator.class, String.class, String.class); + + intercept(IllegalStateException.class, builder::buildStatic); + + intercept(IllegalStateException.class, builder::buildStaticChecked); + + final DynMethods.UnboundMethod cat2 = builder.buildChecked(); + Assert.assertFalse("concat(String,String) should not be static", + cat2.isStatic()); + + intercept(IllegalStateException.class, cat2::asStatic); + } + + @Test + public void testConstructorImpl() throws Exception { + final DynMethods.Builder builder = new DynMethods.Builder("newConcatenator") + .ctorImpl(Concatenator.class, String.class) + .impl(Concatenator.class, String.class); + + DynMethods.UnboundMethod newConcatenator = builder.buildChecked(); + Assert.assertTrue("Should find constructor implementation", + newConcatenator instanceof DynConstructors.Ctor); + Assert.assertTrue("Constructor should be a static method", + newConcatenator.isStatic()); + Assert.assertFalse("Constructor should not be NOOP", + newConcatenator.isNoop()); + + // constructors cannot be bound + intercept(IllegalStateException.class, () -> + builder.buildChecked(new Concatenator())); + intercept(IllegalStateException.class, () -> + builder.build(new Concatenator())); + + Concatenator concatenator = newConcatenator.asStatic().invoke("*"); + Assert.assertEquals("Should function as a concatenator", + "a*b", concatenator.concat("a", "b")); + + concatenator = newConcatenator.asStatic().invokeChecked("@"); + Assert.assertEquals("Should function as a concatenator", + "a@b", concatenator.concat("a", "b")); + } + + @Test + public void testConstructorImplAfterFactoryMethod() throws Exception { + DynMethods.UnboundMethod newConcatenator = new DynMethods.Builder("newConcatenator") + .impl(Concatenator.class, String.class) + .ctorImpl(Concatenator.class, String.class) + .buildChecked(); + + Assert.assertFalse("Should find factory method before constructor method", + newConcatenator instanceof DynConstructors.Ctor); + } + + @Test + public void testNoop() throws Exception { + // noop can be unbound, bound, or static + DynMethods.UnboundMethod noop = new DynMethods.Builder("concat") + .impl("not.a.RealClass", String.class, String.class) + .orNoop() + .buildChecked(); + + Assert.assertTrue("No implementation found, should return NOOP", + noop.isNoop()); + Assert.assertNull("NOOP should always return null", + noop.invoke(new Concatenator(), "a")); + Assert.assertNull("NOOP can be called with null", + noop.invoke(null, "a")); + Assert.assertNull("NOOP can be bound", + noop.bind(new Concatenator()).invoke("a")); + Assert.assertNull("NOOP can be bound to null", + noop.bind(null).invoke("a")); + Assert.assertNull("NOOP can be static", + noop.asStatic().invoke("a")); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestFunctionalIO.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestFunctionalIO.java index 25bdab8ea3203..186483ed106e4 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestFunctionalIO.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestFunctionalIO.java @@ -18,8 +18,10 @@ package org.apache.hadoop.util.functional; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.UncheckedIOException; +import java.util.function.Function; import org.assertj.core.api.Assertions; import org.junit.Test; @@ -28,6 +30,7 @@ import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.apache.hadoop.util.functional.FunctionalIO.extractIOExceptions; +import static org.apache.hadoop.util.functional.FunctionalIO.toUncheckedFunction; import static org.apache.hadoop.util.functional.FunctionalIO.toUncheckedIOExceptionSupplier; import static org.apache.hadoop.util.functional.FunctionalIO.uncheckIOExceptions; @@ -94,4 +97,15 @@ public void testUncheckAndExtract() throws Throwable { .isSameAs(raised); } + @Test + public void testUncheckedFunction() throws Throwable { + // java function which should raise a FileNotFoundException + // wrapped into an unchecked exeption + final Function fn = + toUncheckedFunction((String a) -> { + throw new FileNotFoundException(a); + }); + intercept(UncheckedIOException.class, "missing", () -> + fn.apply("missing")); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/resources/log4j.properties b/hadoop-common-project/hadoop-common/src/test/resources/log4j.properties index ced0687caad45..9a1ff99a6e77a 100644 --- a/hadoop-common-project/hadoop-common/src/test/resources/log4j.properties +++ b/hadoop-common-project/hadoop-common/src/test/resources/log4j.properties @@ -15,4 +15,6 @@ log4j.rootLogger=info,stdout log4j.threshold=ALL log4j.appender.stdout=org.apache.log4j.ConsoleAppender log4j.appender.stdout.layout=org.apache.log4j.PatternLayout -log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n +log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} [%t] %-5p %c{2} (%F:%M(%L)) - %m%n + +log4j.logger.org.apache.hadoop.util.dynamic.BindingUtils=DEBUG diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/contract/hdfs/TestDFSWrappedIO.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/contract/hdfs/TestDFSWrappedIO.java new file mode 100644 index 0000000000000..2b874fd532034 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/contract/hdfs/TestDFSWrappedIO.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.hdfs; + +import java.io.IOException; + +import org.junit.AfterClass; +import org.junit.BeforeClass; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.io.wrappedio.impl.TestWrappedIO; + +/** + * Test WrappedIO access to HDFS, especially ByteBufferPositionedReadable. + */ +public class TestDFSWrappedIO extends TestWrappedIO { + + @BeforeClass + public static void createCluster() throws IOException { + HDFSContract.createCluster(); + } + + @AfterClass + public static void teardownCluster() throws IOException { + HDFSContract.destroyCluster(); + } + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new HDFSContract(conf); + } +} diff --git a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystem.java b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystem.java index c41940fde9d24..65d822b2ade39 100644 --- a/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystem.java +++ b/hadoop-tools/hadoop-aliyun/src/main/java/org/apache/hadoop/fs/aliyun/oss/AliyunOSSFileSystem.java @@ -28,6 +28,7 @@ import java.util.concurrent.TimeUnit; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.CommonPathCapabilities; import org.apache.hadoop.fs.aliyun.oss.statistics.BlockOutputStreamStatistics; import org.apache.hadoop.fs.aliyun.oss.statistics.impl.OutputStreamStatistics; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; @@ -62,6 +63,7 @@ import static org.apache.hadoop.fs.aliyun.oss.AliyunOSSUtils.longOption; import static org.apache.hadoop.fs.aliyun.oss.AliyunOSSUtils.objectRepresentsDirectory; import static org.apache.hadoop.fs.aliyun.oss.Constants.*; +import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; /** * Implementation of {@link FileSystem} for @@ -782,4 +784,19 @@ OSSDataBlocks.BlockFactory getBlockFactory() { BlockOutputStreamStatistics getBlockOutputStreamStatistics() { return blockOutputStreamStatistics; } + + @Override + public boolean hasPathCapability(final Path path, final String capability) + throws IOException { + final Path p = makeQualified(path); + String cap = validatePathCapabilityArgs(p, capability); + switch (cap) { + // block locations are generated locally + case CommonPathCapabilities.VIRTUAL_BLOCK_LOCATIONS: + return true; + + default: + return super.hasPathCapability(p, cap); + } + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 25b036b5fc7f9..3c70004714459 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -5560,10 +5560,12 @@ public boolean hasPathCapability(final Path path, final String capability) case DIRECTORY_LISTING_INCONSISTENT: return s3ExpressStore; - // etags are avaialable in listings, but they + // etags are available in listings, but they // are not consistent across renames. // therefore, only availability is declared case CommonPathCapabilities.ETAGS_AVAILABLE: + // block locations are generated locally + case CommonPathCapabilities.VIRTUAL_BLOCK_LOCATIONS: return true; /* diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputPolicy.java index b90d0f2a61605..1bfe604a6335c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputPolicy.java @@ -26,7 +26,14 @@ import org.apache.hadoop.classification.InterfaceStability; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_AVRO; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_COLUMNAR; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_CSV; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_DEFAULT; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_HBASE; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_JSON; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_ORC; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_PARQUET; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_RANDOM; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_VECTOR; @@ -81,7 +88,8 @@ boolean isAdaptive() { * Choose an access policy. * @param name strategy name from a configuration option, etc. * @param defaultPolicy default policy to fall back to. - * @return the chosen strategy + * @return the chosen strategy or null if there was no match and + * the value of {@code defaultPolicy} was "null". */ public static S3AInputPolicy getPolicy( String name, @@ -93,11 +101,23 @@ public static S3AInputPolicy getPolicy( case Constants.INPUT_FADV_NORMAL: return Normal; - // all these options currently map to random IO. + // all these options currently map to random IO. + case FS_OPTION_OPENFILE_READ_POLICY_HBASE: case FS_OPTION_OPENFILE_READ_POLICY_RANDOM: case FS_OPTION_OPENFILE_READ_POLICY_VECTOR: return Random; + // columnar formats currently map to random IO, + // though in future this may be enhanced. + case FS_OPTION_OPENFILE_READ_POLICY_COLUMNAR: + case FS_OPTION_OPENFILE_READ_POLICY_ORC: + case FS_OPTION_OPENFILE_READ_POLICY_PARQUET: + return Random; + + // handle the sequential formats. + case FS_OPTION_OPENFILE_READ_POLICY_AVRO: + case FS_OPTION_OPENFILE_READ_POLICY_CSV: + case FS_OPTION_OPENFILE_READ_POLICY_JSON: case FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL: case FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE: return Sequential; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractVectoredRead.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractVectoredRead.java index 1f6d3e953dc01..8096f55bcd54c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractVectoredRead.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractVectoredRead.java @@ -56,6 +56,7 @@ import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_PARQUET; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_VECTOR; import static org.apache.hadoop.fs.contract.ContractTestUtils.range; import static org.apache.hadoop.fs.contract.ContractTestUtils.returnBuffersToPoolPostRead; @@ -233,7 +234,8 @@ public void testNormalReadVsVectoredReadStatsCollection() throws Exception { fs.openFile(path(VECTORED_READ_FILE_NAME)) .withFileStatus(fileStatus) .opt(FS_OPTION_OPENFILE_READ_POLICY, - FS_OPTION_OPENFILE_READ_POLICY_VECTOR) + FS_OPTION_OPENFILE_READ_POLICY_PARQUET + + ", " + FS_OPTION_OPENFILE_READ_POLICY_VECTOR) .build(); try (FSDataInputStream in = builder.get()) { in.readVectored(fileRanges, getAllocate()); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AWrappedIO.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AWrappedIO.java new file mode 100644 index 0000000000000..3a82441faef48 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AWrappedIO.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.contract.s3a; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.io.wrappedio.impl.TestWrappedIO; + +/** + * Test S3A access through the wrapped operations class. + */ +public class ITestS3AWrappedIO extends TestWrappedIO { + + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new S3AContract(conf); + } + +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestOpenFileSupport.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestOpenFileSupport.java index 17f210dd586e8..cf427c10e826a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestOpenFileSupport.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestOpenFileSupport.java @@ -43,14 +43,26 @@ import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_AVRO; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_COLUMNAR; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_CSV; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_DEFAULT; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_HBASE; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_JSON; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_ORC; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_PARQUET; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_RANDOM; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_VECTOR; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_SPLIT_END; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_SPLIT_START; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_ASYNC_DRAIN_THRESHOLD; import static org.apache.hadoop.fs.s3a.Constants.INPUT_FADVISE; import static org.apache.hadoop.fs.s3a.Constants.READAHEAD_RANGE; +import static org.apache.hadoop.fs.s3a.S3AInputPolicy.Normal; +import static org.apache.hadoop.fs.s3a.S3AInputPolicy.Random; +import static org.apache.hadoop.fs.s3a.S3AInputPolicy.Sequential; import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** @@ -69,7 +81,7 @@ public class TestOpenFileSupport extends HadoopTestBase { private static final String USERNAME = "hadoop"; - public static final S3AInputPolicy INPUT_POLICY = S3AInputPolicy.Sequential; + public static final S3AInputPolicy INPUT_POLICY = Sequential; public static final String TESTFILE = "s3a://bucket/name"; @@ -142,7 +154,7 @@ public void testSeekRandomIOPolicy() throws Throwable { // is picked up assertOpenFile(INPUT_FADVISE, option) .extracting(f -> f.getInputPolicy()) - .isEqualTo(S3AInputPolicy.Random); + .isEqualTo(Random); // and as neither status nor length was set: no file status assertOpenFile(INPUT_FADVISE, option) .extracting(f -> f.getStatus()) @@ -161,7 +173,7 @@ public void testSeekPolicyAdaptive() throws Throwable { assertOpenFile(FS_OPTION_OPENFILE_READ_POLICY, FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE) .extracting(f -> f.getInputPolicy()) - .isEqualTo(S3AInputPolicy.Normal); + .isEqualTo(Normal); } /** @@ -184,7 +196,7 @@ public void testSeekPolicyListS3AOption() throws Throwable { // fall back to the second seek policy if the first is unknown assertOpenFile(INPUT_FADVISE, "hbase, random") .extracting(f -> f.getInputPolicy()) - .isEqualTo(S3AInputPolicy.Random); + .isEqualTo(Random); } /** @@ -199,14 +211,14 @@ public void testSeekPolicyExtractionFromList() throws Throwable { FS_OPTION_OPENFILE_READ_POLICY); Assertions.assertThat(S3AInputPolicy.getFirstSupportedPolicy(options, null)) .describedAs("Policy from " + plist) - .isEqualTo(S3AInputPolicy.Random); + .isEqualTo(Random); } @Test public void testAdaptiveSeekPolicyRecognized() throws Throwable { Assertions.assertThat(S3AInputPolicy.getPolicy("adaptive", null)) .describedAs("adaptive") - .isEqualTo(S3AInputPolicy.Normal); + .isEqualTo(Normal); } @Test @@ -222,11 +234,20 @@ public void testUnknownSeekPolicyFallback() throws Throwable { @Test public void testInputPolicyMapping() throws Throwable { Object[][] policyMapping = { - {"normal", S3AInputPolicy.Normal}, - {FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE, S3AInputPolicy.Normal}, - {FS_OPTION_OPENFILE_READ_POLICY_DEFAULT, S3AInputPolicy.Normal}, - {FS_OPTION_OPENFILE_READ_POLICY_RANDOM, S3AInputPolicy.Random}, - {FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL, S3AInputPolicy.Sequential}, + {"normal", Normal}, + {FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE, Normal}, + {FS_OPTION_OPENFILE_READ_POLICY_AVRO, Sequential}, + {FS_OPTION_OPENFILE_READ_POLICY_COLUMNAR, Random}, + {FS_OPTION_OPENFILE_READ_POLICY_CSV, Sequential}, + {FS_OPTION_OPENFILE_READ_POLICY_DEFAULT, Normal}, + {FS_OPTION_OPENFILE_READ_POLICY_HBASE, Random}, + {FS_OPTION_OPENFILE_READ_POLICY_JSON, Sequential}, + {FS_OPTION_OPENFILE_READ_POLICY_ORC, Random}, + {FS_OPTION_OPENFILE_READ_POLICY_PARQUET, Random}, + {FS_OPTION_OPENFILE_READ_POLICY_RANDOM, Random}, + {FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL, Sequential}, + {FS_OPTION_OPENFILE_READ_POLICY_VECTOR, Random}, + {FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE, Sequential}, }; for (Object[] mapping : policyMapping) { String name = (String) mapping[0]; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3AOpenCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3AOpenCost.java index 482a963b92ab4..8e61225e17ef5 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3AOpenCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3AOpenCost.java @@ -42,6 +42,7 @@ import org.apache.hadoop.fs.s3a.Statistic; import org.apache.hadoop.fs.statistics.IOStatistics; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_FOOTER_CACHE; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_RANDOM; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL; @@ -180,6 +181,7 @@ public void testStreamIsNotChecksummed() throws Throwable { fs.openFile(testFile) .must(FS_OPTION_OPENFILE_READ_POLICY, FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE) + .must(FS_OPTION_OPENFILE_FOOTER_CACHE, false) .mustLong(FS_OPTION_OPENFILE_LENGTH, fileLength) .build() .get(), diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java index 1cff03fb2c72f..457aef3ed38e0 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystem.java @@ -1692,7 +1692,8 @@ public boolean hasPathCapability(final Path path, final String capability) switch (validatePathCapabilityArgs(p, capability)) { case CommonPathCapabilities.FS_PERMISSIONS: case CommonPathCapabilities.FS_APPEND: - case CommonPathCapabilities.ETAGS_AVAILABLE: + // block locations are generated locally + case CommonPathCapabilities.VIRTUAL_BLOCK_LOCATIONS: return true; case CommonPathCapabilities.ETAGS_PRESERVED_IN_RENAME: diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsWrappedIO.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsWrappedIO.java new file mode 100644 index 0000000000000..28750fd6dc633 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsWrappedIO.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.contract; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.io.wrappedio.impl.TestWrappedIO; + +/** + * Test WrappedIO access to ABFS. + */ +public class ITestAbfsWrappedIO extends TestWrappedIO { + + private final boolean isSecure; + private final ABFSContractTestBinding binding; + + public ITestAbfsWrappedIO() throws Exception { + binding = new ABFSContractTestBinding(); + this.isSecure = binding.isSecureMode(); + } + + @Override + public void setup() throws Exception { + binding.setup(); + super.setup(); + } + + @Override + protected Configuration createConfiguration() { + return binding.getRawConfiguration(); + } + + @Override + protected AbstractFSContract createContract(final Configuration conf) { + return new AbfsFileSystemContract(conf, isSecure); + } +} From fa9bb0d1ac4b27a37ba9df0ee3e1104f1cd85e64 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Thu, 15 Aug 2024 16:44:54 +0100 Subject: [PATCH 084/113] HADOOP-19231. Add JacksonUtil to manage Jackson classes (#6953) New class org.apache.hadoop.util.JacksonUtil centralizes construction of Jackson ObjectMappers and JsonFactories. Contributed by PJ Fanning --- .../org/apache/hadoop/conf/Configuration.java | 8 +- .../crypto/key/kms/KMSClientProvider.java | 5 +- .../apache/hadoop/ipc/DecayRpcScheduler.java | 4 +- .../java/org/apache/hadoop/ipc/Server.java | 5 +- .../org/apache/hadoop/jmx/JMXJsonServlet.java | 10 +- .../hadoop/metrics2/MetricsJsonBuilder.java | 5 +- .../DelegationTokenAuthenticationHandler.java | 5 +- .../org/apache/hadoop/util/JacksonUtil.java | 123 ++++++++++++++++++ .../apache/hadoop/util/JsonSerialization.java | 9 +- .../crypto/key/kms/server/KMSJSONReader.java | 6 +- .../server/datanode/DiskBalancerWorkItem.java | 7 +- .../datanode/DiskBalancerWorkStatus.java | 12 +- .../hdfs/util/CombinedHostsFileReader.java | 14 +- .../hdfs/util/CombinedHostsFileWriter.java | 6 +- .../hadoop/hdfs/web/JsonUtilClient.java | 4 +- .../fs/http/client/HttpFSFileSystem.java | 3 +- .../blockmanagement/SlowDiskTracker.java | 4 +- .../blockmanagement/SlowPeerTracker.java | 5 +- .../datanode/fsdataset/impl/FsVolumeImpl.java | 9 +- .../fsdataset/impl/ProvidedVolumeImpl.java | 4 +- .../server/diskbalancer/command/Command.java | 5 +- .../connectors/JsonNodeConnector.java | 7 +- .../datamodel/DiskBalancerCluster.java | 6 +- .../datamodel/DiskBalancerVolume.java | 4 +- .../server/diskbalancer/planner/NodePlan.java | 8 +- .../namenode/NetworkTopologyServlet.java | 5 +- .../namenode/StartupProgressServlet.java | 4 +- .../org/apache/hadoop/hdfs/web/JsonUtil.java | 24 ++-- .../apache/hadoop/mapred/QueueManager.java | 5 +- .../mapreduce/util/JobHistoryEventUtils.java | 3 +- .../hadoop/fs/s3a/impl/S3AEncryption.java | 6 +- .../fs/azure/NativeAzureFileSystem.java | 4 +- .../fs/azure/RemoteSASKeyGeneratorImpl.java | 8 +- .../fs/azure/RemoteWasbAuthorizerImpl.java | 10 +- .../azurebfs/oauth2/AzureADAuthenticator.java | 8 +- .../azurebfs/services/AbfsHttpOperation.java | 7 +- .../tools/dynamometer/DynoInfraUtils.java | 5 +- .../apache/hadoop/tools/rumen/Anonymizer.java | 11 +- .../tools/rumen/JsonObjectMapperParser.java | 9 +- .../tools/rumen/JsonObjectMapperWriter.java | 5 +- .../hadoop/tools/rumen/state/StatePool.java | 11 +- .../hadoop/tools/rumen/TestHistograms.java | 4 +- .../org/apache/hadoop/yarn/sls/AMRunner.java | 10 +- .../hadoop/yarn/sls/RumenToSLSConverter.java | 8 +- .../sls/synthetic/SynthTraceJobProducer.java | 4 +- .../hadoop/yarn/sls/utils/SLSUtils.java | 12 +- .../yarn/sls/TestSynthJobGeneration.java | 5 +- .../application/AppCatalogSolrClient.java | 54 ++++---- .../application/YarnServiceClient.java | 34 ++--- .../component/instance/ComponentInstance.java | 3 +- .../yarn/service/utils/JsonSerDeser.java | 5 +- .../service/utils/PublishedConfiguration.java | 17 ++- .../api/impl/FileSystemTimelineWriter.java | 8 +- .../client/api/impl/TimelineClientImpl.java | 3 +- .../yarn/util/DockerClientConfigHandler.java | 9 +- .../yarn/util/timeline/TimelineUtils.java | 10 +- .../apache/hadoop/yarn/webapp/Controller.java | 5 +- .../server/timeline/GenericObjectMapper.java | 12 +- .../containermanager/AuxServices.java | 3 +- .../NetworkTagMappingJsonManager.java | 5 +- .../linux/runtime/RuncContainerRuntime.java | 4 +- .../runc/ImageTagToManifestPlugin.java | 8 +- .../resource/ResourceProfilesManagerImpl.java | 5 +- .../placement/MappingRuleCreator.java | 8 +- .../converter/LegacyMappingRuleToJson.java | 15 ++- .../FSConfigToCSConfigConverter.java | 6 +- .../timeline/EntityGroupFSTimelineStore.java | 3 +- .../timeline/LevelDBCacheTimelineStore.java | 5 +- .../server/timeline/PluginStoreTestUtils.java | 9 +- .../documentstore/JsonUtils.java | 3 +- .../storage/FileSystemTimelineReaderImpl.java | 8 +- 71 files changed, 392 insertions(+), 296 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JacksonUtil.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 94285a4dfb7e5..4f372374abe1b 100755 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -22,7 +22,6 @@ import com.ctc.wstx.io.StreamBootstrapper; import com.ctc.wstx.io.SystemId; import com.ctc.wstx.stax.WstxInputFactory; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import java.io.BufferedInputStream; @@ -101,6 +100,7 @@ import org.apache.hadoop.security.alias.CredentialProviderFactory; import org.apache.hadoop.thirdparty.com.google.common.base.Strings; import org.apache.hadoop.util.ConfigurationHelper; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringInterner; @@ -3792,8 +3792,7 @@ public static void dumpConfiguration(Configuration config, throw new IllegalArgumentException("Property " + propertyName + " not found"); } else { - JsonFactory dumpFactory = new JsonFactory(); - JsonGenerator dumpGenerator = dumpFactory.createGenerator(out); + JsonGenerator dumpGenerator = JacksonUtil.getSharedWriter().createGenerator(out); dumpGenerator.writeStartObject(); dumpGenerator.writeFieldName("property"); appendJSONProperty(dumpGenerator, config, propertyName, @@ -3831,8 +3830,7 @@ public static void dumpConfiguration(Configuration config, */ public static void dumpConfiguration(Configuration config, Writer out) throws IOException { - JsonFactory dumpFactory = new JsonFactory(); - JsonGenerator dumpGenerator = dumpFactory.createGenerator(out); + JsonGenerator dumpGenerator = JacksonUtil.getSharedWriter().createGenerator(out); dumpGenerator.writeStartObject(); dumpGenerator.writeFieldName("properties"); dumpGenerator.writeStartArray(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java index 6ee9068ea3458..b5a6d882334d9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java @@ -41,6 +41,7 @@ import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSelector; import org.apache.hadoop.security.token.delegation.web.DelegationTokenAuthenticatedURL; import org.apache.hadoop.util.HttpExceptionUtils; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.JsonSerialization; import org.apache.hadoop.util.KMSUtil; import org.apache.http.client.utils.URIBuilder; @@ -78,7 +79,6 @@ import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.CryptoExtension; -import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.thirdparty.com.google.common.base.Strings; @@ -592,11 +592,10 @@ private T call(HttpURLConnection conn, Object jsonOutput, && conn.getContentType().trim().toLowerCase() .startsWith(APPLICATION_JSON_MIME) && klass != null) { - ObjectMapper mapper = new ObjectMapper(); InputStream is = null; try { is = conn.getInputStream(); - ret = mapper.readValue(is, klass); + ret = JacksonUtil.getSharedReader().readValue(is, klass); } finally { IOUtils.closeStream(is); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java index 63274bb01e72d..4d7cd023b5afa 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java @@ -38,10 +38,10 @@ import javax.management.ObjectName; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.AtomicDoubleArray; import org.apache.commons.lang3.exception.ExceptionUtils; @@ -146,7 +146,7 @@ public class DecayRpcScheduler implements RpcScheduler, public static final Logger LOG = LoggerFactory.getLogger(DecayRpcScheduler.class); - private static final ObjectWriter WRITER = new ObjectMapper().writer(); + private static final ObjectWriter WRITER = JacksonUtil.getSharedWriter(); // Track the decayed and raw (no decay) number of calls for each schedulable // identity from all previous decay windows: idx 0 for decayed call cost and diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 0d9e7296d2a4c..a808f07b0c0aa 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -121,6 +121,7 @@ import org.apache.hadoop.security.token.SecretManager.InvalidToken; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.util.ExitUtil; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.ProtoUtil; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; @@ -130,7 +131,6 @@ import org.apache.hadoop.tracing.TraceScope; import org.apache.hadoop.tracing.Tracer; import org.apache.hadoop.tracing.TraceUtils; -import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; @@ -3843,9 +3843,8 @@ public int getNumOpenConnections() { * @return Get the NumOpenConnections/User. */ public String getNumOpenConnectionsPerUser() { - ObjectMapper mapper = new ObjectMapper(); try { - return mapper + return JacksonUtil.getSharedWriter() .writeValueAsString(connectionManager.getUserToConnectionsMap()); } catch (IOException ignored) { } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java index f089db502783e..6f54364fff4e0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java @@ -43,13 +43,13 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.commons.lang3.NotImplementedException; import org.apache.hadoop.http.HttpServer2; +import org.apache.hadoop.util.JacksonUtil; /* * This servlet is based off of the JMXProxyServlet from Tomcat 7.0.14. It has @@ -134,11 +134,6 @@ public class JMXJsonServlet extends HttpServlet { */ protected transient MBeanServer mBeanServer; - /** - * Json Factory to create Json generators for write objects in json format - */ - protected transient JsonFactory jsonFactory; - /** * Initialize this servlet. */ @@ -146,7 +141,6 @@ public class JMXJsonServlet extends HttpServlet { public void init() throws ServletException { // Retrieve the MBean server mBeanServer = ManagementFactory.getPlatformMBeanServer(); - jsonFactory = new JsonFactory(); } protected boolean isInstrumentationAccessAllowed(HttpServletRequest request, @@ -187,7 +181,7 @@ public void doGet(HttpServletRequest request, HttpServletResponse response) { response.setHeader(ACCESS_CONTROL_ALLOW_METHODS, "GET"); response.setHeader(ACCESS_CONTROL_ALLOW_ORIGIN, "*"); - jg = jsonFactory.createGenerator(writer); + jg = JacksonUtil.getSharedWriter().createGenerator(writer); jg.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET); jg.useDefaultPrettyPrinter(); jg.writeStartObject(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsJsonBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsJsonBuilder.java index 3a9be12803143..3534adfd6903e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsJsonBuilder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsJsonBuilder.java @@ -21,8 +21,8 @@ import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.util.JacksonUtil; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,8 +46,7 @@ public class MetricsJsonBuilder extends MetricsRecordBuilder { private final MetricsCollector parent; private Map innerMetrics = new LinkedHashMap<>(); - private static final ObjectWriter WRITER = - new ObjectMapper().writer(); + private static final ObjectWriter WRITER = JacksonUtil.getSharedWriter(); /** * Build an instance. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java index f4ede6f35edb0..62c7c4ba6e024 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java @@ -46,6 +46,7 @@ import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; import org.apache.hadoop.util.HttpExceptionUtils; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -165,7 +166,7 @@ public void initTokenManager(Properties config) { @VisibleForTesting public void initJsonFactory(Properties config) { boolean hasFeature = false; - JsonFactory tmpJsonFactory = new JsonFactory(); + JsonFactory tmpJsonFactory = JacksonUtil.createBasicJsonFactory(); for (Map.Entry entry : config.entrySet()) { String key = (String)entry.getKey(); @@ -335,7 +336,7 @@ public boolean managementOperation(AuthenticationToken token, if (map != null) { response.setContentType(MediaType.APPLICATION_JSON); Writer writer = response.getWriter(); - ObjectMapper jsonMapper = new ObjectMapper(jsonFactory); + ObjectMapper jsonMapper = JacksonUtil.createObjectMapper(jsonFactory); jsonMapper.writeValue(writer, map); writer.write(ENTER); writer.flush(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JacksonUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JacksonUtil.java new file mode 100644 index 0000000000000..7d90555c8780b --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JacksonUtil.java @@ -0,0 +1,123 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.util; + +import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; +import com.fasterxml.jackson.databind.ObjectWriter; +import com.fasterxml.jackson.databind.json.JsonMapper; + +import org.apache.hadoop.classification.InterfaceAudience.Private; + +/** + * Utility for sharing code related to Jackson usage in Hadoop. + */ +@Private +public final class JacksonUtil { + + private static final ObjectMapper SHARED_BASIC_OBJECT_MAPPER = createBasicObjectMapper(); + private static final ObjectReader SHARED_BASIC_OBJECT_READER = + SHARED_BASIC_OBJECT_MAPPER.reader(); + private static final ObjectWriter SHARED_BASIC_OBJECT_WRITER = + SHARED_BASIC_OBJECT_MAPPER.writer(); + private static final ObjectWriter SHARED_BASIC_OBJECT_WRITER_PRETTY = + SHARED_BASIC_OBJECT_MAPPER.writerWithDefaultPrettyPrinter(); + + /** + * Creates a new {@link JsonFactory} instance with basic configuration. + * + * @return an {@link JsonFactory} with basic configuration + */ + public static JsonFactory createBasicJsonFactory() { + // deliberately return a new instance instead of sharing one because we can't trust + // that users won't modify this instance + return new JsonFactory(); + } + + /** + * Creates a new {@link ObjectMapper} instance with basic configuration. + * + * @return an {@link ObjectMapper} with basic configuration + */ + public static ObjectMapper createBasicObjectMapper() { + // deliberately return a new instance instead of sharing one because we can't trust + // that users won't modify this instance + return JsonMapper.builder(createBasicJsonFactory()).build(); + } + + /** + * Creates a new {@link ObjectMapper} instance based on the configuration + * in the input {@link JsonFactory}. + * + * @param jsonFactory a pre-configured {@link JsonFactory} + * @return an {@link ObjectMapper} with configuration set by the input {@link JsonFactory}. + */ + public static ObjectMapper createObjectMapper(final JsonFactory jsonFactory) { + return JsonMapper.builder(jsonFactory).build(); + } + + /** + * Returns a shared {@link ObjectReader} instance with basic configuration. + * + * @return a shared {@link ObjectReader} instance with basic configuration + */ + public static ObjectReader getSharedReader() { + return SHARED_BASIC_OBJECT_READER; + } + + /** + * Returns an {@link ObjectReader} for the given type instance with basic configuration. + * + * @param type the class that the reader has to support + * @return an {@link ObjectReader} instance with basic configuration + */ + public static ObjectReader createBasicReaderFor(Class type) { + return SHARED_BASIC_OBJECT_MAPPER.readerFor(type); + } + + /** + * Returns a shared {@link ObjectWriter} instance with basic configuration. + * + * @return a shared {@link ObjectWriter} instance with basic configuration + */ + public static ObjectWriter getSharedWriter() { + return SHARED_BASIC_OBJECT_WRITER; + } + + /** + * Returns a shared {@link ObjectWriter} instance with pretty print and basic configuration. + * + * @return a shared {@link ObjectWriter} instance with pretty print and basic configuration + */ + public static ObjectWriter getSharedWriterWithPrettyPrint() { + return SHARED_BASIC_OBJECT_WRITER_PRETTY; + } + + /** + * Returns an {@link ObjectWriter} for the given type instance with basic configuration. + * + * @param type the class that the writer has to support + * @return an {@link ObjectWriter} instance with basic configuration + */ + public static ObjectWriter createBasicWriterFor(Class type) { + return SHARED_BASIC_OBJECT_MAPPER.writerFor(type); + } + + private JacksonUtil() {} +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java index 52c6c4505226a..05b069c3ad9b8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java @@ -76,11 +76,8 @@ public class JsonSerialization { private final Class classType; private final ObjectMapper mapper; - private static final ObjectWriter WRITER = - new ObjectMapper().writerWithDefaultPrettyPrinter(); - - private static final ObjectReader MAP_READER = - new ObjectMapper().readerFor(Map.class); + private static final ObjectWriter WRITER = JacksonUtil.getSharedWriterWithPrettyPrint(); + private static final ObjectReader MAP_READER = JacksonUtil.createBasicReaderFor(Map.class); /** * @return an ObjectWriter which pretty-prints its output @@ -106,7 +103,7 @@ public JsonSerialization(Class classType, boolean failOnUnknownProperties, boolean pretty) { Preconditions.checkArgument(classType != null, "null classType"); this.classType = classType; - this.mapper = new ObjectMapper(); + this.mapper = JacksonUtil.createBasicObjectMapper(); mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, failOnUnknownProperties); mapper.configure(SerializationFeature.INDENT_OUTPUT, pretty); diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSJSONReader.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSJSONReader.java index af781f5277850..2f7a6d8557731 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSJSONReader.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSJSONReader.java @@ -17,9 +17,8 @@ */ package org.apache.hadoop.crypto.key.kms.server; -import com.fasterxml.jackson.databind.ObjectMapper; - import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.util.JacksonUtil; import javax.ws.rs.Consumes; import javax.ws.rs.WebApplicationException; @@ -38,7 +37,6 @@ @Consumes(MediaType.APPLICATION_JSON) @InterfaceAudience.Private public class KMSJSONReader implements MessageBodyReader { - private static final ObjectMapper MAPPER = new ObjectMapper(); @Override public boolean isReadable(Class type, Type genericType, @@ -52,6 +50,6 @@ public Object readFrom(Class type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap httpHeaders, InputStream entityStream) throws IOException, WebApplicationException { - return MAPPER.readValue(entityStream, type); + return JacksonUtil.getSharedReader().readValue(entityStream, type); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkItem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkItem.java index d1ad5a2079f5f..041eb2912be50 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkItem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkItem.java @@ -20,8 +20,8 @@ package org.apache.hadoop.hdfs.server.datanode; import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -35,9 +35,8 @@ @InterfaceStability.Unstable @JsonInclude(JsonInclude.Include.NON_DEFAULT) public class DiskBalancerWorkItem { - private static final ObjectMapper MAPPER = new ObjectMapper(); private static final ObjectReader READER = - new ObjectMapper().readerFor(DiskBalancerWorkItem.class); + JacksonUtil.createBasicReaderFor(DiskBalancerWorkItem.class); private long startTime; private long secondsElapsed; @@ -173,7 +172,7 @@ public void incBlocksCopied() { * @throws IOException */ public String toJson() throws IOException { - return MAPPER.writeValueAsString(this); + return JacksonUtil.getSharedWriter().writeValueAsString(this); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkStatus.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkStatus.java index 5a5da7326a4e0..7ea6e9d885e9e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkStatus.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkStatus.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.databind.SerializationFeature; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -39,14 +40,13 @@ @InterfaceAudience.Private @InterfaceStability.Unstable public class DiskBalancerWorkStatus { - private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final ObjectMapper MAPPER = JacksonUtil.createBasicObjectMapper(); private static final ObjectMapper MAPPER_WITH_INDENT_OUTPUT = - new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT); + JacksonUtil.createBasicObjectMapper().enable(SerializationFeature.INDENT_OUTPUT); private static final ObjectReader READER_WORKSTATUS = - new ObjectMapper().readerFor(DiskBalancerWorkStatus.class); - private static final ObjectReader READER_WORKENTRY = new ObjectMapper() - .readerFor(defaultInstance().constructCollectionType(List.class, - DiskBalancerWorkEntry.class)); + MAPPER.readerFor(DiskBalancerWorkStatus.class); + private static final ObjectReader READER_WORKENTRY = MAPPER.readerFor( + defaultInstance().constructCollectionType(List.class, DiskBalancerWorkEntry.class)); private final List currentState; private Result result; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileReader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileReader.java index 33f4934e5489d..a41b727ab2d20 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileReader.java @@ -18,9 +18,7 @@ package org.apache.hadoop.hdfs.util; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.databind.JsonMappingException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import java.io.File; @@ -42,6 +40,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.protocol.DatanodeAdminProperties; +import org.apache.hadoop.util.JacksonUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -83,7 +82,6 @@ private CombinedHostsFileReader() { public static DatanodeAdminProperties[] readFile(final String hostsFilePath) throws IOException { DatanodeAdminProperties[] allDNs = new DatanodeAdminProperties[0]; - ObjectMapper objectMapper = new ObjectMapper(); File hostFile = new File(hostsFilePath); boolean tryOldFormat = false; @@ -91,7 +89,8 @@ private CombinedHostsFileReader() { try (Reader input = new InputStreamReader( Files.newInputStream(hostFile.toPath()), StandardCharsets.UTF_8)) { - allDNs = objectMapper.readValue(input, DatanodeAdminProperties[].class); + allDNs = JacksonUtil.getSharedReader() + .readValue(input, DatanodeAdminProperties[].class); } catch (JsonMappingException jme) { // The old format doesn't have json top-level token to enclose // the array. @@ -103,15 +102,12 @@ private CombinedHostsFileReader() { } if (tryOldFormat) { - ObjectReader objectReader = - objectMapper.readerFor(DatanodeAdminProperties.class); - JsonFactory jsonFactory = new JsonFactory(); + ObjectReader objectReader = JacksonUtil.createBasicReaderFor(DatanodeAdminProperties.class); List all = new ArrayList<>(); try (Reader input = new InputStreamReader(Files.newInputStream(Paths.get(hostsFilePath)), StandardCharsets.UTF_8)) { - Iterator iterator = - objectReader.readValues(jsonFactory.createParser(input)); + Iterator iterator = objectReader.readValues(input); while (iterator.hasNext()) { DatanodeAdminProperties properties = iterator.next(); all.add(properties); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileWriter.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileWriter.java index de4c12d556cc7..dcd08cfc7010f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileWriter.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileWriter.java @@ -26,11 +26,11 @@ import java.nio.file.Paths; import java.util.Set; -import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.protocol.DatanodeAdminProperties; +import org.apache.hadoop.util.JacksonUtil; /** * Writer support for JSON-based datanode configuration, an alternative format @@ -59,12 +59,10 @@ private CombinedHostsFileWriter() { */ public static void writeFile(final String hostsFile, final Set allDNs) throws IOException { - final ObjectMapper objectMapper = new ObjectMapper(); - try (Writer output = new OutputStreamWriter(Files.newOutputStream(Paths.get(hostsFile)), StandardCharsets.UTF_8)) { - objectMapper.writeValue(output, allDNs); + JacksonUtil.getSharedWriter().writeValue(output, allDNs); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java index 108f74997a63e..54a44b33b17b7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java @@ -17,12 +17,12 @@ */ package org.apache.hadoop.hdfs.web; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.hadoop.fs.ContentSummary; @@ -654,7 +654,7 @@ static List toXAttrNames(final Map json) } final String namesInJson = (String) json.get("XAttrNames"); - ObjectReader reader = new ObjectMapper().readerFor(List.class); + ObjectReader reader = JacksonUtil.createBasicReaderFor(List.class); final List xattrs = reader.readValue(namesInJson); final List names = Lists.newArrayListWithCapacity(json.keySet().size()); diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java index dab4776575bff..1ec907004bd26 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java @@ -71,6 +71,7 @@ import org.apache.hadoop.security.token.delegation.web.DelegationTokenAuthenticator; import org.apache.hadoop.security.token.delegation.web.KerberosDelegationTokenAuthenticator; import org.apache.hadoop.util.HttpExceptionUtils; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Lists; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.ReflectionUtils; @@ -1818,7 +1819,7 @@ public Collection getTrashRoots(boolean allUsers) { @VisibleForTesting static BlockLocation[] toBlockLocations(JSONObject json) throws IOException { - ObjectMapper mapper = new ObjectMapper(); + ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); MapType subType = mapper.getTypeFactory().constructMapType(Map.class, String.class, BlockLocation[].class); MapType rootType = mapper.getTypeFactory().constructMapType(Map.class, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java index 798b5fb5966f7..312d63daed4e1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; @@ -32,6 +31,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports.DiskOp; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Lists; import org.apache.hadoop.util.Timer; import org.slf4j.Logger; @@ -71,7 +71,7 @@ public class SlowDiskTracker { /** * ObjectWriter to convert JSON reports to String. */ - private static final ObjectWriter WRITER = new ObjectMapper().writer(); + private static final ObjectWriter WRITER = JacksonUtil.getSharedWriter(); /** * Number of disks to include in JSON report per operation. We will return diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerTracker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerTracker.java index e4feb4815eee4..3774a9dbdff21 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerTracker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerTracker.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; @@ -30,6 +29,7 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.server.protocol.OutlierMetrics; import org.apache.hadoop.hdfs.server.protocol.SlowPeerReports; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Timer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -75,7 +75,8 @@ public class SlowPeerTracker { /** * ObjectWriter to convert JSON reports to String. */ - private static final ObjectWriter WRITER = new ObjectMapper().writer(); + private static final ObjectWriter WRITER = JacksonUtil.getSharedWriter(); + /** * Number of nodes to include in JSON report. We will return nodes with * the highest number of votes from peers. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java index 6b026823f19f9..080418db08afa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java @@ -79,18 +79,18 @@ import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.DiskChecker.DiskErrorException; import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; +import org.apache.hadoop.util.Preconditions; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Timer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; -import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; /** @@ -103,10 +103,9 @@ public class FsVolumeImpl implements FsVolumeSpi { public static final Logger LOG = LoggerFactory.getLogger(FsVolumeImpl.class); - private static final ObjectWriter WRITER = - new ObjectMapper().writerWithDefaultPrettyPrinter(); + private static final ObjectWriter WRITER = JacksonUtil.getSharedWriterWithPrettyPrint(); private static final ObjectReader READER = - new ObjectMapper().readerFor(BlockIteratorState.class); + JacksonUtil.createBasicReaderFor(BlockIteratorState.class); private final FsDatasetImpl dataset; private final String storageID; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ProvidedVolumeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ProvidedVolumeImpl.java index 69a46257317bf..816a765c52907 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ProvidedVolumeImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ProvidedVolumeImpl.java @@ -32,7 +32,6 @@ import java.util.concurrent.atomic.AtomicLong; import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.classification.InterfaceAudience; @@ -60,6 +59,7 @@ import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; import org.apache.hadoop.util.DiskChecker.DiskErrorException; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Timer; @@ -369,7 +369,7 @@ public void releaseReservedSpace(long bytesToRelease) { } private static final ObjectWriter WRITER = - new ObjectMapper().writerWithDefaultPrettyPrinter(); + JacksonUtil.getSharedWriterWithPrettyPrint(); private static class ProvidedBlockIteratorState { ProvidedBlockIteratorState() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java index c90b77e98d2e8..e9ba658ecdc91 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java @@ -18,7 +18,6 @@ package org.apache.hadoop.hdfs.server.diskbalancer.command; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import org.apache.commons.cli.CommandLine; @@ -47,6 +46,7 @@ import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.HostsFileReader; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -77,8 +77,7 @@ * Common interface for command handling. */ public abstract class Command extends Configured implements Closeable { - private static final ObjectReader READER = - new ObjectMapper().readerFor(HashMap.class); + private static final ObjectReader READER = JacksonUtil.createBasicReaderFor(HashMap.class); static final Logger LOG = LoggerFactory.getLogger(Command.class); private Map validArgs = new HashMap<>(); private URI clusterURI; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/JsonNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/JsonNodeConnector.java index 1cc82253f9885..4e76c7e45e999 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/JsonNodeConnector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/JsonNodeConnector.java @@ -17,15 +17,14 @@ package org.apache.hadoop.hdfs.server.diskbalancer.connectors; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.server.diskbalancer.datamodel.DiskBalancerCluster; -import org.apache.hadoop.hdfs.server.diskbalancer.datamodel - .DiskBalancerDataNode; +import org.apache.hadoop.hdfs.server.diskbalancer.datamodel.DiskBalancerDataNode; import java.io.File; import java.net.URL; @@ -38,7 +37,7 @@ public class JsonNodeConnector implements ClusterConnector { private static final Logger LOG = LoggerFactory.getLogger(JsonNodeConnector.class); private static final ObjectReader READER = - new ObjectMapper().readerFor(DiskBalancerCluster.class); + JacksonUtil.createBasicReaderFor(DiskBalancerCluster.class); private final URL clusterURI; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerCluster.java index 7e935a3f82058..f24f92ff1392d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerCluster.java @@ -19,9 +19,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; -import org.apache.hadoop.util.Preconditions; import org.apache.commons.io.FileUtils; import org.slf4j.Logger; @@ -31,6 +29,8 @@ import org.apache.hadoop.hdfs.server.diskbalancer.planner.Planner; import org.apache.hadoop.hdfs.server.diskbalancer.planner.PlannerFactory; import org.apache.hadoop.hdfs.web.JsonUtil; +import org.apache.hadoop.util.JacksonUtil; +import org.apache.hadoop.util.Preconditions; import java.io.File; import java.io.IOException; @@ -73,7 +73,7 @@ public class DiskBalancerCluster { private static final Logger LOG = LoggerFactory.getLogger(DiskBalancerCluster.class); private static final ObjectReader READER = - new ObjectMapper().readerFor(DiskBalancerCluster.class); + JacksonUtil.createBasicReaderFor(DiskBalancerCluster.class); private final Set exclusionList; private final Set inclusionList; private ClusterConnector clusterConnector; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolume.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolume.java index e43b83e39ce3a..e354a23519ff2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolume.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolume.java @@ -19,10 +19,10 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import org.apache.hadoop.hdfs.web.JsonUtil; +import org.apache.hadoop.util.JacksonUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,7 +34,7 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class DiskBalancerVolume { private static final ObjectReader READER = - new ObjectMapper().readerFor(DiskBalancerVolume.class); + JacksonUtil.createBasicReaderFor(DiskBalancerVolume.class); private static final Logger LOG = LoggerFactory.getLogger(DiskBalancerVolume.class); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/NodePlan.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/NodePlan.java index 39a7c57bca2cd..3dfd27dde4d2d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/NodePlan.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/NodePlan.java @@ -18,9 +18,9 @@ package org.apache.hadoop.hdfs.server.diskbalancer.planner; import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.databind.ObjectWriter; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Preconditions; import java.io.IOException; @@ -39,10 +39,8 @@ public class NodePlan { private int port; private long timeStamp; - private static final ObjectMapper MAPPER = new ObjectMapper(); - private static final ObjectReader READER = MAPPER.readerFor(NodePlan.class); - private static final ObjectWriter WRITER = MAPPER.writerFor( - MAPPER.constructType(NodePlan.class)); + private static final ObjectReader READER = JacksonUtil.createBasicReaderFor(NodePlan.class); + private static final ObjectWriter WRITER = JacksonUtil.createBasicWriterFor(NodePlan.class); /** * returns timestamp when this plan was created. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NetworkTopologyServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NetworkTopologyServlet.java index a6460280835d3..16d9e203d3143 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NetworkTopologyServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NetworkTopologyServlet.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; @@ -26,6 +25,7 @@ import org.apache.hadoop.net.NodeBase; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.net.HttpHeaders; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.StringUtils; import javax.servlet.ServletContext; @@ -123,8 +123,7 @@ protected void printTopology(PrintStream stream, List leaves, protected void printJsonFormat(PrintStream stream, Map> tree, ArrayList racks) throws IOException { - JsonFactory dumpFactory = new JsonFactory(); - JsonGenerator dumpGenerator = dumpFactory.createGenerator(stream); + JsonGenerator dumpGenerator = JacksonUtil.getSharedWriter().createGenerator(stream); dumpGenerator.writeStartArray(); for(String r : racks) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java index 449a1aa62ab46..17cd49c2d5708 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java @@ -21,7 +21,6 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; @@ -29,6 +28,7 @@ import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.classification.InterfaceAudience; @@ -61,7 +61,7 @@ protected void doGet(HttpServletRequest req, HttpServletResponse resp) StartupProgress prog = NameNodeHttpServer.getStartupProgressFromContext( getServletContext()); StartupProgressView view = prog.createView(); - JsonGenerator json = new JsonFactory().createGenerator(resp.getWriter()); + JsonGenerator json = JacksonUtil.getSharedWriter().createGenerator(resp.getWriter()); try { json.writeStartObject(); json.writeNumberField(ELAPSED_TIME, view.getElapsedTime()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java index 5f90404ebee25..1ec6730bb87d2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.web; +import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.ContentSummary; @@ -38,13 +39,12 @@ import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Lists; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; -import com.fasterxml.jackson.databind.ObjectMapper; - import java.io.IOException; import java.util.*; @@ -52,11 +52,11 @@ public class JsonUtil { private static final Object[] EMPTY_OBJECT_ARRAY = {}; - // Reuse ObjectMapper instance for improving performance. - // ObjectMapper is thread safe as long as we always configure instance + // Reuse ObjectWriter instance for improving performance. + // ObjectWriter is thread safe as long as we always configure instance // before use. We don't have a re-entrant call pattern in WebHDFS, // so we just need to worry about thread-safety. - private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final ObjectWriter SHARED_WRITER = JacksonUtil.getSharedWriter(); /** Convert a token object to a Json string. */ public static String toJsonString(final Token token @@ -93,7 +93,7 @@ public static String toJsonString(final String key, final Object value) { final Map m = new TreeMap(); m.put(key, value); try { - return MAPPER.writeValueAsString(m); + return SHARED_WRITER.writeValueAsString(m); } catch (IOException ignored) { } return null; @@ -113,7 +113,7 @@ public static String toJsonString(final HdfsFileStatus status, final Map m = toJsonMap(status); try { return includeType ? - toJsonString(FileStatus.class, m) : MAPPER.writeValueAsString(m); + toJsonString(FileStatus.class, m) : SHARED_WRITER.writeValueAsString(m); } catch (IOException ignored) { } return null; @@ -453,7 +453,7 @@ public static String toJsonString(final AclStatus status) { finalMap.put(AclStatus.class.getSimpleName(), m); try { - return MAPPER.writeValueAsString(finalMap); + return SHARED_WRITER.writeValueAsString(finalMap); } catch (IOException ignored) { } return null; @@ -491,7 +491,7 @@ public static String toJsonString(final List xAttrs, final XAttrCodec encoding) throws IOException { final Map finalMap = new TreeMap(); finalMap.put("XAttrs", toJsonArray(xAttrs, encoding)); - return MAPPER.writeValueAsString(finalMap); + return SHARED_WRITER.writeValueAsString(finalMap); } public static String toJsonString(final List xAttrs) @@ -500,14 +500,14 @@ public static String toJsonString(final List xAttrs) for (XAttr xAttr : xAttrs) { names.add(XAttrHelper.getPrefixedName(xAttr)); } - String ret = MAPPER.writeValueAsString(names); + String ret = SHARED_WRITER.writeValueAsString(names); final Map finalMap = new TreeMap(); finalMap.put("XAttrNames", ret); - return MAPPER.writeValueAsString(finalMap); + return SHARED_WRITER.writeValueAsString(finalMap); } public static String toJsonString(Object obj) throws IOException { - return MAPPER.writeValueAsString(obj); + return SHARED_WRITER.writeValueAsString(obj); } public static String toJsonString(BlockStoragePolicy[] storagePolicies) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/QueueManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/QueueManager.java index ec43bce678b26..3a44b427928d8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/QueueManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/QueueManager.java @@ -18,7 +18,6 @@ package org.apache.hadoop.mapred; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerationException; import com.fasterxml.jackson.core.JsonGenerator; import org.apache.hadoop.classification.InterfaceAudience; @@ -28,6 +27,7 @@ import org.apache.hadoop.mapreduce.QueueState; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -531,8 +531,7 @@ static void dumpConfiguration(Writer out, String configFile, return; } - JsonFactory dumpFactory = new JsonFactory(); - JsonGenerator dumpGenerator = dumpFactory.createGenerator(out); + JsonGenerator dumpGenerator = JacksonUtil.getSharedWriter().createGenerator(out); QueueConfigurationParser parser; boolean aclsEnabled = false; if (conf != null) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/JobHistoryEventUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/JobHistoryEventUtils.java index b5c8b1178d1dd..c7cd7a63a8692 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/JobHistoryEventUtils.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/JobHistoryEventUtils.java @@ -28,6 +28,7 @@ import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.CounterGroup; import org.apache.hadoop.mapreduce.Counters; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.timelineservice.TimelineMetric; /** @@ -41,7 +42,7 @@ private JobHistoryEventUtils() { public static final int ATS_CONFIG_PUBLISH_SIZE_BYTES = 10 * 1024; public static JsonNode countersToJSON(Counters counters) { - ObjectMapper mapper = new ObjectMapper(); + ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); ArrayNode nodes = mapper.createArrayNode(); if (counters != null) { for (CounterGroup counterGroup : counters) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AEncryption.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AEncryption.java index a720d2ca10000..00692abcf182f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AEncryption.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AEncryption.java @@ -22,7 +22,6 @@ import java.nio.charset.StandardCharsets; import java.util.Map; -import com.fasterxml.jackson.databind.ObjectMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,6 +29,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.S3AUtils; +import org.apache.hadoop.util.JacksonUtil; import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_CONTEXT; @@ -91,8 +91,8 @@ public static String getS3EncryptionContextBase64Encoded( if (encryptionContextMap.isEmpty()) { return ""; } - final String encryptionContextJson = new ObjectMapper().writeValueAsString( - encryptionContextMap); + final String encryptionContextJson = JacksonUtil.getSharedWriter() + .writeValueAsString(encryptionContextMap); return Base64.encodeBase64String(encryptionContextJson.getBytes(StandardCharsets.UTF_8)); } catch (IOException e) { if (propagateExceptions) { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java index 4e777da8b409f..2b59452a32d86 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java @@ -84,6 +84,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.web.DelegationTokenAuthenticatedURL; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.LambdaUtils; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Time; @@ -96,7 +97,6 @@ import static org.apache.hadoop.fs.azure.NativeAzureFileSystemHelper.*; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; -import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.VisibleForTesting; import com.microsoft.azure.storage.StorageException; @@ -127,7 +127,7 @@ public static class FolderRenamePending { private static final int FORMATTING_BUFFER = 10000; private boolean committed; public static final String SUFFIX = "-RenamePending.json"; - private static final ObjectReader READER = new ObjectMapper() + private static final ObjectReader READER = JacksonUtil.createBasicObjectMapper() .configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true) .readerFor(JsonNode.class); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteSASKeyGeneratorImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteSASKeyGeneratorImpl.java index 473fa54f97c83..3f8862e6d1def 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteSASKeyGeneratorImpl.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteSASKeyGeneratorImpl.java @@ -24,11 +24,11 @@ import java.util.List; import java.util.concurrent.TimeUnit; -import com.fasterxml.jackson.databind.ObjectReader; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.azure.security.Constants; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryUtils; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.http.NameValuePair; @@ -40,7 +40,7 @@ import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.databind.JsonMappingException; -import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectReader; import static org.apache.hadoop.fs.azure.WasbRemoteCallHelper.REMOTE_CALL_SUCCESS_CODE; @@ -53,8 +53,8 @@ public class RemoteSASKeyGeneratorImpl extends SASKeyGeneratorImpl { public static final Logger LOG = LoggerFactory.getLogger(AzureNativeFileSystemStore.class); - private static final ObjectReader RESPONSE_READER = new ObjectMapper() - .readerFor(RemoteSASKeyGenerationResponse.class); + private static final ObjectReader RESPONSE_READER = JacksonUtil + .createBasicReaderFor(RemoteSASKeyGenerationResponse.class); /** * Configuration parameter name expected in the Configuration diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java index eca8443b6c587..7bcaecdba5b0b 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java @@ -20,7 +20,6 @@ import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.databind.JsonMappingException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.commons.lang3.StringUtils; @@ -29,13 +28,14 @@ import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryUtils; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.JacksonUtil; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.utils.URIBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.util.concurrent.TimeUnit; import java.io.IOException; +import java.util.concurrent.TimeUnit; import static org.apache.hadoop.fs.azure.WasbRemoteCallHelper.REMOTE_CALL_SUCCESS_CODE; @@ -49,8 +49,8 @@ public class RemoteWasbAuthorizerImpl implements WasbAuthorizerInterface { public static final Logger LOG = LoggerFactory .getLogger(RemoteWasbAuthorizerImpl.class); - private static final ObjectReader RESPONSE_READER = new ObjectMapper() - .readerFor(RemoteWasbAuthorizerResponse.class); + private static final ObjectReader RESPONSE_READER = JacksonUtil + .createBasicReaderFor(RemoteWasbAuthorizerResponse.class); /** * Configuration parameter name expected in the Configuration object to @@ -176,7 +176,7 @@ private boolean authorizeInternal(String wasbAbsolutePath, String accessType, St uriBuilder .addParameter(WASB_ABSOLUTE_PATH_QUERY_PARAM_NAME, wasbAbsolutePath); uriBuilder.addParameter(ACCESS_OPERATION_QUERY_PARAM_NAME, accessType); - if (resourceOwner != null && StringUtils.isNotEmpty(resourceOwner)) { + if (StringUtils.isNotEmpty(resourceOwner)) { uriBuilder.addParameter(WASB_RESOURCE_OWNER_QUERY_PARAM_NAME, resourceOwner); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java index dab4d79658451..ab0282e19fc1d 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java @@ -29,9 +29,6 @@ import java.util.Hashtable; import java.util.Map; -import org.apache.hadoop.util.Preconditions; - -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import org.slf4j.Logger; @@ -42,6 +39,8 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.azurebfs.services.AbfsIoUtils; import org.apache.hadoop.fs.azurebfs.services.ExponentialRetryPolicy; +import org.apache.hadoop.util.JacksonUtil; +import org.apache.hadoop.util.Preconditions; /** * This class provides convenience methods to obtain AAD tokens. @@ -493,8 +492,7 @@ private static AzureADToken parseTokenFromStream( int expiryPeriodInSecs = 0; long expiresOnInSecs = -1; - JsonFactory jf = new JsonFactory(); - JsonParser jp = jf.createParser(httpResponseStream); + JsonParser jp = JacksonUtil.createBasicJsonFactory().createParser(httpResponseStream); String fieldName, fieldValue; jp.nextToken(); while (jp.hasCurrentToken()) { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index e2ce5c628a4b6..658f2cfe65167 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -30,7 +30,6 @@ import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; -import com.fasterxml.jackson.databind.ObjectMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -40,6 +39,7 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.AbfsPerfLoggable; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; import org.apache.hadoop.fs.azurebfs.utils.UriUtils; +import org.apache.hadoop.util.JacksonUtil; /** * Base Http operation class for orchestrating server IO calls. Child classes would @@ -447,7 +447,7 @@ private void processStorageErrorResponse() { if (stream == null) { return; } - JsonFactory jf = new JsonFactory(); + JsonFactory jf = JacksonUtil.createBasicJsonFactory(); try (JsonParser jp = jf.createParser(stream)) { String fieldName, fieldValue; jp.nextToken(); // START_OBJECT - { @@ -509,8 +509,7 @@ private void parseListFilesResponse(final InputStream stream) } try { - final ObjectMapper objectMapper = new ObjectMapper(); - this.listResultSchema = objectMapper.readValue(stream, + this.listResultSchema = JacksonUtil.getSharedReader().readValue(stream, ListResultSchema.class); } catch (IOException ex) { log.error("Unable to deserialize list results", ex); diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java index f6c8a6ac4d58b..04e98754ca837 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java @@ -51,6 +51,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler; import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; @@ -484,7 +485,7 @@ static Set parseStaleDataNodeList(String liveNodeJsonString, final int blockThreshold, final Logger log) throws IOException { final Set dataNodesToReport = new HashSet<>(); - JsonFactory fac = new JsonFactory(); + JsonFactory fac = JacksonUtil.createBasicJsonFactory(); JsonParser parser = fac.createParser(IOUtils .toInputStream(liveNodeJsonString, StandardCharsets.UTF_8.name())); @@ -554,7 +555,7 @@ static String fetchNameNodeJMXValue(Properties nameNodeProperties, "Unable to retrieve JMX: " + conn.getResponseMessage()); } InputStream in = conn.getInputStream(); - JsonFactory fac = new JsonFactory(); + JsonFactory fac = JacksonUtil.createBasicJsonFactory(); JsonParser parser = fac.createParser(in); if (parser.nextToken() != JsonToken.START_OBJECT || parser.nextToken() != JsonToken.FIELD_NAME diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Anonymizer.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Anonymizer.java index 3c85a93ddbfc9..dc0856cd58a09 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Anonymizer.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Anonymizer.java @@ -22,7 +22,6 @@ import java.io.OutputStream; import com.fasterxml.jackson.core.JsonEncoding; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.Version; import com.fasterxml.jackson.databind.ObjectMapper; @@ -36,6 +35,7 @@ import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.io.compress.Compressor; import org.apache.hadoop.mapreduce.ID; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.tools.rumen.datatypes.*; @@ -55,8 +55,7 @@ public class Anonymizer extends Configured implements Tool { private StatePool statePool; private ObjectMapper outMapper = null; - private JsonFactory outFactory = null; - + private void initialize(String[] args) throws Exception { try { for (int i = 0; i < args.length; ++i) { @@ -85,7 +84,7 @@ private void initialize(String[] args) throws Exception { // initialize the state manager after the anonymizers are registered statePool.initialize(getConf()); - outMapper = new ObjectMapper(); + outMapper = JacksonUtil.createBasicObjectMapper(); // define a module SimpleModule module = new SimpleModule( "Anonymization Serializer", new Version(0, 1, 1, "FINAL", "", "")); @@ -104,8 +103,6 @@ private void initialize(String[] args) throws Exception { // register the module with the object-mapper outMapper.registerModule(module); - - outFactory = outMapper.getFactory(); } // anonymize the job trace file @@ -191,7 +188,7 @@ private JsonGenerator createJsonGenerator(Configuration conf, Path path) } JsonGenerator outGen = - outFactory.createGenerator(output, JsonEncoding.UTF8); + outMapper.createGenerator(output, JsonEncoding.UTF8); outGen.useDefaultPrettyPrinter(); return outGen; diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperParser.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperParser.java index f95878dde95e3..3d644b5ad2272 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperParser.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperParser.java @@ -26,6 +26,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.JacksonUtil; /** * A simple wrapper for parsing JSON-encoded data using ObjectMapper. @@ -48,10 +49,10 @@ class JsonObjectMapperParser implements Closeable { */ public JsonObjectMapperParser(Path path, Class clazz, Configuration conf) throws IOException { - mapper = new ObjectMapper(); + mapper = JacksonUtil.createBasicObjectMapper(); this.clazz = clazz; InputStream input = new PossiblyDecompressedInputStream(path, conf); - jsonParser = mapper.getFactory().createParser(input); + jsonParser = mapper.createParser(input); } /** @@ -62,9 +63,9 @@ public JsonObjectMapperParser(Path path, Class clazz, */ public JsonObjectMapperParser(InputStream input, Class clazz) throws IOException { - mapper = new ObjectMapper(); + mapper = JacksonUtil.createBasicObjectMapper(); this.clazz = clazz; - jsonParser = mapper.getFactory().createParser(input); + jsonParser = mapper.createParser(input); } /** diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperWriter.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperWriter.java index 747b141fd98be..e0caa18fff792 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperWriter.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperWriter.java @@ -30,6 +30,7 @@ import org.apache.hadoop.tools.rumen.datatypes.DataType; import org.apache.hadoop.tools.rumen.serializers.DefaultRumenSerializer; import org.apache.hadoop.tools.rumen.serializers.ObjectStringSerializer; +import org.apache.hadoop.util.JacksonUtil; /** * Simple wrapper around {@link JsonGenerator} to write objects in JSON format. @@ -39,7 +40,7 @@ public class JsonObjectMapperWriter implements Closeable { private JsonGenerator writer; public JsonObjectMapperWriter(OutputStream output, boolean prettyPrint) throws IOException { - ObjectMapper mapper = new ObjectMapper(); + ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); // define a module SimpleModule module = new SimpleModule( @@ -53,7 +54,7 @@ public JsonObjectMapperWriter(OutputStream output, boolean prettyPrint) throws I // register the module with the object-mapper mapper.registerModule(module); - writer = mapper.getFactory().createGenerator(output, JsonEncoding.UTF8); + writer = mapper.createGenerator(output, JsonEncoding.UTF8); if (prettyPrint) { writer.useDefaultPrettyPrinter(); } diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/state/StatePool.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/state/StatePool.java index ab6f8942e7cfb..0c594afc3b72c 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/state/StatePool.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/state/StatePool.java @@ -30,7 +30,6 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.core.JsonEncoding; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.Version; @@ -44,6 +43,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.tools.rumen.Anonymizer; import org.apache.hadoop.tools.rumen.datatypes.DataType; +import org.apache.hadoop.util.JacksonUtil; /** * A pool of states. States used by {@link DataType}'s can be managed the @@ -206,7 +206,7 @@ private boolean reloadState(Path stateFile, Configuration configuration) } private void read(DataInput in) throws IOException { - ObjectMapper mapper = new ObjectMapper(); + ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); // define a module SimpleModule module = new SimpleModule("State Serializer", new Version(0, 1, 1, "FINAL", "", "")); @@ -216,7 +216,7 @@ private void read(DataInput in) throws IOException { // register the module with the object-mapper mapper.registerModule(module); - JsonParser parser = mapper.getFactory().createParser((InputStream)in); + JsonParser parser = mapper.createParser((InputStream)in); StatePool statePool = mapper.readValue(parser, StatePool.class); this.setStates(statePool.getStates()); parser.close(); @@ -273,7 +273,7 @@ public void persist() throws IOException { private void write(DataOutput out) throws IOException { // This is just a JSON experiment System.out.println("Dumping the StatePool's in JSON format."); - ObjectMapper outMapper = new ObjectMapper(); + ObjectMapper outMapper = JacksonUtil.createBasicObjectMapper(); // define a module SimpleModule module = new SimpleModule("State Serializer", new Version(0, 1, 1, "FINAL", "", "")); @@ -283,9 +283,8 @@ private void write(DataOutput out) throws IOException { // register the module with the object-mapper outMapper.registerModule(module); - JsonFactory outFactory = outMapper.getFactory(); JsonGenerator jGen = - outFactory.createGenerator((OutputStream)out, JsonEncoding.UTF8); + outMapper.createGenerator((OutputStream)out, JsonEncoding.UTF8); jGen.useDefaultPrettyPrinter(); jGen.writeObject(this); diff --git a/hadoop-tools/hadoop-rumen/src/test/java/org/apache/hadoop/tools/rumen/TestHistograms.java b/hadoop-tools/hadoop-rumen/src/test/java/org/apache/hadoop/tools/rumen/TestHistograms.java index 187251900b75d..db6d47cf0726e 100644 --- a/hadoop-tools/hadoop-rumen/src/test/java/org/apache/hadoop/tools/rumen/TestHistograms.java +++ b/hadoop-tools/hadoop-rumen/src/test/java/org/apache/hadoop/tools/rumen/TestHistograms.java @@ -23,7 +23,6 @@ import java.util.List; import com.fasterxml.jackson.core.JsonEncoding; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.ObjectMapper; @@ -141,9 +140,8 @@ public static void main(String[] args) throws IOException { Path goldFilePath = new Path(filePath.getParent(), "gold"+testName); ObjectMapper mapper = new ObjectMapper(); - JsonFactory factory = mapper.getFactory(); FSDataOutputStream ostream = lfs.create(goldFilePath, true); - JsonGenerator gen = factory.createGenerator((OutputStream)ostream, + JsonGenerator gen = mapper.createGenerator((OutputStream)ostream, JsonEncoding.UTF8); gen.useDefaultPrettyPrinter(); diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/AMRunner.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/AMRunner.java index 2dc09de665368..0d943471c6f9c 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/AMRunner.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/AMRunner.java @@ -16,13 +16,13 @@ package org.apache.hadoop.yarn.sls; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.databind.JavaType; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.tools.rumen.JobTraceReader; import org.apache.hadoop.tools.rumen.LoggedJob; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ReservationId; @@ -44,11 +44,8 @@ import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Collections; import java.util.HashMap; import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -122,15 +119,14 @@ public void startAM() throws YarnException, IOException { * Parse workload from a SLS trace file. */ private void startAMFromSLSTrace(String inputTrace) throws IOException { - JsonFactory jsonF = new JsonFactory(); - ObjectMapper mapper = new ObjectMapper(); + ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); try (Reader input = new InputStreamReader( new FileInputStream(inputTrace), StandardCharsets.UTF_8)) { JavaType type = mapper.getTypeFactory(). constructMapType(Map.class, String.class, String.class); Iterator> jobIter = mapper.readValues( - jsonF.createParser(input), type); + mapper.createParser(input), type); while (jobIter.hasNext()) { try { diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/RumenToSLSConverter.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/RumenToSLSConverter.java index 2cdfe236c410d..9b25275912377 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/RumenToSLSConverter.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/RumenToSLSConverter.java @@ -35,7 +35,6 @@ import java.util.TreeMap; import java.util.TreeSet; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.commons.cli.CommandLine; @@ -44,6 +43,7 @@ import org.apache.commons.cli.Options; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.sls.utils.SLSUtils; @Private @@ -126,10 +126,10 @@ private static void generateSLSLoadFile(String inputFile, String outputFile) StandardCharsets.UTF_8)) { try (Writer output = new OutputStreamWriter(new FileOutputStream(outputFile), StandardCharsets.UTF_8)) { - ObjectMapper mapper = new ObjectMapper(); + ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); ObjectWriter writer = mapper.writerWithDefaultPrettyPrinter(); Iterator i = mapper.readValues( - new JsonFactory().createParser(input), Map.class); + mapper.createParser(input), Map.class); while (i.hasNext()) { Map m = i.next(); output.write(writer.writeValueAsString(createSLSJob(m)) + EOL); @@ -143,7 +143,7 @@ private static void generateSLSNodeFile(String outputFile) throws IOException { try (Writer output = new OutputStreamWriter(new FileOutputStream(outputFile), StandardCharsets.UTF_8)) { - ObjectMapper mapper = new ObjectMapper(); + ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); ObjectWriter writer = mapper.writerWithDefaultPrettyPrinter(); for (Map.Entry> entry : rackNodeMap.entrySet()) { Map rack = new LinkedHashMap(); diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthTraceJobProducer.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthTraceJobProducer.java index 18b1c034bdf3a..58f8b59ba65e9 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthTraceJobProducer.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthTraceJobProducer.java @@ -34,6 +34,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.tools.rumen.JobStory; import org.apache.hadoop.tools.rumen.JobStoryProducer; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.sls.appmaster.MRAMSimulator; @@ -88,7 +89,8 @@ public SynthTraceJobProducer(Configuration conf, Path path) JsonFactoryBuilder jsonFactoryBuilder = new JsonFactoryBuilder(); jsonFactoryBuilder.configure(JsonFactory.Feature.INTERN_FIELD_NAMES, true); - ObjectMapper mapper = new ObjectMapper(jsonFactoryBuilder.build()); + + ObjectMapper mapper = JacksonUtil.createObjectMapper(jsonFactoryBuilder.build()); mapper.configure(FAIL_ON_UNKNOWN_PROPERTIES, false); FileSystem ifs = path.getFileSystem(conf); diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java index af0b4f6caf3ab..676ef13b5a8e4 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java @@ -34,7 +34,6 @@ import java.util.Map; import java.util.Set; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; @@ -45,6 +44,7 @@ import org.apache.hadoop.tools.rumen.LoggedJob; import org.apache.hadoop.tools.rumen.LoggedTask; import org.apache.hadoop.tools.rumen.LoggedTaskAttempt; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceInformation; @@ -120,12 +120,11 @@ public static Set parseNodesFromRumenTrace( public static Set parseNodesFromSLSTrace( String jobTrace) throws IOException { Set nodeSet = new HashSet<>(); - JsonFactory jsonF = new JsonFactory(); - ObjectMapper mapper = new ObjectMapper(); + ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); Reader input = new InputStreamReader(new FileInputStream(jobTrace), StandardCharsets.UTF_8); try { - Iterator i = mapper.readValues(jsonF.createParser(input), Map.class); + Iterator i = mapper.readValues(mapper.createParser(input), Map.class); while (i.hasNext()) { addNodes(nodeSet, i.next()); } @@ -167,12 +166,11 @@ private static void addNodes(Set nodeSet, public static Set parseNodesFromNodeFile( String nodeFile, Resource nmDefaultResource) throws IOException { Set nodeSet = new HashSet<>(); - JsonFactory jsonF = new JsonFactory(); - ObjectMapper mapper = new ObjectMapper(); + ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); Reader input = new InputStreamReader(new FileInputStream(nodeFile), StandardCharsets.UTF_8); try { - Iterator i = mapper.readValues(jsonF.createParser(input), Map.class); + Iterator i = mapper.readValues(mapper.createParser(input), Map.class); while (i.hasNext()) { Map jsonE = i.next(); String rack = "/" + jsonE.get("rack"); diff --git a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSynthJobGeneration.java b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSynthJobGeneration.java index dd12a10f94612..f690808f8e143 100644 --- a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSynthJobGeneration.java +++ b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSynthJobGeneration.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.sls; import org.apache.commons.math3.random.JDKRandomGenerator; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.sls.synthetic.SynthJob; @@ -60,7 +61,7 @@ public void testWorkloadGenerateTime() JsonFactoryBuilder jsonFactoryBuilder = new JsonFactoryBuilder(); jsonFactoryBuilder.configure(JsonFactory.Feature.INTERN_FIELD_NAMES, true); - ObjectMapper mapper = new ObjectMapper(jsonFactoryBuilder.build()); + ObjectMapper mapper = JacksonUtil.createObjectMapper(jsonFactoryBuilder.build()); mapper.configure(FAIL_ON_UNKNOWN_PROPERTIES, false); SynthTraceJobProducer.Workload wl = mapper.readValue(workloadJson, SynthTraceJobProducer.Workload.class); @@ -181,7 +182,7 @@ public void testSample() throws IOException { JsonFactoryBuilder jsonFactoryBuilder = new JsonFactoryBuilder(); jsonFactoryBuilder.configure(JsonFactory.Feature.INTERN_FIELD_NAMES, true); - ObjectMapper mapper = new ObjectMapper(jsonFactoryBuilder.build()); + ObjectMapper mapper = JacksonUtil.createObjectMapper(jsonFactoryBuilder.build()); mapper.configure(FAIL_ON_UNKNOWN_PROPERTIES, false); JDKRandomGenerator rand = new JDKRandomGenerator(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/AppCatalogSolrClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/AppCatalogSolrClient.java index ac8dbbac61d35..ea7a0ecdef669 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/AppCatalogSolrClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/AppCatalogSolrClient.java @@ -28,6 +28,7 @@ import java.util.Properties; import java.util.Random; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.appcatalog.model.AppEntry; import org.apache.hadoop.yarn.appcatalog.model.AppStoreEntry; import org.apache.hadoop.yarn.appcatalog.model.Application; @@ -57,6 +58,18 @@ public class AppCatalogSolrClient { private static final Logger LOG = LoggerFactory.getLogger(AppCatalogSolrClient.class); private static String urlString; + /** + * It is more performant to reuse ObjectMapper instances but keeping the instance + * private makes it harder for someone to reconfigure it which might have unwanted + * side effects. + */ + private static final ObjectMapper OBJECT_MAPPER; + + static { + OBJECT_MAPPER = JacksonUtil.createBasicObjectMapper(); + OBJECT_MAPPER.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + } + public AppCatalogSolrClient() { // Locate Solr URL ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); @@ -146,8 +159,6 @@ public List search(String keyword) { public List listAppEntries() { List list = new ArrayList(); - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); SolrClient solr = getSolrClient(); SolrQuery query = new SolrQuery(); @@ -164,7 +175,7 @@ public List listAppEntries() { entry.setId(d.get("id").toString()); entry.setName(d.get("name_s").toString()); entry.setApp(d.get("app_s").toString()); - entry.setYarnfile(mapper.readValue(d.get("yarnfile_s").toString(), + entry.setYarnfile(OBJECT_MAPPER.readValue(d.get("yarnfile_s").toString(), Service.class)); list.add(entry); } @@ -176,8 +187,6 @@ public List listAppEntries() { public AppStoreEntry findAppStoreEntry(String id) { AppStoreEntry entry = new AppStoreEntry(); - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); SolrClient solr = getSolrClient(); SolrQuery query = new SolrQuery(); @@ -197,7 +206,7 @@ public AppStoreEntry findAppStoreEntry(String id) { entry.setDesc(d.get("desc_s").toString()); entry.setLike(Integer.parseInt(d.get("like_i").toString())); entry.setDownload(Integer.parseInt(d.get("download_i").toString())); - Service yarnApp = mapper.readValue(d.get("yarnfile_s").toString(), + Service yarnApp = OBJECT_MAPPER.readValue(d.get("yarnfile_s").toString(), Service.class); String name; try { @@ -222,9 +231,6 @@ public AppStoreEntry findAppStoreEntry(String id) { public AppEntry findAppEntry(String id) { AppEntry entry = new AppEntry(); - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - SolrClient solr = getSolrClient(); SolrQuery query = new SolrQuery(); query.setQuery("id:" + id); @@ -240,7 +246,7 @@ public AppEntry findAppEntry(String id) { entry.setId(d.get("id").toString()); entry.setApp(d.get("app_s").toString()); entry.setName(d.get("name_s").toString()); - entry.setYarnfile(mapper.readValue(d.get("yarnfile_s").toString(), + entry.setYarnfile(OBJECT_MAPPER.readValue(d.get("yarnfile_s").toString(), Service.class)); } } catch (SolrServerException | IOException e) { @@ -252,8 +258,6 @@ public AppEntry findAppEntry(String id) { public void deployApp(String id, Service service) throws SolrServerException, IOException { long download = 0; - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); Collection docs = new HashSet(); SolrClient solr = getSolrClient(); // Find application information from AppStore @@ -287,7 +291,7 @@ public void deployApp(String id, Service service) throws SolrServerException, request.addField("id", name); request.addField("name_s", name); request.addField("app_s", entry.getOrg()+"/"+entry.getName()); - request.addField("yarnfile_s", mapper.writeValueAsString(service)); + request.addField("yarnfile_s", OBJECT_MAPPER.writeValueAsString(service)); docs.add(request); } @@ -326,8 +330,6 @@ public void deleteApp(String id) { public void register(Application app) throws IOException { Collection docs = new HashSet(); SolrClient solr = getSolrClient(); - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); try { SolrInputDocument buffer = new SolrInputDocument(); buffer.setField("id", java.util.UUID.randomUUID().toString() @@ -343,10 +345,10 @@ public void register(Application app) throws IOException { buffer.setField("download_i", 0); // Keep only YARN data model for yarnfile field - String yarnFile = mapper.writeValueAsString(app); - LOG.info("app:"+yarnFile); - Service yarnApp = mapper.readValue(yarnFile, Service.class); - buffer.setField("yarnfile_s", mapper.writeValueAsString(yarnApp)); + String yarnFile = OBJECT_MAPPER.writeValueAsString(app); + LOG.info("app:{}", yarnFile); + Service yarnApp = OBJECT_MAPPER.readValue(yarnFile, Service.class); + buffer.setField("yarnfile_s", OBJECT_MAPPER.writeValueAsString(yarnApp)); docs.add(buffer); commitSolrChanges(solr, docs); @@ -359,8 +361,6 @@ public void register(Application app) throws IOException { protected void register(AppStoreEntry app) throws IOException { Collection docs = new HashSet(); SolrClient solr = getSolrClient(); - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); try { SolrInputDocument buffer = new SolrInputDocument(); buffer.setField("id", java.util.UUID.randomUUID().toString() @@ -376,10 +376,10 @@ protected void register(AppStoreEntry app) throws IOException { buffer.setField("download_i", app.getDownload()); // Keep only YARN data model for yarnfile field - String yarnFile = mapper.writeValueAsString(app); - LOG.info("app:"+yarnFile); - Service yarnApp = mapper.readValue(yarnFile, Service.class); - buffer.setField("yarnfile_s", mapper.writeValueAsString(yarnApp)); + String yarnFile = OBJECT_MAPPER.writeValueAsString(app); + LOG.info("app:{}", yarnFile); + Service yarnApp = OBJECT_MAPPER.readValue(yarnFile, Service.class); + buffer.setField("yarnfile_s", OBJECT_MAPPER.writeValueAsString(yarnApp)); docs.add(buffer); commitSolrChanges(solr, docs); @@ -391,8 +391,6 @@ protected void register(AppStoreEntry app) throws IOException { public void upgradeApp(Service service) throws IOException, SolrServerException { - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); Collection docs = new HashSet(); SolrClient solr = getSolrClient(); if (service!=null) { @@ -420,7 +418,7 @@ public void upgradeApp(Service service) throws IOException, request.addField("id", name); request.addField("name_s", name); request.addField("app_s", app); - request.addField("yarnfile_s", mapper.writeValueAsString(service)); + request.addField("yarnfile_s", OBJECT_MAPPER.writeValueAsString(service)); docs.add(request); } try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/YarnServiceClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/YarnServiceClient.java index 185b1c8ddebd5..57c4b353d099c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/YarnServiceClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/YarnServiceClient.java @@ -23,6 +23,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.appcatalog.model.AppEntry; import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.api.records.ServiceState; @@ -46,6 +47,19 @@ public class YarnServiceClient { private static final Logger LOG = LoggerFactory.getLogger(YarnServiceClient.class); + + /** + * It is more performant to reuse ObjectMapper instances but keeping the instance + * private makes it harder for someone to reconfigure it which might have unwanted + * side effects. + */ + private static final ObjectMapper OBJECT_MAPPER; + + static { + OBJECT_MAPPER = JacksonUtil.createBasicObjectMapper(); + OBJECT_MAPPER.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + } + private static Configuration conf = new Configuration(); private static ClientConfig getClientConfig() { ClientConfig config = new DefaultClientConfig(); @@ -66,8 +80,6 @@ public YarnServiceClient() { } public void createApp(Service app) { - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); ClientResponse response; try { boolean useKerberos = UserGroupInformation.isSecurityEnabled(); @@ -90,7 +102,7 @@ public void createApp(Service app) { app.setKerberosPrincipal(kerberos); } response = asc.getApiClient().post(ClientResponse.class, - mapper.writeValueAsString(app)); + OBJECT_MAPPER.writeValueAsString(app)); if (response.getStatus() >= 299) { String message = response.getEntity(String.class); throw new RuntimeException("Failed : HTTP error code : " @@ -119,10 +131,8 @@ public void deleteApp(String appInstanceId) { } public void restartApp(Service app) throws JsonProcessingException { - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); String appInstanceId = app.getName(); - String yarnFile = mapper.writeValueAsString(app); + String yarnFile = OBJECT_MAPPER.writeValueAsString(app); ClientResponse response; try { response = asc.getApiClient(asc.getServicePath(appInstanceId)) @@ -139,10 +149,8 @@ public void restartApp(Service app) throws JsonProcessingException { } public void stopApp(Service app) throws JsonProcessingException { - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); String appInstanceId = app.getName(); - String yarnFile = mapper.writeValueAsString(app); + String yarnFile = OBJECT_MAPPER.writeValueAsString(app); ClientResponse response; try { response = asc.getApiClient(asc.getServicePath(appInstanceId)) @@ -159,14 +167,12 @@ public void stopApp(Service app) throws JsonProcessingException { } public void getStatus(AppEntry entry) { - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); String appInstanceId = entry.getName(); Service app = null; try { String yarnFile = asc.getApiClient(asc.getServicePath(appInstanceId)) .get(String.class); - app = mapper.readValue(yarnFile, Service.class); + app = OBJECT_MAPPER.readValue(yarnFile, Service.class); entry.setYarnfile(app); } catch (UniformInterfaceException | IOException e) { LOG.error("Error in fetching application status: ", e); @@ -174,11 +180,9 @@ public void getStatus(AppEntry entry) { } public void upgradeApp(Service app) throws JsonProcessingException { - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); String appInstanceId = app.getName(); app.setState(ServiceState.EXPRESS_UPGRADING); - String yarnFile = mapper.writeValueAsString(app); + String yarnFile = OBJECT_MAPPER.writeValueAsString(app); ClientResponse response; try { response = asc.getApiClient(asc.getServicePath(appInstanceId)) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java index cab4870493561..1e30fbd5ba1ec 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java @@ -26,6 +26,7 @@ import org.apache.hadoop.registry.client.binding.RegistryPathUtils; import org.apache.hadoop.registry.client.types.ServiceRecord; import org.apache.hadoop.registry.client.types.yarn.PersistencePolicies; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; @@ -875,7 +876,7 @@ public void updateContainerStatus(ContainerStatus status) { doRegistryUpdate = false; } } - ObjectMapper mapper = new ObjectMapper(); + final ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); try { Map>> ports = null; ports = mapper.readValue(status.getExposedPorts(), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/JsonSerDeser.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/JsonSerDeser.java index 254d6c5d37954..cf3d785a22ea6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/JsonSerDeser.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/JsonSerDeser.java @@ -30,6 +30,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.JacksonUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -61,9 +62,10 @@ public class JsonSerDeser { @SuppressWarnings("deprecation") public JsonSerDeser(Class classType) { this.classType = classType; - this.mapper = new ObjectMapper(); + this.mapper = JacksonUtil.createBasicObjectMapper(); mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); mapper.configure(SerializationFeature.WRITE_NULL_MAP_VALUES, false); + mapper.configure(SerializationFeature.INDENT_OUTPUT, true); } public JsonSerDeser(Class classType, PropertyNamingStrategy namingStrategy) { @@ -231,7 +233,6 @@ private void writeJsonAsBytes(T instance, * @throws JsonProcessingException parse problems */ public String toJson(T instance) throws JsonProcessingException { - mapper.configure(SerializationFeature.INDENT_OUTPUT, true); return mapper.writeValueAsString(instance); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/PublishedConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/PublishedConfiguration.java index e7ec2d6f5e7c2..ac30480fd8856 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/PublishedConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/PublishedConfiguration.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.service.exceptions.BadConfigException; import java.io.IOException; @@ -41,6 +42,18 @@ @JsonInclude(value = JsonInclude.Include.NON_NULL) public class PublishedConfiguration { + /** + * It is more performant to reuse ObjectMapper instances but keeping the instance + * private makes it harder for someone to reconfigure it which might have unwanted + * side effects. + */ + private static final ObjectMapper OBJECT_MAPPER; + + static { + OBJECT_MAPPER = JacksonUtil.createBasicObjectMapper(); + OBJECT_MAPPER.configure(SerializationFeature.INDENT_OUTPUT, true); + } + public String description; public long updated; @@ -154,9 +167,7 @@ public Properties asProperties() { * @throws IOException marshalling failure */ public String asJson() throws IOException { - ObjectMapper mapper = new ObjectMapper(); - mapper.configure(SerializationFeature.INDENT_OUTPUT, true); - String json = mapper.writeValueAsString(entries); + String json = OBJECT_MAPPER.writeValueAsString(entries); return json; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/FileSystemTimelineWriter.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/FileSystemTimelineWriter.java index b92f4e412347c..dc60f9b274ede 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/FileSystemTimelineWriter.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/FileSystemTimelineWriter.java @@ -49,6 +49,7 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -60,7 +61,6 @@ import org.apache.hadoop.yarn.exceptions.YarnException; import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.util.MinimalPrettyPrinter; import com.fasterxml.jackson.databind.ObjectMapper; @@ -274,7 +274,7 @@ public void flush() throws IOException { } private ObjectMapper createObjectMapper() { - ObjectMapper mapper = new ObjectMapper(); + ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); mapper.setAnnotationIntrospector( new JaxbAnnotationIntrospector(TypeFactory.defaultInstance())); mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); @@ -365,8 +365,8 @@ public long getLastModifiedTime() { protected void prepareForWrite() throws IOException{ this.stream = createLogFileStream(fs, logPath); - this.jsonGenerator = new JsonFactory().createGenerator( - (OutputStream)stream); + this.jsonGenerator = JacksonUtil.getSharedWriter() + .createGenerator((OutputStream)stream); this.jsonGenerator.setPrettyPrinter(new MinimalPrettyPrinter("\n")); this.lastModifiedTime = Time.monotonicNow(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java index 45da0f444ba0d..0264e40c7be28 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java @@ -30,6 +30,7 @@ import org.apache.commons.cli.Options; import org.apache.hadoop.security.authentication.server.KerberosAuthenticationHandler; import org.apache.hadoop.security.authentication.server.PseudoAuthenticationHandler; +import org.apache.hadoop.util.JacksonUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -62,7 +63,7 @@ public class TimelineClientImpl extends TimelineClient { private static final Logger LOG = LoggerFactory.getLogger(TimelineClientImpl.class); - private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final ObjectMapper MAPPER = JacksonUtil.createBasicObjectMapper(); private static final String RESOURCE_URI_STR_V1 = "/ws/v1/timeline/"; private static Options opts; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java index 6351cb69c82e7..83b6a09607512 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java @@ -27,9 +27,9 @@ import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.security.DockerCredentialTokenIdentifier; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -96,9 +96,8 @@ public static Credentials readCredentialsFromConfigFile(Path configFile, } // Parse the JSON and create the Tokens/Credentials. - ObjectMapper mapper = new ObjectMapper(); - JsonFactory factory = mapper.getFactory(); - JsonParser parser = factory.createParser(contents); + ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + JsonParser parser = mapper.createParser(contents); JsonNode rootNode = mapper.readTree(parser); Credentials credentials = new Credentials(); @@ -161,7 +160,7 @@ public static boolean writeDockerCredentialsToPath(File outConfigFile, Credentials credentials) throws IOException { boolean foundDockerCred = false; if (credentials.numberOfTokens() > 0) { - ObjectMapper mapper = new ObjectMapper(); + ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); ObjectNode rootNode = mapper.createObjectNode(); ObjectNode registryUrlNode = mapper.createObjectNode(); for (Token tk : credentials.getAllTokens()) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/timeline/TimelineUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/timeline/TimelineUtils.java index 14b9b0ceb7d12..a36b96dca205a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/timeline/TimelineUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/timeline/TimelineUtils.java @@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.SecurityUtil; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.timeline.TimelineAbout; @@ -53,11 +54,10 @@ public class TimelineUtils { "TIMELINE_FLOW_RUN_ID_TAG"; public final static String DEFAULT_FLOW_VERSION = "1"; - private static ObjectMapper mapper; + private static final ObjectMapper OBJECT_MAPPER = JacksonUtil.createBasicObjectMapper(); static { - mapper = new ObjectMapper(); - YarnJacksonJaxbJsonProvider.configObjectMapper(mapper); + YarnJacksonJaxbJsonProvider.configObjectMapper(OBJECT_MAPPER); } /** @@ -90,9 +90,9 @@ public static String dumpTimelineRecordtoJSON(Object o) public static String dumpTimelineRecordtoJSON(Object o, boolean pretty) throws JsonGenerationException, JsonMappingException, IOException { if (pretty) { - return mapper.writerWithDefaultPrettyPrinter().writeValueAsString(o); + return OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(o); } else { - return mapper.writeValueAsString(o); + return OBJECT_MAPPER.writeValueAsString(o); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Controller.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Controller.java index ad80a2eefe5bd..bf5500892de14 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Controller.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Controller.java @@ -28,8 +28,8 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.webapp.view.DefaultPage; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,7 +42,6 @@ @InterfaceAudience.LimitedPrivate({"YARN", "MapReduce"}) public abstract class Controller implements Params { public static final Logger LOG = LoggerFactory.getLogger(Controller.class); - static final ObjectMapper jsonMapper = new ObjectMapper(); @RequestScoped public static class RequestContext{ @@ -225,7 +224,7 @@ protected void renderJSON(Object object) { context().rendered = true; context().response.setContentType(MimeType.JSON); try { - jsonMapper.writeValue(writer(), object); + JacksonUtil.getSharedWriter().writeValue(writer(), object); } catch (Exception e) { throw new WebAppException(e); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/GenericObjectMapper.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/GenericObjectMapper.java index fdafcf0cd1c9d..440c5d6f0600c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/GenericObjectMapper.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/GenericObjectMapper.java @@ -19,11 +19,11 @@ import java.io.IOException; -import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.util.JacksonUtil; /** * A utility class providing methods for serializing and deserializing @@ -38,14 +38,8 @@ public class GenericObjectMapper { private static final byte[] EMPTY_BYTES = new byte[0]; - public static final ObjectReader OBJECT_READER; - public static final ObjectWriter OBJECT_WRITER; - - static { - ObjectMapper mapper = new ObjectMapper(); - OBJECT_READER = mapper.reader(Object.class); - OBJECT_WRITER = mapper.writer(); - } + public static final ObjectReader OBJECT_READER = JacksonUtil.createBasicReaderFor(Object.class); + public static final ObjectWriter OBJECT_WRITER = JacksonUtil.getSharedWriter(); /** * Serializes an Object into a byte array. Along with {@link #read(byte[])}, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java index 794ef9d9a4326..cbbc33706db34 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java @@ -43,6 +43,7 @@ import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.security.authorize.AccessControlList; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.server.nodemanager.containermanager.records.AuxServiceConfiguration; import org.apache.hadoop.yarn.server.nodemanager.containermanager.records.AuxServiceFile; import org.apache.hadoop.yarn.server.nodemanager.containermanager.records.AuxServiceRecord; @@ -135,7 +136,7 @@ public class AuxServices extends AbstractService this.dirsHandler = nmContext.getLocalDirsHandler(); this.delService = deletionService; this.userUGI = getRemoteUgi(); - this.mapper = new ObjectMapper(); + this.mapper = JacksonUtil.createBasicObjectMapper(); mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); // Obtain services from configuration in init() } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/NetworkTagMappingJsonManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/NetworkTagMappingJsonManager.java index cc2ded4422b71..3b4e26eda1ff3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/NetworkTagMappingJsonManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/NetworkTagMappingJsonManager.java @@ -28,11 +28,11 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; @@ -58,9 +58,8 @@ public void initialize(Configuration conf) { + " we have to set the configuration:" + YarnConfiguration.NM_NETWORK_TAG_MAPPING_FILE_PATH); } - ObjectMapper mapper = new ObjectMapper(); try { - networkTagMapping = mapper.readValue(new File(mappingJsonFile), + networkTagMapping = JacksonUtil.getSharedReader().readValue(new File(mappingJsonFile), NetworkTagMapping.class); } catch (Exception e) { throw new YarnRuntimeException(e); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/RuncContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/RuncContainerRuntime.java index 2c327c04ebaf1..86bb5113dd26b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/RuncContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/RuncContainerRuntime.java @@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair; import org.apache.hadoop.security.authorize.AccessControlList; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.concurrent.HadoopExecutors; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -91,6 +92,7 @@ import static org.apache.hadoop.yarn.conf.YarnConfiguration.NM_RUNC_MANIFEST_TO_RESOURCES_PLUGIN; import static org.apache.hadoop.yarn.conf.YarnConfiguration.NM_REAP_RUNC_LAYER_MOUNTS_INTERVAL; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.*; + /** *

    This class is an extension of {@link OCIContainerRuntime} that uses the * native {@code container-executor} binary via a @@ -206,7 +208,7 @@ public void initialize(Configuration configuration, Context nmCtx) imageTagToManifestPlugin.init(conf); manifestToResourcesPlugin = chooseManifestToResourcesPlugin(); manifestToResourcesPlugin.init(conf); - mapper = new ObjectMapper(); + mapper = JacksonUtil.createBasicObjectMapper(); defaultRuncImage = conf.get(YarnConfiguration.NM_RUNC_IMAGE_NAME); allowedNetworks.clear(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/runc/ImageTagToManifestPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/runc/ImageTagToManifestPlugin.java index 457939c9a1740..bb21c45f735a0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/runc/ImageTagToManifestPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/runc/ImageTagToManifestPlugin.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.concurrent.HadoopExecutors; import java.io.BufferedReader; @@ -42,7 +43,6 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; -import com.fasterxml.jackson.databind.ObjectMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -65,7 +65,6 @@ public class ImageTagToManifestPlugin extends AbstractService implements RuncImageTagToManifestPlugin { private Map manifestCache; - private ObjectMapper objMapper; private AtomicReference> localImageToHashCache = new AtomicReference<>(new HashMap<>()); private AtomicReference> hdfsImageToHashCache = @@ -107,7 +106,7 @@ public ImageManifest getManifestFromImageTag(String imageTag) } byte[] bytes = IOUtils.toByteArray(input); - manifest = objMapper.readValue(bytes, ImageManifest.class); + manifest = JacksonUtil.getSharedReader().readValue(bytes, ImageManifest.class); manifestCache.put(hash, manifest); return manifest; @@ -279,7 +278,6 @@ protected void serviceInit(Configuration configuration) throws Exception { DEFAULT_NM_RUNC_IMAGE_TOPLEVEL_DIR) + "/manifests/"; int numManifestsToCache = conf.getInt(NM_RUNC_NUM_MANIFESTS_TO_CACHE, DEFAULT_NUM_MANIFESTS_TO_CACHE); - this.objMapper = new ObjectMapper(); this.manifestCache = Collections.synchronizedMap( new LRUCache(numManifestsToCache, 0.75f)); @@ -315,7 +313,7 @@ protected void serviceStop() throws Exception { } private static class LRUCache extends LinkedHashMap { - private int cacheSize; + private final int cacheSize; LRUCache(int initialCapacity, float loadFactor) { super(initialCapacity, loadFactor, true); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceProfilesManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceProfilesManagerImpl.java index 24cb34327b745..8910ab48ddaaa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceProfilesManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceProfilesManagerImpl.java @@ -20,10 +20,10 @@ import org.apache.hadoop.classification.VisibleForTesting; -import com.fasterxml.jackson.databind.ObjectMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -105,8 +105,7 @@ private void loadProfiles() throws IOException { resourcesFile = tmp.getPath(); } } - ObjectMapper mapper = new ObjectMapper(); - Map data = mapper.readValue(new File(resourcesFile), Map.class); + Map data = JacksonUtil.getSharedReader().readValue(new File(resourcesFile), Map.class); Iterator iterator = data.entrySet().iterator(); while (iterator.hasNext()) { Map.Entry entry = (Map.Entry) iterator.next(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/MappingRuleCreator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/MappingRuleCreator.java index 174577099e48c..0fa10570d030a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/MappingRuleCreator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/MappingRuleCreator.java @@ -27,6 +27,7 @@ import java.util.List; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.server.resourcemanager.placement.csmappingrule.MappingRule; import org.apache.hadoop.yarn.server.resourcemanager.placement.csmappingrule.MappingRuleAction; import org.apache.hadoop.yarn.server.resourcemanager.placement.csmappingrule.MappingRuleActions; @@ -43,7 +44,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.VisibleForTesting; public class MappingRuleCreator { @@ -58,14 +58,12 @@ public MappingRulesDescription getMappingRulesFromJsonFile(String filePath) MappingRulesDescription getMappingRulesFromJson(byte[] contents) throws IOException { - ObjectMapper objectMapper = new ObjectMapper(); - return objectMapper.readValue(contents, MappingRulesDescription.class); + return JacksonUtil.getSharedReader().readValue(contents, MappingRulesDescription.class); } MappingRulesDescription getMappingRulesFromJson(String contents) throws IOException { - ObjectMapper objectMapper = new ObjectMapper(); - return objectMapper.readValue(contents, MappingRulesDescription.class); + return JacksonUtil.getSharedReader().readValue(contents, MappingRulesDescription.class); } public List getMappingRulesFromFile(String jsonPath) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/converter/LegacyMappingRuleToJson.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/converter/LegacyMappingRuleToJson.java index 108d52bc40c36..6c963775be770 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/converter/LegacyMappingRuleToJson.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/converter/LegacyMappingRuleToJson.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueuePath; @@ -52,9 +53,11 @@ public class LegacyMappingRuleToJson { public static final String JSON_NODE_MATCHES = "matches"; /** - * Our internal object mapper, used to create JSON nodes. + * It is more performant to reuse ObjectMapper instances but keeping the instance + * private makes it harder for someone to reconfigure it which might have unwanted + * side effects. */ - private ObjectMapper objectMapper = new ObjectMapper(); + private static final ObjectMapper OBJECT_MAPPER = JacksonUtil.createBasicObjectMapper(); /** * Collection to store the legacy group mapping rule strings. @@ -138,8 +141,8 @@ public LegacyMappingRuleToJson setAppNameMappingRules( */ public String convert() { //creating the basic JSON config structure - ObjectNode rootNode = objectMapper.createObjectNode(); - ArrayNode rulesNode = objectMapper.createArrayNode(); + ObjectNode rootNode = OBJECT_MAPPER.createObjectNode(); + ArrayNode rulesNode = OBJECT_MAPPER.createArrayNode(); rootNode.set("rules", rulesNode); //Processing and adding all the user group mapping rules @@ -158,7 +161,7 @@ public String convert() { } try { - return objectMapper + return OBJECT_MAPPER .writerWithDefaultPrettyPrinter() .writeValueAsString(rootNode); } catch (JsonProcessingException e) { @@ -246,7 +249,7 @@ private String[] splitRule(String rule, int expectedParts) { * @return The object node with the preset fields */ private ObjectNode createDefaultRuleNode(String type) { - return objectMapper + return OBJECT_MAPPER .createObjectNode() .put("type", type) //All legacy rule fallback to place to default diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/FSConfigToCSConfigConverter.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/FSConfigToCSConfigConverter.java index d801652377983..6a16aac686d6a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/FSConfigToCSConfigConverter.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/FSConfigToCSConfigConverter.java @@ -32,6 +32,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.security.authorize.AccessControlList; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -55,7 +56,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import com.fasterxml.jackson.core.util.DefaultPrettyPrinter; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.classification.VisibleForTesting; @@ -327,14 +327,14 @@ private void performRuleConversion(FairScheduler fs) placementConverter.convertPlacementPolicy(placementManager, ruleHandler, capacitySchedulerConfig, usePercentages); - ObjectMapper mapper = new ObjectMapper(); + final ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); // close output stream if we write to a file, leave it open otherwise if (!consoleMode && rulesToFile) { mapper.configure(JsonGenerator.Feature.AUTO_CLOSE_TARGET, true); } else { mapper.configure(JsonGenerator.Feature.AUTO_CLOSE_TARGET, false); } - ObjectWriter writer = mapper.writer(new DefaultPrettyPrinter()); + ObjectWriter writer = mapper.writerWithDefaultPrettyPrinter(); if (consoleMode && rulesToFile) { System.out.println("======= " + MAPPING_RULES_JSON + " ======="); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStore.java index 1f4a9f42a9f8c..7e49bd19aef73 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStore.java @@ -42,6 +42,7 @@ import org.apache.hadoop.service.ServiceOperations; import org.apache.hadoop.ipc.CallerContext; import org.apache.hadoop.util.ApplicationClassLoader; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -320,7 +321,7 @@ protected void serviceStart() throws Exception { } } - objMapper = new ObjectMapper(); + objMapper = JacksonUtil.createBasicObjectMapper(); objMapper.setAnnotationIntrospector( new JaxbAnnotationIntrospector(TypeFactory.defaultInstance())); jsonFactory = new MappingJsonFactory(objMapper); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/LevelDBCacheTimelineStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/LevelDBCacheTimelineStore.java index f84eeebbf0c8e..8ee6d1864c694 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/LevelDBCacheTimelineStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/LevelDBCacheTimelineStore.java @@ -18,13 +18,13 @@ package org.apache.hadoop.yarn.server.timeline; -import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.timeline.util.LeveldbUtils; @@ -298,7 +298,6 @@ public void close() throws IOException { } }; } - static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); @SuppressWarnings("unchecked") private V getEntityForKey(byte[] key) throws IOException { @@ -306,7 +305,7 @@ private V getEntityForKey(byte[] key) throws IOException { if (resultRaw == null) { return null; } - return (V) OBJECT_MAPPER.readValue(resultRaw, TimelineEntity.class); + return (V) JacksonUtil.getSharedReader().readValue(resultRaw, TimelineEntity.class); } private byte[] getStartTimeKey(K entityId) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/test/java/org/apache/hadoop/yarn/server/timeline/PluginStoreTestUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/test/java/org/apache/hadoop/yarn/server/timeline/PluginStoreTestUtils.java index cb887fe264fab..d3885c5bc8fb4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/test/java/org/apache/hadoop/yarn/server/timeline/PluginStoreTestUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/test/java/org/apache/hadoop/yarn/server/timeline/PluginStoreTestUtils.java @@ -18,7 +18,6 @@ package org.apache.hadoop.yarn.server.timeline; import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.util.MinimalPrettyPrinter; import com.fasterxml.jackson.databind.ObjectMapper; @@ -31,6 +30,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntities; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity; import org.apache.hadoop.yarn.api.records.timeline.TimelineEvent; @@ -108,7 +108,7 @@ static FSDataOutputStream createLogFile(Path logPath, FileSystem fs) } static ObjectMapper createObjectMapper() { - ObjectMapper mapper = new ObjectMapper(); + ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); mapper.setAnnotationIntrospector( new JaxbAnnotationIntrospector(TypeFactory.defaultInstance())); mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); @@ -230,10 +230,9 @@ static TimelineEvent createEvent(long timestamp, String type, Map T getTimelineRecordFromJSON( String jsonString, Class clazz) throws JsonGenerationException, JsonMappingException, IOException { - return mapper.readValue(jsonString, clazz); + return OBJECT_MAPPER.readValue(jsonString, clazz); } private static void fillFields(TimelineEntity finalEntity, From 5f93edfd70784aa4f3ff392ef065c78a6fc532ea Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 16 Aug 2024 13:41:35 +0100 Subject: [PATCH 085/113] HADOOP-19153. hadoop-common exports logback as a transitive dependency (#6999) - Critical: remove the obsolete exclusion list from hadoop-common. - Diligence: expand the hadoop-project exclusion list to exclude all ch.qos.logback artifacts Contributed by Steve Loughran --- hadoop-common-project/hadoop-common/pom.xml | 19 ------------------- hadoop-project/pom.xml | 6 +----- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 90d667797343e..06c6b06ec6a3c 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -330,25 +330,6 @@ org.apache.zookeeper zookeeper - - - org.jboss.netty - netty - - - - junit - junit - - - com.sun.jdmk - jmxtools - - - com.sun.jmx - jmxri - - io.netty diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 3f0a8b3a85fa7..8c8f675f98bc5 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -1484,11 +1484,7 @@ ch.qos.logback - logback-core - - - ch.qos.logback - logback-classic + * From f00094203bf40a8c3f2216cf22eaa5599e3b9b4d Mon Sep 17 00:00:00 2001 From: Ferenc Erdelyi <55103964+ferdelyi@users.noreply.github.com> Date: Fri, 16 Aug 2024 16:33:10 +0200 Subject: [PATCH 086/113] YARN-11709. NodeManager should be shut down or blacklisted when it cacannot run program /var/lib/yarn-ce/bin/container-executor (#6960) --- .../nodemanager/LinuxContainerExecutor.java | 6 ++-- .../TestLinuxContainerExecutorWithMocks.java | 35 +++++++++++++++++-- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index 19335045c865b..19c06736035e9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -451,8 +451,10 @@ public void startLocalizer(LocalizerStartContext ctx) } catch (PrivilegedOperationException e) { int exitCode = e.getExitCode(); - LOG.warn("Exit code from container {} startLocalizer is : {}", - locId, exitCode, e); + LOG.error("Unrecoverable issue occurred. Marking the node as unhealthy to prevent " + + "further containers to get scheduled on the node and cause application failures. " + + "Exit code from the container " + locId + "startLocalizer is : " + exitCode, e); + nmContext.getNodeStatusUpdater().reportException(e); throw new IOException("Application " + appId + " initialization failed" + " (exitCode=" + exitCode + ") with output: " + e.getOutput(), e); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java index 3d9d33c5a10dd..7d49cab4a86d2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java @@ -26,6 +26,7 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; @@ -37,6 +38,7 @@ import java.io.FileReader; import java.io.IOException; import java.io.LineNumberReader; +import java.lang.reflect.Field; import java.net.InetSocketAddress; import java.net.URI; import java.net.URISyntaxException; @@ -345,7 +347,8 @@ public void testStartLocalizer() throws IOException { @Test public void testContainerLaunchError() - throws IOException, ContainerExecutionException, URISyntaxException { + throws IOException, ContainerExecutionException, URISyntaxException, IllegalAccessException, + NoSuchFieldException { final String[] expecetedMessage = {"badcommand", "Exit code: 24"}; final String[] executor = { @@ -387,6 +390,14 @@ public Object answer(InvocationOnMock invocationOnMock) dirsHandler.init(conf); mockExec.setConf(conf); + //set the private nmContext field without initing the LinuxContainerExecutor + NodeManager nodeManager = new NodeManager(); + NodeManager.NMContext nmContext = + nodeManager.createNMContext(null, null, null, false, conf); + Field lceNmContext = LinuxContainerExecutor.class.getDeclaredField("nmContext"); + lceNmContext.setAccessible(true); + lceNmContext.set(mockExec, nmContext); + String appSubmitter = "nobody"; String cmd = String .valueOf(PrivilegedOperation.RunAsUserCommand.LAUNCH_CONTAINER. @@ -601,8 +612,6 @@ public void testNoExitCodeFromPrivilegedOperation() throws Exception { LinuxContainerRuntime runtime = new DefaultLinuxContainerRuntime( spyPrivilegedExecutor); runtime.initialize(conf, null); - mockExec = new LinuxContainerExecutor(runtime); - mockExec.setConf(conf); LinuxContainerExecutor lce = new LinuxContainerExecutor(runtime) { @Override protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() { @@ -610,6 +619,23 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() { } }; lce.setConf(conf); + + //set the private nmContext field without initing the LinuxContainerExecutor + NodeManager nodeManager = new NodeManager(); + NodeManager.NMContext nmContext = + nodeManager.createNMContext(null, null, null, false, conf); + NodeManager.NMContext spyNmContext = spy(nmContext); + + //initialize a mock NodeStatusUpdater + NodeStatusUpdaterImpl nodeStatusUpdater = mock(NodeStatusUpdaterImpl.class); + nmContext.setNodeStatusUpdater(nodeStatusUpdater); + //imitate a void method call on the NodeStatusUpdater when setting NM unhealthy. + doNothing().when(nodeStatusUpdater).reportException(any()); + + Field lceNmContext = LinuxContainerExecutor.class.getDeclaredField("nmContext"); + lceNmContext.setAccessible(true); + lceNmContext.set(lce, nmContext); + InetSocketAddress address = InetSocketAddress.createUnresolved( "localhost", 8040); Path nmPrivateCTokensPath= new Path("file:///bin/nmPrivateCTokensPath"); @@ -672,6 +698,9 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() { assertTrue("Unexpected exception " + e, e.getMessage().contains("exit code")); } + + //verify that the NM was set unhealthy on PrivilegedOperationException + verify(nodeStatusUpdater, times(1)).reportException(any()); } @Test From bf804cb64be7b15b1ff69242095729f1228aca33 Mon Sep 17 00:00:00 2001 From: zhengchenyu Date: Sat, 17 Aug 2024 00:16:28 +0800 Subject: [PATCH 087/113] HADOOP-19250. Fix test TestServiceInterruptHandling.testRegisterAndRaise (#6987) Contributed by Chenyu Zheng --- .../service/launcher/TestServiceInterruptHandling.java | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/service/launcher/TestServiceInterruptHandling.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/service/launcher/TestServiceInterruptHandling.java index bd779e4a0ce3a..c21fa8b73073f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/service/launcher/TestServiceInterruptHandling.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/service/launcher/TestServiceInterruptHandling.java @@ -20,6 +20,7 @@ import org.apache.hadoop.service.BreakableService; import org.apache.hadoop.service.launcher.testservices.FailureTestService; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.ExitUtil; import org.junit.Test; import org.slf4j.Logger; @@ -43,10 +44,8 @@ public void testRegisterAndRaise() throws Throwable { assertEquals(0, irqHandler.getSignalCount()); irqHandler.raise(); // allow for an async event - Thread.sleep(500); - IrqHandler.InterruptData data = catcher.interruptData; - assertNotNull("interrupt data", data); - assertEquals(name, data.getName()); + GenericTestUtils.waitFor(() -> catcher.interruptData != null, 100, 10000); + assertEquals(name, catcher.interruptData.getName()); assertEquals(1, irqHandler.getSignalCount()); } From b5f88990b721aebe84d4bbf97ed2fdfbd0f42854 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Sat, 17 Aug 2024 02:42:26 +0800 Subject: [PATCH 088/113] HADOOP-19136. Upgrade commons-io to 2.16.1. (#6704) Contributed by Shilun Fan. --- LICENSE-binary | 2 +- hadoop-project/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index c0eb82f3dabfb..cc018ed265bbf 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -248,7 +248,7 @@ commons-cli:commons-cli:1.5.0 commons-codec:commons-codec:1.11 commons-collections:commons-collections:3.2.2 commons-daemon:commons-daemon:1.0.13 -commons-io:commons-io:2.14.0 +commons-io:commons-io:2.16.1 commons-net:commons-net:3.9.0 de.ruedigermoeller:fst:2.50 io.grpc:grpc-api:1.53.0 diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 8c8f675f98bc5..4c69012f08d18 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -124,7 +124,7 @@ 3.2.2 1.26.1 1.9.0 - 2.14.0 + 2.16.1 3.12.0 1.2 3.6.1 From 59dba6e1bd3f5f62fba8e64040dfe20301518846 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Sun, 18 Aug 2024 16:59:12 +0100 Subject: [PATCH 089/113] HADOOP-19134. Use StringBuilder instead of StringBuffer. (#6692). Contributed by PJ Fanning --- .../apache/hadoop/crypto/key/KeyShell.java | 2 +- .../main/java/org/apache/hadoop/fs/DF.java | 2 +- .../java/org/apache/hadoop/fs/FileUtil.java | 2 +- .../apache/hadoop/oncrpc/RpcDeniedReply.java | 2 +- .../apache/hadoop/security/ProviderUtils.java | 2 +- .../security/alias/CredentialShell.java | 2 +- .../security/ssl/SSLHostnameVerifier.java | 8 +-- .../java/org/apache/hadoop/util/Shell.java | 6 +- .../org/apache/hadoop/util/StringUtils.java | 2 +- .../org/apache/hadoop/fs/shell/TestCount.java | 2 +- .../alias/TestCredentialProviderFactory.java | 2 +- .../org/apache/hadoop/util/TestShell.java | 2 +- .../util/functional/TestRemoteIterators.java | 2 +- .../hadoop/crypto/key/kms/server/TestKMS.java | 2 +- .../org/apache/hadoop/hdfs/tools/DFSck.java | 2 +- .../OfflineEditsXmlLoader.java | 7 +-- .../offlineImageViewer/PBImageCorruption.java | 2 +- .../org/apache/hadoop/hdfs/DFSTestUtil.java | 2 +- .../org/apache/hadoop/hdfs/TestHDFSTrash.java | 2 +- .../blockmanagement/TestBlockManager.java | 8 +-- .../resources/TestWebHdfsDataLocality.java | 2 +- .../mapreduce/v2/app/webapp/ConfBlock.java | 2 +- .../hadoop/mapreduce/v2/app/TestRecovery.java | 2 +- .../app/webapp/TestAMWebServicesAttempts.java | 2 +- .../v2/app/webapp/TestAMWebServicesJobs.java | 2 +- .../apache/hadoop/mapred/LocalJobRunner.java | 4 +- .../mapreduce/v2/util/MRWebAppUtil.java | 2 +- .../mapred/TestLocalModeWithNewApis.java | 2 +- .../apache/hadoop/mapred/FileInputFormat.java | 2 +- .../hadoop/mapred/InvalidInputException.java | 2 +- .../apache/hadoop/mapred/MultiFileSplit.java | 2 +- .../apache/hadoop/mapred/SortedRanges.java | 2 +- .../org/apache/hadoop/mapred/TaskLog.java | 6 +- .../mapred/lib/FieldSelectionMapReduce.java | 2 +- .../java/org/apache/hadoop/mapreduce/Job.java | 2 +- .../apache/hadoop/mapreduce/JobStatus.java | 2 +- .../hadoop/mapreduce/TaskCompletionEvent.java | 2 +- .../lib/aggregate/ValueHistogram.java | 4 +- .../lib/fieldsel/FieldSelectionHelper.java | 8 +-- .../lib/input/CombineFileInputFormat.java | 2 +- .../mapreduce/lib/input/CombineFileSplit.java | 4 +- .../mapreduce/lib/input/FileInputFormat.java | 2 +- .../lib/input/InvalidInputException.java | 2 +- .../lib/jobcontrol/ControlledJob.java | 2 +- .../mapreduce/lib/join/TupleWritable.java | 2 +- .../hadoop/mapreduce/split/JobSplit.java | 2 +- .../hadoop/mapreduce/task/reduce/Fetcher.java | 8 ++- .../task/reduce/ShuffleSchedulerImpl.java | 2 +- .../apache/hadoop/mapreduce/tools/CLI.java | 2 +- .../mapred/TestFileOutputCommitter.java | 2 +- .../lib/output/TestFileOutputCommitter.java | 2 +- .../mapreduce/v2/hs/webapp/HsJobBlock.java | 2 +- .../mapreduce/v2/hs/webapp/dao/JobInfo.java | 2 +- .../hs/webapp/TestHsWebServicesAttempts.java | 2 +- .../v2/hs/webapp/VerifyJobsUtils.java | 2 +- .../apache/hadoop/RandomTextWriterJob.java | 2 +- .../apache/hadoop/fs/AccumulatingReducer.java | 2 +- .../org/apache/hadoop/fs/JHLogAnalyzer.java | 2 +- .../org/apache/hadoop/mapred/MRBench.java | 2 +- .../TestConcatenatedCompressedInput.java | 2 +- .../mapred/TestFixedLengthInputFormat.java | 2 +- .../mapred/TestMRCJCFileOutputCommitter.java | 2 +- .../apache/hadoop/mapred/TestMapProgress.java | 2 +- .../org/apache/hadoop/mapred/TestMapRed.java | 2 +- .../hadoop/mapred/TestMiniMRClasspath.java | 4 +- .../mapred/TestMultipleTextOutputFormat.java | 8 +-- .../hadoop/mapred/TestTextInputFormat.java | 2 +- .../apache/hadoop/mapred/UtilsForTests.java | 6 +- .../jobcontrol/JobControlTestUtils.java | 2 +- .../hadoop/mapreduce/MapReduceTestUtil.java | 6 +- .../hadoop/mapreduce/RandomTextWriter.java | 2 +- .../lib/input/TestFixedLengthInputFormat.java | 2 +- .../output/TestMRCJCFileOutputCommitter.java | 2 +- .../mapreduce/v2/MiniMRYarnCluster.java | 2 +- .../hadoop/examples/RandomTextWriter.java | 2 +- .../hadoop/examples/dancing/Pentomino.java | 2 +- .../hadoop/examples/dancing/Sudoku.java | 2 +- .../examples/terasort/TeraScheduler.java | 4 +- .../s3a/commit/AbstractITCommitProtocol.java | 2 +- .../hadoop/contrib/utils/join/JobBase.java | 2 +- .../org/apache/hadoop/tools/DistCpSync.java | 2 +- .../apache/hadoop/tools/util/DistCpUtils.java | 2 +- .../tools/rumen/datatypes/NodeName.java | 2 +- .../apache/hadoop/streaming/PipeMapRed.java | 2 +- .../apache/hadoop/streaming/StreamJob.java | 2 +- .../streaming/StreamXmlRecordReader.java | 2 - .../streaming/TestMultipleArchiveFiles.java | 2 +- .../org/apache/hadoop/streaming/UtilTest.java | 2 +- .../timeline/TimelineEntityGroupId.java | 2 +- .../api/resource/PlacementConstraint.java | 10 +-- .../hadoop/yarn/client/cli/TestLogsCLI.java | 2 +- .../yarn/util/ProcfsBasedProcessTree.java | 2 +- .../hadoop/yarn/webapp/view/JQueryUI.java | 4 +- .../TestAggregatedLogFormat.java | 2 +- .../store/sql/FederationQueryRunner.java | 2 +- .../task/DockerContainerDeletionTask.java | 2 +- .../PrivilegedOperationExecutor.java | 2 +- .../NetworkPacketTaggingHandlerImpl.java | 2 +- .../TrafficControlBandwidthHandlerImpl.java | 4 +- .../linux/resources/TrafficController.java | 2 +- .../linux/runtime/docker/DockerCommand.java | 2 +- .../localizer/LocalCacheDirectoryManager.java | 2 +- .../nvidia/NvidiaGPUPluginForRuntimeV2.java | 2 +- .../recovery/NMStateStoreService.java | 2 +- .../TestPrivilegedOperationExecutor.java | 2 +- .../TestNetworkPacketTaggingHandlerImpl.java | 2 +- ...estTrafficControlBandwidthHandlerImpl.java | 2 +- .../resources/TestTrafficController.java | 2 +- .../runtime/TestDockerContainerRuntime.java | 2 +- .../TestHdfsManifestToResourcesPlugin.java | 2 +- .../runtime/TestImageTagToManifestPlugin.java | 2 +- .../runtime/TestRuncContainerRuntime.java | 2 +- .../docker/TestDockerCommandExecutor.java | 4 +- .../TestLocalCacheDirectoryManager.java | 2 +- .../TestDevicePluginAdapter.java | 2 +- .../scheduler/capacity/PlanQueue.java | 2 +- .../resourcemanager/webapp/RMWebServices.java | 2 +- .../webapp/dao/AllocationTagsInfo.java | 2 +- .../constraint/TestPlacementProcessor.java | 2 +- .../algorithm/TestCircularIterator.java | 8 +-- .../fair/TestQueuePlacementPolicy.java | 62 +++++++++---------- .../webapp/FederationInterceptorREST.java | 8 +-- 122 files changed, 195 insertions(+), 196 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java index c18d0d41bc08a..cd774479d72d4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java @@ -169,7 +169,7 @@ protected int init(String[] args) throws IOException { @Override public String getCommandUsage() { - StringBuffer sbuf = new StringBuffer(USAGE_PREFIX + COMMANDS); + StringBuilder sbuf = new StringBuilder(USAGE_PREFIX + COMMANDS); String banner = StringUtils.repeat("=", 66); sbuf.append(banner + "\n"); sbuf.append(CreateCommand.USAGE + ":\n\n" + CreateCommand.DESC + "\n"); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java index c5a052f3de4be..d88a1eca45c03 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java @@ -163,7 +163,7 @@ protected void parseExecResult(BufferedReader lines) throws IOException { @VisibleForTesting protected void parseOutput() throws IOException { if (output.size() < 2) { - StringBuffer sb = new StringBuffer("Fewer lines of output than expected"); + StringBuilder sb = new StringBuilder("Fewer lines of output than expected"); if (output.size() > 0) { sb.append(": " + output.get(0)); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java index fa87bb48aaa69..56b97bf08681e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java @@ -1052,7 +1052,7 @@ private static void unTarUsingTar(InputStream inputStream, File untarDir, private static void unTarUsingTar(File inFile, File untarDir, boolean gzipped) throws IOException { - StringBuffer untarCommand = new StringBuffer(); + StringBuilder untarCommand = new StringBuilder(); // not using canonical path here; this postpones relative path // resolution until bash is executed. final String source = "'" + FileUtil.makeSecureShellPath(inFile) + "'"; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/oncrpc/RpcDeniedReply.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/oncrpc/RpcDeniedReply.java index 62bbd933425bd..62b6c51bcb523 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/oncrpc/RpcDeniedReply.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/oncrpc/RpcDeniedReply.java @@ -58,7 +58,7 @@ public RejectState getRejectState() { @Override public String toString() { - return new StringBuffer().append("xid:").append(xid) + return new StringBuilder().append("xid:").append(xid) .append(",messageType:").append(messageType).append("verifier_flavor:") .append(verifier.getFlavor()).append("rejectState:") .append(rejectState).toString(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ProviderUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ProviderUtils.java index 9cd85499f5803..ae17cde11ce3c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ProviderUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ProviderUtils.java @@ -148,7 +148,7 @@ public static Configuration excludeIncompatibleCredentialProviders( if (providerPath == null) { return config; } - StringBuffer newProviderPath = new StringBuffer(); + StringBuilder newProviderPath = new StringBuilder(); String[] providers = providerPath.split(","); Path path = null; for (String provider: providers) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialShell.java index 45b5af36bbbf1..41f56715d18d7 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialShell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/alias/CredentialShell.java @@ -127,7 +127,7 @@ protected int init(String[] args) throws IOException { @Override public String getCommandUsage() { - StringBuffer sbuf = new StringBuffer(USAGE_PREFIX + COMMANDS); + StringBuilder sbuf = new StringBuilder(USAGE_PREFIX + COMMANDS); String banner = StringUtils.repeat("=", 66); sbuf.append(banner + "\n") .append(CreateCommand.USAGE + ":\n\n" + CreateCommand.DESC + "\n") diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/SSLHostnameVerifier.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/SSLHostnameVerifier.java index 6a7c9d48e4f8f..86c92ab1147b8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/SSLHostnameVerifier.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/ssl/SSLHostnameVerifier.java @@ -370,7 +370,7 @@ public void check(final String[] hosts, final String[] cns, strictWithSubDomains); } // Build up lists of allowed hosts For logging/debugging purposes. - StringBuffer buf = new StringBuffer(32); + StringBuilder buf = new StringBuilder(32); buf.append('<'); for (int i = 0; i < hosts.length; i++) { String h = hosts[i]; @@ -408,15 +408,15 @@ public void check(final String[] hosts, final String[] cns, throw new SSLException(msg); } - // StringBuffer for building the error message. - buf = new StringBuffer(); + // StringBuilder for building the error message. + buf = new StringBuilder(); boolean match = false; out: for (Iterator it = names.iterator(); it.hasNext();) { // Don't trim the CN, though! final String cn = StringUtils.toLowerCase(it.next()); - // Store CN in StringBuffer in case we need to report an error. + // Store CN in StringBuilder in case we need to report an error. buf.append(" <") .append(cn) .append('>'); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java index 91868365b1346..e0d199ea86372 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/Shell.java @@ -1014,7 +1014,7 @@ private void runCommand() throws IOException { BufferedReader inReader = new BufferedReader(new InputStreamReader(process.getInputStream(), StandardCharsets.UTF_8)); - final StringBuffer errMsg = new StringBuffer(); + final StringBuilder errMsg = new StringBuilder(); // read error and input streams as this would free up the buffers // free the error stream buffer @@ -1208,7 +1208,7 @@ public static class ShellCommandExecutor extends Shell implements CommandExecutor { private String[] command; - private StringBuffer output; + private StringBuilder output; public ShellCommandExecutor(String[] execString) { @@ -1289,7 +1289,7 @@ public String[] getExecString() { @Override protected void parseExecResult(BufferedReader lines) throws IOException { - output = new StringBuffer(); + output = new StringBuilder(); char[] buf = new char[512]; int nRead; while ( (nRead = lines.read(buf, 0, buf.length)) > 0 ) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java index b109d8bacb0cd..14a745815750a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java @@ -1334,7 +1334,7 @@ public static String wrap(String str, int wrapLength, String newLineStr, int inputLineLength = str.length(); int offset = 0; - StringBuffer wrappedLine = new StringBuffer(inputLineLength + 32); + StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32); while(inputLineLength - offset > wrapLength) { if(str.charAt(offset) == 32) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCount.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCount.java index c86a4c89dfb9f..a2af500c30c9b 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCount.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/shell/TestCount.java @@ -580,7 +580,7 @@ public MockQuotaUsage() { public String toString(boolean hOption, boolean tOption, List types) { if (tOption) { - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); result.append(hOption ? HUMAN : BYTES); for (StorageType type : types) { diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/alias/TestCredentialProviderFactory.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/alias/TestCredentialProviderFactory.java index fb17977aa2e70..37da798e804fd 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/alias/TestCredentialProviderFactory.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/security/alias/TestCredentialProviderFactory.java @@ -114,7 +114,7 @@ public void testUriErrors() throws Exception { } private static char[] generatePassword(int length) { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); Random r = new Random(); for (int i = 0; i < length; i++) { sb.append(chars[r.nextInt(chars.length)]); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShell.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShell.java index 9ae52ff95cb91..2dafe81696e33 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShell.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestShell.java @@ -480,7 +480,7 @@ public void testBashQuote() { @Test(timeout=120000) public void testDestroyAllShellProcesses() throws Throwable { Assume.assumeFalse(WINDOWS); - StringBuffer sleepCommand = new StringBuffer(); + StringBuilder sleepCommand = new StringBuilder(); sleepCommand.append("sleep 200"); String[] shellCmd = {"bash", "-c", sleepCommand.toString()}; final ShellCommandExecutor shexc1 = new ShellCommandExecutor(shellCmd); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestRemoteIterators.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestRemoteIterators.java index 373e1003ef728..4f83b510c37d9 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestRemoteIterators.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/functional/TestRemoteIterators.java @@ -86,7 +86,7 @@ public void log(Object o) { */ @Test public void testSingleton() throws Throwable { - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); String name = "singleton"; RemoteIterator it = remoteIteratorFromSingleton(name); assertStringValueContains(it, "SingletonIterator"); diff --git a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java index f4c7fbe0b3c3c..282ae36f86ed1 100644 --- a/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java +++ b/hadoop-common-project/hadoop-kms/src/test/java/org/apache/hadoop/crypto/key/kms/server/TestKMS.java @@ -167,7 +167,7 @@ protected String generateLoadBalancingKeyProviderUriString() { if (kmsUrl == null || kmsUrl.size() == 0) { return null; } - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); for (int i = 0; i < kmsUrl.size(); i++) { sb.append(KMSClientProvider.SCHEME_NAME + "://" + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java index a0da4eaf80538..e279ea349b235 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSck.java @@ -195,7 +195,7 @@ private Integer listCorruptFileBlocks(String dir, String baseUrl) final String cookiePrefix = "Cookie:"; boolean allDone = false; while (!allDone) { - final StringBuffer url = new StringBuffer(baseUrl); + final StringBuilder url = new StringBuilder(baseUrl); if (cookie > 0) { url.append("&startblockafter=").append(String.valueOf(cookie)); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java index ac43b21d840ab..7169668edab65 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineEditsViewer/OfflineEditsXmlLoader.java @@ -32,7 +32,6 @@ import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOpCodes; import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp.OpInstanceCache; -import org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer; import org.apache.hadoop.hdfs.util.XMLUtils.Stanza; import org.xml.sax.Attributes; import org.xml.sax.InputSource; @@ -57,7 +56,7 @@ class OfflineEditsXmlLoader private Stanza stanza; private Stack stanzaStack; private FSEditLogOpCodes opCode; - private StringBuffer cbuf; + private StringBuilder cbuf; private long nextTxId; private final OpInstanceCache opCache = new OpInstanceCache(); @@ -119,7 +118,7 @@ public void startDocument() { stanza = null; stanzaStack = new Stack(); opCode = null; - cbuf = new StringBuffer(); + cbuf = new StringBuilder(); nextTxId = -1; } @@ -182,7 +181,7 @@ public void startElement (String uri, String name, @Override public void endElement (String uri, String name, String qName) { String str = XMLUtils.unmangleXmlString(cbuf.toString(), false).trim(); - cbuf = new StringBuffer(); + cbuf = new StringBuilder(); switch (state) { case EXPECT_EDITS_TAG: throw new InvalidXmlException("expected "); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageCorruption.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageCorruption.java index d510dfc3b9c95..a2991f03fdd86 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageCorruption.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageCorruption.java @@ -85,7 +85,7 @@ long getId() { } String getType() { - StringBuffer s = new StringBuffer(); + StringBuilder s = new StringBuilder(); if (type.contains(PBImageCorruptionType.CORRUPT_NODE)) { s.append(PBImageCorruptionType.CORRUPT_NODE); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java index 791f6529da364..c35edd1f6d48c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java @@ -340,7 +340,7 @@ private class MyFile { for (int idx = 0; idx < nLevels; idx++) { levels[idx] = gen.nextInt(10); } - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); for (int idx = 0; idx < nLevels; idx++) { sb.append(dirNames[levels[idx]]); sb.append("/"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSTrash.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSTrash.java index 5dbb124882d43..ea73a63f385f1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSTrash.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestHDFSTrash.java @@ -180,7 +180,7 @@ private Trash getPerUserTrash(UserGroupInformation ugi, FileSystem fileSystem, Configuration config) throws IOException { // generate an unique path per instance UUID trashId = UUID.randomUUID(); - StringBuffer sb = new StringBuffer() + StringBuilder sb = new StringBuilder() .append(ugi.getUserName()) .append("-") .append(trashId.toString()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java index d9d236b66468d..a456041d1f950 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java @@ -1833,7 +1833,7 @@ public void testMetaSavePostponedMisreplicatedBlocks() throws IOException { DataInputStream in = new DataInputStream(fstream); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); String line; try { while ((line = reader.readLine()) != null) { @@ -1861,7 +1861,7 @@ public void testMetaSaveMissingReplicas() throws Exception { FileInputStream fstream = new FileInputStream(file); DataInputStream in = new DataInputStream(fstream); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); String line; try { while ((line = reader.readLine()) != null) { @@ -1933,7 +1933,7 @@ public void testMetaSaveInMaintenanceReplicas() throws Exception { FileInputStream fstream = new FileInputStream(file); DataInputStream in = new DataInputStream(fstream); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); String line; try { while ((line = reader.readLine()) != null) { @@ -1989,7 +1989,7 @@ public void testMetaSaveDecommissioningReplicas() throws Exception { FileInputStream fstream = new FileInputStream(file); DataInputStream in = new DataInputStream(fstream); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); String line; try { while ((line = reader.readLine()) != null) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsDataLocality.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsDataLocality.java index 640994562e669..e56b03ee18434 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsDataLocality.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/web/resources/TestWebHdfsDataLocality.java @@ -196,7 +196,7 @@ public void testExcludeDataNodes() throws Exception { //For GETFILECHECKSUM, OPEN and APPEND, //the chosen datanode must be different with exclude nodes. - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); for (int i = 0; i < 2; i++) { sb.append(locations[i].getXferAddr()); { // test GETFILECHECKSUM diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/ConfBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/ConfBlock.java index 4c92ca0950f83..f080da46dc8a7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/ConfBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/ConfBlock.java @@ -83,7 +83,7 @@ public class ConfBlock extends HtmlBlock { __(). tbody(); for (ConfEntryInfo entry : info.getProperties()) { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); String[] sources = entry.getSource(); //Skip the last entry, because it is always the same HDFS file, and // output them in reverse order so most recent is output first diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java index d0b9acee8ec80..6c69dde2e92b0 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRecovery.java @@ -2080,7 +2080,7 @@ private void writeOutput(TaskAttempt attempt, Configuration conf) private void validateOutput() throws IOException { File expectedFile = new File(new Path(outputDir, partFile).toString()); - StringBuffer expectedOutput = new StringBuffer(); + StringBuilder expectedOutput = new StringBuilder(); expectedOutput.append(key1).append('\t').append(val1).append("\n"); expectedOutput.append(val1).append("\n"); expectedOutput.append(val2).append("\n"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebServicesAttempts.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebServicesAttempts.java index aad41966e8f3d..cdc868d945d8d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebServicesAttempts.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebServicesAttempts.java @@ -516,7 +516,7 @@ public void verifyTaskAttemptGeneric(TaskAttempt ta, TaskType ttype, String expectDiag = ""; List diagnosticsList = ta.getDiagnostics(); if (diagnosticsList != null && !diagnostics.isEmpty()) { - StringBuffer b = new StringBuffer(); + StringBuilder b = new StringBuilder(); for (String diag : diagnosticsList) { b.append(diag); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebServicesJobs.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebServicesJobs.java index 5e4e9f70b35a8..756a6b2e08e4e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebServicesJobs.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestAMWebServicesJobs.java @@ -600,7 +600,7 @@ public void verifyAMJobGenericSecure(Job job, int mapsPending, String diagString = ""; List diagList = job.getDiagnostics(); if (diagList != null && !diagList.isEmpty()) { - StringBuffer b = new StringBuffer(); + StringBuilder b = new StringBuilder(); for (String diag : diagList) { b.append(diag); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalJobRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalJobRunner.java index 88a10e2a8d143..aae1fd0b673f6 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalJobRunner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapred/LocalJobRunner.java @@ -1027,8 +1027,8 @@ static void setupChildMapredLocalDirs(Task t, JobConf conf) { String taskId = t.getTaskID().toString(); boolean isCleanup = t.isTaskCleanupTask(); String user = t.getUser(); - StringBuffer childMapredLocalDir = - new StringBuffer(localDirs[0] + Path.SEPARATOR + StringBuilder childMapredLocalDir = + new StringBuilder(localDirs[0] + Path.SEPARATOR + getLocalTaskDir(user, jobId, taskId, isCleanup)); for (int i = 1; i < localDirs.length; i++) { childMapredLocalDir.append("," + localDirs[i] + Path.SEPARATOR diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRWebAppUtil.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRWebAppUtil.java index acda0f43d9186..5944d6cb32d12 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRWebAppUtil.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/util/MRWebAppUtil.java @@ -145,7 +145,7 @@ public static String getApplicationWebURLOnJHSWithoutScheme(Configuration conf, InetSocketAddress address = NetUtils.createSocketAddr( hsAddress, getDefaultJHSWebappPort(), getDefaultJHSWebappURLWithoutScheme()); - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); if (address.getAddress() != null && (address.getAddress().isAnyLocalAddress() || address.getAddress().isLoopbackAddress())) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestLocalModeWithNewApis.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestLocalModeWithNewApis.java index 628ff15095cc8..d87015f5cfa11 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestLocalModeWithNewApis.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapred/TestLocalModeWithNewApis.java @@ -102,7 +102,7 @@ public void testNewApis() throws Exception { static String readOutput(Path outDir, Configuration conf) throws IOException { FileSystem fs = outDir.getFileSystem(conf); - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter())); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java index 91151f0d8ef25..46bb4b629c816 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileInputFormat.java @@ -470,7 +470,7 @@ public static void addInputPaths(JobConf conf, String commaSeparatedPaths) { */ public static void setInputPaths(JobConf conf, Path... inputPaths) { Path path = new Path(conf.getWorkingDirectory(), inputPaths[0]); - StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString())); + StringBuilder str = new StringBuilder(StringUtils.escapeString(path.toString())); for(int i = 1; i < inputPaths.length;i++) { str.append(StringUtils.COMMA_STR); path = new Path(conf.getWorkingDirectory(), inputPaths[i]); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/InvalidInputException.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/InvalidInputException.java index faf1a3877c16c..809721f84daa7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/InvalidInputException.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/InvalidInputException.java @@ -61,7 +61,7 @@ public List getProblems() { * @return the concatenated messages from all of the problems. */ public String getMessage() { - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); Iterator itr = problems.iterator(); while(itr.hasNext()) { result.append(itr.next().getMessage()); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MultiFileSplit.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MultiFileSplit.java index 8ea4f093ec60b..cd811bde9bc42 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MultiFileSplit.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MultiFileSplit.java @@ -70,7 +70,7 @@ private void addToSet(Set set, String[] array) { @Override public String toString() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); for(int i=0; i < getPaths().length; i++) { sb.append(getPath(i).toUri().getPath() + ":0+" + getLength(i)); if (i < getPaths().length -1) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SortedRanges.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SortedRanges.java index 9d73e2be76366..9d4e7354f0d02 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SortedRanges.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/SortedRanges.java @@ -207,7 +207,7 @@ public synchronized void write(DataOutput out) throws IOException { } public String toString() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); Iterator it = ranges.iterator(); while(it.hasNext()) { Range range = it.next(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java index 36405328f5fae..4e5c21ea45b6e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/TaskLog.java @@ -518,8 +518,8 @@ static String buildCommandLine(List setup, List cmd, throws IOException { String stdout = FileUtil.makeShellPath(stdoutFilename); - String stderr = FileUtil.makeShellPath(stderrFilename); - StringBuffer mergedCmd = new StringBuffer(); + String stderr = FileUtil.makeShellPath(stderrFilename); + StringBuilder mergedCmd = new StringBuilder(); // Export the pid of taskJvm to env variable JVM_PID. // Currently pid is not used on Windows @@ -606,7 +606,7 @@ static String buildDebugScriptCommandLine(List cmd, String debugout) */ public static String addCommand(List cmd, boolean isExecutable) throws IOException { - StringBuffer command = new StringBuffer(); + StringBuilder command = new StringBuilder(); for(String s: cmd) { command.append('\''); if (isExecutable) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/FieldSelectionMapReduce.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/FieldSelectionMapReduce.java index 0105559e8f338..6c7413139ded7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/FieldSelectionMapReduce.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/FieldSelectionMapReduce.java @@ -96,7 +96,7 @@ public class FieldSelectionMapReduce LoggerFactory.getLogger("FieldSelectionMapReduce"); private String specToString() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append("fieldSeparator: ").append(fieldSeparator).append("\n"); sb.append("mapOutputKeyValueSpec: ").append(mapOutputKeyValueSpec).append( diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java index c73c5bdd839f2..dada6595b6b83 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/Job.java @@ -476,7 +476,7 @@ public String toString() { } catch (IOException e) { } catch (InterruptedException ie) { } - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append("Job: ").append(status.getJobID()).append("\n"); sb.append("Job File: ").append(status.getJobFile()).append("\n"); sb.append("Job Tracking URL : ").append(status.getTrackingUrl()); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobStatus.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobStatus.java index d8b2321aca44f..03aeef33cb4f7 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobStatus.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobStatus.java @@ -636,7 +636,7 @@ public synchronized void setUber(boolean isUber) { } public String toString() { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); buffer.append("job-id : " + jobid); buffer.append("uber-mode : " + isUber); buffer.append("map-progress : " + mapProgress); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskCompletionEvent.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskCompletionEvent.java index 21c3823012f3c..ae4c6e62fe49b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskCompletionEvent.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/TaskCompletionEvent.java @@ -188,7 +188,7 @@ protected void setTaskTrackerHttp(String taskTrackerHttp) { @Override public String toString(){ - StringBuffer buf = new StringBuffer(); + StringBuilder buf = new StringBuilder(); buf.append("Task Id : "); buf.append(taskId); buf.append(", Status : "); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueHistogram.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueHistogram.java index b41a5bd3029a5..0438f38078007 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueHistogram.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/aggregate/ValueHistogram.java @@ -83,7 +83,7 @@ public void addNextValue(Object val) { public String getReport() { long[] counts = new long[items.size()]; - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); Iterator iter = items.values().iterator(); int i = 0; while (iter.hasNext()) { @@ -133,7 +133,7 @@ public String getReport() { * the histogram */ public String getReportDetails() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); Iterator> iter = items.entrySet().iterator(); while (iter.hasNext()) { Entry en = iter.next(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/fieldsel/FieldSelectionHelper.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/fieldsel/FieldSelectionHelper.java index 5ee7e0f78b602..1755935188baf 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/fieldsel/FieldSelectionHelper.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/fieldsel/FieldSelectionHelper.java @@ -121,10 +121,10 @@ private static String selectFields(String[] fields, List fieldList, int allFieldsFrom, String separator) { String retv = null; int i = 0; - StringBuffer sb = null; + StringBuilder sb = null; if (fieldList != null && fieldList.size() > 0) { if (sb == null) { - sb = new StringBuffer(); + sb = new StringBuilder(); } for (Integer index : fieldList) { if (index < fields.length) { @@ -135,7 +135,7 @@ private static String selectFields(String[] fields, List fieldList, } if (allFieldsFrom >= 0) { if (sb == null) { - sb = new StringBuffer(); + sb = new StringBuilder(); } for (i = allFieldsFrom; i < fields.length; i++) { sb.append(fields[i]).append(separator); @@ -168,7 +168,7 @@ public static int parseOutputKeyValueSpec(String keyValueSpec, public static String specToString(String fieldSeparator, String keyValueSpec, int allValueFieldsFrom, List keyFieldList, List valueFieldList) { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append("fieldSeparator: ").append(fieldSeparator).append("\n"); sb.append("keyValueSpec: ").append(keyValueSpec).append("\n"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java index caebe6c823da4..8ed756367e9ca 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileInputFormat.java @@ -803,7 +803,7 @@ public boolean accept(Path path) { } public String toString() { - StringBuffer buf = new StringBuffer(); + StringBuilder buf = new StringBuilder(); buf.append("["); for (PathFilter f: filters) { buf.append(f); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileSplit.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileSplit.java index 3c00689381803..96c1e360a3511 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileSplit.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/CombineFileSplit.java @@ -175,7 +175,7 @@ public void write(DataOutput out) throws IOException { @Override public String toString() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); for (int i = 0; i < paths.length; i++) { if (i == 0 ) { sb.append("Paths:"); @@ -188,7 +188,7 @@ public String toString() { } if (locations != null) { String locs = ""; - StringBuffer locsb = new StringBuffer(); + StringBuilder locsb = new StringBuilder(); for (int i = 0; i < locations.length; i++) { locsb.append(locations[i] + ":"); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java index e74c3fa81376a..fd7d27d3bc24b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/FileInputFormat.java @@ -569,7 +569,7 @@ public static void setInputPaths(Job job, Path... inputPaths) throws IOException { Configuration conf = job.getConfiguration(); Path path = inputPaths[0].getFileSystem(conf).makeQualified(inputPaths[0]); - StringBuffer str = new StringBuffer(StringUtils.escapeString(path.toString())); + StringBuilder str = new StringBuilder(StringUtils.escapeString(path.toString())); for(int i = 1; i < inputPaths.length;i++) { str.append(StringUtils.COMMA_STR); path = inputPaths[i].getFileSystem(conf).makeQualified(inputPaths[i]); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/InvalidInputException.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/InvalidInputException.java index 1113bec18889a..e0dcdf91d8110 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/InvalidInputException.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/InvalidInputException.java @@ -60,7 +60,7 @@ public List getProblems() { * @return the concatenated messages from all of the problems. */ public String getMessage() { - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); Iterator itr = problems.iterator(); while(itr.hasNext()) { result.append(itr.next().getMessage()); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/jobcontrol/ControlledJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/jobcontrol/ControlledJob.java index 16ba22bfb604e..48cde0e5f0040 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/jobcontrol/ControlledJob.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/jobcontrol/ControlledJob.java @@ -90,7 +90,7 @@ public ControlledJob(Configuration conf) throws IOException { @Override public String toString() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append("job name:\t").append(this.job.getJobName()).append("\n"); sb.append("job id:\t").append(this.controlID).append("\n"); sb.append("job state:\t").append(this.state).append("\n"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/TupleWritable.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/TupleWritable.java index 2990ca99d36c7..aa541f3640d3a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/TupleWritable.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/TupleWritable.java @@ -147,7 +147,7 @@ public void remove() { * [<child1>,<child2>,...,<childn>] */ public String toString() { - StringBuffer buf = new StringBuffer("["); + StringBuilder buf = new StringBuilder("["); for (int i = 0; i < values.length; ++i) { buf.append(has(i) ? values[i].toString() : ""); buf.append(","); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplit.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplit.java index d725196b9b884..c2366d0d9613f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplit.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/split/JobSplit.java @@ -123,7 +123,7 @@ public void write(DataOutput out) throws IOException { @Override public String toString() { - StringBuffer buf = new StringBuffer(); + StringBuilder buf = new StringBuilder(); buf.append("data-size : " + inputDataLength + "\n"); buf.append("start-offset : " + startOffset + "\n"); buf.append("locations : " + "\n"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java index df871ca32ae92..59ef95bdd462b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Fetcher.java @@ -678,7 +678,7 @@ private boolean verifySanity(long compressedLength, long decompressedLength, private URL getMapOutputURL(MapHost host, Collection maps ) throws MalformedURLException { // Get the base url - StringBuffer url = new StringBuffer(host.getBaseUrl()); + StringBuilder url = new StringBuilder(host.getBaseUrl()); boolean first = true; for (TaskAttemptID mapId : maps) { @@ -688,8 +688,10 @@ private URL getMapOutputURL(MapHost host, Collection maps url.append(mapId); first = false; } - - LOG.debug("MapOutput URL for " + host + " -> " + url.toString()); + + if (LOG.isDebugEnabled()) { + LOG.debug("MapOutput URL for " + host + " -> " + url.toString()); + } return new URL(url.toString()); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleSchedulerImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleSchedulerImpl.java index 83f9669bfefb1..173cd093e9f6e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleSchedulerImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/ShuffleSchedulerImpl.java @@ -171,7 +171,7 @@ public void resolve(TaskCompletionEvent event) { } static URI getBaseURI(TaskAttemptID reduceId, String url) { - StringBuffer baseUrl = new StringBuffer(url); + StringBuilder baseUrl = new StringBuilder(url); if (!url.endsWith("/")) { baseUrl.append("/"); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/tools/CLI.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/tools/CLI.java index 0f65a29b13489..396cf585dbc07 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/tools/CLI.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/tools/CLI.java @@ -520,7 +520,7 @@ Cluster createCluster() throws IOException { } private String getJobPriorityNames() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); for (JobPriority p : JobPriority.values()) { // UNDEFINED_PRIORITY need not to be displayed in usage if (JobPriority.UNDEFINED_PRIORITY == p) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestFileOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestFileOutputCommitter.java index b646b04b74034..4b457458210c8 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestFileOutputCommitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestFileOutputCommitter.java @@ -175,7 +175,7 @@ public void testRecoveryUpgradeV1V2() throws Exception { private void validateContent(Path dir) throws IOException { File fdir = new File(dir.toUri().getPath()); File expectedFile = new File(fdir, partFile); - StringBuffer expectedOutput = new StringBuffer(); + StringBuilder expectedOutput = new StringBuilder(); expectedOutput.append(key1).append('\t').append(val1).append("\n"); expectedOutput.append(val1).append("\n"); expectedOutput.append(val2).append("\n"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java index 2aa7b34a007c8..7c53833842a67 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestFileOutputCommitter.java @@ -227,7 +227,7 @@ private void validateContent(Path dir) throws IOException { private void validateContent(File dir) throws IOException { File expectedFile = new File(dir, partFile); assertTrue("Could not find "+expectedFile, expectedFile.exists()); - StringBuffer expectedOutput = new StringBuffer(); + StringBuilder expectedOutput = new StringBuilder(); expectedOutput.append(key1).append('\t').append(val1).append("\n"); expectedOutput.append(val1).append("\n"); expectedOutput.append(val2).append("\n"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobBlock.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobBlock.java index 18040f00440b1..1d22b2bdfcf43 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobBlock.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobBlock.java @@ -109,7 +109,7 @@ public class HsJobBlock extends HtmlBlock { // todo - switch to use JobInfo List diagnostics = j.getDiagnostics(); if(diagnostics != null && !diagnostics.isEmpty()) { - StringBuffer b = new StringBuffer(); + StringBuilder b = new StringBuilder(); for(String diag: diagnostics) { b.append(addTaskLinks(diag)); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/JobInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/JobInfo.java index 08044c6ff1648..7d9d0eecb1aaa 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/JobInfo.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/dao/JobInfo.java @@ -117,7 +117,7 @@ public JobInfo(Job job) { this.diagnostics = ""; List diagnostics = job.getDiagnostics(); if (diagnostics != null && !diagnostics.isEmpty()) { - StringBuffer b = new StringBuffer(); + StringBuilder b = new StringBuilder(); for (String diag : diagnostics) { b.append(diag); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesAttempts.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesAttempts.java index 3ca6db3ab4a21..a4426e6b8529e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesAttempts.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesAttempts.java @@ -534,7 +534,7 @@ public void verifyTaskAttemptGeneric(TaskAttempt ta, TaskType ttype, String expectDiag = ""; List diagnosticsList = ta.getDiagnostics(); if (diagnosticsList != null && !diagnostics.isEmpty()) { - StringBuffer b = new StringBuffer(); + StringBuilder b = new StringBuilder(); for (String diag : diagnosticsList) { b.append(diag); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/VerifyJobsUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/VerifyJobsUtils.java index f636e46c022d7..2d46f80178b54 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/VerifyJobsUtils.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/VerifyJobsUtils.java @@ -108,7 +108,7 @@ public static void verifyHsJobGenericSecure(Job job, Boolean uberized, String diagString = ""; List diagList = job.getDiagnostics(); if (diagList != null && !diagList.isEmpty()) { - StringBuffer b = new StringBuffer(); + StringBuilder b = new StringBuilder(); for (String diag : diagList) { b.append(diag); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/RandomTextWriterJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/RandomTextWriterJob.java index 1e3ee8c3579c9..398b5a6015171 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/RandomTextWriterJob.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/RandomTextWriterJob.java @@ -204,7 +204,7 @@ public void map(Text key, Text value, } private Text generateSentence(int noWords) { - StringBuffer sentence = new StringBuffer(); + StringBuilder sentence = new StringBuilder(); String space = " "; for (int i=0; i < noWords; ++i) { sentence.append(words[random.nextInt(words.length)]); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/AccumulatingReducer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/AccumulatingReducer.java index f6c2a06bfbbb3..b6313494e4542 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/AccumulatingReducer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/AccumulatingReducer.java @@ -73,7 +73,7 @@ public void reduce(Text key, // concatenate strings if (field.startsWith(VALUE_TYPE_STRING)) { - StringBuffer sSum = new StringBuffer(); + StringBuilder sSum = new StringBuilder(); while (values.hasNext()) sSum.append(values.next().toString()).append(";"); output.collect(key, new Text(sSum.toString())); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/JHLogAnalyzer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/JHLogAnalyzer.java index 5e3e745f0229c..8937bdafe35cd 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/JHLogAnalyzer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/JHLogAnalyzer.java @@ -773,7 +773,7 @@ public void parseLogFile(FileSystem fs, /** * Read lines until one ends with a " ." or "\" " */ - private StringBuffer resBuffer = new StringBuffer(); + private StringBuilder resBuffer = new StringBuilder(); private String readLine(BufferedReader reader) throws IOException { resBuffer.setLength(0); reader.mark(maxJobDelimiterLineLength); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MRBench.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MRBench.java index 36f469385e080..4d3352f219e93 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MRBench.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/MRBench.java @@ -132,7 +132,7 @@ public void generateTextFile(FileSystem fs, Path inputFile, */ private static String pad(long number, int length) { String str = String.valueOf(number); - StringBuffer value = new StringBuffer(); + StringBuilder value = new StringBuilder(); for (int i = str.length(); i < length; i++) { value.append("0"); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java index ec44dd77efbc5..80a9502774eac 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java @@ -677,7 +677,7 @@ private static void doSingleBzip2BufferSize(JobConf jConf) } private static String unquote(String in) { - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); for(int i=0; i < in.length(); ++i) { char ch = in.charAt(i); if (ch == '\\') { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFixedLengthInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFixedLengthInputFormat.java index 5fec24a1b1317..1ae17584a6d8b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFixedLengthInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestFixedLengthInputFormat.java @@ -236,7 +236,7 @@ private ArrayList createFile(Path targetFile, CompressionCodec codec, } Writer writer = new OutputStreamWriter(ostream); try { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); for (int i = 0; i < numRecords; i++) { for (int j = 0; j < recordLen; j++) { sb.append(chars[charRand.nextInt(chars.length)]); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMRCJCFileOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMRCJCFileOutputCommitter.java index be7dcc5ec2bc2..18d7010a4966a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMRCJCFileOutputCommitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMRCJCFileOutputCommitter.java @@ -105,7 +105,7 @@ public void testCommitter() throws Exception { // validate output File expectedFile = new File(new Path(outDir, file).toString()); - StringBuffer expectedOutput = new StringBuffer(); + StringBuilder expectedOutput = new StringBuilder(); expectedOutput.append(key1).append('\t').append(val1).append("\n"); expectedOutput.append(val1).append("\n"); expectedOutput.append(val2).append("\n"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapProgress.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapProgress.java index 9b6ebda593b42..f70a552327f45 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapProgress.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapProgress.java @@ -119,7 +119,7 @@ public boolean canCommit(TaskAttemptID taskid) throws IOException { public AMFeedback statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus) throws IOException, InterruptedException { - StringBuffer buf = new StringBuffer("Task "); + StringBuilder buf = new StringBuilder("Task "); buf.append(taskId); if (taskStatus != null) { buf.append(" making progress to "); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapRed.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapRed.java index af09e09535ea7..a63dbec6d9f78 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapRed.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMapRed.java @@ -763,7 +763,7 @@ public void runJob(int items) { SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, inFile, Text.class, Text.class); - StringBuffer content = new StringBuffer(); + StringBuilder content = new StringBuilder(); for (int i = 0; i < 1000; i++) { content.append(i).append(": This is one more line of content\n"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRClasspath.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRClasspath.java index 60e32683c3a75..71f04078b6c12 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRClasspath.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMiniMRClasspath.java @@ -80,7 +80,7 @@ static String launchWordCount(URI fileSys, JobConf conf, String input, FileSystem fs = FileSystem.get(fileSys, conf); configureWordCount(fs, conf, input, numMaps, numReduces, inDir, outDir); JobClient.runJob(conf); - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); { Path[] parents = FileUtil.stat2Paths(fs.listStatus(outDir.getParent())); Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir, @@ -137,7 +137,7 @@ static String launchExternal(URI uri, JobConf conf, String input, // set the tests jar file conf.setJarByClass(TestMiniMRClasspath.class); JobClient.runJob(conf); - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir, new Utils.OutputFileUtils .OutputFilesFilter())); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMultipleTextOutputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMultipleTextOutputFormat.java index 43ead04b26996..114b6054d3fe4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMultipleTextOutputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestMultipleTextOutputFormat.java @@ -106,7 +106,7 @@ public void testFormat() throws Exception { File expectedFile_11 = new File(new Path(workDir, file_11).toString()); //System.out.printf("expectedFile_11: %s\n", new Path(workDir, file_11).toString()); - StringBuffer expectedOutput = new StringBuffer(); + StringBuilder expectedOutput = new StringBuilder(); for (int i = 10; i < 20; i++) { expectedOutput.append(""+i).append('\t').append(""+i).append("\n"); } @@ -118,7 +118,7 @@ public void testFormat() throws Exception { File expectedFile_12 = new File(new Path(workDir, file_12).toString()); //System.out.printf("expectedFile_12: %s\n", new Path(workDir, file_12).toString()); - expectedOutput = new StringBuffer(); + expectedOutput = new StringBuilder(); for (int i = 20; i < 30; i++) { expectedOutput.append(""+i).append('\t').append(""+i).append("\n"); } @@ -130,7 +130,7 @@ public void testFormat() throws Exception { File expectedFile_13 = new File(new Path(workDir, file_13).toString()); //System.out.printf("expectedFile_13: %s\n", new Path(workDir, file_13).toString()); - expectedOutput = new StringBuffer(); + expectedOutput = new StringBuilder(); for (int i = 30; i < 40; i++) { expectedOutput.append(""+i).append('\t').append(""+i).append("\n"); } @@ -142,7 +142,7 @@ public void testFormat() throws Exception { File expectedFile_2 = new File(new Path(workDir, file_2).toString()); //System.out.printf("expectedFile_2: %s\n", new Path(workDir, file_2).toString()); - expectedOutput = new StringBuffer(); + expectedOutput = new StringBuilder(); for (int i = 10; i < 40; i++) { expectedOutput.append(""+i).append('\t').append(""+i).append("\n"); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTextInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTextInputFormat.java index 9a2576ec66496..29a370de7c333 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTextInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestTextInputFormat.java @@ -548,7 +548,7 @@ public void testGzipEmpty() throws IOException { } private static String unquote(String in) { - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); for(int i=0; i < in.length(); ++i) { char ch = in.charAt(i); if (ch == '\\') { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/UtilsForTests.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/UtilsForTests.java index fd73410918d85..bfd8849ef1d8d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/UtilsForTests.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/UtilsForTests.java @@ -91,7 +91,7 @@ public static String ifmt(double d) { } public static String formatBytes(long numBytes) { - StringBuffer buf = new StringBuffer(); + StringBuilder buf = new StringBuilder(); boolean bDetails = true; double num = numBytes; @@ -116,7 +116,7 @@ public static String formatBytes(long numBytes) { } public static String formatBytes2(long numBytes) { - StringBuffer buf = new StringBuffer(); + StringBuilder buf = new StringBuilder(); long u = 0; if (numBytes >= TB) { u = numBytes / TB; @@ -145,7 +145,7 @@ public static String formatBytes2(long numBytes) { static final String regexpSpecials = "[]()?*+|.!^-\\~@"; public static String regexpEscape(String plain) { - StringBuffer buf = new StringBuffer(); + StringBuilder buf = new StringBuilder(); char[] ch = plain.toCharArray(); int csup = ch.length; for (int c = 0; c < csup; c++) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/jobcontrol/JobControlTestUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/jobcontrol/JobControlTestUtils.java index c971ccc6c02be..68bd399dc4209 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/jobcontrol/JobControlTestUtils.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/jobcontrol/JobControlTestUtils.java @@ -82,7 +82,7 @@ private static String generateRandomWord() { private static String generateRandomLine() { long r = rand.nextLong() % 7; long n = r + 20; - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); for (int i = 0; i < n; i++) { sb.append(generateRandomWord()).append(" "); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/MapReduceTestUtil.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/MapReduceTestUtil.java index 4141d26933f82..9dd49350ebb07 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/MapReduceTestUtil.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/MapReduceTestUtil.java @@ -97,7 +97,7 @@ public static String generateRandomWord() { public static String generateRandomLine() { long r = rand.nextLong() % 7; long n = r + 20; - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); for (int i = 0; i < n; i++) { sb.append(generateRandomWord()).append(" "); } @@ -401,7 +401,7 @@ public Counter getCounter(String group, String name) { public static String readOutput(Path outDir, Configuration conf) throws IOException { FileSystem fs = outDir.getFileSystem(conf); - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); Path[] fileList = FileUtil.stat2Paths(fs.listStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter())); @@ -436,7 +436,7 @@ public static String readTaskLog(TaskLog.LogName filter, org.apache.hadoop.mapred.TaskAttemptID taskId, boolean isCleanup) throws IOException { // string buffer to store task log - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); int res; // reads the whole tasklog into inputstream diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomTextWriter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomTextWriter.java index dca39dfd71f99..6f1adcb9e1375 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomTextWriter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/RandomTextWriter.java @@ -100,7 +100,7 @@ enum Counters { RECORDS_WRITTEN, BYTES_WRITTEN } public static String generateSentenceWithRand(ThreadLocalRandom rand, int noWords) { - StringBuffer sentence = new StringBuffer(words[rand.nextInt(words.length)]); + StringBuilder sentence = new StringBuilder(words[rand.nextInt(words.length)]); for (int i = 1; i < noWords; i++) { sentence.append(" ").append(words[rand.nextInt(words.length)]); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestFixedLengthInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestFixedLengthInputFormat.java index be9e6deff3fc8..cf5e74004caaf 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestFixedLengthInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestFixedLengthInputFormat.java @@ -262,7 +262,7 @@ private ArrayList createFile(Path targetFile, CompressionCodec codec, } Writer writer = new OutputStreamWriter(ostream); try { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); for (int i = 0; i < numRecords; i++) { for (int j = 0; j < recordLen; j++) { sb.append(chars[charRand.nextInt(chars.length)]); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestMRCJCFileOutputCommitter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestMRCJCFileOutputCommitter.java index 855bb2225ca64..ef1a512af7ed5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestMRCJCFileOutputCommitter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/lib/output/TestMRCJCFileOutputCommitter.java @@ -119,7 +119,7 @@ public void testCommitter() throws Exception { // validate output File expectedFile = new File(new Path(outDir, partFile).toString()); - StringBuffer expectedOutput = new StringBuffer(); + StringBuilder expectedOutput = new StringBuilder(); expectedOutput.append(key1).append('\t').append(val1).append("\n"); expectedOutput.append(val1).append("\n"); expectedOutput.append(val2).append("\n"); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java index e41c95c490358..b3533482b525d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/v2/MiniMRYarnCluster.java @@ -106,7 +106,7 @@ public static String getResolvedMRHistoryWebAppURLWithoutScheme( JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_ADDRESS, JHAdminConfig.DEFAULT_MR_HISTORY_WEBAPP_PORT); } address = NetUtils.getConnectAddress(address); - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); InetAddress resolved = address.getAddress(); if (resolved == null || resolved.isAnyLocalAddress() || resolved.isLoopbackAddress()) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/RandomTextWriter.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/RandomTextWriter.java index 7e6c099a06f83..2340811bdd011 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/RandomTextWriter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/RandomTextWriter.java @@ -154,7 +154,7 @@ public void map(Text key, Text value, } private Text generateSentence(int noWords) { - StringBuffer sentence = new StringBuffer(); + StringBuilder sentence = new StringBuilder(); String space = " "; for (int i=0; i < noWords; ++i) { sentence.append(words[random.nextInt(words.length)]); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/Pentomino.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/Pentomino.java index a30d62c33cc42..1f2243722872e 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/Pentomino.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/Pentomino.java @@ -142,7 +142,7 @@ static class Point implements ColumnName { public static String stringifySolution(int width, int height, List> solution) { String[][] picture = new String[height][width]; - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); // for each piece placement... for(List row: solution) { // go through to find which piece was placed diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/Sudoku.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/Sudoku.java index 402ff028dfb42..632ab536952b5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/Sudoku.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/dancing/Sudoku.java @@ -66,7 +66,7 @@ protected static interface ColumnName { */ static String stringifySolution(int size, List> solution) { int[][] picture = new int[size][size]; - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); // go through the rows selected in the model and build a picture of the // solution. for(List row: solution) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java index 6df1f1e497783..53fc52d342d13 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/terasort/TeraScheduler.java @@ -47,7 +47,7 @@ static class Split { this.filename = filename; } public String toString() { - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); result.append(filename); result.append(" on "); for(Host host: locations) { @@ -64,7 +64,7 @@ static class Host { this.hostname = hostname; } public String toString() { - StringBuffer result = new StringBuffer(); + StringBuilder result = new StringBuilder(); result.append(splits.size()); result.append(" "); result.append(hostname); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java index 3a7cceb2369ee..165379d1dc0c8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java @@ -722,7 +722,7 @@ private void validateContent(Path dir, } Path expectedFile = getPart0000(dir); log().debug("Validating content in {}", expectedFile); - StringBuffer expectedOutput = new StringBuffer(); + StringBuilder expectedOutput = new StringBuilder(); expectedOutput.append(KEY_1).append('\t').append(VAL_1).append("\n"); expectedOutput.append(VAL_1).append("\n"); expectedOutput.append(VAL_2).append("\n"); diff --git a/hadoop-tools/hadoop-datajoin/src/main/java/org/apache/hadoop/contrib/utils/join/JobBase.java b/hadoop-tools/hadoop-datajoin/src/main/java/org/apache/hadoop/contrib/utils/join/JobBase.java index 7267fdecac434..2bea8957d3380 100644 --- a/hadoop-tools/hadoop-datajoin/src/main/java/org/apache/hadoop/contrib/utils/join/JobBase.java +++ b/hadoop-tools/hadoop-datajoin/src/main/java/org/apache/hadoop/contrib/utils/join/JobBase.java @@ -143,7 +143,7 @@ protected void report() { * */ protected String getReport() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); Iterator iter = this.longCounters.entrySet().iterator(); while (iter.hasNext()) { diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java index dbc86fd0b4722..a6f01261ca604 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/DistCpSync.java @@ -614,7 +614,7 @@ private Path translateRenamedPath(Path sourcePath, if (sourcePath.equals(renameItem.getSource())) { return renameItem.getTarget(); } - StringBuffer sb = new StringBuffer(sourcePath.toString()); + StringBuilder sb = new StringBuilder(sourcePath.toString()); String remain = sb.substring(renameItem.getSource().toString().length() + 1); return new Path(renameItem.getTarget(), remain); diff --git a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java index e77b2031a76db..6752329b4861d 100644 --- a/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java +++ b/hadoop-tools/hadoop-distcp/src/main/java/org/apache/hadoop/tools/util/DistCpUtils.java @@ -155,7 +155,7 @@ public static String getRelativePath(Path sourceRootPath, Path childPath) { * @return - String containing first letters of each attribute to preserve */ public static String packAttributes(EnumSet attributes) { - StringBuffer buffer = new StringBuffer(FileAttribute.values().length); + StringBuilder buffer = new StringBuilder(FileAttribute.values().length); int len = 0; for (FileAttribute attribute : attributes) { buffer.append(attribute.name().charAt(0)); diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/datatypes/NodeName.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/datatypes/NodeName.java index 20eb535d0cb0d..603b1fa6ee101 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/datatypes/NodeName.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/datatypes/NodeName.java @@ -140,7 +140,7 @@ public String getAnonymizedValue(StatePool statePool, Configuration conf) { } private void anonymize(StatePool pool) { - StringBuffer buf = new StringBuffer(); + StringBuilder buf = new StringBuilder(); NodeNameState state = (NodeNameState) pool.getState(getClass()); if (state == null) { state = new NodeNameState(); diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/PipeMapRed.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/PipeMapRed.java index ef62505c4c632..3d6541565cb44 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/PipeMapRed.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/PipeMapRed.java @@ -254,7 +254,7 @@ void addJobConfToEnvironment(JobConf jobconf, Properties env) { } String safeEnvVarName(String var) { - StringBuffer safe = new StringBuffer(); + StringBuilder safe = new StringBuilder(); int len = var.length(); for (int i = 0; i < len; i++) { char c = var.charAt(i); diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java index 023371ce99443..ceac3fe6230af 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamJob.java @@ -291,7 +291,7 @@ void parseArgv() { LOG.warn("-file option is deprecated, please use generic option" + " -files instead."); - StringBuffer fileList = new StringBuffer(); + StringBuilder fileList = new StringBuilder(); for (String file : values) { packageFiles_.add(file); try { diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamXmlRecordReader.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamXmlRecordReader.java index 974cdc7c8d001..416aa07b9947c 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamXmlRecordReader.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/StreamXmlRecordReader.java @@ -23,9 +23,7 @@ import java.util.regex.*; import org.apache.hadoop.io.DataOutputBuffer; -import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.mapred.Reporter; diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java index 041d527ab173a..0d330b7c25d7f 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/TestMultipleArchiveFiles.java @@ -128,7 +128,7 @@ protected String[] genArgs() { } protected void checkOutput() throws IOException { - StringBuffer output = new StringBuffer(256); + StringBuilder output = new StringBuilder(256); Path[] fileList = FileUtil.stat2Paths(fileSys.listStatus( new Path(OUTPUT_DIR))); for (int i = 0; i < fileList.length; i++){ diff --git a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java index 2378c7b414944..af9056c92397c 100644 --- a/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java +++ b/hadoop-tools/hadoop-streaming/src/test/java/org/apache/hadoop/streaming/UtilTest.java @@ -86,7 +86,7 @@ void redirectIfAntJunit() throws IOException } public static String collate(List args, String sep) { - StringBuffer buf = new StringBuffer(); + StringBuilder buf = new StringBuilder(); Iterator it = args.iterator(); while (it.hasNext()) { if (buf.length() > 0) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/timeline/TimelineEntityGroupId.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/timeline/TimelineEntityGroupId.java index 710a1345dcde8..7acc66605350a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/timeline/TimelineEntityGroupId.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/timeline/TimelineEntityGroupId.java @@ -144,7 +144,7 @@ public String toString() { public static TimelineEntityGroupId fromString(String timelineEntityGroupIdStr) { - StringBuffer buf = new StringBuffer(); + StringBuilder buf = new StringBuilder(); Iterator it = SPLITTER.split(timelineEntityGroupIdStr).iterator(); if (!it.next().equals(TIMELINE_ENTITY_GROUPID_STR_PREFIX)) { throw new IllegalArgumentException( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/resource/PlacementConstraint.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/resource/PlacementConstraint.java index 79196fbf851da..bb87215a46621 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/resource/PlacementConstraint.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/resource/PlacementConstraint.java @@ -413,7 +413,7 @@ public boolean equals(Object o) { @Override public String toString() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); if (TargetType.ALLOCATION_TAG == this.targetType) { // following by a comma separated tags sb.append(String.join(",", getTargetValues())); @@ -643,7 +643,7 @@ public int hashCode() { @Override public String toString() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append("cardinality").append(",").append(getScope()).append(","); for (String tag : getAllocationTags()) { sb.append(tag).append(","); @@ -717,7 +717,7 @@ public T accept(Visitor visitor) { @Override public String toString() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append("and("); Iterator it = getChildren().iterator(); while (it.hasNext()) { @@ -759,7 +759,7 @@ public T accept(Visitor visitor) { @Override public String toString() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append("or("); Iterator it = getChildren().iterator(); while (it.hasNext()) { @@ -805,7 +805,7 @@ public T accept(Visitor visitor) { @Override public String toString() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append("DelayedOr("); Iterator it = getChildren().iterator(); while (it.hasNext()) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestLogsCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestLogsCLI.java index 6ec8549be9d98..d9b16cfa2166c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestLogsCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/cli/TestLogsCLI.java @@ -1491,7 +1491,7 @@ public void testSaveContainerLogsLocally() throws Exception { private String readContainerContent(Path containerPath, FileSystem fs) throws IOException { assertTrue(fs.exists(containerPath)); - StringBuffer inputLine = new StringBuffer(); + StringBuilder inputLine = new StringBuilder(); try (BufferedReader reader = new BufferedReader(new InputStreamReader( fs.open(containerPath)))) { String tmp; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java index ed48a92a04b48..f347e928fa839 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/ProcfsBasedProcessTree.java @@ -568,7 +568,7 @@ private static ProcessInfo constructProcessInfo(ProcessInfo pinfo, */ @Override public String toString() { - StringBuffer pTree = new StringBuffer("[ "); + StringBuilder pTree = new StringBuilder("[ "); for (String p : processTree.keySet()) { pTree.append(p); pTree.append(" "); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/JQueryUI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/JQueryUI.java index 56d9f25710eec..349c3e05180ff 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/JQueryUI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/JQueryUI.java @@ -130,7 +130,7 @@ protected void initDataTables(List list) { } // for inserting stateSaveInit int pos = init.indexOf('{') + 1; - init = new StringBuffer(init).insert(pos, stateSaveInit).toString(); + init = new StringBuilder(init).insert(pos, stateSaveInit).toString(); list.add(join(id, "DataTable = $('#", id, "').dataTable(", init, ").fnSetFilteringDelay(188);")); String postInit = $(postInitID(DATATABLES, id)); @@ -146,7 +146,7 @@ protected void initDataTables(List list) { init = defaultInit; } int pos = init.indexOf('{') + 1; - init = new StringBuffer(init).insert(pos, stateSaveInit).toString(); + init = new StringBuilder(init).insert(pos, stateSaveInit).toString(); list.add(join(" $('", escapeEcmaScript(selector), "').dataTable(", init, ").fnSetFilteringDelay(288);")); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/TestAggregatedLogFormat.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/TestAggregatedLogFormat.java index 007721f2eca6e..054e751ff6436 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/TestAggregatedLogFormat.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/logaggregation/TestAggregatedLogFormat.java @@ -401,7 +401,7 @@ void testContainerLogsFileAccess() throws IOException { new BufferedReader(new FileReader(new File(remoteAppLogFile .toUri().getRawPath()))); String line; - StringBuffer sb = new StringBuffer(""); + StringBuilder sb = new StringBuilder(""); while ((line = in.readLine()) != null) { LOG.info(line); sb.append(line); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/sql/FederationQueryRunner.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/sql/FederationQueryRunner.java index 4ff56eef01eba..7db32701e61bc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/sql/FederationQueryRunner.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/federation/store/sql/FederationQueryRunner.java @@ -181,7 +181,7 @@ protected void rethrow(SQLException cause, String sql, Object... params) causeMessage = ""; } - StringBuffer msg = new StringBuffer(causeMessage); + StringBuilder msg = new StringBuilder(causeMessage); msg.append(" Query: "); msg.append(sql); msg.append(" Parameters: "); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DockerContainerDeletionTask.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DockerContainerDeletionTask.java index 8882b455446f7..9de365a128c83 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DockerContainerDeletionTask.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/deletion/task/DockerContainerDeletionTask.java @@ -65,7 +65,7 @@ public void run() { */ @Override public String toString() { - StringBuffer sb = new StringBuffer("DockerContainerDeletionTask : "); + StringBuilder sb = new StringBuilder("DockerContainerDeletionTask : "); sb.append(" id : ").append(this.getTaskId()); sb.append(" containerId : ").append(this.containerId); return sb.toString().trim(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperationExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperationExecutor.java index d3e96d8d5a1c4..c648808efa96b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperationExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/PrivilegedOperationExecutor.java @@ -281,7 +281,7 @@ public IOStreamPair executePrivilegedInteractiveOperation( return null; } - StringBuffer finalOpArg = new StringBuffer(PrivilegedOperation + StringBuilder finalOpArg = new StringBuilder(PrivilegedOperation .CGROUP_ARG_PREFIX); boolean noTasks = true; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/NetworkPacketTaggingHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/NetworkPacketTaggingHandlerImpl.java index efe07e0228856..e861a9c5424f0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/NetworkPacketTaggingHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/NetworkPacketTaggingHandlerImpl.java @@ -104,7 +104,7 @@ public List preStart(Container container) //executable. String tasksFile = cGroupsHandler.getPathForCGroupTasks( CGroupsHandler.CGroupController.NET_CLS, containerIdStr); - String opArg = new StringBuffer(PrivilegedOperation.CGROUP_ARG_PREFIX) + String opArg = new StringBuilder(PrivilegedOperation.CGROUP_ARG_PREFIX) .append(tasksFile).toString(); List ops = new ArrayList<>(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficControlBandwidthHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficControlBandwidthHandlerImpl.java index efe9db35d80f7..67c32fd085287 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficControlBandwidthHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficControlBandwidthHandlerImpl.java @@ -101,7 +101,7 @@ public List bootstrap(Configuration configuration) containerBandwidthMbit = (int) Math.ceil((double) yarnBandwidthMbit / MAX_CONTAINER_COUNT); - StringBuffer logLine = new StringBuffer("strict mode is set to :") + StringBuilder logLine = new StringBuilder("strict mode is set to :") .append(strictMode).append(System.lineSeparator()); if (strictMode) { @@ -152,7 +152,7 @@ public List preStart(Container container) //executable. String tasksFile = cGroupsHandler.getPathForCGroupTasks( CGroupsHandler.CGroupController.NET_CLS, containerIdStr); - String opArg = new StringBuffer(PrivilegedOperation.CGROUP_ARG_PREFIX) + String opArg = new StringBuilder(PrivilegedOperation.CGROUP_ARG_PREFIX) .append(tasksFile).toString(); List ops = new ArrayList<>(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficController.java index e5abca2826699..51667786ad619 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficController.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TrafficController.java @@ -225,7 +225,7 @@ private boolean checkIfAlreadyBootstrapped(String state) if (pattern.matcher(state).find()) { LOG.debug("Matched regex: {}", regex); } else { - String logLine = new StringBuffer("Failed to match regex: ") + String logLine = new StringBuilder("Failed to match regex: ") .append(regex).append(" Current state: ").append(state).toString(); LOG.warn(logLine); return false; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerCommand.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerCommand.java index 260c5b53f554e..7859d5a89d452 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerCommand.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/DockerCommand.java @@ -85,7 +85,7 @@ public Map> getDockerCommandWithArguments() { @Override public String toString() { - StringBuffer ret = new StringBuffer(this.command); + StringBuilder ret = new StringBuilder(this.command); for (Map.Entry> entry : commandArguments.entrySet()) { ret.append(" ").append(entry.getKey()); ret.append("=").append(StringUtils.join(",", entry.getValue())); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalCacheDirectoryManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalCacheDirectoryManager.java index 45746e786fbb1..55f7df7c7a1a3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalCacheDirectoryManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/LocalCacheDirectoryManager.java @@ -165,7 +165,7 @@ static String getRelativePath(int directoryNo) { String relativePath = ""; if (directoryNo > 0) { String tPath = Integer.toString(directoryNo - 1, DIRECTORIES_PER_LEVEL); - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); if (tPath.length() == 1) { sb.append(tPath.charAt(0)); } else { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/com/nvidia/NvidiaGPUPluginForRuntimeV2.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/com/nvidia/NvidiaGPUPluginForRuntimeV2.java index bf7958c0f741e..5a0f801449375 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/com/nvidia/NvidiaGPUPluginForRuntimeV2.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/com/nvidia/NvidiaGPUPluginForRuntimeV2.java @@ -172,7 +172,7 @@ public DeviceRuntimeSpec onDevicesAllocated(Set allocatedDevices, if (yarnRuntime == YarnRuntimeType.RUNTIME_DOCKER) { String nvidiaRuntime = "nvidia"; String nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES"; - StringBuffer gpuMinorNumbersSB = new StringBuffer(); + StringBuilder gpuMinorNumbersSB = new StringBuilder(); for (Device device : allocatedDevices) { gpuMinorNumbersSB.append(device.getMinorNumber() + ","); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java index aa56c6f247475..ee41eab77ed83 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/recovery/NMStateStoreService.java @@ -186,7 +186,7 @@ public void setLogDir(String logDir) { @Override public String toString() { - return new StringBuffer("Status: ").append(getStatus()) + return new StringBuilder("Status: ").append(getStatus()) .append(", Exit code: ").append(exitCode) .append(", Version: ").append(version) .append(", Start Time: ").append(startTime) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/TestPrivilegedOperationExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/TestPrivilegedOperationExecutor.java index c5b2e9711c429..0823801f7f612 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/TestPrivilegedOperationExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/privileged/TestPrivilegedOperationExecutor.java @@ -213,7 +213,7 @@ public void testSquashCGroupOperationsWithValidOperations() { try { PrivilegedOperation op = PrivilegedOperationExecutor .squashCGroupOperations(ops); - String expected = new StringBuffer + String expected = new StringBuilder (PrivilegedOperation.CGROUP_ARG_PREFIX) .append(cGroupTasks1).append(PrivilegedOperation .LINUX_FILE_PATH_SEPARATOR) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestNetworkPacketTaggingHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestNetworkPacketTaggingHandlerImpl.java index 74f6bff6af6ae..e2d107d068730 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestNetworkPacketTaggingHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestNetworkPacketTaggingHandlerImpl.java @@ -70,7 +70,7 @@ public void setup() { privilegedOperationExecutorMock = mock(PrivilegedOperationExecutor.class); cGroupsHandlerMock = mock(CGroupsHandler.class); conf = new YarnConfiguration(); - tmpPath = new StringBuffer(System.getProperty("test.build.data")) + tmpPath = new StringBuilder(System.getProperty("test.build.data")) .append('/').append("hadoop.tmp.dir").toString(); containerIdMock = mock(ContainerId.class); containerMock = mock(Container.class); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestTrafficControlBandwidthHandlerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestTrafficControlBandwidthHandlerImpl.java index ee6b41e7588b9..08f5ee390a13f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestTrafficControlBandwidthHandlerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestTrafficControlBandwidthHandlerImpl.java @@ -73,7 +73,7 @@ public void setup() { cGroupsHandlerMock = mock(CGroupsHandler.class); trafficControllerMock = mock(TrafficController.class); conf = new YarnConfiguration(); - tmpPath = new StringBuffer(System.getProperty("test.build.data")).append + tmpPath = new StringBuilder(System.getProperty("test.build.data")).append ('/').append("hadoop.tmp.dir").toString(); device = YarnConfiguration.DEFAULT_NM_NETWORK_RESOURCE_INTERFACE; containerIdMock = mock(ContainerId.class); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestTrafficController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestTrafficController.java index 33b8434c9a8cb..fd77a953d1726 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestTrafficController.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/TestTrafficController.java @@ -96,7 +96,7 @@ public class TestTrafficController { public void setup() { privilegedOperationExecutorMock = mock(PrivilegedOperationExecutor.class); conf = new YarnConfiguration(); - tmpPath = new StringBuffer(System.getProperty("test.build.data")).append + tmpPath = new StringBuilder(System.getProperty("test.build.data")).append ('/').append("hadoop.tmp.dir").toString(); conf.set("hadoop.tmp.dir", tmpPath); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java index c5f508778f01d..5950a137d20e4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestDockerContainerRuntime.java @@ -173,7 +173,7 @@ public class TestDockerContainerRuntime { private String[] testCapabilities; private final String signalPid = "1234"; private final String tmpPath = - new StringBuffer(System.getProperty("test.build.data")) + new StringBuilder(System.getProperty("test.build.data")) .append('/').append("hadoop.tmp.dir").toString(); private static final String RUNTIME_TYPE = "DOCKER"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestHdfsManifestToResourcesPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestHdfsManifestToResourcesPlugin.java index aa4005e4dbf1a..735f19fbab2eb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestHdfsManifestToResourcesPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestHdfsManifestToResourcesPlugin.java @@ -54,7 +54,7 @@ public class TestHdfsManifestToResourcesPlugin { private static final Logger LOG = LoggerFactory.getLogger(TestHdfsManifestToResourcesPlugin.class); private Configuration conf; - private String tmpPath = new StringBuffer( + private String tmpPath = new StringBuilder( System.getProperty("test.build.data")) .append('/').append("hadoop.tmp.dir").toString(); private static final String LAYER_MEDIA_TYPE = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestImageTagToManifestPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestImageTagToManifestPlugin.java index 3c2a951597ec9..9164b746b948c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestImageTagToManifestPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestImageTagToManifestPlugin.java @@ -54,7 +54,7 @@ public class TestImageTagToManifestPlugin { private MockImageTagToManifestPlugin mockImageTagToManifestPlugin; private Configuration conf; private String tmpPath = - new StringBuffer(System.getProperty("test.build.data")) + new StringBuilder(System.getProperty("test.build.data")) .append('/').append("hadoop.tmp.dir").toString(); private ObjectMapper mapper; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestRuncContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestRuncContainerRuntime.java index 8a541bbe1ae32..66ed95e740f07 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestRuncContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/TestRuncContainerRuntime.java @@ -168,7 +168,7 @@ public void setup() throws ContainerExecutionException { mockExecutor = Mockito .mock(PrivilegedOperationExecutor.class); mockCGroupsHandler = Mockito.mock(CGroupsHandler.class); - tmpPath = new StringBuffer(System.getProperty("test.build.data")) + tmpPath = new StringBuilder(System.getProperty("test.build.data")) .append('/').append("hadoop.tmp.dir").toString(); containerId = "container_e11_1518975676334_14532816_01_000001"; container = mock(Container.class); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java index e5737d9246cec..93bedbf366b66 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/docker/TestDockerCommandExecutor.java @@ -85,7 +85,7 @@ public void setUp() throws Exception { mockExecutor = mock(PrivilegedOperationExecutor.class); mockCGroupsHandler = mock(CGroupsHandler.class); configuration = new Configuration(); - String tmpPath = new StringBuffer(System.getProperty("test.build.data")) + String tmpPath = new StringBuilder(System.getProperty("test.build.data")) .append('/').append("hadoop.tmp.dir").toString(); configuration.set("hadoop.tmp.dir", tmpPath); runtime = new DockerLinuxContainerRuntime(mockExecutor, mockCGroupsHandler); @@ -115,7 +115,7 @@ public Context createMockNMContext() { LocalDirsHandlerService localDirsHandler = mock(LocalDirsHandlerService.class); - String tmpPath = new StringBuffer(System.getProperty("test.build.data")) + String tmpPath = new StringBuilder(System.getProperty("test.build.data")) .append('/').append("hadoop.tmp.dir").toString(); ConcurrentMap containerMap = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java index 95cca2ca3b9b1..e13ada2ac7238 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/localizer/TestLocalCacheDirectoryManager.java @@ -47,7 +47,7 @@ public void testHierarchicalSubDirectoryCreation() { // Testing path generation from "0" to "0/0/z/z" for (int i = 1; i <= 37 * 36 * 36; i++) { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); String num = Integer.toString(i - 1, 36); if (num.length() == 1) { sb.append(num.charAt(0)); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/deviceframework/TestDevicePluginAdapter.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/deviceframework/TestDevicePluginAdapter.java index 78d794e53782b..fc6bd3714a6ac 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/deviceframework/TestDevicePluginAdapter.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/resourceplugin/deviceframework/TestDevicePluginAdapter.java @@ -977,7 +977,7 @@ private DeviceRuntimeSpec generateSpec(String version, if (version.equals("v2")) { String nvidiaRuntime = "nvidia"; String nvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES"; - StringBuffer gpuMinorNumbersSB = new StringBuffer(); + StringBuilder gpuMinorNumbersSB = new StringBuilder(); for (Device device : allocatedDevices) { gpuMinorNumbersSB.append(device.getMinorNumber() + ","); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PlanQueue.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PlanQueue.java index 847c90e54589a..fc7b2a6aa169a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PlanQueue.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/PlanQueue.java @@ -72,7 +72,7 @@ public PlanQueue(CapacitySchedulerQueueContext queueContext, String queueName, updateQuotas(configuredUserLimit, configuredUserLimitFactor, maxAppsForReservation, configuredMaxAppsPerUserForReservation); - StringBuffer queueInfo = new StringBuffer(); + StringBuilder queueInfo = new StringBuilder(); queueInfo.append("Created Plan Queue: ").append(queueName) .append("\nwith capacity: [").append(super.getCapacity()) .append("]\nwith max capacity: [").append(super.getMaximumCapacity()) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java index a65775104ec70..62b6703567f23 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebServices.java @@ -600,7 +600,7 @@ private RMNode getRMNode(final String nodeId) { * @return The str String after escaping invalid xml characters. */ public static String escapeInvalidXMLCharacters(String str) { - StringBuffer out = new StringBuffer(); + StringBuilder out = new StringBuilder(); final int strlen = str.length(); final String substitute = "\uFFFD"; int idx = 0; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AllocationTagsInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AllocationTagsInfo.java index ee09aa2f03f17..c60f167318c18 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AllocationTagsInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/AllocationTagsInfo.java @@ -45,7 +45,7 @@ public void addAllocationTag(AllocationTagInfo info) { @Override public String toString() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); Iterator it = allocationTagInfo.iterator(); while (it.hasNext()) { AllocationTagInfo current = it.next(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestPlacementProcessor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestPlacementProcessor.java index 837d78df8da00..e7c1d0ca6d7e9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestPlacementProcessor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/TestPlacementProcessor.java @@ -896,7 +896,7 @@ private static void printTags(Collection nodes, for (MockNM nm : nodes) { Map nmTags = atm .getAllocationTagsWithCount(nm.getNodeId()); - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); if (nmTags != null) { nmTags.forEach((tag, count) -> sb.append(tag + "(" + count + "),")); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/algorithm/TestCircularIterator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/algorithm/TestCircularIterator.java index 5ce76b08948b2..bb005a04ee826 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/algorithm/TestCircularIterator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/constraint/algorithm/TestCircularIterator.java @@ -35,7 +35,7 @@ public void testIteration() throws Exception { List list = Arrays.asList("a", "b", "c", "d"); CircularIterator ci = new CircularIterator<>(null, list.iterator(), list); - StringBuffer sb = new StringBuffer(""); + StringBuilder sb = new StringBuilder(""); while (ci.hasNext()) { sb.append(ci.next()); } @@ -44,7 +44,7 @@ public void testIteration() throws Exception { Iterator lIter = list.iterator(); lIter.next(); lIter.next(); - sb = new StringBuffer(""); + sb = new StringBuilder(""); ci = new CircularIterator<>(null, lIter, list); while (ci.hasNext()) { sb.append(ci.next()); @@ -55,7 +55,7 @@ public void testIteration() throws Exception { lIter.next(); lIter.next(); lIter.next(); - sb = new StringBuffer(""); + sb = new StringBuilder(""); ci = new CircularIterator<>("x", lIter, list); while (ci.hasNext()) { sb.append(ci.next()); @@ -65,7 +65,7 @@ public void testIteration() throws Exception { list = Arrays.asList("a"); lIter = list.iterator(); lIter.next(); - sb = new StringBuffer(""); + sb = new StringBuilder(""); ci = new CircularIterator<>("y", lIter, list); while (ci.hasNext()) { sb.append(ci.next()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestQueuePlacementPolicy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestQueuePlacementPolicy.java index c6148cd2f30f4..5e8f60253cba5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestQueuePlacementPolicy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/TestQueuePlacementPolicy.java @@ -99,7 +99,7 @@ public void cleanTest() { @Test public void testSpecifiedUserPolicy() throws Exception { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -118,7 +118,7 @@ public void testSpecifiedUserPolicy() throws Exception { @Test public void testNoCreate() throws Exception { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -144,7 +144,7 @@ public void testNoCreate() throws Exception { @Test public void testSpecifiedThenReject() throws Exception { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -160,7 +160,7 @@ public void testSpecifiedThenReject() throws Exception { @Test public void testOmittedTerminalRule() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -170,7 +170,7 @@ public void testOmittedTerminalRule() { @Test public void testTerminalRuleInMiddle() { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -184,7 +184,7 @@ public void testTerminals() { // The default rule is no longer considered terminal when the create flag // is false. The throw now happens when configuring not when assigning the // application - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -197,7 +197,7 @@ public void testDefaultRuleWithQueueAttribute() throws Exception { // This test covers the use case where we would like default rule // to point to a different queue by default rather than root.default createQueue(FSQueueType.LEAF, "root.someDefaultQueue"); - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -212,7 +212,7 @@ public void testDefaultRuleWithQueueAttribute() throws Exception { @Test public void testNestedUserQueueParsingErrors() { // No nested rule specified in hierarchical user queue - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(""); @@ -220,7 +220,7 @@ public void testNestedUserQueueParsingErrors() { assertIfExceptionThrown(sb); // Specified nested rule is not a FSPlacementRule - sb = new StringBuffer(); + sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -230,7 +230,7 @@ public void testNestedUserQueueParsingErrors() { assertIfExceptionThrown(sb); // Parent rule is rule that cannot be one: reject or nestedUserQueue - sb = new StringBuffer(); + sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -241,7 +241,7 @@ public void testNestedUserQueueParsingErrors() { // If the parent rule does not have the create flag the nested rule is not // terminal - sb = new StringBuffer(); + sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -253,7 +253,7 @@ public void testNestedUserQueueParsingErrors() { @Test public void testMultipleParentRules() throws Exception { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -275,7 +275,7 @@ public void testMultipleParentRules() throws Exception { @Test public void testBrokenRules() throws Exception { // broken rule should fail configuring - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(""); @@ -283,7 +283,7 @@ public void testBrokenRules() throws Exception { assertIfExceptionThrown(sb); // policy without rules ignoring policy - sb = new StringBuffer(); + sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(""); @@ -291,7 +291,7 @@ public void testBrokenRules() throws Exception { createPolicy(sb.toString()); // broken rule should fail configuring - sb = new StringBuffer(); + sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -302,7 +302,7 @@ public void testBrokenRules() throws Exception { // parent rule not set to something known: no parent rule is required // required case is only for nestedUserQueue tested earlier - sb = new StringBuffer(); + sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -312,7 +312,7 @@ public void testBrokenRules() throws Exception { createPolicy(sb.toString()); } - private void assertIfExceptionThrown(StringBuffer sb) { + private void assertIfExceptionThrown(StringBuilder sb) { Throwable th = null; try { createPolicy(sb.toString()); @@ -336,7 +336,7 @@ private void assertIfExceptionThrown(String user) { @Test public void testNestedUserQueueParsing() throws Exception { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -349,7 +349,7 @@ public void testNestedUserQueueParsing() throws Exception { @Test public void testNestedUserQueuePrimaryGroup() throws Exception { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -380,7 +380,7 @@ public void testNestedUserQueuePrimaryGroup() throws Exception { @Test public void testNestedUserQueuePrimaryGroupNoCreate() throws Exception { // Primary group rule has create='false' - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -402,7 +402,7 @@ public void testNestedUserQueuePrimaryGroupNoCreate() throws Exception { assertEquals("root.user1group.user1", context.getQueue()); // Both Primary group and nestedUserQueue rule has create='false' - sb = new StringBuffer(); + sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -426,7 +426,7 @@ public void testNestedUserQueuePrimaryGroupNoCreate() throws Exception { @Test public void testNestedUserQueueSecondaryGroup() throws Exception { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -452,7 +452,7 @@ public void testNestedUserQueueSecondaryGroup() throws Exception { public void testNestedUserQueueSpecificRule() throws Exception { // This test covers the use case where users can specify different parent // queues and want user queues under those. - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -477,7 +477,7 @@ public void testNestedUserQueueSpecificRule() throws Exception { public void testNestedUserQueueDefaultRule() throws Exception { // This test covers the use case where we would like user queues to be // created under a default parent queue - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -492,7 +492,7 @@ public void testNestedUserQueueDefaultRule() throws Exception { // Same as above but now with the create flag false for the parent createQueue(FSQueueType.PARENT, "root.parent"); - sb = new StringBuffer(); + sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -509,7 +509,7 @@ public void testNestedUserQueueDefaultRule() throws Exception { // Parent queue returned is already a configured LEAF, should fail and the // context is null. createQueue(FSQueueType.LEAF, "root.parent"); - sb = new StringBuffer(); + sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -526,7 +526,7 @@ public void testNestedUserQueueDefaultRule() throws Exception { @Test public void testUserContainsPeriod() throws Exception { // This test covers the user case where the username contains periods. - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(""); @@ -535,7 +535,7 @@ public void testUserContainsPeriod() throws Exception { context = placementManager.placeApplication(asc, "first.last"); assertEquals("root.first_dot_last", context.getQueue()); - sb = new StringBuffer(); + sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -553,7 +553,7 @@ public void testUserContainsPeriod() throws Exception { @Test public void testGroupContainsPeriod() throws Exception { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -580,7 +580,7 @@ public void testGroupContainsPeriod() throws Exception { @Test public void testEmptyGroupsPrimaryGroupRule() throws Exception { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); @@ -596,7 +596,7 @@ public void testEmptyGroupsPrimaryGroupRule() throws Exception { @Test public void testSpecifiedQueueWithSpaces() throws Exception { - StringBuffer sb = new StringBuffer(); + StringBuilder sb = new StringBuilder(); sb.append(""); sb.append(" "); sb.append(" "); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java index 725211ef1f48c..f0b06fc73681b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/src/main/java/org/apache/hadoop/yarn/server/router/webapp/FederationInterceptorREST.java @@ -1906,7 +1906,7 @@ public Response addToClusterNodeLabels(NodeLabelsInfo newNodeLabels, ClientMethod remoteMethod = new ClientMethod("addToClusterNodeLabels", argsClasses, args); Map responseInfoMap = invokeConcurrent(subClustersActives, remoteMethod, Response.class); - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); // SubCluster-0:SUCCESS,SubCluster-1:SUCCESS responseInfoMap.forEach((subClusterInfo, response) -> buildAppendMsg(subClusterInfo, buffer, response)); @@ -1964,7 +1964,7 @@ public Response removeFromClusterNodeLabels(Set oldNodeLabels, new ClientMethod("removeFromClusterNodeLabels", argsClasses, args); Map responseInfoMap = invokeConcurrent(subClustersActives, remoteMethod, Response.class); - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); // SubCluster-0:SUCCESS,SubCluster-1:SUCCESS responseInfoMap.forEach((subClusterInfo, response) -> buildAppendMsg(subClusterInfo, buffer, response)); @@ -1993,10 +1993,10 @@ public Response removeFromClusterNodeLabels(Set oldNodeLabels, * Build Append information. * * @param subClusterInfo subCluster information. - * @param buffer StringBuffer. + * @param buffer StringBuilder. * @param response response message. */ - private void buildAppendMsg(SubClusterInfo subClusterInfo, StringBuffer buffer, + private void buildAppendMsg(SubClusterInfo subClusterInfo, StringBuilder buffer, Response response) { SubClusterId subClusterId = subClusterInfo.getSubClusterId(); String state = response != null && From e5b76dc99fdc7c9a3fc2132873eb4ef3e545bb4f Mon Sep 17 00:00:00 2001 From: zhengchenyu Date: Mon, 19 Aug 2024 12:40:45 +0800 Subject: [PATCH 090/113] HADOOP-19180. EC: Fix calculation errors caused by special index order (#6813). Contributed by zhengchenyu. Reviewed-by: He Xiaoqiao Signed-off-by: Shuyan Zhang --- .../io/erasurecode/rawcoder/RSRawDecoder.java | 32 +++--- .../hadoop/io/erasurecode/erasure_coder.c | 36 +++--- .../hadoop/io/erasurecode/erasure_coder.h | 1 - .../hadoop/io/erasurecode/erasure_code_test.c | 80 +++++++++---- .../TestErasureCodingEncodeAndDecode.java | 108 ++++++++++++++++++ 5 files changed, 195 insertions(+), 62 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestErasureCodingEncodeAndDecode.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder.java index d7f78abc05056..824e701c71fe6 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/erasurecode/rawcoder/RSRawDecoder.java @@ -51,7 +51,6 @@ public class RSRawDecoder extends RawErasureDecoder { private byte[] gfTables; private int[] cachedErasedIndexes; private int[] validIndexes; - private int numErasedDataUnits; private boolean[] erasureFlags; public RSRawDecoder(ErasureCoderOptions coderOptions) { @@ -120,14 +119,10 @@ private void processErasures(int[] erasedIndexes) { this.gfTables = new byte[getNumAllUnits() * getNumDataUnits() * 32]; this.erasureFlags = new boolean[getNumAllUnits()]; - this.numErasedDataUnits = 0; for (int i = 0; i < erasedIndexes.length; i++) { int index = erasedIndexes[i]; erasureFlags[index] = true; - if (index < getNumDataUnits()) { - numErasedDataUnits++; - } } generateDecodeMatrix(erasedIndexes); @@ -156,21 +151,22 @@ private void generateDecodeMatrix(int[] erasedIndexes) { GF256.gfInvertMatrix(tmpMatrix, invertMatrix, getNumDataUnits()); - for (i = 0; i < numErasedDataUnits; i++) { - for (j = 0; j < getNumDataUnits(); j++) { - decodeMatrix[getNumDataUnits() * i + j] = - invertMatrix[getNumDataUnits() * erasedIndexes[i] + j]; - } - } - - for (p = numErasedDataUnits; p < erasedIndexes.length; p++) { - for (i = 0; i < getNumDataUnits(); i++) { - s = 0; + for (p = 0; p < erasedIndexes.length; p++) { + int erasedIndex = erasedIndexes[p]; + if (erasedIndex < getNumDataUnits()) { for (j = 0; j < getNumDataUnits(); j++) { - s ^= GF256.gfMul(invertMatrix[j * getNumDataUnits() + i], - encodeMatrix[getNumDataUnits() * erasedIndexes[p] + j]); + decodeMatrix[getNumDataUnits() * p + j] = + invertMatrix[getNumDataUnits() * erasedIndexes[p] + j]; + } + } else { + for (i = 0; i < getNumDataUnits(); i++) { + s = 0; + for (j = 0; j < getNumDataUnits(); j++) { + s ^= GF256.gfMul(invertMatrix[j * getNumDataUnits() + i], + encodeMatrix[getNumDataUnits() * erasedIndexes[p] + j]); + } + decodeMatrix[getNumDataUnits() * p + i] = s; } - decodeMatrix[getNumDataUnits() * p + i] = s; } } } diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_coder.c b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_coder.c index b2d856b6f889e..e7ea07af4cae5 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_coder.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_coder.c @@ -132,9 +132,6 @@ static int processErasures(IsalDecoder* pCoder, unsigned char** inputs, index = erasedIndexes[i]; pCoder->erasedIndexes[i] = index; pCoder->erasureFlags[index] = 1; - if (index < numDataUnits) { - pCoder->numErasedDataUnits++; - } } pCoder->numErased = numErased; @@ -175,7 +172,6 @@ int decode(IsalDecoder* pCoder, unsigned char** inputs, // Clear variables used per decode call void clearDecoder(IsalDecoder* decoder) { - decoder->numErasedDataUnits = 0; decoder->numErased = 0; memset(decoder->gftbls, 0, sizeof(decoder->gftbls)); memset(decoder->decodeMatrix, 0, sizeof(decoder->decodeMatrix)); @@ -205,24 +201,24 @@ int generateDecodeMatrix(IsalDecoder* pCoder) { h_gf_invert_matrix(pCoder->tmpMatrix, pCoder->invertMatrix, numDataUnits); - for (i = 0; i < pCoder->numErasedDataUnits; i++) { + for (p = 0; p < pCoder->numErased; p++) { for (j = 0; j < numDataUnits; j++) { - pCoder->decodeMatrix[numDataUnits * i + j] = - pCoder->invertMatrix[numDataUnits * - pCoder->erasedIndexes[i] + j]; - } - } - - for (p = pCoder->numErasedDataUnits; p < pCoder->numErased; p++) { - for (i = 0; i < numDataUnits; i++) { - s = 0; - for (j = 0; j < numDataUnits; j++) { - s ^= h_gf_mul(pCoder->invertMatrix[j * numDataUnits + i], - pCoder->encodeMatrix[numDataUnits * - pCoder->erasedIndexes[p] + j]); + int erasedIndex = pCoder->erasedIndexes[p]; + if (erasedIndex < numDataUnits) { + pCoder->decodeMatrix[numDataUnits * p + j] = + pCoder->invertMatrix[numDataUnits * + pCoder->erasedIndexes[p] + j]; + } else { + for (i = 0; i < numDataUnits; i++) { + s = 0; + for (j = 0; j < numDataUnits; j++) { + s ^= h_gf_mul(pCoder->invertMatrix[j * numDataUnits + i], + pCoder->encodeMatrix[numDataUnits * + pCoder->erasedIndexes[p] + j]); + } + pCoder->decodeMatrix[numDataUnits * p + i] = s; + } } - - pCoder->decodeMatrix[numDataUnits * p + i] = s; } } diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_coder.h b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_coder.h index 8f5bf8a3ca7fe..d2ab24cc30b1e 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_coder.h +++ b/hadoop-common-project/hadoop-common/src/main/native/src/org/apache/hadoop/io/erasurecode/erasure_coder.h @@ -62,7 +62,6 @@ typedef struct _IsalDecoder { unsigned char erasureFlags[MMAX]; int erasedIndexes[MMAX]; int numErased; - int numErasedDataUnits; unsigned char* realInputs[MMAX]; } IsalDecoder; diff --git a/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/io/erasurecode/erasure_code_test.c b/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/io/erasurecode/erasure_code_test.c index 331bb219b7faf..ed439805baedb 100644 --- a/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/io/erasurecode/erasure_code_test.c +++ b/hadoop-common-project/hadoop-common/src/main/native/src/test/org/apache/hadoop/io/erasurecode/erasure_code_test.c @@ -27,25 +27,27 @@ #include "erasure_code.h" #include "gf_util.h" #include "erasure_coder.h" +#include "dump.h" #include #include #include int main(int argc, char *argv[]) { - int i, j; + int i, j, k, l; char err[256]; size_t err_len = sizeof(err); int chunkSize = 1024; int numDataUnits = 6; int numParityUnits = 3; + int numTotalUnits = numDataUnits + numParityUnits; unsigned char** dataUnits; unsigned char** parityUnits; IsalEncoder* pEncoder; - int erasedIndexes[2]; + int erasedIndexes[3]; unsigned char* allUnits[MMAX]; IsalDecoder* pDecoder; - unsigned char* decodingOutput[2]; + unsigned char* decodingOutput[3]; unsigned char** backupUnits; if (0 == build_support_erasurecode()) { @@ -82,6 +84,11 @@ int main(int argc, char *argv[]) { } } + // Allocate decode output + for (i = 0; i < 3; i++) { + decodingOutput[i] = malloc(chunkSize); + } + pEncoder = (IsalEncoder*)malloc(sizeof(IsalEncoder)); memset(pEncoder, 0, sizeof(*pEncoder)); initEncoder(pEncoder, numDataUnits, numParityUnits); @@ -95,26 +102,53 @@ int main(int argc, char *argv[]) { memcpy(allUnits + numDataUnits, parityUnits, numParityUnits * (sizeof (unsigned char*))); - erasedIndexes[0] = 1; - erasedIndexes[1] = 7; - - backupUnits[0] = allUnits[1]; - backupUnits[1] = allUnits[7]; - - allUnits[0] = NULL; // Not to read - allUnits[1] = NULL; - allUnits[7] = NULL; - - decodingOutput[0] = malloc(chunkSize); - decodingOutput[1] = malloc(chunkSize); - - decode(pDecoder, allUnits, erasedIndexes, 2, decodingOutput, chunkSize); - - for (i = 0; i < pDecoder->numErased; i++) { - if (0 != memcmp(decodingOutput[i], backupUnits[i], chunkSize)) { - fprintf(stderr, "Decoding failed\n\n"); - dumpDecoder(pDecoder); - return -1; + for (i = 0; i < numTotalUnits; i++) { + for (j = 0; j < numTotalUnits; j++) { + for (k = 0; k < numTotalUnits; k++) { + int numErased; + if (i == j && j == k) { + erasedIndexes[0] = i; + numErased = 1; + backupUnits[0] = allUnits[i]; + allUnits[i] = NULL; + } else if (i == j) { + erasedIndexes[0] = i; + erasedIndexes[1] = k; + numErased = 2; + backupUnits[0] = allUnits[i]; + backupUnits[1] = allUnits[k]; + allUnits[i] = NULL; + allUnits[k] = NULL; + } else if (i == k || j == k) { + erasedIndexes[0] = i; + erasedIndexes[1] = j; + numErased = 2; + backupUnits[0] = allUnits[i]; + backupUnits[1] = allUnits[j]; + allUnits[i] = NULL; + allUnits[j] = NULL; + } else { + erasedIndexes[0] = i; + erasedIndexes[1] = j; + erasedIndexes[2] = k; + numErased = 3; + backupUnits[0] = allUnits[i]; + backupUnits[1] = allUnits[j]; + backupUnits[2] = allUnits[k]; + allUnits[i] = NULL; + allUnits[j] = NULL; + allUnits[k] = NULL; + } + decode(pDecoder, allUnits, erasedIndexes, numErased, decodingOutput, chunkSize); + for (l = 0; l < pDecoder->numErased; l++) { + if (0 != memcmp(decodingOutput[l], backupUnits[l], chunkSize)) { + printf("Decoding failed\n"); + dumpDecoder(pDecoder); + return -1; + } + allUnits[erasedIndexes[l]] = backupUnits[l]; + } + } } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestErasureCodingEncodeAndDecode.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestErasureCodingEncodeAndDecode.java new file mode 100644 index 0000000000000..e61f64e423f30 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/erasurecode/TestErasureCodingEncodeAndDecode.java @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.io.erasurecode; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureDecoder; +import org.apache.hadoop.io.erasurecode.rawcoder.RawErasureEncoder; +import org.junit.Test; + +import java.util.Random; + +import static org.junit.Assert.assertArrayEquals; + +public class TestErasureCodingEncodeAndDecode { + + private final static int CHUNCK = 1024; + private final static int DATAB_LOCKS = 6; + private final static int PARITY_BLOCKS = 3; + private final static int TOTAL_BLOCKS = DATAB_LOCKS + PARITY_BLOCKS; + + @Test + public void testEncodeAndDecode() throws Exception { + Configuration conf = new Configuration(); + int totalBytes = CHUNCK * DATAB_LOCKS; + Random random = new Random(); + byte[] tmpBytes = new byte[totalBytes]; + random.nextBytes(tmpBytes); + byte[][] data = new byte[DATAB_LOCKS][CHUNCK]; + for (int i = 0; i < DATAB_LOCKS; i++) { + System.arraycopy(tmpBytes, i * CHUNCK, data[i], 0, CHUNCK); + } + ErasureCoderOptions coderOptions = new ErasureCoderOptions(DATAB_LOCKS, PARITY_BLOCKS); + + // 1 Encode + RawErasureEncoder encoder = + CodecUtil.createRawEncoder(conf, ErasureCodeConstants.RS_CODEC_NAME, coderOptions); + byte[][] parity = new byte[PARITY_BLOCKS][CHUNCK]; + encoder.encode(data, parity); + + // 2 Compose the complete data + byte[][] all = new byte[DATAB_LOCKS + PARITY_BLOCKS][CHUNCK]; + for (int i = 0; i < DATAB_LOCKS; i++) { + System.arraycopy(data[i], 0, all[i], 0, CHUNCK); + } + for (int i = 0; i < PARITY_BLOCKS; i++) { + System.arraycopy(parity[i], 0, all[i + DATAB_LOCKS], 0, CHUNCK); + } + + // 3 Decode + RawErasureDecoder rawDecoder = + CodecUtil.createRawDecoder(conf, ErasureCodeConstants.RS_CODEC_NAME, coderOptions); + byte[][] backup = new byte[PARITY_BLOCKS][CHUNCK]; + for (int i = 0; i < TOTAL_BLOCKS; i++) { + for (int j = 0; j < TOTAL_BLOCKS; j++) { + for (int k = 0; k < TOTAL_BLOCKS; k++) { + int[] erasedIndexes; + if (i == j && j == k) { + erasedIndexes = new int[]{i}; + backup[0] = all[i]; + all[i] = null; + } else if (i == j) { + erasedIndexes = new int[]{i, k}; + backup[0] = all[i]; + backup[1] = all[k]; + all[i] = null; + all[k] = null; + } else if ((i == k) || ((j == k))) { + erasedIndexes = new int[]{i, j}; + backup[0] = all[i]; + backup[1] = all[j]; + all[i] = null; + all[j] = null; + } else { + erasedIndexes = new int[]{i, j, k}; + backup[0] = all[i]; + backup[1] = all[j]; + backup[2] = all[k]; + all[i] = null; + all[j] = null; + all[k] = null; + } + byte[][] decoded = new byte[erasedIndexes.length][CHUNCK]; + rawDecoder.decode(all, erasedIndexes, decoded); + for (int l = 0; l < erasedIndexes.length; l++) { + assertArrayEquals(backup[l], decoded[l]); + all[erasedIndexes[l]] = backup[l]; + } + } + } + } + } +} From df08e0de41aa9cd85c50c0f293ae6d72b997582a Mon Sep 17 00:00:00 2001 From: Stephen O'Donnell Date: Mon, 19 Aug 2024 11:35:11 +0100 Subject: [PATCH 091/113] HDFS-17605. Reduce memory overhead of TestBPOfferService (#6996) --- .../hadoop/hdfs/server/datanode/TestBPOfferService.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java index fd1b5609b1f0a..5de131e6e411c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java @@ -144,8 +144,9 @@ public void setupMocks() throws Exception { mockNN2 = setupNNMock(1); // Set up a mock DN with the bare-bones configuration - // objects, etc. - mockDn = Mockito.mock(DataNode.class); + // objects, etc. Set as stubOnly to save memory and avoid Mockito holding + // references to each invocation. This can cause OOM in some runs. + mockDn = Mockito.mock(DataNode.class, Mockito.withSettings().stubOnly()); Mockito.doReturn(true).when(mockDn).shouldRun(); Configuration conf = new Configuration(); File dnDataDir = new File(new File(TEST_BUILD_DATA, "dfs"), "data"); From 2fd7cf53facec3aa649f1f2cc53f8e21c209e178 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 19 Aug 2024 19:54:47 +0100 Subject: [PATCH 092/113] HADOOP-19253. Google GCS compilation fails due to VectorIO changes (#7002) Fixes a compilation failure caused by HADOOP-19098 Restore original sortRanges() method signature, FileRange[] sortRanges(List) This ensures that google GCS connector will compile again. It has also been marked as Stable so it is left alone The version returning List has been renamed sortRangeList() Contributed by Steve Loughran --- .../apache/hadoop/fs/VectoredReadUtils.java | 17 +++++++-- .../hadoop/fs/impl/TestVectoredReadUtils.java | 35 +++++++++++++++---- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java index fa0440620a409..2f99edc910c16 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java @@ -308,7 +308,7 @@ public static List validateAndSortRanges( validateRangeRequest(input.get(0)); sortedRanges = input; } else { - sortedRanges = sortRanges(input); + sortedRanges = sortRangeList(input); FileRange prev = null; for (final FileRange current : sortedRanges) { validateRangeRequest(current); @@ -341,12 +341,25 @@ public static List validateAndSortRanges( * @param input input ranges. * @return a new list of the ranges, sorted by offset. */ - public static List sortRanges(List input) { + public static List sortRangeList(List input) { final List l = new ArrayList<>(input); l.sort(Comparator.comparingLong(FileRange::getOffset)); return l; } + /** + * Sort the input ranges by offset; no validation is done. + *

    + * This method is used externally and must be retained with + * the signature unchanged. + * @param input input ranges. + * @return a new list of the ranges, sorted by offset. + */ + @InterfaceStability.Stable + public static FileRange[] sortRanges(List input) { + return sortRangeList(input).toArray(new FileRange[0]); + } + /** * Merge sorted ranges to optimize the access from the underlying file * system. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java index 3fd3fe4d1f451..b08fc95279a82 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/impl/TestVectoredReadUtils.java @@ -23,6 +23,7 @@ import java.nio.ByteBuffer; import java.nio.IntBuffer; import java.util.Collections; +import java.util.Comparator; import java.util.List; import java.util.Optional; import java.util.concurrent.CompletableFuture; @@ -47,6 +48,7 @@ import static org.apache.hadoop.fs.VectoredReadUtils.mergeSortedRanges; import static org.apache.hadoop.fs.VectoredReadUtils.readRangeFrom; import static org.apache.hadoop.fs.VectoredReadUtils.readVectored; +import static org.apache.hadoop.fs.VectoredReadUtils.sortRangeList; import static org.apache.hadoop.fs.VectoredReadUtils.sortRanges; import static org.apache.hadoop.fs.VectoredReadUtils.validateAndSortRanges; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -196,7 +198,7 @@ public void testSortAndMerge() { ); assertIsNotOrderedDisjoint(input, 100, 800); final List outputList = mergeSortedRanges( - sortRanges(input), 100, 1001, 2500); + sortRangeList(input), 100, 1001, 2500); assertRangeListSize(outputList, 1); CombinedFileRange output = outputList.get(0); @@ -208,7 +210,7 @@ public void testSortAndMerge() { // the minSeek doesn't allow the first two to merge assertIsNotOrderedDisjoint(input, 100, 100); final List list2 = mergeSortedRanges( - sortRanges(input), + sortRangeList(input), 100, 1000, 2100); assertRangeListSize(list2, 2); assertRangeElement(list2, 0, 1000, 100); @@ -219,7 +221,7 @@ public void testSortAndMerge() { // the maxSize doesn't allow the third range to merge assertIsNotOrderedDisjoint(input, 100, 800); final List list3 = mergeSortedRanges( - sortRanges(input), + sortRangeList(input), 100, 1001, 2099); assertRangeListSize(list3, 2); CombinedFileRange range0 = list3.get(0); @@ -240,7 +242,7 @@ public void testSortAndMerge() { // test the round up and round down (the maxSize doesn't allow any merges) assertIsNotOrderedDisjoint(input, 16, 700); final List list4 = mergeSortedRanges( - sortRanges(input), + sortRangeList(input), 16, 1001, 100); assertRangeListSize(list4, 3); // range[992,1104) @@ -273,6 +275,27 @@ private static void assertFileRange( .isEqualTo(length); } + /** + * Verify that {@link VectoredReadUtils#sortRanges(List)} + * returns an array matching the list sort ranges. + */ + @Test + public void testArraySortRange() throws Throwable { + List input = asList( + createFileRange(3000, 100, "1"), + createFileRange(2100, 100, null), + createFileRange(1000, 100, "3") + ); + final FileRange[] rangeArray = sortRanges(input); + final List rangeList = sortRangeList(input); + Assertions.assertThat(rangeArray) + .describedAs("range array from sortRanges()") + .isSortedAccordingTo(Comparator.comparingLong(FileRange::getOffset)); + Assertions.assertThat(rangeList.toArray(new FileRange[0])) + .describedAs("range from sortRangeList()") + .isEqualTo(rangeArray); + } + /** * Assert that a file range satisfies the conditions. * @param range range to validate @@ -399,7 +422,7 @@ public void testSortAndMergeMoreCases() throws Exception { ); assertIsNotOrderedDisjoint(input, 100, 800); List outputList = mergeSortedRanges( - sortRanges(input), 1, 1001, 2500); + sortRangeList(input), 1, 1001, 2500); Assertions.assertThat(outputList) .describedAs("merged range size") .hasSize(1); @@ -411,7 +434,7 @@ public void testSortAndMergeMoreCases() throws Exception { assertOrderedDisjoint(outputList, 1, 800); outputList = mergeSortedRanges( - sortRanges(input), 100, 1001, 2500); + sortRangeList(input), 100, 1001, 2500); assertRangeListSize(outputList, 1); output = outputList.get(0); From 33c9ecb6521ca98b76123c91669be6a6c7833060 Mon Sep 17 00:00:00 2001 From: dhavalshah9131 <35031652+dhavalshah9131@users.noreply.github.com> Date: Tue, 20 Aug 2024 18:33:05 +0530 Subject: [PATCH 093/113] HADOOP-19249. KMSClientProvider raises NPE with unauthed user (#6984) KMSClientProvider raises a NullPointerException when an unauthorised user tries to perform the key operation Contributed by Dhaval Shah --- .../hadoop/crypto/key/kms/KMSClientProvider.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java index b5a6d882334d9..fcacf1481a757 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java @@ -18,6 +18,7 @@ package org.apache.hadoop.crypto.key.kms; import org.apache.commons.codec.binary.Base64; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProvider; @@ -561,17 +562,19 @@ private T call(HttpURLConnection conn, Object jsonOutput, } throw ex; } + if ((conn.getResponseCode() == HttpURLConnection.HTTP_FORBIDDEN - && (conn.getResponseMessage().equals(ANONYMOUS_REQUESTS_DISALLOWED) || - conn.getResponseMessage().contains(INVALID_SIGNATURE))) + && (!StringUtils.isEmpty(conn.getResponseMessage()) + && (conn.getResponseMessage().equals(ANONYMOUS_REQUESTS_DISALLOWED) + || conn.getResponseMessage().contains(INVALID_SIGNATURE)))) || conn.getResponseCode() == HttpURLConnection.HTTP_UNAUTHORIZED) { // Ideally, this should happen only when there is an Authentication // failure. Unfortunately, the AuthenticationFilter returns 403 when it // cannot authenticate (Since a 401 requires Server to send // WWW-Authenticate header as well).. if (LOG.isDebugEnabled()) { - LOG.debug("Response={}({}), resetting authToken", - conn.getResponseCode(), conn.getResponseMessage()); + LOG.debug("Response={}, resetting authToken", + conn.getResponseCode()); } KMSClientProvider.this.authToken = new DelegationTokenAuthenticatedURL.Token(); @@ -797,6 +800,7 @@ public EncryptedKeyVersion generateEncryptedKey( @SuppressWarnings("rawtypes") @Override public KeyVersion decryptEncryptedKey( + EncryptedKeyVersion encryptedKeyVersion) throws IOException, GeneralSecurityException { checkNotNull(encryptedKeyVersion.getEncryptionKeyVersionName(), From b15ed27cfbfe80e83ce835aa882cc3f62211b300 Mon Sep 17 00:00:00 2001 From: Anuj Modi <128447756+anujmodi2021@users.noreply.github.com> Date: Tue, 20 Aug 2024 22:37:07 +0530 Subject: [PATCH 094/113] HADOOP-19187: [ABFS][FNSOverBlob] AbfsClient Refactoring to Support Multiple Implementation of Clients. (#6879) Refactor AbfsClient into DFS and Blob Client. Contributed by Anuj Modi --- .../src/config/checkstyle-suppressions.xml | 2 + .../hadoop/fs/azurebfs/AbfsConfiguration.java | 83 +- .../fs/azurebfs/AzureBlobFileSystem.java | 56 +- .../fs/azurebfs/AzureBlobFileSystemStore.java | 102 +- .../azurebfs/constants/AbfsHttpConstants.java | 2 + .../azurebfs/constants/AbfsServiceType.java | 37 + .../azurebfs/constants/ConfigurationKeys.java | 28 +- .../azurebfs/constants/FSOperationType.java | 3 +- .../constants/FileSystemConfigurations.java | 1 + .../constants/FileSystemUriSchemes.java | 5 +- .../InvalidConfigurationValueException.java | 4 + .../fs/azurebfs/services/AbfsClient.java | 1249 ++++++---------- .../azurebfs/services/AbfsClientHandler.java | 127 ++ .../fs/azurebfs/services/AbfsDfsClient.java | 1302 +++++++++++++++++ .../hadoop/fs/azurebfs/utils/UriUtils.java | 36 + .../src/site/markdown/fns_blob.md | 82 ++ .../hadoop-azure/src/site/markdown/index.md | 1 + .../azurebfs/ITestAbfsCustomEncryption.java | 3 +- .../ITestAzureBlobFileSystemCheckAccess.java | 9 +- ...ITestAzureBlobFileSystemInitAndCreate.java | 44 +- .../fs/azurebfs/ITestGetNameSpaceEnabled.java | 14 +- .../fs/azurebfs/services/ITestAbfsClient.java | 9 +- 22 files changed, 2287 insertions(+), 912 deletions(-) create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsServiceType.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientHandler.java create mode 100644 hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java create mode 100644 hadoop-tools/hadoop-azure/src/site/markdown/fns_blob.md diff --git a/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml b/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml index 2065746b76611..27ab4329043f4 100644 --- a/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml +++ b/hadoop-tools/hadoop-azure/src/config/checkstyle-suppressions.xml @@ -44,6 +44,8 @@ + HTTP_BAD_REQUEST && statusCode < HTTP_INTERNAL_ERROR) { + LOG.debug("getNamespace failed with non 400 user error", ex); + statIncrement(ERROR_IGNORED); + return true; + } + throw ex; + } + } + private boolean fileSystemExists() throws IOException { LOG.debug( "AzureBlobFileSystem.fileSystemExists uri: {}", uri); @@ -1660,7 +1706,7 @@ AbfsDelegationTokenManager getDelegationTokenManager() { @VisibleForTesting boolean getIsNamespaceEnabled(TracingContext tracingContext) throws AzureBlobFileSystemException { - return abfsStore.getIsNamespaceEnabled(tracingContext); + return getAbfsStore().getIsNamespaceEnabled(tracingContext); } /** diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java index 784e3f25c621f..e903421f469a7 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AzureBlobFileSystemStore.java @@ -29,11 +29,9 @@ import java.net.URISyntaxException; import java.net.URL; import java.nio.ByteBuffer; -import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetDecoder; -import java.nio.charset.CharsetEncoder; import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.time.Instant; @@ -55,11 +53,13 @@ import java.util.concurrent.TimeUnit; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.constants.AbfsServiceType; import org.apache.hadoop.fs.azurebfs.services.AbfsHttpOperation; import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; import org.apache.hadoop.fs.azurebfs.security.ContextProviderEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; import org.apache.hadoop.fs.azurebfs.security.NoContextEncryptionAdapter; +import org.apache.hadoop.fs.azurebfs.services.AbfsClientHandler; import org.apache.hadoop.fs.azurebfs.utils.EncryptionType; import org.apache.hadoop.fs.impl.BackReference; import org.apache.hadoop.fs.PathIOException; @@ -158,6 +158,7 @@ import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.AZURE_FOOTER_READ_BUFFER_SIZE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_BUFFERED_PREAD_DISABLE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_IDENTITY_TRANSFORM_CLASS; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_BLOB_DOMAIN_NAME; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_ENCRYPTION_CONTEXT; /** @@ -169,6 +170,13 @@ public class AzureBlobFileSystemStore implements Closeable, ListingSupport { private static final Logger LOG = LoggerFactory.getLogger(AzureBlobFileSystemStore.class); private AbfsClient client; + + /** + * Variable to hold the client handler which will determine the operative + * client based on the service type configured. + * Initialized in the {@link #initializeClient(URI, String, String, boolean)}. + */ + private AbfsClientHandler clientHandler; private URI uri; private String userName; private String primaryUserGroup; @@ -221,7 +229,8 @@ public AzureBlobFileSystemStore( leaseRefs = Collections.synchronizedMap(new WeakHashMap<>()); try { - this.abfsConfiguration = new AbfsConfiguration(abfsStoreBuilder.configuration, accountName); + this.abfsConfiguration = new AbfsConfiguration(abfsStoreBuilder.configuration, + accountName, getAbfsServiceTypeFromUrl()); } catch (IllegalAccessException exception) { throw new FileSystemOperationUnhandledException(exception); } @@ -286,6 +295,8 @@ public AzureBlobFileSystemStore( /** * Checks if the given key in Azure Storage should be stored as a page * blob instead of block blob. + * @param key The key to check. + * @return True if the key should be stored as a page blob, false otherwise. */ public boolean isAppendBlobKey(String key) { return isKeyForDirectorySet(key, appendBlobDirSet); @@ -497,15 +508,9 @@ public void setFilesystemProperties( try (AbfsPerfInfo perfInfo = startTracking("setFilesystemProperties", "setFilesystemProperties")) { - final String commaSeparatedProperties; - try { - commaSeparatedProperties = convertXmsPropertiesToCommaSeparatedString(properties); - } catch (CharacterCodingException ex) { - throw new InvalidAbfsRestOperationException(ex); - } - final AbfsRestOperation op = client - .setFilesystemProperties(commaSeparatedProperties, tracingContext); + final AbfsRestOperation op = getClient() + .setFilesystemProperties(properties, tracingContext); perfInfo.registerResult(op.getResult()).registerSuccess(true); } } @@ -590,18 +595,13 @@ public void setPathProperties(final Path path, path, properties); - final String commaSeparatedProperties; - try { - commaSeparatedProperties = convertXmsPropertiesToCommaSeparatedString(properties); - } catch (CharacterCodingException ex) { - throw new InvalidAbfsRestOperationException(ex); - } + final String relativePath = getRelativePath(path); final ContextEncryptionAdapter contextEncryptionAdapter = createEncryptionAdapterFromServerStoreContext(relativePath, tracingContext); final AbfsRestOperation op = client - .setPathProperties(getRelativePath(path), commaSeparatedProperties, + .setPathProperties(getRelativePath(path), properties, tracingContext, contextEncryptionAdapter); contextEncryptionAdapter.destroy(); perfInfo.registerResult(op.getResult()).registerSuccess(true); @@ -1090,7 +1090,7 @@ public boolean rename(final Path source, final AbfsClientRenameResult abfsClientRenameResult = client.renamePath(sourceRelativePath, destinationRelativePath, continuation, tracingContext, sourceEtag, false, - isNamespaceEnabled); + isNamespaceEnabled); AbfsRestOperation op = abfsClientRenameResult.getOp(); perfInfo.registerResult(op.getResult()); @@ -1369,7 +1369,7 @@ private String generateContinuationTokenForNonXns(String path, final String firs SimpleDateFormat simpleDateFormat = new SimpleDateFormat(TOKEN_DATE_PATTERN, Locale.US); String date = simpleDateFormat.format(new Date()); String token = String.format("%06d!%s!%06d!%s!%06d!%s!", - path.length(), path, startFrom.length(), startFrom, date.length(), date); + path.length(), path, startFrom.length(), startFrom, date.length(), date); String base64EncodedToken = Base64.encode(token.getBytes(StandardCharsets.UTF_8)); StringBuilder encodedTokenBuilder = new StringBuilder(base64EncodedToken.length() + 5); @@ -1810,18 +1810,29 @@ private void initializeClient(URI uri, String fileSystemName, LOG.trace("Initializing AbfsClient for {}", baseUrl); if (tokenProvider != null) { - this.client = new AbfsClient(baseUrl, creds, abfsConfiguration, + this.clientHandler = new AbfsClientHandler(baseUrl, creds, abfsConfiguration, tokenProvider, encryptionContextProvider, populateAbfsClientContext()); } else { - this.client = new AbfsClient(baseUrl, creds, abfsConfiguration, + this.clientHandler = new AbfsClientHandler(baseUrl, creds, abfsConfiguration, sasTokenProvider, encryptionContextProvider, populateAbfsClientContext()); } + this.client = getClientHandler().getClient(); LOG.trace("AbfsClient init complete"); } + private AbfsServiceType getAbfsServiceTypeFromUrl() { + if (uri.toString().contains(ABFS_BLOB_DOMAIN_NAME)) { + return AbfsServiceType.BLOB; + } + // In case of DFS Domain name or any other custom endpoint, the service + // type is to be identified as default DFS. + LOG.debug("Falling back to default service type DFS"); + return AbfsServiceType.DFS; + } + /** * Populate a new AbfsClientContext instance with the desired properties. * @@ -1861,43 +1872,6 @@ private boolean parseIsDirectory(final String resourceType) { && resourceType.equalsIgnoreCase(AbfsHttpConstants.DIRECTORY); } - /** - * Convert properties stored in a Map into a comma separated string. For map - * , method would convert to: - * key1=value1,key2=value,...,keyN=valueN - * */ - @VisibleForTesting - String convertXmsPropertiesToCommaSeparatedString(final Map properties) throws - CharacterCodingException { - StringBuilder commaSeparatedProperties = new StringBuilder(); - - final CharsetEncoder encoder = Charset.forName(XMS_PROPERTIES_ENCODING).newEncoder(); - - for (Map.Entry propertyEntry : properties.entrySet()) { - String key = propertyEntry.getKey(); - String value = propertyEntry.getValue(); - - Boolean canEncodeValue = encoder.canEncode(value); - if (!canEncodeValue) { - throw new CharacterCodingException(); - } - - String encodedPropertyValue = Base64.encode(encoder.encode(CharBuffer.wrap(value)).array()); - commaSeparatedProperties.append(key) - .append(AbfsHttpConstants.EQUAL) - .append(encodedPropertyValue); - - commaSeparatedProperties.append(AbfsHttpConstants.COMMA); - } - - if (commaSeparatedProperties.length() != 0) { - commaSeparatedProperties.deleteCharAt(commaSeparatedProperties.length() - 1); - } - - return commaSeparatedProperties.toString(); - } - private Hashtable parseCommaSeparatedXmsProperties(String xMsProperties) throws InvalidFileSystemPropertyException, InvalidAbfsRestOperationException { Hashtable properties = new Hashtable<>(); @@ -2176,6 +2150,16 @@ public AbfsClient getClient() { return this.client; } + @VisibleForTesting + public AbfsClient getClient(AbfsServiceType serviceType) { + return getClientHandler().getClient(serviceType); + } + + @VisibleForTesting + public AbfsClientHandler getClientHandler() { + return this.clientHandler; + } + @VisibleForTesting void setClient(AbfsClient client) { this.client = client; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java index 84127d9d5738f..26106a717c94f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java @@ -101,6 +101,8 @@ public final class AbfsHttpConstants { public static final String GMT_TIMEZONE = "GMT"; public static final String APPLICATION_JSON = "application/json"; public static final String APPLICATION_OCTET_STREAM = "application/octet-stream"; + public static final String XMS_PROPERTIES_ENCODING_ASCII = "ISO-8859-1"; + public static final String XMS_PROPERTIES_ENCODING_UNICODE = "UTF-8"; public static final String ROOT_PATH = "/"; public static final String ACCESS_MASK = "mask:"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsServiceType.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsServiceType.java new file mode 100644 index 0000000000000..c84d4b0dfafde --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsServiceType.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.constants; + +/** + * Azure Storage Offers two sets of Rest APIs for interacting with the storage account. + *

      + *
    1. Blob Rest API:
    2. + *
    3. Data Lake Rest API:
    4. + *
    + */ +public enum AbfsServiceType { + /** + * Service type to set operative endpoint as Data Lake Rest API. + */ + DFS, + /** + * Service type to set operative endpoint as Blob Rest API. + */ + BLOB +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java index ed749c7885b39..620182f5bd20a 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java @@ -32,9 +32,35 @@ public final class ConfigurationKeys { /** * Config to specify if the configured account is HNS enabled or not. If * this config is not set, getacl call is made on account filesystem root - * path to determine HNS status. + * path on DFS Endpoint to determine HNS status. */ public static final String FS_AZURE_ACCOUNT_IS_HNS_ENABLED = "fs.azure.account.hns.enabled"; + + /** + * Config to specify which {@link AbfsServiceType} to use with HNS-Disabled Account type. + * Default value will be identified from URL used to initialize filesystem. + * This will allow an override to choose service endpoint in cases where any + * local DNS resolution is set for account and driver is unable to detect + * intended endpoint from the url used to initialize filesystem. + * If configured Blob for HNS-Enabled account, FS init will fail. + * Value {@value} case-insensitive "DFS" or "BLOB" + */ + public static final String FS_AZURE_FNS_ACCOUNT_SERVICE_TYPE = "fs.azure.fns.account.service.type"; + + /** + * Config to specify which {@link AbfsServiceType} to use only for Ingress Operations. + * Other operations will continue to move to the FS configured service endpoint. + * Value {@value} case-insensitive "DFS" or "BLOB" + */ + public static final String FS_AZURE_INGRESS_SERVICE_TYPE = "fs.azure.ingress.service.type"; + + /** + * Config to be set only for cases where traffic over dfs endpoint is + * experiencing compatibility issues and need to move to blob for mitigation. + * Value {@value} case-insensitive "True" or "False" + */ + public static final String FS_AZURE_ENABLE_DFSTOBLOB_FALLBACK = "fs.azure.enable.dfstoblob.fallback"; + /** * Enable or disable expect hundred continue header. * Value: {@value}. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FSOperationType.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FSOperationType.java index 6b6e98c9c7082..8c9c8af75b53d 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FSOperationType.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FSOperationType.java @@ -45,7 +45,8 @@ public enum FSOperationType { SET_OWNER("SO"), SET_ACL("SA"), TEST_OP("TS"), - WRITE("WR"); + WRITE("WR"), + INIT("IN"); private final String opCode; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java index bd2d6e4b57334..f8c97b031b402 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java @@ -32,6 +32,7 @@ public final class FileSystemConfigurations { public static final String DEFAULT_FS_AZURE_ACCOUNT_IS_HNS_ENABLED = ""; + public static final boolean DEFAULT_FS_AZURE_ENABLE_DFSTOBLOB_FALLBACK = false; public static final boolean DEFAULT_FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED = true; public static final String USER_HOME_DIRECTORY_PREFIX = "/user"; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemUriSchemes.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemUriSchemes.java index c7a0cdad605ab..0b5cba72f126d 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemUriSchemes.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemUriSchemes.java @@ -38,5 +38,8 @@ public final class FileSystemUriSchemes { public static final String WASB_SECURE_SCHEME = "wasbs"; public static final String WASB_DNS_PREFIX = "blob"; + public static final String ABFS_DFS_DOMAIN_NAME = "dfs.core.windows.net"; + public static final String ABFS_BLOB_DOMAIN_NAME = "blob.core.windows.net"; + private FileSystemUriSchemes() {} -} \ No newline at end of file +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidConfigurationValueException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidConfigurationValueException.java index 7591bac59e292..fef2b073ce9a0 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidConfigurationValueException.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/contracts/exceptions/InvalidConfigurationValueException.java @@ -34,4 +34,8 @@ public InvalidConfigurationValueException(String configKey, Exception innerExcep public InvalidConfigurationValueException(String configKey) { super("Invalid configuration value detected for " + configKey); } + + public InvalidConfigurationValueException(String configKey, String message) { + super(String.format("Invalid configuration value detected for \"%s\". %s ", configKey, message)); + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java index a2d65c145b625..ca35015b1939e 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClient.java @@ -31,17 +31,18 @@ import java.security.NoSuchAlgorithmException; import java.util.ArrayList; import java.util.Base64; +import java.util.Hashtable; import java.util.List; import java.util.Locale; import java.util.Timer; import java.util.TimerTask; -import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.constants.HttpOperationType; import org.apache.hadoop.fs.azurebfs.constants.FSOperationType; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsInvalidChecksumException; @@ -67,9 +68,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ApiVersion; import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; -import org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidUriException; @@ -83,25 +83,52 @@ import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; import org.apache.hadoop.util.concurrent.HadoopExecutors; -import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.apache.commons.lang3.StringUtils.isNotEmpty; import static org.apache.hadoop.fs.azurebfs.AbfsStatistic.RENAME_PATH_ATTEMPTS; import static org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.extractEtagHeader; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.*; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APN_VERSION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CLIENT_VERSION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DEFAULT_TIMEOUT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FILESYSTEM; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH_ENCODE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_DELETE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_HEAD; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JAVA_VENDOR; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.JAVA_VERSION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.MD5; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.OS_ARCH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.OS_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.OS_VERSION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.PLUS; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.PLUS_ENCODE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SEMICOLON; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SINGLE_WHITE_SPACE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.UTF_8; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_DELETE_CONSIDERED_IDEMPOTENT; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_MB; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.SERVER_SIDE_ENCRYPTION_ALGORITHM; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.HTTPS_SCHEME; -import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.*; -import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.*; -import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND; -import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.ACCEPT_CHARSET; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.CONTENT_MD5; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.CONTENT_TYPE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.USER_AGENT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_ENCRYPTION_ALGORITHM; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_ENCRYPTION_CONTEXT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_ENCRYPTION_KEY; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_ENCRYPTION_KEY_SHA256; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_VERSION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_RESOURCE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_TIMEOUT; import static org.apache.hadoop.fs.azurebfs.services.RetryReasonConstants.CONNECTION_TIMEOUT_ABBREVIATION; /** * AbfsClient. */ -public class AbfsClient implements Closeable { +public abstract class AbfsClient implements Closeable { public static final Logger LOG = LoggerFactory.getLogger(AbfsClient.class); public static final String HUNDRED_CONTINUE_USER_AGENT = SINGLE_WHITE_SPACE + HUNDRED_CONTINUE + SEMICOLON; @@ -147,7 +174,7 @@ public class AbfsClient implements Closeable { /** * logging the rename failure if metadata is in an incomplete state. */ - private static final LogExactlyOnce ABFS_METADATA_INCOMPLETE_RENAME_FAILURE = new LogExactlyOnce(LOG); + protected static final LogExactlyOnce ABFS_METADATA_INCOMPLETE_RENAME_FAILURE = new LogExactlyOnce(LOG); private AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCredentials, @@ -255,7 +282,7 @@ public AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCredent public AbfsClient(final URL baseUrl, final SharedKeyCredentials sharedKeyCredentials, final AbfsConfiguration abfsConfiguration, final SASTokenProvider sasTokenProvider, - final EncryptionContextProvider encryptionContextProvider, + final EncryptionContextProvider encryptionContextProvider, final AbfsClientContext abfsClientContext) throws IOException { this(baseUrl, sharedKeyCredentials, abfsConfiguration, @@ -335,22 +362,25 @@ AbfsThrottlingIntercept getIntercept() { * @return default request headers */ @VisibleForTesting - protected List createDefaultHeaders() { - return createDefaultHeaders(this.xMsVersion); - } + protected abstract List createDefaultHeaders(); /** * Create request headers for Rest Operation using the specified API version. - * @param xMsVersion + * @param xMsVersion Azure services API version to be used. * @return default request headers */ - private List createDefaultHeaders(ApiVersion xMsVersion) { + @VisibleForTesting + public abstract List createDefaultHeaders(ApiVersion xMsVersion); + + /** + * Create request headers common to both service endpoints. + * @param xMsVersion azure services API version to be used. + * @return common request headers + */ + protected List createCommonHeaders(ApiVersion xMsVersion) { final List requestHeaders = new ArrayList(); requestHeaders.add(new AbfsHttpHeader(X_MS_VERSION, xMsVersion.toString())); - requestHeaders.add(new AbfsHttpHeader(ACCEPT, APPLICATION_JSON - + COMMA + SINGLE_WHITE_SPACE + APPLICATION_OCTET_STREAM)); - requestHeaders.add(new AbfsHttpHeader(ACCEPT_CHARSET, - UTF_8)); + requestHeaders.add(new AbfsHttpHeader(ACCEPT_CHARSET, UTF_8)); requestHeaders.add(new AbfsHttpHeader(CONTENT_TYPE, EMPTY_STRING)); requestHeaders.add(new AbfsHttpHeader(USER_AGENT, userAgent)); return requestHeaders; @@ -372,8 +402,15 @@ private List createDefaultHeaders(ApiVersion xMsVersion) { *
  • getPathStatus for fs.setXAttr and fs.getXAttr
  • *
  • read
  • * + * @param path path of the file / directory to be created / overwritten. + * @param requestHeaders list of headers to be added to the request. + * @param isCreateFileRequest defines if file or directory has to be created / overwritten. + * @param contextEncryptionAdapter object that contains the encryptionContext and + * encryptionKey created from the developer provided implementation of {@link EncryptionContextProvider} + * @param tracingContext to trace service calls. + * @throws AzureBlobFileSystemException if namespace is not enabled. */ - private void addEncryptionKeyRequestHeaders(String path, + protected void addEncryptionKeyRequestHeaders(String path, List requestHeaders, boolean isCreateFileRequest, ContextEncryptionAdapter contextEncryptionAdapter, TracingContext tracingContext) throws AzureBlobFileSystemException { @@ -405,106 +442,66 @@ private void addEncryptionKeyRequestHeaders(String path, SERVER_SIDE_ENCRYPTION_ALGORITHM)); } - AbfsUriQueryBuilder createDefaultUriQueryBuilder() { + /** + * Creates a AbfsUriQueryBuilder with default query parameter timeout. + * @return default AbfsUriQueryBuilder. + */ + protected AbfsUriQueryBuilder createDefaultUriQueryBuilder() { final AbfsUriQueryBuilder abfsUriQueryBuilder = new AbfsUriQueryBuilder(); abfsUriQueryBuilder.addQuery(QUERY_PARAM_TIMEOUT, DEFAULT_TIMEOUT); return abfsUriQueryBuilder; } - public AbfsRestOperation createFilesystem(TracingContext tracingContext) - throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - - final AbfsUriQueryBuilder abfsUriQueryBuilder = new AbfsUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM); - - final URL url = createRequestUrl(abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.CreateFileSystem, - HTTP_METHOD_PUT, url, requestHeaders); - op.execute(tracingContext); - return op; - } - - public AbfsRestOperation setFilesystemProperties(final String properties, - TracingContext tracingContext) throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - // JDK7 does not support PATCH, so to work around the issue we will use - // PUT and specify the real method in the X-Http-Method-Override header. - requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, - HTTP_METHOD_PATCH)); - - requestHeaders.add(new AbfsHttpHeader(X_MS_PROPERTIES, - properties)); - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM); - - final URL url = createRequestUrl(abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.SetFileSystemProperties, - HTTP_METHOD_PUT, - url, - requestHeaders); - op.execute(tracingContext); - return op; - } - - public AbfsRestOperation listPath(final String relativePath, final boolean recursive, final int listMaxResults, - final String continuation, TracingContext tracingContext) - throws IOException { - final List requestHeaders = createDefaultHeaders(); - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_DIRECTORY, getDirectoryQueryParameter(relativePath)); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_RECURSIVE, String.valueOf(recursive)); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_CONTINUATION, continuation); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_MAXRESULTS, String.valueOf(listMaxResults)); - abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_UPN, String.valueOf(abfsConfiguration.isUpnUsed())); - appendSASTokenToQuery(relativePath, SASTokenProvider.LIST_OPERATION, abfsUriQueryBuilder); - - final URL url = createRequestUrl(abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.ListPaths, - HTTP_METHOD_GET, - url, - requestHeaders); - op.execute(tracingContext); - return op; - } - - public AbfsRestOperation getFilesystemProperties(TracingContext tracingContext) throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM); + /** + * Create a new filesystem using Azure REST API Service. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation createFilesystem(TracingContext tracingContext) + throws AzureBlobFileSystemException; - final URL url = createRequestUrl(abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.GetFileSystemProperties, - HTTP_METHOD_HEAD, - url, - requestHeaders); - op.execute(tracingContext); - return op; - } + /** + * Sets user-defined metadata on filesystem. + * @param properties list of metadata key-value pairs. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation setFilesystemProperties(Hashtable properties, + TracingContext tracingContext) throws AzureBlobFileSystemException; - public AbfsRestOperation deleteFilesystem(TracingContext tracingContext) throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); + /** + * List paths and their properties in the current filesystem. + * @param relativePath to return only blobs within this directory. + * @param recursive to return all blobs in the path, including those in subdirectories. + * @param listMaxResults maximum number of blobs to return. + * @param continuation marker to specify the continuation token. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation or response parsing fails. + */ + public abstract AbfsRestOperation listPath(String relativePath, boolean recursive, + int listMaxResults, String continuation, TracingContext tracingContext) + throws IOException; - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM); + /** + * Retrieves user-defined metadata on filesystem. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + * */ + public abstract AbfsRestOperation getFilesystemProperties(TracingContext tracingContext) + throws AzureBlobFileSystemException; - final URL url = createRequestUrl(abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.DeleteFileSystem, - HTTP_METHOD_DELETE, - url, - requestHeaders); - op.execute(tracingContext); - return op; - } + /** + * Deletes the filesystem using Azure REST API Service. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation deleteFilesystem(TracingContext tracingContext) + throws AzureBlobFileSystemException; /** * Method for calling createPath API to the backend. Method can be called from: @@ -533,150 +530,57 @@ public AbfsRestOperation deleteFilesystem(TracingContext tracingContext) throws * @throws AzureBlobFileSystemException throws back the exception it receives from the * {@link AbfsRestOperation#execute(TracingContext)} method call. */ - public AbfsRestOperation createPath(final String path, - final boolean isFile, - final boolean overwrite, - final Permissions permissions, - final boolean isAppendBlob, - final String eTag, - final ContextEncryptionAdapter contextEncryptionAdapter, - final TracingContext tracingContext) - throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - if (isFile) { - addEncryptionKeyRequestHeaders(path, requestHeaders, true, - contextEncryptionAdapter, tracingContext); - } - if (!overwrite) { - requestHeaders.add(new AbfsHttpHeader(IF_NONE_MATCH, AbfsHttpConstants.STAR)); - } - - if (permissions.hasPermission()) { - requestHeaders.add( - new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_PERMISSIONS, - permissions.getPermission())); - } - - if (permissions.hasUmask()) { - requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_UMASK, - permissions.getUmask())); - } - - if (eTag != null && !eTag.isEmpty()) { - requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.IF_MATCH, eTag)); - } - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, isFile ? FILE : DIRECTORY); - if (isAppendBlob) { - abfsUriQueryBuilder.addQuery(QUERY_PARAM_BLOBTYPE, APPEND_BLOB_TYPE); - } - - String operation = isFile - ? SASTokenProvider.CREATE_FILE_OPERATION - : SASTokenProvider.CREATE_DIRECTORY_OPERATION; - appendSASTokenToQuery(path, operation, abfsUriQueryBuilder); - - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.CreatePath, - HTTP_METHOD_PUT, - url, - requestHeaders); - try { - op.execute(tracingContext); - } catch (AzureBlobFileSystemException ex) { - // If we have no HTTP response, throw the original exception. - if (!op.hasResult()) { - throw ex; - } - if (!isFile && op.getResult().getStatusCode() == HttpURLConnection.HTTP_CONFLICT) { - String existingResource = - op.getResult().getResponseHeader(X_MS_EXISTING_RESOURCE_TYPE); - if (existingResource != null && existingResource.equals(DIRECTORY)) { - return op; //don't throw ex on mkdirs for existing directory - } - } - throw ex; - } - return op; - } - - public AbfsRestOperation acquireLease(final String path, int duration, TracingContext tracingContext) throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - - requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, ACQUIRE_LEASE_ACTION)); - requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_DURATION, Integer.toString(duration))); - requestHeaders.add(new AbfsHttpHeader(X_MS_PROPOSED_LEASE_ID, UUID.randomUUID().toString())); - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.LeasePath, - HTTP_METHOD_POST, - url, - requestHeaders); - op.execute(tracingContext); - return op; - } - - public AbfsRestOperation renewLease(final String path, final String leaseId, - TracingContext tracingContext) throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - - requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, RENEW_LEASE_ACTION)); - requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, leaseId)); - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.LeasePath, - HTTP_METHOD_POST, - url, - requestHeaders); - op.execute(tracingContext); - return op; - } - - public AbfsRestOperation releaseLease(final String path, - final String leaseId, TracingContext tracingContext) throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - - requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, RELEASE_LEASE_ACTION)); - requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, leaseId)); - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.LeasePath, - HTTP_METHOD_POST, - url, - requestHeaders); - op.execute(tracingContext); - return op; - } + public abstract AbfsRestOperation createPath(String path, + boolean isFile, + boolean overwrite, + Permissions permissions, + boolean isAppendBlob, + String eTag, + ContextEncryptionAdapter contextEncryptionAdapter, + TracingContext tracingContext) throws AzureBlobFileSystemException; - public AbfsRestOperation breakLease(final String path, - TracingContext tracingContext) throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); + /** + * Acquire lease on specified path. + * @param path on which lease has to be acquired. + * @param duration for which lease has to be acquired. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation acquireLease(String path, int duration, + TracingContext tracingContext) throws AzureBlobFileSystemException; - requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, BREAK_LEASE_ACTION)); - requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_BREAK_PERIOD, DEFAULT_LEASE_BREAK_PERIOD)); + /** + * Renew lease on specified path. + * @param path on which lease has to be renewed. + * @param leaseId of the lease to be renewed. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation renewLease(String path, String leaseId, + TracingContext tracingContext) throws AzureBlobFileSystemException; - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + /** + * Release lease on specified path. + * @param path on which lease has to be released. + * @param leaseId of the lease to be released. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation releaseLease(String path, String leaseId, + TracingContext tracingContext) throws AzureBlobFileSystemException; - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.LeasePath, - HTTP_METHOD_POST, - url, - requestHeaders); - op.execute(tracingContext); - return op; - } + /** + * Break lease on specified path. + * @param path on which lease has to be broke. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation breakLease(String path, + TracingContext tracingContext) throws AzureBlobFileSystemException; /** * Rename a file or directory. @@ -700,127 +604,29 @@ public AbfsRestOperation breakLease(final String path, * AbfsRest operation, rename recovery and incomplete metadata state failure. * @throws AzureBlobFileSystemException failure, excluding any recovery from overload failures. */ - public AbfsClientRenameResult renamePath( - final String source, - final String destination, - final String continuation, - final TracingContext tracingContext, - String sourceEtag, - boolean isMetadataIncompleteState, - boolean isNamespaceEnabled) - throws IOException { - final List requestHeaders = createDefaultHeaders(); - - final boolean hasEtag = !isEmpty(sourceEtag); - - boolean shouldAttemptRecovery = renameResilience && isNamespaceEnabled; - if (!hasEtag && shouldAttemptRecovery) { - // in case eTag is already not supplied to the API - // and rename resilience is expected and it is an HNS enabled account - // fetch the source etag to be used later in recovery - try { - final AbfsRestOperation srcStatusOp = getPathStatus(source, - false, tracingContext, null); - if (srcStatusOp.hasResult()) { - final AbfsHttpOperation result = srcStatusOp.getResult(); - sourceEtag = extractEtagHeader(result); - // and update the directory status. - boolean isDir = checkIsDir(result); - shouldAttemptRecovery = !isDir; - LOG.debug("Retrieved etag of source for rename recovery: {}; isDir={}", sourceEtag, isDir); - } - } catch (AbfsRestOperationException e) { - throw new AbfsRestOperationException(e.getStatusCode(), SOURCE_PATH_NOT_FOUND.getErrorCode(), - e.getMessage(), e); - } + public abstract AbfsClientRenameResult renamePath( + String source, + String destination, + String continuation, + TracingContext tracingContext, + String sourceEtag, + boolean isMetadataIncompleteState, + boolean isNamespaceEnabled) + throws IOException; - } - - String encodedRenameSource = urlEncode(FORWARD_SLASH + this.getFileSystem() + source); - if (authType == AuthType.SAS) { - final AbfsUriQueryBuilder srcQueryBuilder = new AbfsUriQueryBuilder(); - appendSASTokenToQuery(source, SASTokenProvider.RENAME_SOURCE_OPERATION, srcQueryBuilder); - encodedRenameSource += srcQueryBuilder.toString(); - } - - LOG.trace("Rename source queryparam added {}", encodedRenameSource); - requestHeaders.add(new AbfsHttpHeader(X_MS_RENAME_SOURCE, encodedRenameSource)); - requestHeaders.add(new AbfsHttpHeader(IF_NONE_MATCH, STAR)); - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_CONTINUATION, continuation); - appendSASTokenToQuery(destination, SASTokenProvider.RENAME_DESTINATION_OPERATION, abfsUriQueryBuilder); - - final URL url = createRequestUrl(destination, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = createRenameRestOperation(url, requestHeaders); - try { - incrementAbfsRenamePath(); - op.execute(tracingContext); - // AbfsClientResult contains the AbfsOperation, If recovery happened or - // not, and the incompleteMetaDataState is true or false. - // If we successfully rename a path and isMetadataIncompleteState was - // true, then rename was recovered, else it didn't, this is why - // isMetadataIncompleteState is used for renameRecovery(as the 2nd param). - return new AbfsClientRenameResult(op, isMetadataIncompleteState, isMetadataIncompleteState); - } catch (AzureBlobFileSystemException e) { - // If we have no HTTP response, throw the original exception. - if (!op.hasResult()) { - throw e; - } - - // ref: HADOOP-18242. Rename failure occurring due to a rare case of - // tracking metadata being in incomplete state. - if (op.getResult().getStorageErrorCode() - .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode()) - && !isMetadataIncompleteState) { - //Logging - ABFS_METADATA_INCOMPLETE_RENAME_FAILURE - .info("Rename Failure attempting to resolve tracking metadata state and retrying."); - // rename recovery should be attempted in this case also - shouldAttemptRecovery = true; - isMetadataIncompleteState = true; - String sourceEtagAfterFailure = sourceEtag; - if (isEmpty(sourceEtagAfterFailure)) { - // Doing a HEAD call resolves the incomplete metadata state and - // then we can retry the rename operation. - AbfsRestOperation sourceStatusOp = getPathStatus(source, false, - tracingContext, null); - isMetadataIncompleteState = true; - // Extract the sourceEtag, using the status Op, and set it - // for future rename recovery. - AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult(); - sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult); - } - renamePath(source, destination, continuation, tracingContext, - sourceEtagAfterFailure, isMetadataIncompleteState, isNamespaceEnabled); - } - // if we get out of the condition without a successful rename, then - // it isn't metadata incomplete state issue. - isMetadataIncompleteState = false; - - // setting default rename recovery success to false - boolean etagCheckSucceeded = false; - if (shouldAttemptRecovery) { - etagCheckSucceeded = renameIdempotencyCheckOp( - source, - sourceEtag, op, destination, tracingContext); - } - if (!etagCheckSucceeded) { - // idempotency did not return different result - // throw back the exception - throw e; - } - return new AbfsClientRenameResult(op, true, isMetadataIncompleteState); - } - } - - private boolean checkIsDir(AbfsHttpOperation result) { - String resourceType = result.getResponseHeader( - HttpHeaderConfigurations.X_MS_RESOURCE_TYPE); - return resourceType != null - && resourceType.equalsIgnoreCase(AbfsHttpConstants.DIRECTORY); - } + /** + * Checks if the rest operation results indicate if the path is a directory. + * @param result executed rest operation containing response from server. + * @return True if the path is a directory, False otherwise. + */ + protected abstract boolean checkIsDir(AbfsHttpOperation result); + /** + * Creates a rest operation for rename. + * @param url to be used for the operation. + * @param requestHeaders list of headers to be added to the request. + * @return un-executed rest operation. + */ @VisibleForTesting AbfsRestOperation createRenameRestOperation(URL url, List requestHeaders) { AbfsRestOperation op = getAbfsRestOperation( @@ -831,7 +637,11 @@ AbfsRestOperation createRenameRestOperation(URL url, List reques return op; } - private void incrementAbfsRenamePath() { + /** + * Increments AbfsCounters for rename path attempts by 1. + * Will be called each time a rename path operation is attempted. + */ + protected void incrementAbfsRenamePath() { abfsCounters.incrementCounter(RENAME_PATH_ATTEMPTS, 1); } @@ -896,142 +706,35 @@ public boolean renameIdempotencyCheckOp( return false; } - @VisibleForTesting - boolean isSourceDestEtagEqual(String sourceEtag, AbfsHttpOperation result) { - return sourceEtag.equals(extractEtagHeader(result)); - } - - public AbfsRestOperation append(final String path, final byte[] buffer, - AppendRequestParameters reqParams, final String cachedSasToken, + /** + * Uploads data to be appended to a file. + * @param path to which data has to be appended. + * @param buffer containing data to be appended. + * @param reqParams containing parameters for append operation like offset, length etc. + * @param cachedSasToken to be used for the authenticating operation. + * @param contextEncryptionAdapter to provide encryption context. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation append(String path, byte[] buffer, + AppendRequestParameters reqParams, String cachedSasToken, ContextEncryptionAdapter contextEncryptionAdapter, TracingContext tracingContext) - throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - addEncryptionKeyRequestHeaders(path, requestHeaders, false, - contextEncryptionAdapter, tracingContext); - if (reqParams.isExpectHeaderEnabled()) { - requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE)); - } - // JDK7 does not support PATCH, so to workaround the issue we will use - // PUT and specify the real method in the X-Http-Method-Override header. - requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, - HTTP_METHOD_PATCH)); - if (reqParams.getLeaseId() != null) { - requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, reqParams.getLeaseId())); - } - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, APPEND_ACTION); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, Long.toString(reqParams.getPosition())); - - if ((reqParams.getMode() == AppendRequestParameters.Mode.FLUSH_MODE) || ( - reqParams.getMode() == AppendRequestParameters.Mode.FLUSH_CLOSE_MODE)) { - abfsUriQueryBuilder.addQuery(QUERY_PARAM_FLUSH, TRUE); - if (reqParams.getMode() == AppendRequestParameters.Mode.FLUSH_CLOSE_MODE) { - abfsUriQueryBuilder.addQuery(QUERY_PARAM_CLOSE, TRUE); - } - } - - // Check if the retry is with "Expect: 100-continue" header being present in the previous request. - if (reqParams.isRetryDueToExpect()) { - String userAgentRetry = userAgent; - // Remove the specific marker related to "Expect: 100-continue" from the User-Agent string. - userAgentRetry = userAgentRetry.replace(HUNDRED_CONTINUE_USER_AGENT, EMPTY_STRING); - requestHeaders.removeIf(header -> header.getName().equalsIgnoreCase(USER_AGENT)); - requestHeaders.add(new AbfsHttpHeader(USER_AGENT, userAgentRetry)); - } - - // Add MD5 Hash of request content as request header if feature is enabled - if (isChecksumValidationEnabled()) { - addCheckSumHeaderForWrite(requestHeaders, reqParams, buffer); - } - - // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance - String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.WRITE_OPERATION, - abfsUriQueryBuilder, cachedSasToken); - - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.Append, - HTTP_METHOD_PUT, - url, - requestHeaders, - buffer, - reqParams.getoffset(), - reqParams.getLength(), - sasTokenForReuse); - try { - op.execute(tracingContext); - } catch (AbfsRestOperationException e) { - /* - If the http response code indicates a user error we retry - the same append request with expect header being disabled. - When "100-continue" header is enabled but a non Http 100 response comes, - the response message might not get set correctly by the server. - So, this handling is to avoid breaking of backward compatibility - if someone has taken dependency on the exception message, - which is created using the error string present in the response header. - */ - int responseStatusCode = e.getStatusCode(); - if (checkUserError(responseStatusCode) && reqParams.isExpectHeaderEnabled()) { - LOG.debug("User error, retrying without 100 continue enabled for the given path {}", path); - reqParams.setExpectHeaderEnabled(false); - reqParams.setRetryDueToExpect(true); - return this.append(path, buffer, reqParams, cachedSasToken, - contextEncryptionAdapter, tracingContext); - } - // If we have no HTTP response, throw the original exception. - if (!op.hasResult()) { - throw e; - } - - if (isMd5ChecksumError(e)) { - throw new AbfsInvalidChecksumException(e); - } - - if (reqParams.isAppendBlob() - && appendSuccessCheckOp(op, path, - (reqParams.getPosition() + reqParams.getLength()), tracingContext)) { - final AbfsRestOperation successOp = getAbfsRestOperation( - AbfsRestOperationType.Append, - HTTP_METHOD_PUT, - url, - requestHeaders, - buffer, - reqParams.getoffset(), - reqParams.getLength(), - sasTokenForReuse); - successOp.hardSetResult(HttpURLConnection.HTTP_OK); - return successOp; - } - throw e; - } - - catch (AzureBlobFileSystemException e) { - // Any server side issue will be returned as AbfsRestOperationException and will be handled above. - LOG.debug("Append request failed with non server issues for path: {}, offset: {}, position: {}", - path, reqParams.getoffset(), reqParams.getPosition()); - throw e; - } - - return op; - } + throws AzureBlobFileSystemException; /** * Returns true if the status code lies in the range of user error. * @param responseStatusCode http response status code. * @return True or False. */ - private boolean checkUserError(int responseStatusCode) { - return (responseStatusCode >= HttpURLConnection.HTTP_BAD_REQUEST - && responseStatusCode < HttpURLConnection.HTTP_INTERNAL_ERROR); - } + public abstract boolean checkUserError(int responseStatusCode); /** * To check if the failure exception returned by server is due to MD5 Mismatch * @param e Exception returned by AbfsRestOperation * @return boolean whether exception is due to MD5Mismatch or not */ - private boolean isMd5ChecksumError(final AbfsRestOperationException e) { + protected boolean isMd5ChecksumError(final AbfsRestOperationException e) { AzureServiceErrorCode storageErrorCode = e.getErrorCode(); return storageErrorCode == AzureServiceErrorCode.MD5_MISMATCH; } @@ -1040,7 +743,7 @@ private boolean isMd5ChecksumError(final AbfsRestOperationException e) { // However a retry would fail with an InvalidQueryParameterValue // (as the current offset would be unacceptable). // Hence, we pass/succeed the appendblob append call - // in case we are doing a retry after checking the length of the file + // in case we are doing a retry after checking the length of the file. public boolean appendSuccessCheckOp(AbfsRestOperation op, final String path, final long length, TracingContext tracingContext) throws AzureBlobFileSystemException { @@ -1059,203 +762,111 @@ public boolean appendSuccessCheckOp(AbfsRestOperation op, final String path, return false; } - public AbfsRestOperation flush(final String path, final long position, + /** + * Flush previously uploaded data to a file. + * @param path on which data has to be flushed. + * @param position to which data has to be flushed. + * @param retainUncommittedData whether to retain uncommitted data after flush. + * @param isClose specify if this is the last flush to the file. + * @param cachedSasToken to be used for the authenticating operation. + * @param leaseId if there is an active lease on the path. + * @param contextEncryptionAdapter to provide encryption context. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation flush(String path, long position, boolean retainUncommittedData, boolean isClose, - final String cachedSasToken, final String leaseId, + String cachedSasToken, String leaseId, ContextEncryptionAdapter contextEncryptionAdapter, TracingContext tracingContext) - throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - addEncryptionKeyRequestHeaders(path, requestHeaders, false, - contextEncryptionAdapter, tracingContext); - // JDK7 does not support PATCH, so to workaround the issue we will use - // PUT and specify the real method in the X-Http-Method-Override header. - requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, - HTTP_METHOD_PATCH)); - if (leaseId != null) { - requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, leaseId)); - } - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, FLUSH_ACTION); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, Long.toString(position)); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_RETAIN_UNCOMMITTED_DATA, String.valueOf(retainUncommittedData)); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_CLOSE, String.valueOf(isClose)); - - // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance - String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.WRITE_OPERATION, - abfsUriQueryBuilder, cachedSasToken); - - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.Flush, - HTTP_METHOD_PUT, - url, - requestHeaders, sasTokenForReuse); - op.execute(tracingContext); - return op; - } - - public AbfsRestOperation setPathProperties(final String path, final String properties, - final TracingContext tracingContext, final ContextEncryptionAdapter contextEncryptionAdapter) - throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - addEncryptionKeyRequestHeaders(path, requestHeaders, false, - contextEncryptionAdapter, tracingContext); - // JDK7 does not support PATCH, so to workaround the issue we will use - // PUT and specify the real method in the X-Http-Method-Override header. - requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, - HTTP_METHOD_PATCH)); - - requestHeaders.add(new AbfsHttpHeader(X_MS_PROPERTIES, properties)); + throws AzureBlobFileSystemException; - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, SET_PROPERTIES_ACTION); - appendSASTokenToQuery(path, SASTokenProvider.SET_PROPERTIES_OPERATION, abfsUriQueryBuilder); - - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.SetPathProperties, - HTTP_METHOD_PUT, - url, - requestHeaders); - op.execute(tracingContext); - return op; - } + /** + * Flush previously uploaded data to a file. + * @param buffer containing blockIds to be flushed. + * @param path on which data has to be flushed. + * @param isClose specify if this is the last flush to the file. + * @param cachedSasToken to be used for the authenticating operation. + * @param leaseId if there is an active lease on the path. + * @param eTag to specify conditional headers. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation flush(byte[] buffer, + String path, + boolean isClose, + String cachedSasToken, + String leaseId, + String eTag, + TracingContext tracingContext) throws AzureBlobFileSystemException; - public AbfsRestOperation getPathStatus(final String path, - final boolean includeProperties, final TracingContext tracingContext, - final ContextEncryptionAdapter contextEncryptionAdapter) - throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - String operation = SASTokenProvider.GET_PROPERTIES_OPERATION; - if (!includeProperties) { - // The default action (operation) is implicitly to get properties and this action requires read permission - // because it reads user defined properties. If the action is getStatus or getAclStatus, then - // only traversal (execute) permission is required. - abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_ACTION, AbfsHttpConstants.GET_STATUS); - operation = SASTokenProvider.GET_STATUS_OPERATION; - } else { - addEncryptionKeyRequestHeaders(path, requestHeaders, false, - contextEncryptionAdapter, - tracingContext); - } - abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_UPN, String.valueOf(abfsConfiguration.isUpnUsed())); - appendSASTokenToQuery(path, operation, abfsUriQueryBuilder); + /** + * Set the properties of a file or directory. + * @param path on which properties have to be set. + * @param properties list of metadata key-value pairs. + * @param tracingContext for tracing the server calls. + * @param contextEncryptionAdapter to provide encryption context. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation setPathProperties(String path, Hashtable properties, + TracingContext tracingContext, ContextEncryptionAdapter contextEncryptionAdapter) + throws AzureBlobFileSystemException; - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.GetPathStatus, - HTTP_METHOD_HEAD, - url, - requestHeaders); - op.execute(tracingContext); - return op; - } + /** + * Get the properties of a file or directory. + * @param path of which properties have to be fetched. + * @param includeProperties to include user defined properties. + * @param tracingContext for tracing the server calls. + * @param contextEncryptionAdapter to provide encryption context. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation getPathStatus(String path, + boolean includeProperties, TracingContext tracingContext, + ContextEncryptionAdapter contextEncryptionAdapter) + throws AzureBlobFileSystemException; - public AbfsRestOperation read(final String path, - final long position, - final byte[] buffer, - final int bufferOffset, - final int bufferLength, - final String eTag, + /** + * Read the contents of the file at specified path. + * @param path of the file to be read. + * @param position in the file from where data has to be read. + * @param buffer to store the data read. + * @param bufferOffset offset in the buffer to start storing the data. + * @param bufferLength length of data to be read. + * @param eTag to specify conditional headers. + * @param cachedSasToken to be used for the authenticating operation. + * @param contextEncryptionAdapter to provide encryption context. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation read(String path, + long position, + byte[] buffer, + int bufferOffset, + int bufferLength, + String eTag, String cachedSasToken, ContextEncryptionAdapter contextEncryptionAdapter, - TracingContext tracingContext) throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - addEncryptionKeyRequestHeaders(path, requestHeaders, false, - contextEncryptionAdapter, tracingContext); - AbfsHttpHeader rangeHeader = new AbfsHttpHeader(RANGE, - String.format("bytes=%d-%d", position, position + bufferLength - 1)); - requestHeaders.add(rangeHeader); - requestHeaders.add(new AbfsHttpHeader(IF_MATCH, eTag)); - - // Add request header to fetch MD5 Hash of data returned by server. - if (isChecksumValidationEnabled(requestHeaders, rangeHeader, bufferLength)) { - requestHeaders.add(new AbfsHttpHeader(X_MS_RANGE_GET_CONTENT_MD5, TRUE)); - } + TracingContext tracingContext) throws AzureBlobFileSystemException; - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - - // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance - String sasTokenForReuse = appendSASTokenToQuery(path, SASTokenProvider.READ_OPERATION, - abfsUriQueryBuilder, cachedSasToken); - - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.ReadFile, - HTTP_METHOD_GET, - url, - requestHeaders, - buffer, - bufferOffset, - bufferLength, sasTokenForReuse); - op.execute(tracingContext); - - // Verify the MD5 hash returned by server holds valid on the data received. - if (isChecksumValidationEnabled(requestHeaders, rangeHeader, bufferLength)) { - verifyCheckSumForRead(buffer, op.getResult(), bufferOffset); - } - - return op; - } - - public AbfsRestOperation deletePath(final String path, final boolean recursive, - final String continuation, - TracingContext tracingContext, - final boolean isNamespaceEnabled) - throws AzureBlobFileSystemException { - /* - * If Pagination is enabled and current API version is old, - * use the minimum required version for pagination. - * If Pagination is enabled and current API version is later than minimum required - * version for pagination, use current version only as azure service is backward compatible. - * If pagination is disabled, use the current API version only. - */ - final List requestHeaders = (isPaginatedDelete(recursive, - isNamespaceEnabled) && xMsVersion.compareTo(ApiVersion.AUG_03_2023) < 0) - ? createDefaultHeaders(ApiVersion.AUG_03_2023) - : createDefaultHeaders(); - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - - if (isPaginatedDelete(recursive, isNamespaceEnabled)) { - // Add paginated query parameter - abfsUriQueryBuilder.addQuery(QUERY_PARAM_PAGINATED, TRUE); - } - - abfsUriQueryBuilder.addQuery(QUERY_PARAM_RECURSIVE, String.valueOf(recursive)); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_CONTINUATION, continuation); - String operation = recursive ? SASTokenProvider.DELETE_RECURSIVE_OPERATION : SASTokenProvider.DELETE_OPERATION; - appendSASTokenToQuery(path, operation, abfsUriQueryBuilder); - - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = new AbfsRestOperation( - AbfsRestOperationType.DeletePath, - this, - HTTP_METHOD_DELETE, - url, - requestHeaders, - abfsConfiguration); - try { - op.execute(tracingContext); - } catch (AzureBlobFileSystemException e) { - // If we have no HTTP response, throw the original exception. - if (!op.hasResult()) { - throw e; - } - final AbfsRestOperation idempotencyOp = deleteIdempotencyCheckOp(op); - if (idempotencyOp.getResult().getStatusCode() - == op.getResult().getStatusCode()) { - // idempotency did not return different result - // throw back the exception - throw e; - } else { - return idempotencyOp; - } - } - - return op; - } + /** + * Delete the file or directory at specified path. + * @param path to be deleted. + * @param recursive if the path is a directory, delete recursively. + * @param continuation to specify continuation token. + * @param tracingContext for tracing the server calls. + * @param isNamespaceEnabled specify if the namespace is enabled. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation deletePath(String path, boolean recursive, + String continuation, + TracingContext tracingContext, + boolean isNamespaceEnabled) + throws AzureBlobFileSystemException; /** * Check if the delete request failure is post a retry and if delete failure @@ -1269,8 +880,8 @@ public AbfsRestOperation deletePath(final String path, final boolean recursive, * delete issued from this filesystem instance. * These are few corner cases and usually returning a success at this stage * should help the job to continue. - * @param op Delete request REST operation response with non-null HTTP response - * @return REST operation response post idempotency check + * @param op Delete request REST operation response with non-null HTTP response. + * @return REST operation response post idempotency check. */ public AbfsRestOperation deleteIdempotencyCheckOp(final AbfsRestOperation op) { Preconditions.checkArgument(op.hasResult(), "Operations has null HTTP response"); @@ -1292,117 +903,79 @@ public AbfsRestOperation deleteIdempotencyCheckOp(final AbfsRestOperation op) { return op; } - public AbfsRestOperation setOwner(final String path, final String owner, final String group, - TracingContext tracingContext) - throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - // JDK7 does not support PATCH, so to workaround the issue we will use - // PUT and specify the real method in the X-Http-Method-Override header. - requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, - HTTP_METHOD_PATCH)); - - if (owner != null && !owner.isEmpty()) { - requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_OWNER, owner)); - } - if (group != null && !group.isEmpty()) { - requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_GROUP, group)); - } - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_ACTION, AbfsHttpConstants.SET_ACCESS_CONTROL); - appendSASTokenToQuery(path, SASTokenProvider.SET_OWNER_OPERATION, abfsUriQueryBuilder); - - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.SetOwner, - AbfsHttpConstants.HTTP_METHOD_PUT, - url, - requestHeaders); - op.execute(tracingContext); - return op; - } - - public AbfsRestOperation setPermission(final String path, final String permission, - TracingContext tracingContext) - throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - // JDK7 does not support PATCH, so to workaround the issue we will use - // PUT and specify the real method in the X-Http-Method-Override header. - requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, - HTTP_METHOD_PATCH)); - - requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_PERMISSIONS, permission)); - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_ACTION, AbfsHttpConstants.SET_ACCESS_CONTROL); - appendSASTokenToQuery(path, SASTokenProvider.SET_PERMISSION_OPERATION, abfsUriQueryBuilder); + /** + * Sets the owner on tha path. + * @param path on which owner has to be set. + * @param owner to be set. + * @param group to be set. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation setOwner(String path, String owner, String group, + TracingContext tracingContext) + throws AzureBlobFileSystemException; - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.SetPermissions, - AbfsHttpConstants.HTTP_METHOD_PUT, - url, - requestHeaders); - op.execute(tracingContext); - return op; - } + /** + * Sets the permission on the path. + * @param path on which permission has to be set. + * @param permission to be set. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation setPermission(String path, String permission, + TracingContext tracingContext) + throws AzureBlobFileSystemException; + /** + * Sets the ACL. + * @param path on which ACL has to be set. + * @param aclSpecString to be set. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ public AbfsRestOperation setAcl(final String path, final String aclSpecString, - TracingContext tracingContext) throws AzureBlobFileSystemException { - return setAcl(path, aclSpecString, AbfsHttpConstants.EMPTY_STRING, tracingContext); + TracingContext tracingContext) throws AzureBlobFileSystemException { + return setAcl(path, aclSpecString, EMPTY_STRING, tracingContext); } - public AbfsRestOperation setAcl(final String path, final String aclSpecString, final String eTag, - TracingContext tracingContext) - throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - // JDK7 does not support PATCH, so to workaround the issue we will use - // PUT and specify the real method in the X-Http-Method-Override header. - requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, - HTTP_METHOD_PATCH)); - - requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_ACL, aclSpecString)); - - if (eTag != null && !eTag.isEmpty()) { - requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.IF_MATCH, eTag)); - } - - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_ACTION, AbfsHttpConstants.SET_ACCESS_CONTROL); - appendSASTokenToQuery(path, SASTokenProvider.SET_ACL_OPERATION, abfsUriQueryBuilder); - - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.SetAcl, - AbfsHttpConstants.HTTP_METHOD_PUT, - url, - requestHeaders); - op.execute(tracingContext); - return op; - } + /** + * Sets the ACL on the path that matches ETag. + * @param path on which ACL has to be set. + * @param aclSpecString to be set. + * @param eTag to specify conditional headers. Set only if etag matches. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation setAcl(String path, String aclSpecString, String eTag, + TracingContext tracingContext) + throws AzureBlobFileSystemException; + /** + * Retrieves the ACL properties of blob at specified path. + * @param path of which properties have to be fetched. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ public AbfsRestOperation getAclStatus(final String path, TracingContext tracingContext) throws AzureBlobFileSystemException { return getAclStatus(path, abfsConfiguration.isUpnUsed(), tracingContext); } - public AbfsRestOperation getAclStatus(final String path, final boolean useUPN, - TracingContext tracingContext) throws AzureBlobFileSystemException { - final List requestHeaders = createDefaultHeaders(); - final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_ACTION, AbfsHttpConstants.GET_ACCESS_CONTROL); - abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_UPN, String.valueOf(useUPN)); - appendSASTokenToQuery(path, SASTokenProvider.GET_ACL_OPERATION, abfsUriQueryBuilder); - - final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - final AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.GetAcl, - AbfsHttpConstants.HTTP_METHOD_HEAD, - url, - requestHeaders); - op.execute(tracingContext); - return op; - } + /** + * Retrieves the ACL properties of blob at specified path. + * @param path of which properties have to be fetched. + * @param useUPN whether to use UPN with rest operation. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + public abstract AbfsRestOperation getAclStatus(String path, boolean useUPN, + TracingContext tracingContext) throws AzureBlobFileSystemException; /** * Talks to the server to check whether the permission specified in @@ -1414,21 +987,8 @@ public AbfsRestOperation getAclStatus(final String path, final boolean useUPN, * @return The {@link AbfsRestOperation} object for the operation * @throws AzureBlobFileSystemException in case of bad requests */ - public AbfsRestOperation checkAccess(String path, String rwx, TracingContext tracingContext) - throws AzureBlobFileSystemException { - AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); - abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, CHECK_ACCESS); - abfsUriQueryBuilder.addQuery(QUERY_FS_ACTION, rwx); - appendSASTokenToQuery(path, SASTokenProvider.CHECK_ACCESS_OPERATION, abfsUriQueryBuilder); - URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); - AbfsRestOperation op = getAbfsRestOperation( - AbfsRestOperationType.CheckAccess, - AbfsHttpConstants.HTTP_METHOD_HEAD, - url, - createDefaultHeaders()); - op.execute(tracingContext); - return op; - } + public abstract AbfsRestOperation checkAccess(String path, String rwx, TracingContext tracingContext) + throws AzureBlobFileSystemException; /** * Get the directory query parameter used by the List Paths REST API and used @@ -1442,7 +1002,7 @@ public AbfsRestOperation checkAccess(String path, String rwx, TracingContext tra public static String getDirectoryQueryParameter(final String path) { String directory = path; if (Strings.isNullOrEmpty(directory)) { - directory = AbfsHttpConstants.EMPTY_STRING; + directory = EMPTY_STRING; } else if (directory.charAt(0) == '/') { directory = directory.substring(1); } @@ -1451,29 +1011,29 @@ public static String getDirectoryQueryParameter(final String path) { /** * If configured for SAS AuthType, appends SAS token to queryBuilder. - * @param path - * @param operation - * @param queryBuilder + * @param path for which SAS token is required. + * @param operation for which SAS token is required. + * @param queryBuilder to which SAS token is appended. * @return sasToken - returned for optional re-use. - * @throws SASTokenProviderException + * @throws SASTokenProviderException if SAS token cannot be acquired. */ - private String appendSASTokenToQuery(String path, String operation, AbfsUriQueryBuilder queryBuilder) throws SASTokenProviderException { + protected String appendSASTokenToQuery(String path, String operation, AbfsUriQueryBuilder queryBuilder) throws SASTokenProviderException { return appendSASTokenToQuery(path, operation, queryBuilder, null); } /** * If configured for SAS AuthType, appends SAS token to queryBuilder. - * @param path - * @param operation - * @param queryBuilder + * @param path for which SAS token is required. + * @param operation for which SAS token is required. + * @param queryBuilder to which SAS token is appended. * @param cachedSasToken - previously acquired SAS token to be reused. * @return sasToken - returned for optional re-use. - * @throws SASTokenProviderException + * @throws SASTokenProviderException if SAS token cannot be acquired. */ - private String appendSASTokenToQuery(String path, - String operation, - AbfsUriQueryBuilder queryBuilder, - String cachedSasToken) + protected String appendSASTokenToQuery(String path, + String operation, + AbfsUriQueryBuilder queryBuilder, + String cachedSasToken) throws SASTokenProviderException { String sasToken = null; if (this.authType == AuthType.SAS) { @@ -1506,17 +1066,38 @@ private String appendSASTokenToQuery(String path, return sasToken; } + /** + * Creates REST operation URL with empty path for the given query. + * @param query to be added to the URL. + * @return URL for the REST operation. + * @throws AzureBlobFileSystemException if URL creation fails. + */ @VisibleForTesting - private URL createRequestUrl(final String query) throws AzureBlobFileSystemException { + protected URL createRequestUrl(final String query) throws AzureBlobFileSystemException { return createRequestUrl(EMPTY_STRING, query); } + /** + * Creates REST operation URL with given path and query. + * @param path for which URL has to be created. + * @param query to be added to the URL. + * @return URL for the REST operation. + * @throws AzureBlobFileSystemException if URL creation fails. + */ @VisibleForTesting protected URL createRequestUrl(final String path, final String query) throws AzureBlobFileSystemException { return createRequestUrl(baseUrl, path, query); } + /** + * Creates REST operation URL with given baseUrl, path and query. + * @param baseUrl to be used for the operation. + * @param path for which URL has to be created. + * @param query to be added to the URL. + * @return URL for the REST operation. + * @throws AzureBlobFileSystemException if URL creation fails. + */ @VisibleForTesting protected URL createRequestUrl(final URL baseUrl, final String path, final String query) throws AzureBlobFileSystemException { @@ -1545,6 +1126,12 @@ protected URL createRequestUrl(final URL baseUrl, final String path, final Strin return url; } + /** + * returns the url encoded string for a given value. + * @param value to be encoded. + * @return url encoded string. + * @throws AzureBlobFileSystemException if encoding fails. + */ public static String urlEncode(final String value) throws AzureBlobFileSystemException { String encodedString; try { @@ -1570,7 +1157,7 @@ protected Boolean getIsPaginatedDeleteEnabled() { return abfsConfiguration.isPaginatedDeleteEnabled(); } - private Boolean isPaginatedDelete(boolean isRecursiveDelete, boolean isNamespaceEnabled) { + protected Boolean isPaginatedDelete(boolean isRecursiveDelete, boolean isNamespaceEnabled) { return getIsPaginatedDeleteEnabled() && isNamespaceEnabled && isRecursiveDelete; } @@ -1656,7 +1243,7 @@ private void appendIfNotEmpty(StringBuilder sb, String regEx, * @param buffer for getting input data for MD5 computation * @throws AbfsRestOperationException if Md5 computation fails */ - private void addCheckSumHeaderForWrite(List requestHeaders, + protected void addCheckSumHeaderForWrite(List requestHeaders, final AppendRequestParameters reqParams, final byte[] buffer) throws AbfsRestOperationException { String md5Hash = computeMD5Hash(buffer, reqParams.getoffset(), @@ -1671,7 +1258,7 @@ private void addCheckSumHeaderForWrite(List requestHeaders, * @param bufferOffset Position where data returned by server is saved in buffer. * @throws AbfsRestOperationException if Md5Mismatch. */ - private void verifyCheckSumForRead(final byte[] buffer, + protected void verifyCheckSumForRead(final byte[] buffer, final AbfsHttpOperation result, final int bufferOffset) throws AbfsRestOperationException { // Number of bytes returned by server could be less than or equal to what @@ -1694,9 +1281,8 @@ private void verifyCheckSumForRead(final byte[] buffer, /** * Conditions check for allowing checksum support for read operation. - * Sending MD5 Hash in request headers. For more details see - * @see - * Path - Read Azure Storage Rest API. + * Sending MD5 Hash in request headers. For more details refer to + * Path - Read Azure Storage Rest API. * 1. Range header must be present as one of the request headers. * 2. buffer length must be less than or equal to 4 MB. * @param requestHeaders to be checked for range header. @@ -1704,7 +1290,7 @@ private void verifyCheckSumForRead(final byte[] buffer, * @param bufferLength must be less than or equal to 4 MB. * @return true if all conditions are met. */ - private boolean isChecksumValidationEnabled(List requestHeaders, + protected boolean isChecksumValidationEnabled(List requestHeaders, final AbfsHttpHeader rangeHeader, final int bufferLength) { return getAbfsConfiguration().getIsChecksumValidationEnabled() && requestHeaders.contains(rangeHeader) && bufferLength <= 4 * ONE_MB; @@ -1713,12 +1299,11 @@ private boolean isChecksumValidationEnabled(List requestHeaders, /** * Conditions check for allowing checksum support for write operation. * Server will support this if client sends the MD5 Hash as a request header. - * For azure stoage service documentation see - * @see - * Path - Update Azure Rest API. + * For azure stoage service documentation and more details refer to + * Path - Update Azure Rest API. * @return true if checksum validation enabled. */ - private boolean isChecksumValidationEnabled() { + protected boolean isChecksumValidationEnabled() { return getAbfsConfiguration().getIsChecksumValidationEnabled(); } @@ -2011,4 +1596,12 @@ AbfsApacheHttpClient getAbfsApacheHttpClient() { KeepAliveCache getKeepAliveCache() { return keepAliveCache; } + + protected String getUserAgent() { + return userAgent; + } + + protected boolean isRenameResilience() { + return renameResilience; + } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientHandler.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientHandler.java new file mode 100644 index 0000000000000..12d800939ae95 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsClientHandler.java @@ -0,0 +1,127 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.net.URL; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.constants.AbfsServiceType; +import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; +import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; +import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; + +import static org.apache.hadoop.fs.azurebfs.utils.UriUtils.changeUrlFromBlobToDfs; + +/** + * AbfsClientHandler is a class that provides a way to get the AbfsClient + * based on the service type. + */ +public class AbfsClientHandler { + public static final Logger LOG = LoggerFactory.getLogger(AbfsClientHandler.class); + + private AbfsServiceType defaultServiceType; + private final AbfsDfsClient dfsAbfsClient; + + public AbfsClientHandler(final URL baseUrl, + final SharedKeyCredentials sharedKeyCredentials, + final AbfsConfiguration abfsConfiguration, + final AccessTokenProvider tokenProvider, + final EncryptionContextProvider encryptionContextProvider, + final AbfsClientContext abfsClientContext) throws IOException { + this.dfsAbfsClient = createDfsClient(baseUrl, sharedKeyCredentials, + abfsConfiguration, tokenProvider, null, encryptionContextProvider, + abfsClientContext); + initServiceType(abfsConfiguration); + } + + public AbfsClientHandler(final URL baseUrl, + final SharedKeyCredentials sharedKeyCredentials, + final AbfsConfiguration abfsConfiguration, + final SASTokenProvider sasTokenProvider, + final EncryptionContextProvider encryptionContextProvider, + final AbfsClientContext abfsClientContext) throws IOException { + this.dfsAbfsClient = createDfsClient(baseUrl, sharedKeyCredentials, + abfsConfiguration, null, sasTokenProvider, encryptionContextProvider, + abfsClientContext); + initServiceType(abfsConfiguration); + } + + /** + * Initialize the default service type based on the user configuration. + * @param abfsConfiguration set by user. + */ + private void initServiceType(final AbfsConfiguration abfsConfiguration) { + this.defaultServiceType = abfsConfiguration.getFsConfiguredServiceType(); + } + + /** + * Get the AbfsClient based on the default service type. + * @return AbfsClient + */ + public AbfsClient getClient() { + return getClient(defaultServiceType); + } + + /** + * Get the AbfsClient based on the service type. + * @param serviceType AbfsServiceType + * @return AbfsClient + */ + public AbfsClient getClient(AbfsServiceType serviceType) { + return serviceType == AbfsServiceType.DFS ? dfsAbfsClient : null; + } + + /** + * Create the AbfsDfsClient using the url used to configure file system. + * If URL is for Blob endpoint, it will be converted to DFS endpoint. + * @param baseUrl URL + * @param creds SharedKeyCredentials + * @param abfsConfiguration AbfsConfiguration + * @param tokenProvider AccessTokenProvider + * @param sasTokenProvider SASTokenProvider + * @param encryptionContextProvider EncryptionContextProvider + * @param abfsClientContext AbfsClientContext + * @return AbfsDfsClient with DFS endpoint URL + * @throws IOException if URL conversion fails. + */ + private AbfsDfsClient createDfsClient(final URL baseUrl, + final SharedKeyCredentials creds, + final AbfsConfiguration abfsConfiguration, + final AccessTokenProvider tokenProvider, + final SASTokenProvider sasTokenProvider, + final EncryptionContextProvider encryptionContextProvider, + final AbfsClientContext abfsClientContext) throws IOException { + URL dfsUrl = changeUrlFromBlobToDfs(baseUrl); + if (tokenProvider != null) { + LOG.debug("Creating AbfsDfsClient with access token provider using the URL: {}", dfsUrl); + return new AbfsDfsClient(dfsUrl, creds, abfsConfiguration, + tokenProvider, encryptionContextProvider, + abfsClientContext); + } else { + LOG.debug("Creating AbfsDfsClient with SAS token provider using the URL: {}", dfsUrl); + return new AbfsDfsClient(dfsUrl, creds, abfsConfiguration, + sasTokenProvider, encryptionContextProvider, + abfsClientContext); + } + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java new file mode 100644 index 0000000000000..f2eebd8800f15 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsDfsClient.java @@ -0,0 +1,1302 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.azurebfs.services; + +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.util.Hashtable; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import org.apache.hadoop.fs.azurebfs.AbfsConfiguration; +import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore; +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; +import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ApiVersion; +import org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations; +import org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsInvalidChecksumException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidAbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.services.AppendRequestParameters; +import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider; +import org.apache.hadoop.fs.azurebfs.extensions.SASTokenProvider; +import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; +import org.apache.hadoop.fs.azurebfs.security.ContextEncryptionAdapter; +import org.apache.hadoop.fs.azurebfs.utils.Base64; +import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import org.apache.hadoop.util.StringUtils; + +import static org.apache.commons.lang3.StringUtils.isEmpty; +import static org.apache.hadoop.fs.azurebfs.AzureBlobFileSystemStore.extractEtagHeader; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.ACQUIRE_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_BLOB_TYPE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPLICATION_JSON; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPLICATION_OCTET_STREAM; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.BREAK_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.CHECK_ACCESS; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COMMA; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DEFAULT_LEASE_BREAK_PERIOD; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.DIRECTORY; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EMPTY_STRING; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FILE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FILESYSTEM; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FLUSH_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.GET_ACCESS_CONTROL; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.GET_STATUS; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_DELETE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_GET; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_HEAD; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_POST; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.RELEASE_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.RENEW_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SET_ACCESS_CONTROL; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SET_PROPERTIES_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.SINGLE_WHITE_SPACE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.STAR; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.XMS_PROPERTIES_ENCODING_ASCII; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.ACCEPT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.IF_MATCH; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.IF_NONE_MATCH; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.RANGE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.USER_AGENT; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_HTTP_METHOD_OVERRIDE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_EXISTING_RESOURCE_TYPE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_BREAK_PERIOD; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_DURATION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_LEASE_ID; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_PROPERTIES; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_PROPOSED_LEASE_ID; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_RANGE_GET_CONTENT_MD5; +import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.X_MS_RENAME_SOURCE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_FS_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_BLOBTYPE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_CLOSE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_CONTINUATION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_DIRECTORY; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_FLUSH; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_MAXRESULTS; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_PAGINATED; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_POSITION; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_RECURSIVE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_RESOURCE; +import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_RETAIN_UNCOMMITTED_DATA; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.RENAME_DESTINATION_PARENT_PATH_NOT_FOUND; +import static org.apache.hadoop.fs.azurebfs.contracts.services.AzureServiceErrorCode.SOURCE_PATH_NOT_FOUND; + +/** + * AbfsClient interacting with the DFS Endpoint. + */ +public class AbfsDfsClient extends AbfsClient { + + public AbfsDfsClient(final URL baseUrl, + final SharedKeyCredentials sharedKeyCredentials, + final AbfsConfiguration abfsConfiguration, + final AccessTokenProvider tokenProvider, + final EncryptionContextProvider encryptionContextProvider, + final AbfsClientContext abfsClientContext) throws IOException { + super(baseUrl, sharedKeyCredentials, abfsConfiguration, tokenProvider, + encryptionContextProvider, abfsClientContext); + } + + public AbfsDfsClient(final URL baseUrl, + final SharedKeyCredentials sharedKeyCredentials, + final AbfsConfiguration abfsConfiguration, + final SASTokenProvider sasTokenProvider, + final EncryptionContextProvider encryptionContextProvider, + final AbfsClientContext abfsClientContext) throws IOException { + super(baseUrl, sharedKeyCredentials, abfsConfiguration, sasTokenProvider, + encryptionContextProvider, abfsClientContext); + } + + /** + * Create request headers for Rest Operation using the default API version. + * @return default request headers. + */ + @Override + public List createDefaultHeaders() { + return this.createDefaultHeaders(getxMsVersion()); + } + + /** + * Create request headers for Rest Operation using the specified API version. + * DFS Endpoint API responses are in JSON/Stream format. + * @param xMsVersion API version to be used. + * @return default request headers. + */ + @Override + public List createDefaultHeaders(ApiVersion xMsVersion) { + List requestHeaders = createCommonHeaders(xMsVersion); + requestHeaders.add(new AbfsHttpHeader(ACCEPT, APPLICATION_JSON + + COMMA + SINGLE_WHITE_SPACE + APPLICATION_OCTET_STREAM)); + return requestHeaders; + } + + /** + * Get Rest Operation for API + * + * Filesystem - Create. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation createFilesystem(TracingContext tracingContext) + throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = new AbfsUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM); + + final URL url = createRequestUrl(abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.CreateFileSystem, + HTTP_METHOD_PUT, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Filesystem - Set Properties. + * @param properties list of metadata key-value pairs. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation setFilesystemProperties(final Hashtable properties, + TracingContext tracingContext) throws AzureBlobFileSystemException { + final String commaSeparatedProperties; + try { + commaSeparatedProperties = convertXmsPropertiesToCommaSeparatedString(properties); + } catch (CharacterCodingException ex) { + throw new InvalidAbfsRestOperationException(ex); + } + + final List requestHeaders = createDefaultHeaders(); + // JDK7 does not support PATCH, so to work around the issue we will use + // PUT and specify the real method in the X-Http-Method-Override header. + requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, + HTTP_METHOD_PATCH)); + requestHeaders.add(new AbfsHttpHeader(X_MS_PROPERTIES, commaSeparatedProperties)); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM); + + final URL url = createRequestUrl(abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.SetFileSystemProperties, + HTTP_METHOD_PUT, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Filesystem - Get Properties. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + * */ + @Override + public AbfsRestOperation getFilesystemProperties(TracingContext tracingContext) + throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM); + + final URL url = createRequestUrl(abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.GetFileSystemProperties, + HTTP_METHOD_HEAD, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Filesystem - Delete. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation deleteFilesystem(TracingContext tracingContext) + throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM); + + final URL url = createRequestUrl(abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.DeleteFileSystem, + HTTP_METHOD_DELETE, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Filesystem - List. + * List paths and their properties in the current filesystem. + * @param relativePath to return only blobs within this directory. + * @param recursive to return all blobs in the path, including those in subdirectories. + * @param listMaxResults maximum number of blobs to return. + * @param continuation marker to specify the continuation token. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation or response parsing fails. + */ + @Override + public AbfsRestOperation listPath(final String relativePath, + final boolean recursive, + final int listMaxResults, + final String continuation, + TracingContext tracingContext) throws IOException { + final List requestHeaders = createDefaultHeaders(); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_DIRECTORY, + getDirectoryQueryParameter(relativePath)); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_RECURSIVE, String.valueOf(recursive)); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_CONTINUATION, continuation); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_MAXRESULTS, + String.valueOf(listMaxResults)); + abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_UPN, + String.valueOf(getAbfsConfiguration().isUpnUsed())); + appendSASTokenToQuery(relativePath, SASTokenProvider.LIST_OPERATION, + abfsUriQueryBuilder); + + final URL url = createRequestUrl(abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.ListPaths, + HTTP_METHOD_GET, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Create. + * Create a path (file or directory) in the current filesystem. + * @param path to be created inside the filesystem. + * @param isFile to specify if the created path is file or directory. + * @param overwrite to specify if the path should be overwritten if it already exists. + * @param permissions to specify the permissions of the path. + * @param isAppendBlob to specify if the path to be created is an append blob. + * @param eTag to specify conditional headers. + * @param contextEncryptionAdapter to provide encryption context. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation createPath(final String path, + final boolean isFile, + final boolean overwrite, + final AzureBlobFileSystemStore.Permissions permissions, + final boolean isAppendBlob, + final String eTag, + final ContextEncryptionAdapter contextEncryptionAdapter, + final TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + if (isFile) { + addEncryptionKeyRequestHeaders(path, requestHeaders, true, + contextEncryptionAdapter, tracingContext); + } + if (!overwrite) { + requestHeaders.add(new AbfsHttpHeader(IF_NONE_MATCH, STAR)); + } + + if (permissions.hasPermission()) { + requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_PERMISSIONS, + permissions.getPermission())); + } + + if (permissions.hasUmask()) { + requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_UMASK, + permissions.getUmask())); + } + + if (eTag != null && !eTag.isEmpty()) { + requestHeaders.add(new AbfsHttpHeader(IF_MATCH, eTag)); + } + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, isFile ? FILE : DIRECTORY); + if (isAppendBlob) { + abfsUriQueryBuilder.addQuery(QUERY_PARAM_BLOBTYPE, APPEND_BLOB_TYPE); + } + + String operation = isFile + ? SASTokenProvider.CREATE_FILE_OPERATION + : SASTokenProvider.CREATE_DIRECTORY_OPERATION; + appendSASTokenToQuery(path, operation, abfsUriQueryBuilder); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.CreatePath, + HTTP_METHOD_PUT, url, requestHeaders); + try { + op.execute(tracingContext); + } catch (AzureBlobFileSystemException ex) { + // If we have no HTTP response, throw the original exception. + if (!op.hasResult()) { + throw ex; + } + if (!isFile && op.getResult().getStatusCode() == HttpURLConnection.HTTP_CONFLICT) { + String existingResource = + op.getResult().getResponseHeader(X_MS_EXISTING_RESOURCE_TYPE); + if (existingResource != null && existingResource.equals(DIRECTORY)) { + return op; //don't throw ex on mkdirs for existing directory + } + } + throw ex; + } + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Lease. + * Acquire lease on specified path. + * @param path on which lease has to be acquired. + * @param duration for which lease has to be acquired. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation acquireLease(final String path, final int duration, + TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, ACQUIRE_LEASE_ACTION)); + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_DURATION, Integer.toString(duration))); + requestHeaders.add(new AbfsHttpHeader(X_MS_PROPOSED_LEASE_ID, + UUID.randomUUID().toString())); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.LeasePath, + HTTP_METHOD_POST, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Lease. + * Renew lease on specified path. + * @param path on which lease has to be renewed. + * @param leaseId of the lease to be renewed. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation renewLease(final String path, final String leaseId, + TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, RENEW_LEASE_ACTION)); + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, leaseId)); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.LeasePath, + HTTP_METHOD_POST, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Lease. + * Release lease on specified path. + * @param path on which lease has to be released. + * @param leaseId of the lease to be released. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation releaseLease(final String path, final String leaseId, + TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, RELEASE_LEASE_ACTION)); + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, leaseId)); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.LeasePath, + HTTP_METHOD_POST, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Lease. + * Break lease on specified path. + * @param path on which lease has to be broke. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation breakLease(final String path, + TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, BREAK_LEASE_ACTION)); + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_BREAK_PERIOD, + DEFAULT_LEASE_BREAK_PERIOD)); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.LeasePath, + HTTP_METHOD_POST, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Create. + * @param source path to source file + * @param destination destination of rename. + * @param continuation continuation. + * @param tracingContext for tracing the server calls. + * @param sourceEtag etag of source file. may be null or empty + * @param isMetadataIncompleteState was there a rename failure due to incomplete metadata state? + * @param isNamespaceEnabled whether namespace enabled account or not + * @return executed rest operation containing response from server. + * @throws IOException if rest operation fails. + */ + @Override + public AbfsClientRenameResult renamePath( + final String source, + final String destination, + final String continuation, + final TracingContext tracingContext, + String sourceEtag, + boolean isMetadataIncompleteState, + boolean isNamespaceEnabled) throws IOException { + final List requestHeaders = createDefaultHeaders(); + + final boolean hasEtag = !isEmpty(sourceEtag); + + boolean shouldAttemptRecovery = isRenameResilience() && isNamespaceEnabled; + if (!hasEtag && shouldAttemptRecovery) { + // in case eTag is already not supplied to the API + // and rename resilience is expected and it is an HNS enabled account + // fetch the source etag to be used later in recovery + try { + final AbfsRestOperation srcStatusOp = getPathStatus(source, + false, tracingContext, null); + if (srcStatusOp.hasResult()) { + final AbfsHttpOperation result = srcStatusOp.getResult(); + sourceEtag = extractEtagHeader(result); + // and update the directory status. + boolean isDir = checkIsDir(result); + shouldAttemptRecovery = !isDir; + LOG.debug( + "Retrieved etag of source for rename recovery: {}; isDir={}", + sourceEtag, isDir); + } + } catch (AbfsRestOperationException e) { + throw new AbfsRestOperationException(e.getStatusCode(), + SOURCE_PATH_NOT_FOUND.getErrorCode(), + e.getMessage(), e); + } + + } + + String encodedRenameSource = urlEncode( + FORWARD_SLASH + this.getFileSystem() + source); + if (getAuthType() == AuthType.SAS) { + final AbfsUriQueryBuilder srcQueryBuilder = new AbfsUriQueryBuilder(); + appendSASTokenToQuery(source, SASTokenProvider.RENAME_SOURCE_OPERATION, + srcQueryBuilder); + encodedRenameSource += srcQueryBuilder.toString(); + } + + LOG.trace("Rename source queryparam added {}", encodedRenameSource); + requestHeaders.add(new AbfsHttpHeader(X_MS_RENAME_SOURCE, encodedRenameSource)); + requestHeaders.add(new AbfsHttpHeader(IF_NONE_MATCH, STAR)); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_CONTINUATION, continuation); + appendSASTokenToQuery(destination, + SASTokenProvider.RENAME_DESTINATION_OPERATION, abfsUriQueryBuilder); + + final URL url = createRequestUrl(destination, + abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = createRenameRestOperation(url, requestHeaders); + try { + incrementAbfsRenamePath(); + op.execute(tracingContext); + // AbfsClientResult contains the AbfsOperation, If recovery happened or + // not, and the incompleteMetaDataState is true or false. + // If we successfully rename a path and isMetadataIncompleteState was + // true, then rename was recovered, else it didn't, this is why + // isMetadataIncompleteState is used for renameRecovery(as the 2nd param). + return new AbfsClientRenameResult(op, isMetadataIncompleteState, + isMetadataIncompleteState); + } catch (AzureBlobFileSystemException e) { + // If we have no HTTP response, throw the original exception. + if (!op.hasResult()) { + throw e; + } + + // ref: HADOOP-18242. Rename failure occurring due to a rare case of + // tracking metadata being in incomplete state. + if (op.getResult().getStorageErrorCode() + .equals(RENAME_DESTINATION_PARENT_PATH_NOT_FOUND.getErrorCode()) + && !isMetadataIncompleteState) { + //Logging + ABFS_METADATA_INCOMPLETE_RENAME_FAILURE + .info( + "Rename Failure attempting to resolve tracking metadata state and retrying."); + // rename recovery should be attempted in this case also + shouldAttemptRecovery = true; + isMetadataIncompleteState = true; + String sourceEtagAfterFailure = sourceEtag; + if (isEmpty(sourceEtagAfterFailure)) { + // Doing a HEAD call resolves the incomplete metadata state and + // then we can retry the rename operation. + AbfsRestOperation sourceStatusOp = getPathStatus(source, false, + tracingContext, null); + isMetadataIncompleteState = true; + // Extract the sourceEtag, using the status Op, and set it + // for future rename recovery. + AbfsHttpOperation sourceStatusResult = sourceStatusOp.getResult(); + sourceEtagAfterFailure = extractEtagHeader(sourceStatusResult); + } + renamePath(source, destination, continuation, tracingContext, + sourceEtagAfterFailure, isMetadataIncompleteState, + isNamespaceEnabled); + } + // if we get out of the condition without a successful rename, then + // it isn't metadata incomplete state issue. + isMetadataIncompleteState = false; + + // setting default rename recovery success to false + boolean etagCheckSucceeded = false; + if (shouldAttemptRecovery) { + etagCheckSucceeded = renameIdempotencyCheckOp( + source, + sourceEtag, op, destination, tracingContext); + } + if (!etagCheckSucceeded) { + // idempotency did not return different result + // throw back the exception + throw e; + } + return new AbfsClientRenameResult(op, true, isMetadataIncompleteState); + } + } + + /** + * Get Rest Operation for API + * + * Path - Update. + * Uploads data to be appended to a file. + * @param path to which data has to be appended. + * @param buffer containing data to be appended. + * @param reqParams containing parameters for append operation like offset, length etc. + * @param cachedSasToken to be used for the authenticating operation. + * @param contextEncryptionAdapter to provide encryption context. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation append(final String path, + final byte[] buffer, + AppendRequestParameters reqParams, + final String cachedSasToken, + ContextEncryptionAdapter contextEncryptionAdapter, + TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + addEncryptionKeyRequestHeaders(path, requestHeaders, false, + contextEncryptionAdapter, tracingContext); + if (reqParams.isExpectHeaderEnabled()) { + requestHeaders.add(new AbfsHttpHeader(EXPECT, HUNDRED_CONTINUE)); + } + // JDK7 does not support PATCH, so to workaround the issue we will use + // PUT and specify the real method in the X-Http-Method-Override header. + requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, HTTP_METHOD_PATCH)); + if (reqParams.getLeaseId() != null) { + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, reqParams.getLeaseId())); + } + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, APPEND_ACTION); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, + Long.toString(reqParams.getPosition())); + + if ((reqParams.getMode() == AppendRequestParameters.Mode.FLUSH_MODE) || ( + reqParams.getMode() == AppendRequestParameters.Mode.FLUSH_CLOSE_MODE)) { + abfsUriQueryBuilder.addQuery(QUERY_PARAM_FLUSH, TRUE); + if (reqParams.getMode() == AppendRequestParameters.Mode.FLUSH_CLOSE_MODE) { + abfsUriQueryBuilder.addQuery(QUERY_PARAM_CLOSE, TRUE); + } + } + + // Check if the retry is with "Expect: 100-continue" header being present in the previous request. + if (reqParams.isRetryDueToExpect()) { + String userAgentRetry = getUserAgent(); + // Remove the specific marker related to "Expect: 100-continue" from the User-Agent string. + userAgentRetry = userAgentRetry.replace(HUNDRED_CONTINUE_USER_AGENT, EMPTY_STRING); + requestHeaders.removeIf(header -> header.getName().equalsIgnoreCase(USER_AGENT)); + requestHeaders.add(new AbfsHttpHeader(USER_AGENT, userAgentRetry)); + } + + // Add MD5 Hash of request content as request header if feature is enabled + if (isChecksumValidationEnabled()) { + addCheckSumHeaderForWrite(requestHeaders, reqParams, buffer); + } + + // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance + String sasTokenForReuse = appendSASTokenToQuery(path, + SASTokenProvider.WRITE_OPERATION, + abfsUriQueryBuilder, cachedSasToken); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.Append, + HTTP_METHOD_PUT, url, requestHeaders, + buffer, reqParams.getoffset(), reqParams.getLength(), + sasTokenForReuse); + try { + op.execute(tracingContext); + } catch (AbfsRestOperationException e) { + /* + If the http response code indicates a user error we retry + the same append request with expect header being disabled. + When "100-continue" header is enabled but a non Http 100 response comes, + the response message might not get set correctly by the server. + So, this handling is to avoid breaking of backward compatibility + if someone has taken dependency on the exception message, + which is created using the error string present in the response header. + */ + int responseStatusCode = e.getStatusCode(); + if (checkUserError(responseStatusCode) + && reqParams.isExpectHeaderEnabled()) { + LOG.debug( + "User error, retrying without 100 continue enabled for the given path {}", + path); + reqParams.setExpectHeaderEnabled(false); + reqParams.setRetryDueToExpect(true); + return this.append(path, buffer, reqParams, cachedSasToken, + contextEncryptionAdapter, tracingContext); + } + // If we have no HTTP response, throw the original exception. + if (!op.hasResult()) { + throw e; + } + + if (isMd5ChecksumError(e)) { + throw new AbfsInvalidChecksumException(e); + } + + if (reqParams.isAppendBlob() + && appendSuccessCheckOp(op, path, + (reqParams.getPosition() + reqParams.getLength()), tracingContext)) { + final AbfsRestOperation successOp = getAbfsRestOperation( + AbfsRestOperationType.Append, + HTTP_METHOD_PUT, url, requestHeaders, + buffer, reqParams.getoffset(), reqParams.getLength(), + sasTokenForReuse); + successOp.hardSetResult(HttpURLConnection.HTTP_OK); + return successOp; + } + throw e; + } catch (AzureBlobFileSystemException e) { + // Any server side issue will be returned as AbfsRestOperationException and will be handled above. + LOG.debug( + "Append request failed with non server issues for path: {}, offset: {}, position: {}", + path, reqParams.getoffset(), reqParams.getPosition()); + throw e; + } + + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Update. + * Flush previously uploaded data to a file. + * @param path on which data has to be flushed. + * @param position to which data has to be flushed. + * @param retainUncommittedData whether to retain uncommitted data after flush. + * @param isClose specify if this is the last flush to the file. + * @param cachedSasToken to be used for the authenticating operation. + * @param leaseId if there is an active lease on the path. + * @param contextEncryptionAdapter to provide encryption context. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation flush(final String path, + final long position, + boolean retainUncommittedData, + boolean isClose, + final String cachedSasToken, + final String leaseId, + ContextEncryptionAdapter contextEncryptionAdapter, + TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + addEncryptionKeyRequestHeaders(path, requestHeaders, false, + contextEncryptionAdapter, tracingContext); + // JDK7 does not support PATCH, so to workaround the issue we will use + // PUT and specify the real method in the X-Http-Method-Override header. + requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, HTTP_METHOD_PATCH)); + if (leaseId != null) { + requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, leaseId)); + } + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, FLUSH_ACTION); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_POSITION, Long.toString(position)); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_RETAIN_UNCOMMITTED_DATA, + String.valueOf(retainUncommittedData)); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_CLOSE, String.valueOf(isClose)); + // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance + String sasTokenForReuse = appendSASTokenToQuery(path, + SASTokenProvider.WRITE_OPERATION, + abfsUriQueryBuilder, cachedSasToken); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.Flush, + HTTP_METHOD_PUT, url, requestHeaders, + sasTokenForReuse); + op.execute(tracingContext); + return op; + } + + @Override + public AbfsRestOperation flush(byte[] buffer, + final String path, + boolean isClose, + final String cachedSasToken, + final String leaseId, + final String eTag, + final TracingContext tracingContext) throws AzureBlobFileSystemException { + throw new UnsupportedOperationException( + "Flush with blockIds not supported on DFS Endpoint"); + } + + /** + * Get Rest Operation for API + * + * Path - Update. + * Set the properties of a file or directory. + * @param path on which properties have to be set. + * @param properties list of metadata key-value pairs. + * @param tracingContext for tracing the server calls. + * @param contextEncryptionAdapter to provide encryption context. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation setPathProperties(final String path, + final Hashtable properties, + final TracingContext tracingContext, + final ContextEncryptionAdapter contextEncryptionAdapter) + throws AzureBlobFileSystemException { + final String commaSeparatedProperties; + try { + commaSeparatedProperties = convertXmsPropertiesToCommaSeparatedString(properties); + } catch (CharacterCodingException ex) { + throw new InvalidAbfsRestOperationException(ex); + } + + final List requestHeaders = createDefaultHeaders(); + addEncryptionKeyRequestHeaders(path, requestHeaders, false, + contextEncryptionAdapter, tracingContext); + // JDK7 does not support PATCH, so to workaround the issue we will use + // PUT and specify the real method in the X-Http-Method-Override header. + requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, HTTP_METHOD_PATCH)); + requestHeaders.add(new AbfsHttpHeader(X_MS_PROPERTIES, commaSeparatedProperties)); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, SET_PROPERTIES_ACTION); + appendSASTokenToQuery(path, SASTokenProvider.SET_PROPERTIES_OPERATION, + abfsUriQueryBuilder); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.SetPathProperties, + HTTP_METHOD_PUT, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Get Properties. + * Get the properties of a file or directory. + * @param path of which properties have to be fetched. + * @param includeProperties to include user defined properties. + * @param tracingContext for tracing the server calls. + * @param contextEncryptionAdapter to provide encryption context. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation getPathStatus(final String path, + final boolean includeProperties, + final TracingContext tracingContext, + final ContextEncryptionAdapter contextEncryptionAdapter) + throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + String operation = SASTokenProvider.GET_PROPERTIES_OPERATION; + if (!includeProperties) { + // The default action (operation) is implicitly to get properties and this action requires read permission + // because it reads user defined properties. If the action is getStatus or getAclStatus, then + // only traversal (execute) permission is required. + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, GET_STATUS); + operation = SASTokenProvider.GET_STATUS_OPERATION; + } else { + addEncryptionKeyRequestHeaders(path, requestHeaders, false, + contextEncryptionAdapter, tracingContext); + } + abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_UPN, + String.valueOf(getAbfsConfiguration().isUpnUsed())); + appendSASTokenToQuery(path, operation, abfsUriQueryBuilder); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.GetPathStatus, + HTTP_METHOD_HEAD, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Read. + * Read the contents of the file at specified path + * @param path of the file to be read. + * @param position in the file from where data has to be read. + * @param buffer to store the data read. + * @param bufferOffset offset in the buffer to start storing the data. + * @param bufferLength length of data to be read. + * @param eTag to specify conditional headers. + * @param cachedSasToken to be used for the authenticating operation. + * @param contextEncryptionAdapter to provide encryption context. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation read(final String path, + final long position, + final byte[] buffer, + final int bufferOffset, + final int bufferLength, + final String eTag, + String cachedSasToken, + ContextEncryptionAdapter contextEncryptionAdapter, + TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + addEncryptionKeyRequestHeaders(path, requestHeaders, false, + contextEncryptionAdapter, tracingContext); + AbfsHttpHeader rangeHeader = new AbfsHttpHeader(RANGE, + String.format("bytes=%d-%d", position, position + bufferLength - 1)); + requestHeaders.add(rangeHeader); + requestHeaders.add(new AbfsHttpHeader(IF_MATCH, eTag)); + + // Add request header to fetch MD5 Hash of data returned by server. + if (isChecksumValidationEnabled(requestHeaders, rangeHeader, bufferLength)) { + requestHeaders.add(new AbfsHttpHeader(X_MS_RANGE_GET_CONTENT_MD5, TRUE)); + } + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + // AbfsInputStream/AbfsOutputStream reuse SAS tokens for better performance + String sasTokenForReuse = appendSASTokenToQuery(path, + SASTokenProvider.READ_OPERATION, + abfsUriQueryBuilder, cachedSasToken); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.ReadFile, + HTTP_METHOD_GET, url, requestHeaders, + buffer, bufferOffset, bufferLength, + sasTokenForReuse); + op.execute(tracingContext); + + // Verify the MD5 hash returned by server holds valid on the data received. + if (isChecksumValidationEnabled(requestHeaders, rangeHeader, bufferLength)) { + verifyCheckSumForRead(buffer, op.getResult(), bufferOffset); + } + + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Delete. + * Delete the file or directory at specified path. + * @param path to be deleted. + * @param recursive if the path is a directory, delete recursively. + * @param continuation to specify continuation token. + * @param tracingContext for tracing the server calls. + * @param isNamespaceEnabled specify if the namespace is enabled. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation deletePath(final String path, + final boolean recursive, + final String continuation, + TracingContext tracingContext, + final boolean isNamespaceEnabled) throws AzureBlobFileSystemException { + /* + * If Pagination is enabled and current API version is old, + * use the minimum required version for pagination. + * If Pagination is enabled and current API version is later than minimum required + * version for pagination, use current version only as azure service is backward compatible. + * If pagination is disabled, use the current API version only. + */ + final List requestHeaders = (isPaginatedDelete(recursive, + isNamespaceEnabled) && getxMsVersion().compareTo( + ApiVersion.AUG_03_2023) < 0) + ? createDefaultHeaders(ApiVersion.AUG_03_2023) + : createDefaultHeaders(); + final AbfsUriQueryBuilder abfsUriQueryBuilder + = createDefaultUriQueryBuilder(); + + if (isPaginatedDelete(recursive, isNamespaceEnabled)) { + // Add paginated query parameter + abfsUriQueryBuilder.addQuery(QUERY_PARAM_PAGINATED, TRUE); + } + + abfsUriQueryBuilder.addQuery(QUERY_PARAM_RECURSIVE, + String.valueOf(recursive)); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_CONTINUATION, continuation); + String operation = recursive + ? SASTokenProvider.DELETE_RECURSIVE_OPERATION + : SASTokenProvider.DELETE_OPERATION; + appendSASTokenToQuery(path, operation, abfsUriQueryBuilder); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = new AbfsRestOperation( + AbfsRestOperationType.DeletePath, this, + HTTP_METHOD_DELETE, url, requestHeaders, getAbfsConfiguration()); + try { + op.execute(tracingContext); + } catch (AzureBlobFileSystemException e) { + // If we have no HTTP response, throw the original exception. + if (!op.hasResult()) { + throw e; + } + final AbfsRestOperation idempotencyOp = deleteIdempotencyCheckOp(op); + if (idempotencyOp.getResult().getStatusCode() + == op.getResult().getStatusCode()) { + // idempotency did not return different result + // throw back the exception + throw e; + } else { + return idempotencyOp; + } + } + + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Update. + * @param path on which owner has to be set. + * @param owner to be set. + * @param group to be set. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation setOwner(final String path, + final String owner, + final String group, + TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + // JDK7 does not support PATCH, so to workaround the issue we will use + // PUT and specify the real method in the X-Http-Method-Override header. + requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, HTTP_METHOD_PATCH)); + if (owner != null && !owner.isEmpty()) { + requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_OWNER, owner)); + } + if (group != null && !group.isEmpty()) { + requestHeaders.add(new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_GROUP, group)); + } + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, SET_ACCESS_CONTROL); + appendSASTokenToQuery(path, SASTokenProvider.SET_OWNER_OPERATION, + abfsUriQueryBuilder); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.SetOwner, + HTTP_METHOD_PUT, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Update. + * @param path on which permission has to be set. + * @param permission to be set. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation setPermission(final String path, + final String permission, + TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + // JDK7 does not support PATCH, so to workaround the issue we will use + // PUT and specify the real method in the X-Http-Method-Override header. + requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, HTTP_METHOD_PATCH)); + requestHeaders.add(new AbfsHttpHeader( + HttpHeaderConfigurations.X_MS_PERMISSIONS, permission)); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, SET_ACCESS_CONTROL); + appendSASTokenToQuery(path, SASTokenProvider.SET_PERMISSION_OPERATION, + abfsUriQueryBuilder); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.SetPermissions, + HTTP_METHOD_PUT, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Update. + * @param path on which ACL has to be set. + * @param aclSpecString to be set. + * @param eTag to specify conditional headers. Set only if etag matches. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation setAcl(final String path, + final String aclSpecString, + final String eTag, + TracingContext tracingContext) + throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + // JDK7 does not support PATCH, so to workaround the issue we will use + // PUT and specify the real method in the X-Http-Method-Override header. + requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE, + HTTP_METHOD_PATCH)); + requestHeaders.add( + new AbfsHttpHeader(HttpHeaderConfigurations.X_MS_ACL, aclSpecString)); + if (eTag != null && !eTag.isEmpty()) { + requestHeaders.add( + new AbfsHttpHeader(IF_MATCH, eTag)); + } + + final AbfsUriQueryBuilder abfsUriQueryBuilder + = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, + SET_ACCESS_CONTROL); + appendSASTokenToQuery(path, SASTokenProvider.SET_ACL_OPERATION, + abfsUriQueryBuilder); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.SetAcl, + HTTP_METHOD_PUT, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Get Properties. + * Retrieves the ACL properties of blob at specified path. + * @param path of which properties have to be fetched. + * @param useUPN whether to use UPN with rest operation. + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation getAclStatus(final String path, + final boolean useUPN, + TracingContext tracingContext) throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + + final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, GET_ACCESS_CONTROL); + abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_UPN, + String.valueOf(useUPN)); + appendSASTokenToQuery(path, SASTokenProvider.GET_ACL_OPERATION, + abfsUriQueryBuilder); + + final URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + final AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.GetAcl, + HTTP_METHOD_HEAD, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Get Rest Operation for API + * + * Path - Get Properties. + * @param path Path for which access check needs to be performed + * @param rwx The permission to be checked on the path + * @param tracingContext for tracing the server calls. + * @return executed rest operation containing response from server. + * @throws AzureBlobFileSystemException if rest operation fails. + */ + @Override + public AbfsRestOperation checkAccess(String path, + String rwx, + TracingContext tracingContext) + throws AzureBlobFileSystemException { + final List requestHeaders = createDefaultHeaders(); + + AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder(); + abfsUriQueryBuilder.addQuery(QUERY_PARAM_ACTION, CHECK_ACCESS); + abfsUriQueryBuilder.addQuery(QUERY_FS_ACTION, rwx); + appendSASTokenToQuery(path, SASTokenProvider.CHECK_ACCESS_OPERATION, + abfsUriQueryBuilder); + + URL url = createRequestUrl(path, abfsUriQueryBuilder.toString()); + AbfsRestOperation op = getAbfsRestOperation( + AbfsRestOperationType.CheckAccess, + HTTP_METHOD_HEAD, url, requestHeaders); + op.execute(tracingContext); + return op; + } + + /** + * Checks if the rest operation results indicate if the path is a directory. + * @param result executed rest operation containing response from server. + * @return True if the path is a directory, False otherwise. + */ + @Override + public boolean checkIsDir(AbfsHttpOperation result) { + String resourceType = result.getResponseHeader( + HttpHeaderConfigurations.X_MS_RESOURCE_TYPE); + return StringUtils.equalsIgnoreCase(resourceType, DIRECTORY); + } + + /** + * Returns true if the status code lies in the range of user error. + * @param responseStatusCode http response status code. + * @return True or False. + */ + @Override + public boolean checkUserError(int responseStatusCode) { + return (responseStatusCode >= HttpURLConnection.HTTP_BAD_REQUEST + && responseStatusCode < HttpURLConnection.HTTP_INTERNAL_ERROR); + } + + private String convertXmsPropertiesToCommaSeparatedString(final Map properties) throws CharacterCodingException { + StringBuilder commaSeparatedProperties = new StringBuilder(); + + final CharsetEncoder encoder = Charset.forName(XMS_PROPERTIES_ENCODING_ASCII).newEncoder(); + + for (Map.Entry propertyEntry : properties.entrySet()) { + String key = propertyEntry.getKey(); + String value = propertyEntry.getValue(); + + Boolean canEncodeValue = encoder.canEncode(value); + if (!canEncodeValue) { + throw new CharacterCodingException(); + } + + String encodedPropertyValue = Base64.encode(encoder.encode(CharBuffer.wrap(value)).array()); + commaSeparatedProperties.append(key) + .append(AbfsHttpConstants.EQUAL) + .append(encodedPropertyValue); + + commaSeparatedProperties.append(AbfsHttpConstants.COMMA); + } + + if (commaSeparatedProperties.length() != 0) { + commaSeparatedProperties.deleteCharAt(commaSeparatedProperties.length() - 1); + } + + return commaSeparatedProperties.toString(); + } +} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java index e27d54b443ca2..c769186692b07 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/utils/UriUtils.java @@ -19,6 +19,7 @@ package org.apache.hadoop.fs.azurebfs.utils; import java.io.UnsupportedEncodingException; +import java.net.MalformedURLException; import java.net.URL; import java.net.URLEncoder; import java.nio.charset.StandardCharsets; @@ -30,11 +31,14 @@ import java.util.regex.Pattern; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidUriException; import org.apache.http.NameValuePair; import org.apache.http.client.utils.URLEncodedUtils; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.AND_MARK; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EQUAL; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_BLOB_DOMAIN_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_DFS_DOMAIN_NAME; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SAOID; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SIGNATURE; import static org.apache.hadoop.fs.azurebfs.constants.HttpQueryParams.QUERY_PARAM_SKOID; @@ -169,6 +173,38 @@ public static String getMaskedUrl(URL url) { return url.toString().replace(queryString, maskedQueryString); } + /** + * Changes Blob Endpoint URL to DFS Endpoint URL. + * If original url is not Blob Endpoint URL, it will return the original URL. + * @param url to be converted. + * @return updated URL + * @throws InvalidUriException in case of MalformedURLException. + */ + public static URL changeUrlFromBlobToDfs(URL url) throws InvalidUriException { + try { + url = new URL(url.toString().replace(ABFS_BLOB_DOMAIN_NAME, ABFS_DFS_DOMAIN_NAME)); + } catch (MalformedURLException ex) { + throw new InvalidUriException(url.toString()); + } + return url; + } + + /** + * Changes DFS Endpoint URL to Blob Endpoint URL. + * If original url is not DFS Endpoint URL, it will return the original URL. + * @param url to be converted. + * @return updated URL + * @throws InvalidUriException in case of MalformedURLException. + */ + public static URL changeUrlFromDfsToBlob(URL url) throws InvalidUriException { + try { + url = new URL(url.toString().replace(ABFS_DFS_DOMAIN_NAME, ABFS_BLOB_DOMAIN_NAME)); + } catch (MalformedURLException ex) { + throw new InvalidUriException(url.toString()); + } + return url; + } + private UriUtils() { } } diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/fns_blob.md b/hadoop-tools/hadoop-azure/src/site/markdown/fns_blob.md new file mode 100644 index 0000000000000..f93593cecfb5b --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/site/markdown/fns_blob.md @@ -0,0 +1,82 @@ + + +# ABFS Driver for Namespace Disabled Accounts (FNS: Flat Namespace) + +### Note: FNS-BLOB Support is being built and not yet ready for usage. + +## Background +The ABFS driver is recommended to be used only with HNS Enabled ADLS Gen-2 accounts +for big data analytics because of being more performant and scalable. + +However, to enable users of legacy WASB Driver to migrate to ABFS driver without +needing them to upgrade their general purpose V2 accounts (HNS-Disabled), Support +for FNS accounts is being added to ABFS driver. +Refer to [WASB Deprication](./wasb.html) for more details. + +## Azure Service Endpoints Used by ABFS Driver +Azure Services offers two set of endpoints for interacting with storage accounts: +1. [Azure Blob Storage](https://learn.microsoft.com/en-us/rest/api/storageservices/blob-service-rest-api) referred as Blob Endpoint +2. [Azure Data Lake Storage](https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/operation-groups) referred as DFS Endpoint + +The ABFS Driver by default is designed to work with DFS Endpoint only which primarily +supports HNS Enabled Accounts only. + +To enable ABFS Driver to work with FNS Accounts, Support for Blob Endpoint is being added. +This is because Azure services do not recommend using DFS Endpoint for FNS Accounts. +ABFS Driver will only allow FNS Accounts to be accessed using Blob Endpoint. +HNS Enabled accounts will still use DFS Endpoint which continues to be the +recommended stack based on performance and feature capabilities. + +## Configuring ABFS Driver for FNS Accounts +Following configurations will be introduced to configure ABFS Driver for FNS Accounts: +1. Account Type: Must be set to `false` to indicate FNS Account + ```xml + + fs.azure.account.hns.enabled + false + + ``` + +2. Account Url: It is the URL used to initialize the file system. It is either passed +directly to file system or configured as default uri using "fs.DefaultFS" configuration. +In both the cases the URL used must be the blob endpoint url of the account. + ```xml + + fs.defaultFS + https://ACCOUNT_NAME.blob.core.windows.net + + ``` +3. Service Type for FNS Accounts: This will allow an override to choose service +type specially in cases where any local DNS resolution is set for the account and driver is +unable to detect the intended endpoint from above configured URL. If this is set +to blob for HNS Enabled Accounts, FS init will fail with InvalidConfiguration error. + ```xml + + fs.azure.fns.account.service.type + BLOB + + ``` + +4. Service Type for Ingress Operations: This will allow an override to choose service +type only for Ingress Related Operations like [Create](https://learn.microsoft.com/en-us/rest/api/storageservices/put-blob?tabs=microsoft-entra-id), +[Append](https://learn.microsoft.com/en-us/rest/api/storageservices/put-block?tabs=microsoft-entra-id) +and [Flush](https://learn.microsoft.com/en-us/rest/api/storageservices/put-block-list?tabs=microsoft-entra-id). All other operations will still use the +configured service type. + ```xml + + fs.azure.fns.account.service.type + BLOB + + ``` \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/index.md b/hadoop-tools/hadoop-azure/src/site/markdown/index.md index 177ab282c112b..143cba8a7aae1 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/index.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/index.md @@ -20,6 +20,7 @@ See also: * [WASB](./wasb.html) * [ABFS](./abfs.html) +* [Namespace Disabled Accounts on ABFS](./fns_blob.html) * [Testing](./testing_azure.html) ## Introduction diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java index 0951ed9a0303b..b121fb9420185 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsCustomEncryption.java @@ -339,8 +339,7 @@ private AbfsRestOperation callOperation(AzureBlobFileSystem fs, case SET_ATTR: Hashtable properties = new Hashtable<>(); properties.put("key", "{ value: valueTest }"); - return client.setPathProperties(path, fs.getAbfsStore() - .convertXmsPropertiesToCommaSeparatedString(properties), + return client.setPathProperties(path, properties, getTestTracingContext(fs, false), createEncryptionAdapterFromServerStoreContext(path, getTestTracingContext(fs, false), client)); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCheckAccess.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCheckAccess.java index e185ab2e75e53..71c77ce82c8e2 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCheckAccess.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemCheckAccess.java @@ -22,6 +22,7 @@ import java.lang.reflect.Field; import java.util.List; +import org.apache.hadoop.fs.azurebfs.enums.Trilean; import org.apache.hadoop.util.Lists; import org.junit.Assume; import org.junit.Test; @@ -97,8 +98,12 @@ private void setTestUserFs() throws Exception { + getAccountName(), ClientCredsTokenProvider.class.getName()); conf.setBoolean(AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, false); - conf.unset(FS_AZURE_ACCOUNT_IS_HNS_ENABLED); + // Since FS init now needs to know account type setting it before init to avoid that. + conf.setBoolean(FS_AZURE_ACCOUNT_IS_HNS_ENABLED, isHNSEnabled); this.testUserFs = FileSystem.newInstance(conf); + // Resetting the namespace enabled flag to unknown after file system init. + ((AzureBlobFileSystem) testUserFs).getAbfsStore().setNamespaceEnabled( + Trilean.UNKNOWN); } private void setTestFsConf(final String fsConfKey, @@ -306,11 +311,11 @@ public void testFsActionALL() throws Exception { } private void checkPrerequisites() throws Exception { - setTestUserFs(); Assume.assumeTrue(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT + " is false", isHNSEnabled); Assume.assumeTrue(FS_AZURE_ENABLE_CHECK_ACCESS + " is false", isCheckAccessEnabled); + setTestUserFs(); checkIfConfigIsSet(FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_ID); checkIfConfigIsSet(FS_AZURE_BLOB_FS_CHECKACCESS_TEST_CLIENT_SECRET); checkIfConfigIsSet(FS_AZURE_BLOB_FS_CHECKACCESS_TEST_USER_GUID); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemInitAndCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemInitAndCreate.java index 1ff3458fdbaac..44b1685a3f3f2 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemInitAndCreate.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemInitAndCreate.java @@ -20,19 +20,32 @@ import java.io.FileNotFoundException; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; + import org.junit.Test; import org.mockito.Mockito; import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AzureBlobFileSystemException; +import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TrileanConversionException; import org.apache.hadoop.fs.azurebfs.enums.Trilean; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.fs.azurebfs.utils.TracingContext; +import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; +import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_IS_HNS_ENABLED; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_BLOB_DOMAIN_NAME; +import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_DFS_DOMAIN_NAME; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.mockito.ArgumentMatchers.any; + /** * Test filesystem initialization and creation. */ @@ -73,11 +86,11 @@ public void testGetAclCallOnHnsConfigAbsence() throws Exception { TracingContext tracingContext = getSampleTracingContext(fs, true); Mockito.doReturn(Mockito.mock(AbfsRestOperation.class)) .when(client) - .getAclStatus(Mockito.anyString(), Mockito.any(TracingContext.class)); + .getAclStatus(Mockito.anyString(), any(TracingContext.class)); store.getIsNamespaceEnabled(tracingContext); Mockito.verify(client, Mockito.times(1)) - .getAclStatus(Mockito.anyString(), Mockito.any(TracingContext.class)); + .getAclStatus(Mockito.anyString(), any(TracingContext.class)); } @Test @@ -96,6 +109,31 @@ public void testNoGetAclCallOnHnsConfigPresence() throws Exception { store.getIsNamespaceEnabled(tracingContext); Mockito.verify(client, Mockito.times(0)) - .getAclStatus(Mockito.anyString(), Mockito.any(TracingContext.class)); + .getAclStatus(Mockito.anyString(), any(TracingContext.class)); + } + + // Todo: [FnsOverBlob] Remove this test case once Blob Endpoint Support is ready and enabled. + @Test + public void testFileSystemInitFailsWithBlobEndpoitUrl() throws Exception { + Configuration configuration = getRawConfiguration(); + String defaultUri = configuration.get(FS_DEFAULT_NAME_KEY); + String blobUri = defaultUri.replace(ABFS_DFS_DOMAIN_NAME, ABFS_BLOB_DOMAIN_NAME); + intercept(InvalidConfigurationValueException.class, + "Blob Endpoint Support not yet available", () -> + FileSystem.newInstance(new Path(blobUri).toUri(), configuration)); + } + + @Test + public void testFileSystemInitFailsIfNotAbleToDetermineAccountType() throws Exception { + AzureBlobFileSystem fs = ((AzureBlobFileSystem) FileSystem.newInstance( + getRawConfiguration())); + AzureBlobFileSystem mockedFs = Mockito.spy(fs); + Mockito.doThrow( + new AbfsRestOperationException(HTTP_UNAVAILABLE, "Throttled", + "Throttled", null)).when(mockedFs).getIsNamespaceEnabled(any()); + + intercept(AzureBlobFileSystemException.class, + FS_AZURE_ACCOUNT_IS_HNS_ENABLED, () -> + mockedFs.initialize(fs.getUri(), getRawConfiguration())); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java index d168ed38844df..d4c58c9705a68 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestGetNameSpaceEnabled.java @@ -30,6 +30,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; import org.apache.hadoop.fs.azurebfs.services.AbfsClient; +import org.apache.hadoop.fs.azurebfs.services.AbfsDfsClient; import org.apache.hadoop.fs.azurebfs.services.AbfsRestOperation; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -140,12 +141,15 @@ private String getNonExistingUrl() { @Test public void testFailedRequestWhenFSNotExist() throws Exception { + assumeValidTestConfigPresent(getRawConfiguration(), FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT); AbfsConfiguration config = this.getConfiguration(); config.setBoolean(AZURE_CREATE_REMOTE_FILESYSTEM_DURING_INITIALIZATION, false); String testUri = this.getTestUrl(); String nonExistingFsUrl = getAbfsScheme() + "://" + UUID.randomUUID() + testUri.substring(testUri.indexOf("@")); + config.setBoolean(FS_AZURE_ACCOUNT_IS_HNS_ENABLED, isUsingXNSAccount); AzureBlobFileSystem fs = this.getFileSystem(nonExistingFsUrl); + fs.getAbfsStore().setNamespaceEnabled(Trilean.UNKNOWN); intercept(FileNotFoundException.class, "\"The specified filesystem does not exist.\", 404", @@ -214,12 +218,14 @@ private void unsetConfAndEnsureGetAclCallIsMadeOnce() throws IOException { private AbfsClient callAbfsGetIsNamespaceEnabledAndReturnMockAbfsClient() throws IOException { - final AzureBlobFileSystem abfs = this.getFileSystem(); - final AzureBlobFileSystemStore abfsStore = abfs.getAbfsStore(); - final AbfsClient mockClient = mock(AbfsClient.class); + final AzureBlobFileSystem abfs = Mockito.spy(this.getFileSystem()); + final AzureBlobFileSystemStore abfsStore = Mockito.spy(abfs.getAbfsStore()); + final AbfsClient mockClient = mock(AbfsDfsClient.class); + doReturn(abfsStore).when(abfs).getAbfsStore(); + doReturn(mockClient).when(abfsStore).getClient(); + doReturn(mockClient).when(abfsStore).getClient(any()); doReturn(mock(AbfsRestOperation.class)).when(mockClient) .getAclStatus(anyString(), any(TracingContext.class)); - abfsStore.setClient(mockClient); getIsNamespaceEnabled(abfs); return mockClient; } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java index 909e7cf1749a1..81897a568763e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -160,7 +160,8 @@ private String getUserAgentString(AbfsConfiguration config, boolean includeSSLProvider) throws IOException, URISyntaxException { AbfsCounters abfsCounters = Mockito.spy(new AbfsCountersImpl(new URI("abcd"))); AbfsClientContext abfsClientContext = new AbfsClientContextBuilder().withAbfsCounters(abfsCounters).build(); - AbfsClient client = new AbfsClient(new URL("https://azure.com"), null, + // Todo : [FnsOverBlob] Update to work with Blob Endpoint as well when Fns Over Blob is ready. + AbfsClient client = new AbfsDfsClient(new URL("https://azure.com"), null, config, (AccessTokenProvider) null, null, abfsClientContext); String sslProviderName = null; if (includeSSLProvider) { @@ -363,7 +364,8 @@ public static AbfsClient createTestClientFromCurrentContext( .build(); // Create test AbfsClient - AbfsClient testClient = new AbfsClient( + // Todo : [FnsOverBlob] Update to work with Blob Endpoint as well when Fns Over Blob is ready. + AbfsClient testClient = new AbfsDfsClient( baseAbfsClientInstance.getBaseUrl(), (currentAuthType == AuthType.SharedKey ? new SharedKeyCredentials( @@ -391,7 +393,8 @@ public static AbfsClient getMockAbfsClient(AbfsClient baseAbfsClientInstance, (currentAuthType == AuthType.SharedKey) || (currentAuthType == AuthType.OAuth)); - AbfsClient client = mock(AbfsClient.class); + // Todo : [FnsOverBlob] Update to work with Blob Endpoint as well when Fns Over Blob is ready. + AbfsClient client = mock(AbfsDfsClient.class); AbfsPerfTracker tracker = new AbfsPerfTracker( "test", abfsConfig.getAccountName(), From 012ae9d1aa04f9636a7d872126bda727737e9e70 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Tue, 20 Aug 2024 16:32:53 -0700 Subject: [PATCH 095/113] HDFS-17606. Do not require implementing CustomizedCallbackHandler. (#7005) --- .../sasl/CustomizedCallbackHandler.java | 25 +++++- .../sasl/SaslDataTransferServer.java | 14 +++- .../sasl/TestCustomizedCallbackHandler.java | 76 ++++++++++++++++--- 3 files changed, 99 insertions(+), 16 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/CustomizedCallbackHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/CustomizedCallbackHandler.java index eff093490bcd1..a15282bd6307f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/CustomizedCallbackHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/CustomizedCallbackHandler.java @@ -20,13 +20,15 @@ import javax.security.auth.callback.Callback; import javax.security.auth.callback.UnsupportedCallbackException; import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.List; /** For handling customized {@link Callback}. */ public interface CustomizedCallbackHandler { class DefaultHandler implements CustomizedCallbackHandler{ @Override - public void handleCallback(List callbacks, String username, char[] password) + public void handleCallbacks(List callbacks, String username, char[] password) throws UnsupportedCallbackException { if (!callbacks.isEmpty()) { throw new UnsupportedCallbackException(callbacks.get(0)); @@ -34,6 +36,25 @@ public void handleCallback(List callbacks, String username, char[] pas } } - void handleCallback(List callbacks, String name, char[] password) + static CustomizedCallbackHandler delegate(Object delegated) { + final String methodName = "handleCallbacks"; + final Class clazz = delegated.getClass(); + final Method method; + try { + method = clazz.getMethod(methodName, List.class, String.class, char[].class); + } catch (NoSuchMethodException e) { + throw new IllegalStateException("Failed to get method " + methodName + " from " + clazz, e); + } + + return (callbacks, name, password) -> { + try { + method.invoke(delegated, callbacks, name, password); + } catch (IllegalAccessException | InvocationTargetException e) { + throw new IOException("Failed to invoke " + method, e); + } + }; + } + + void handleCallbacks(List callbacks, String name, char[] password) throws UnsupportedCallbackException, IOException; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java index ae79800b3ed37..d71544fc77dc6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/SaslDataTransferServer.java @@ -225,14 +225,20 @@ static final class SaslServerCallbackHandler SaslServerCallbackHandler(Configuration conf, PasswordFunction passwordFunction) { this.passwordFunction = passwordFunction; - final Class clazz = conf.getClass( + final Class clazz = conf.getClass( HdfsClientConfigKeys.DFS_DATA_TRANSFER_SASL_CUSTOMIZEDCALLBACKHANDLER_CLASS_KEY, - CustomizedCallbackHandler.DefaultHandler.class, CustomizedCallbackHandler.class); + CustomizedCallbackHandler.DefaultHandler.class); + final Object callbackHandler; try { - this.customizedCallbackHandler = clazz.newInstance(); + callbackHandler = clazz.newInstance(); } catch (Exception e) { throw new IllegalStateException("Failed to create a new instance of " + clazz, e); } + if (callbackHandler instanceof CustomizedCallbackHandler) { + customizedCallbackHandler = (CustomizedCallbackHandler) callbackHandler; + } else { + customizedCallbackHandler = CustomizedCallbackHandler.delegate(callbackHandler); + } } @Override @@ -271,7 +277,7 @@ public void handle(Callback[] callbacks) throws IOException, if (unknownCallbacks != null) { final String name = nc != null ? nc.getDefaultName() : null; final char[] password = name != null ? passwordFunction.apply(name) : null; - customizedCallbackHandler.handleCallback(unknownCallbacks, name, password); + customizedCallbackHandler.handleCallbacks(unknownCallbacks, name, password); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestCustomizedCallbackHandler.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestCustomizedCallbackHandler.java index 88d1d66bc40ff..37de661720839 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestCustomizedCallbackHandler.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/protocol/datatransfer/sasl/TestCustomizedCallbackHandler.java @@ -20,6 +20,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.protocol.datatransfer.sasl.SaslDataTransferServer.SaslServerCallbackHandler; +import org.apache.hadoop.test.LambdaTestUtils; import org.junit.Assert; import org.junit.Test; import org.slf4j.Logger; @@ -27,18 +28,37 @@ import javax.security.auth.callback.Callback; import javax.security.auth.callback.UnsupportedCallbackException; -import java.util.Arrays; +import java.io.IOException; import java.util.List; +import java.util.concurrent.atomic.AtomicReference; +/** For testing {@link CustomizedCallbackHandler}. */ public class TestCustomizedCallbackHandler { - public static final Logger LOG = LoggerFactory.getLogger(TestCustomizedCallbackHandler.class); + static final Logger LOG = LoggerFactory.getLogger(TestCustomizedCallbackHandler.class); + + static final AtomicReference> LAST_CALLBACKS = new AtomicReference<>(); + + static void runHandleCallbacks(Object caller, List callbacks, String name) { + LOG.info("{}: handling {} for {}", caller.getClass().getSimpleName(), callbacks, name); + LAST_CALLBACKS.set(callbacks); + } + + /** Assert if the callbacks in {@link #LAST_CALLBACKS} are the same as the expected callbacks. */ + static void assertCallbacks(Callback[] expected) { + final List computed = LAST_CALLBACKS.getAndSet(null); + Assert.assertNotNull(computed); + Assert.assertEquals(expected.length, computed.size()); + for (int i = 0; i < expected.length; i++) { + Assert.assertSame(expected[i], computed.get(i)); + } + } static class MyCallback implements Callback { } static class MyCallbackHandler implements CustomizedCallbackHandler { @Override - public void handleCallback(List callbacks, String name, char[] password) { - LOG.info("{}: handling {} for {}", getClass().getSimpleName(), callbacks, name); + public void handleCallbacks(List callbacks, String name, char[] password) { + runHandleCallbacks(this, callbacks, name); } } @@ -48,16 +68,52 @@ public void testCustomizedCallbackHandler() throws Exception { final Callback[] callbacks = {new MyCallback()}; // without setting conf, expect UnsupportedCallbackException - try { - new SaslServerCallbackHandler(conf, String::toCharArray).handle(callbacks); - Assert.fail("Expected UnsupportedCallbackException for " + Arrays.asList(callbacks)); - } catch (UnsupportedCallbackException e) { - LOG.info("The failure is expected", e); - } + LambdaTestUtils.intercept(UnsupportedCallbackException.class, () -> runTest(conf, callbacks)); // set conf and expect success conf.setClass(HdfsClientConfigKeys.DFS_DATA_TRANSFER_SASL_CUSTOMIZEDCALLBACKHANDLER_CLASS_KEY, MyCallbackHandler.class, CustomizedCallbackHandler.class); new SaslServerCallbackHandler(conf, String::toCharArray).handle(callbacks); + assertCallbacks(callbacks); + } + + static class MyCallbackMethod { + public void handleCallbacks(List callbacks, String name, char[] password) + throws UnsupportedCallbackException { + runHandleCallbacks(this, callbacks, name); + } + } + + static class MyExceptionMethod { + public void handleCallbacks(List callbacks, String name, char[] password) + throws UnsupportedCallbackException { + runHandleCallbacks(this, callbacks, name); + throw new UnsupportedCallbackException(callbacks.get(0)); + } + } + + @Test + public void testCustomizedCallbackMethod() throws Exception { + final Configuration conf = new Configuration(); + final Callback[] callbacks = {new MyCallback()}; + + // without setting conf, expect UnsupportedCallbackException + LambdaTestUtils.intercept(UnsupportedCallbackException.class, () -> runTest(conf, callbacks)); + + // set conf and expect success + conf.setClass(HdfsClientConfigKeys.DFS_DATA_TRANSFER_SASL_CUSTOMIZEDCALLBACKHANDLER_CLASS_KEY, + MyCallbackMethod.class, Object.class); + new SaslServerCallbackHandler(conf, String::toCharArray).handle(callbacks); + assertCallbacks(callbacks); + + // set conf and expect exception + conf.setClass(HdfsClientConfigKeys.DFS_DATA_TRANSFER_SASL_CUSTOMIZEDCALLBACKHANDLER_CLASS_KEY, + MyExceptionMethod.class, Object.class); + LambdaTestUtils.intercept(IOException.class, () -> runTest(conf, callbacks)); + } + + static void runTest(Configuration conf, Callback... callbacks) + throws IOException, UnsupportedCallbackException { + new SaslServerCallbackHandler(conf, String::toCharArray).handle(callbacks); } } From 68fcd7234ca271b99cb26ba2759d873ade311c13 Mon Sep 17 00:00:00 2001 From: Carl Levasseur Date: Wed, 21 Aug 2024 15:15:28 +0200 Subject: [PATCH 096/113] HADOOP-18542. Keep MSI tenant ID and client ID optional (#4262) Contributed by Carl Levasseur --- .../hadoop/fs/azurebfs/AbfsConfiguration.java | 4 +-- .../fs/azurebfs/TestAccountConfiguration.java | 33 +++++++++++++++---- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java index e4d3a1b8b07ad..1892b13d372f6 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java @@ -1084,9 +1084,9 @@ public AccessTokenProvider getTokenProvider() throws TokenAccessProviderExceptio FS_AZURE_ACCOUNT_OAUTH_MSI_ENDPOINT, AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_MSI_ENDPOINT); String tenantGuid = - getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_MSI_TENANT); + getPasswordString(FS_AZURE_ACCOUNT_OAUTH_MSI_TENANT); String clientId = - getMandatoryPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID); + getPasswordString(FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID); String authority = getTrimmedPasswordString( FS_AZURE_ACCOUNT_OAUTH_MSI_AUTHORITY, AuthConfigurations.DEFAULT_FS_AZURE_ACCOUNT_OAUTH_MSI_AUTHORITY); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAccountConfiguration.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAccountConfiguration.java index 17da772d0819b..483a7e3d5d58e 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAccountConfiguration.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/TestAccountConfiguration.java @@ -27,6 +27,7 @@ import org.apache.hadoop.fs.azurebfs.contracts.exceptions.ConfigurationPropertyNotFoundException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.InvalidConfigurationValueException; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.TokenAccessProviderException; +import org.apache.hadoop.fs.azurebfs.oauth2.AccessTokenProvider; import org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider; import org.apache.hadoop.fs.azurebfs.oauth2.CustomTokenProviderAdapter; import org.apache.hadoop.fs.azurebfs.oauth2.MsiTokenProvider; @@ -66,6 +67,7 @@ */ public class TestAccountConfiguration { private static final String TEST_OAUTH_PROVIDER_CLASS_CONFIG = "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider"; + private static final String TEST_OAUTH_MSI_TOKEN_PROVIDER_CLASS_CONFIG = "org.apache.hadoop.fs.azurebfs.oauth2.MsiTokenProvider"; private static final String TEST_CUSTOM_PROVIDER_CLASS_CONFIG = "org.apache.hadoop.fs.azurebfs.oauth2.RetryTestTokenProvider"; private static final String TEST_SAS_PROVIDER_CLASS_CONFIG_1 = "org.apache.hadoop.fs.azurebfs.extensions.MockErrorSASTokenProvider"; private static final String TEST_SAS_PROVIDER_CLASS_CONFIG_2 = "org.apache.hadoop.fs.azurebfs.extensions.MockSASTokenProvider"; @@ -90,11 +92,6 @@ public class TestAccountConfiguration { FS_AZURE_ACCOUNT_OAUTH_USER_NAME, FS_AZURE_ACCOUNT_OAUTH_USER_PASSWORD)); - private static final List MSI_TOKEN_OAUTH_CONFIG_KEYS = - Collections.unmodifiableList(Arrays.asList( - FS_AZURE_ACCOUNT_OAUTH_MSI_TENANT, - FS_AZURE_ACCOUNT_OAUTH_CLIENT_ID)); - private static final List REFRESH_TOKEN_OAUTH_CONFIG_KEYS = Collections.unmodifiableList(Arrays.asList( FS_AZURE_ACCOUNT_OAUTH_REFRESH_TOKEN, @@ -410,10 +407,8 @@ public void testAccessTokenProviderPrecedence() public void testOAuthConfigPropNotFound() throws Throwable { testConfigPropNotFound(CLIENT_CREDENTIAL_OAUTH_CONFIG_KEYS, ClientCredsTokenProvider.class.getName()); testConfigPropNotFound(USER_PASSWORD_OAUTH_CONFIG_KEYS, UserPasswordTokenProvider.class.getName()); - testConfigPropNotFound(MSI_TOKEN_OAUTH_CONFIG_KEYS, MsiTokenProvider.class.getName()); testConfigPropNotFound(REFRESH_TOKEN_OAUTH_CONFIG_KEYS, RefreshTokenBasedTokenProvider.class.getName()); testConfigPropNotFound(WORKLOAD_IDENTITY_OAUTH_CONFIG_KEYS, WorkloadIdentityTokenProvider.class.getName()); - } private void testConfigPropNotFound(List configKeys, @@ -444,6 +439,30 @@ private static void testMissingConfigKey(final AbfsConfiguration abfsConf, () -> abfsConf.getTokenProvider().getClass().getTypeName()))); } + @Test + public void testClientAndTenantIdOptionalWhenUsingMsiTokenProvider() throws Throwable { + final String accountName = "account"; + final Configuration conf = new Configuration(); + final AbfsConfiguration abfsConf = new AbfsConfiguration(conf, accountName); + + final String accountNameSuffix = "." + abfsConf.getAccountName(); + String authKey = FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME + accountNameSuffix; + String providerClassKey = ""; + String providerClassValue = ""; + + providerClassKey = FS_AZURE_ACCOUNT_TOKEN_PROVIDER_TYPE_PROPERTY_NAME + accountNameSuffix; + providerClassValue = TEST_OAUTH_MSI_TOKEN_PROVIDER_CLASS_CONFIG; + + abfsConf.set(authKey, AuthType.OAuth.toString()); + abfsConf.set(providerClassKey, providerClassValue); + + AccessTokenProvider tokenProviderTypeName = abfsConf.getTokenProvider(); + // Test that we managed to instantiate an MsiTokenProvider without having to define the tenant and client ID. + // Those 2 fields are optional as they can automatically be determined by the Azure Metadata service when + // running on an Azure VM. + Assertions.assertThat(tokenProviderTypeName).describedAs("Token Provider Should be MsiTokenProvider").isInstanceOf(MsiTokenProvider.class); + } + public void testGlobalAndAccountOAuthPrecedence(AbfsConfiguration abfsConf, AuthType globalAuthType, AuthType accountSpecificAuthType) From f6c45e0bcf4aeaba31515e548dcc98b33245fe0e Mon Sep 17 00:00:00 2001 From: Heagan A Date: Wed, 21 Aug 2024 13:11:32 -0700 Subject: [PATCH 097/113] HDFS-17546. Follow-up backport from branch3.3 (#6908) HDFS-17546. Follow-up backport from branch3.3 --- .../apache/hadoop/hdfs/util/TestCombinedHostsFileReader.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestCombinedHostsFileReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestCombinedHostsFileReader.java index 9c536dbf26a08..751f2ad18ae12 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestCombinedHostsFileReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/util/TestCombinedHostsFileReader.java @@ -30,10 +30,10 @@ import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; import org.mockito.Mock; -import org.mockito.Mockito; import static org.junit.Assert.assertEquals; import static org.mockito.Mockito.when; +import org.mockito.MockitoAnnotations; /** * Test for JSON based HostsFileReader. @@ -56,7 +56,7 @@ public class TestCombinedHostsFileReader { @Before public void setUp() throws Exception { - callable = Mockito.mock(Callable.class); + MockitoAnnotations.initMocks(this); } @After From 6be04633b55bbd67c2875e39977cd9d2308dc1d1 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Thu, 22 Aug 2024 17:49:42 +0800 Subject: [PATCH 098/113] YARN-11711. Clean Up ServiceScheduler Code. (#6977) Contributed by Shilun Fan. Reviewed-by: Steve Loughran Signed-off-by: Shilun Fan --- .../dev-support/findbugs-exclude.xml | 9 ++ .../hadoop/yarn/service/ServiceScheduler.java | 106 ++++++++---------- 2 files changed, 58 insertions(+), 57 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml index 309c028580081..bad2bacb1a57b 100644 --- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml @@ -725,4 +725,13 @@ + + + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java index 9da8f31fe4b8b..b95e2c31849f6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceScheduler.java @@ -123,6 +123,8 @@ .EXIT_FALSE; import static org.apache.hadoop.yarn.service.exceptions.LauncherExitCodes .EXIT_SUCCESS; +import static org.apache.hadoop.yarn.webapp.util.WebAppUtils.HTTPS_PREFIX; +import static org.apache.hadoop.yarn.webapp.util.WebAppUtils.HTTP_PREFIX; /** * @@ -153,10 +155,10 @@ public class ServiceScheduler extends CompositeService { private boolean timelineServiceEnabled; - // Global diagnostics that will be reported to RM on eRxit. + // Global diagnostics that will be reported to RM on exit. // The unit the number of characters. This will be limited to 64 * 1024 // characters. - private BoundedAppender diagnostics = new BoundedAppender(64 * 1024); + private final BoundedAppender diagnostics = new BoundedAppender(64 * 1024); // A cache for loading config files from remote such as hdfs public LoadingCache configFileCache = null; @@ -168,7 +170,7 @@ public class ServiceScheduler extends CompositeService { private NMClientAsync nmClient; private AsyncDispatcher dispatcher; private YarnRegistryViewForProviders yarnRegistryOperations; - private ServiceContext context; + private final ServiceContext context; private ContainerLaunchService containerLaunchService; private final Map unRecoveredInstances = new ConcurrentHashMap<>(); @@ -185,10 +187,10 @@ public class ServiceScheduler extends CompositeService { private volatile FinalApplicationStatus finalApplicationStatus = FinalApplicationStatus.ENDED; - private Clock systemClock; + private final Clock systemClock; // For unit test override since we don't want to terminate UT process. - private ServiceUtils.ProcessTerminationHandler + private final ServiceUtils.ProcessTerminationHandler terminationHandler = new ServiceUtils.ProcessTerminationHandler(); public ServiceScheduler(ServiceContext context) { @@ -199,10 +201,10 @@ public ServiceScheduler(ServiceContext context) { } public void buildInstance(ServiceContext context, Configuration configuration) - throws YarnException, IOException { + throws YarnException { app = context.service; executorService = Executors.newScheduledThreadPool(10); - RegistryOperations registryClient = null; + RegistryOperations registryClient; if (UserGroupInformation.isSecurityEnabled() && !StringUtils.isEmpty(context.principal) && !StringUtils.isEmpty(context.keytab)) { @@ -480,7 +482,7 @@ private void recoverComponents(RegisterApplicationMasterResponse response) { } }); - if (unRecoveredInstances.size() > 0) { + if (!unRecoveredInstances.isEmpty()) { executorService.schedule(() -> { synchronized (unRecoveredInstances) { // after containerRecoveryTimeout, all the containers that haven't be @@ -532,7 +534,8 @@ private void createConfigFileCache(final FileSystem fileSystem) { this.configFileCache = CacheBuilder.newBuilder().expireAfterAccess(10, TimeUnit.MINUTES) .build(new CacheLoader() { - @Override public Object load(ConfigFile key) throws Exception { + @Override + public Object load(ConfigFile key) throws Exception { switch (key.getType()) { case HADOOP_XML: try (FSDataInputStream input = fileSystem @@ -560,9 +563,8 @@ private void createConfigFileCache(final FileSystem fileSystem) { } private void registerServiceInstance(ApplicationAttemptId attemptId, - Service service) throws IOException { - LOG.info("Registering " + attemptId + ", " + service.getName() - + " into registry"); + Service service) { + LOG.info("Registering {}, {} into registry.", attemptId, service.getName()); ServiceRecord serviceRecord = new ServiceRecord(); serviceRecord.set(YarnRegistryAttributes.YARN_ID, attemptId.getApplicationId().toString()); @@ -570,24 +572,21 @@ private void registerServiceInstance(ApplicationAttemptId attemptId, PersistencePolicies.APPLICATION); serviceRecord.description = "YarnServiceMaster"; - executorService.submit(new Runnable() { - @Override public void run() { - try { - yarnRegistryOperations.registerSelf(serviceRecord, false); - LOG.info("Registered service under {}; absolute path {}", - yarnRegistryOperations.getSelfRegistrationPath(), - yarnRegistryOperations.getAbsoluteSelfRegistrationPath()); - boolean isFirstAttempt = 1 == attemptId.getAttemptId(); - // delete the children in case there are any and this is an AM startup. - // just to make sure everything underneath is purged - if (isFirstAttempt) { - yarnRegistryOperations.deleteChildren( - yarnRegistryOperations.getSelfRegistrationPath(), true); - } - } catch (IOException e) { - LOG.error( - "Failed to register app " + app.getName() + " in registry", e); + executorService.submit(() -> { + try { + yarnRegistryOperations.registerSelf(serviceRecord, false); + LOG.info("Registered service under {}; absolute path {}", + yarnRegistryOperations.getSelfRegistrationPath(), + yarnRegistryOperations.getAbsoluteSelfRegistrationPath()); + boolean isFirstAttempt = 1 == attemptId.getAttemptId(); + // delete the children in case there are any and this is an AM startup. + // just to make sure everything underneath is purged + if (isFirstAttempt) { + yarnRegistryOperations.deleteChildren( + yarnRegistryOperations.getSelfRegistrationPath(), true); } + } catch (IOException e) { + LOG.error("Failed to register app {} in registry.", app.getName(), e); } }); if (YarnConfiguration.timelineServiceV2Enabled(getConfig())) { @@ -637,7 +636,7 @@ public void handle(ComponentEvent event) { Component component = componentsByName.get(event.getName()); if (component == null) { - LOG.error("No component exists for " + event.getName()); + LOG.error("No component exists for {}.", event.getName()); return; } try { @@ -657,14 +656,14 @@ public void handle(ComponentInstanceEvent event) { ComponentInstance instance = liveInstances.get(event.getContainerId()); if (instance == null) { - LOG.error("No component instance exists for " + event.getContainerId()); + LOG.error("No component instance exists for {}.", event.getContainerId()); return; } try { instance.handle(event); } catch (Throwable t) { - LOG.error(instance.getCompInstanceId() + - ": Error in handling event type " + event.getType(), t); + LOG.error("{} : Error in handling event type {}.", + instance.getCompInstanceId(), event.getType(), t); } } } @@ -673,7 +672,7 @@ class AMRMClientCallback extends AMRMClientAsync.AbstractCallbackHandler { @Override public void onContainersAllocated(List containers) { - LOG.info(containers.size() + " containers allocated. "); + LOG.info("{} containers allocated. ", containers.size()); for (Container container : containers) { Component comp = componentsById.get(container.getAllocationRequestId()); ComponentEvent event = @@ -684,8 +683,8 @@ public void onContainersAllocated(List containers) { Collection requests = amRMClient .getMatchingRequests(container.getAllocationRequestId()); LOG.info("[COMPONENT {}]: remove {} outstanding container requests " + - "for allocateId " + container.getAllocationRequestId(), - comp.getName(), requests.size()); + "for allocateId {}.", comp.getName(), requests.size(), + container.getAllocationRequestId()); // remove the corresponding request if (requests.iterator().hasNext()) { AMRMClient.ContainerRequest request = requests.iterator().next(); @@ -799,7 +798,7 @@ private class NMClientCallback extends NMClientAsync.AbstractCallbackHandler { Map allServiceResponse) { ComponentInstance instance = liveInstances.get(containerId); if (instance == null) { - LOG.error("No component instance exists for " + containerId); + LOG.error("No component instance exists for {}.", containerId); return; } ComponentEvent event = @@ -821,10 +820,10 @@ private class NMClientCallback extends NMClientAsync.AbstractCallbackHandler { public void onStartContainerError(ContainerId containerId, Throwable t) { ComponentInstance instance = liveInstances.get(containerId); if (instance == null) { - LOG.error("No component instance exists for " + containerId); + LOG.error("No component instance exists for {}.", containerId); return; } - LOG.error("Failed to start " + containerId, t); + LOG.error("Failed to start {}.", containerId, t); amRMClient.releaseAssignedContainer(containerId); // After container released, it'll get CONTAINER_COMPLETED event from RM // automatically which will trigger stopping COMPONENT INSTANCE @@ -950,15 +949,14 @@ public boolean hasAtLeastOnePlacementConstraint() { } public boolean terminateServiceIfNeeded(Component component) { - boolean serviceIsTerminated = + return terminateServiceIfDominantComponentFinished(component) || terminateServiceIfAllComponentsFinished(); - return serviceIsTerminated; } /** * If the service state component is finished, the service is also terminated. - * @param component + * @param component service component. */ private boolean terminateServiceIfDominantComponentFinished(Component component) { @@ -981,8 +979,7 @@ private boolean terminateServiceIfDominantComponentFinished(Component state); component.getComponentSpec().setState(state); LOG.info("Dominate component {} finished, exiting Service Master... " + - ", final status=" + (isSucceeded ? "Succeeded" : "Failed"), - component.getName()); + ", final status={}.", component.getName(), (isSucceeded ? "Succeeded" : "Failed")); terminateService(isSucceeded); } } @@ -1042,14 +1039,10 @@ private boolean terminateServiceIfAllComponentsFinished() { } if (shouldTerminate) { - LOG.info("All component finished, exiting Service Master... " - + ", final status=" + (failedComponents.isEmpty() ? - "Succeeded" : - "Failed")); - LOG.info("Succeeded components: [" + org.apache.commons.lang3.StringUtils - .join(succeededComponents, ",") + "]"); - LOG.info("Failed components: [" + org.apache.commons.lang3.StringUtils - .join(failedComponents, ",") + "]"); + LOG.info("All component finished, exiting Service Master... " + + ", final status={}", (failedComponents.isEmpty() ? "Succeeded" : "Failed")); + LOG.info("Succeeded components: [" + StringUtils.join(succeededComponents, ",") + "]"); + LOG.info("Failed components: [" + StringUtils.join(failedComponents, ",") + "]"); terminateService(failedComponents.isEmpty()); } @@ -1093,7 +1086,7 @@ public void syncSysFs(Service yarnApp) { spec = ServiceApiUtil.jsonSerDeser.toJson(yarnApp); for (org.apache.hadoop.yarn.service.api.records.Component c : yarnApp.getComponents()) { - Set nodes = new HashSet(); + Set nodes = new HashSet<>(); boolean update = Boolean.parseBoolean(c.getConfiguration() .getEnv(ApplicationConstants.Environment .YARN_CONTAINER_RUNTIME_YARN_SYSFS_ENABLE.name())); @@ -1109,9 +1102,9 @@ public void syncSysFs(Service yarnApp) { for (String bareHost : nodes) { StringBuilder requestPath = new StringBuilder(); if (YarnConfiguration.useHttps(conf)) { - requestPath.append("https://"); + requestPath.append(HTTPS_PREFIX); } else { - requestPath.append("http://"); + requestPath.append(HTTP_PREFIX); } requestPath.append(bareHost) .append(":") @@ -1129,8 +1122,7 @@ public void syncSysFs(Service yarnApp) { Builder builder = HttpUtil.connect(requestPath.toString()); ClientResponse response = builder.put(ClientResponse.class, spec); if (response.getStatus()!=ClientResponse.Status.OK.getStatusCode()) { - LOG.warn("Error synchronize YARN sysfs: " + - response.getEntity(String.class)); + LOG.warn("Error synchronize YARN sysfs: {}.", response.getEntity(String.class)); success = false; } } From 5745a7dd754dd8f07fad3c5b8a36f89da489aaf7 Mon Sep 17 00:00:00 2001 From: Kevin Cai Date: Sun, 25 Aug 2024 16:47:05 +0800 Subject: [PATCH 099/113] HDFS-16084. Fix getJNIEnv crash due to incorrect state set to tls var (#6969). Contributed by Kevin Cai. Signed-off-by: He Xiaoqiao --- .../src/main/native/libhdfs/jni_helper.c | 19 +++++--- .../native/libhdfspp/tests/CMakeLists.txt | 4 ++ .../libhdfspp/tests/libhdfs_getjni_test.cc | 44 +++++++++++++++++++ 3 files changed, 60 insertions(+), 7 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_getjni_test.cc diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c index 8f00a08b0a98b..47dce0086a93c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfs/jni_helper.c @@ -818,26 +818,31 @@ JNIEnv* getJNIEnv(void) fprintf(stderr, "getJNIEnv: Unable to create ThreadLocalState\n"); return NULL; } - if (threadLocalStorageSet(state)) { - mutexUnlock(&jvmMutex); - goto fail; - } - THREAD_LOCAL_STORAGE_SET_QUICK(state); state->env = getGlobalJNIEnv(); - mutexUnlock(&jvmMutex); - if (!state->env) { + mutexUnlock(&jvmMutex); goto fail; } jthrowable jthr = NULL; jthr = initCachedClasses(state->env); if (jthr) { + mutexUnlock(&jvmMutex); printExceptionAndFree(state->env, jthr, PRINT_EXC_ALL, "initCachedClasses failed"); goto fail; } + + if (threadLocalStorageSet(state)) { + mutexUnlock(&jvmMutex); + goto fail; + } + + // set the TLS var only when the state passes all the checks + THREAD_LOCAL_STORAGE_SET_QUICK(state); + mutexUnlock(&jvmMutex); + return state->env; fail: diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/CMakeLists.txt b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/CMakeLists.txt index 7eb432f31ac0b..3e52c6d965a01 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/CMakeLists.txt +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/CMakeLists.txt @@ -74,6 +74,10 @@ add_executable(uri_test uri_test.cc) target_link_libraries(uri_test common gmock_main ${CMAKE_THREAD_LIBS_INIT}) add_memcheck_test(uri uri_test) +add_executable(get_jni_test libhdfs_getjni_test.cc) +target_link_libraries(get_jni_test gmock_main hdfs_static ${CMAKE_THREAD_LIBS_INIT}) +add_memcheck_test(get_jni get_jni_test) + add_executable(remote_block_reader_test remote_block_reader_test.cc) target_link_libraries(remote_block_reader_test test_common reader proto common connection ${PROTOBUF_LIBRARIES} ${OPENSSL_LIBRARIES} gmock_main ${CMAKE_THREAD_LIBS_INIT}) add_memcheck_test(remote_block_reader remote_block_reader_test) diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_getjni_test.cc b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_getjni_test.cc new file mode 100644 index 0000000000000..b2648da23bb4d --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/src/main/native/libhdfspp/tests/libhdfs_getjni_test.cc @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +// hook the jvm runtime function. expect always failure +_JNI_IMPORT_OR_EXPORT_ jint JNICALL JNI_GetDefaultJavaVMInitArgs(void*) { + return 1; +} + +// hook the jvm runtime function. expect always failure +_JNI_IMPORT_OR_EXPORT_ jint JNICALL JNI_CreateJavaVM(JavaVM**, void**, void*) { + return 1; +} + +TEST(GetJNITest, TestRepeatedGetJNIFailsButNoCrash) { + // connect to nothing, should fail but not crash + EXPECT_EQ(NULL, hdfsConnectNewInstance(NULL, 0)); + + // try again, should fail but not crash + EXPECT_EQ(NULL, hdfsConnectNewInstance(NULL, 0)); +} + +int main(int argc, char* argv[]) { + ::testing::InitGoogleMock(&argc, argv); + return RUN_ALL_TESTS(); +} From 89e38f08ae00b97e2c98b331a9f04a5bf8d705b6 Mon Sep 17 00:00:00 2001 From: Sung Dong Kim <67620856+Last-remote11@users.noreply.github.com> Date: Sun, 25 Aug 2024 18:51:14 +0900 Subject: [PATCH 100/113] HDFS-17573. Allow turn on both FSImage parallelization and compression (#6929). Contributed by Sung Dong Kim. Signed-off-by: He Xiaoqiao --- .../server/namenode/FSImageFormatPBINode.java | 2 + .../namenode/FSImageFormatProtobuf.java | 52 +++++++++++------ .../snapshot/FSImageFormatPBSnapshot.java | 4 +- .../hdfs/server/namenode/TestFSImage.java | 17 ++++-- .../namenode/TestFSImageWithSnapshot.java | 40 +++++++++++-- ...SImageWithSnapshotParallelAndCompress.java | 58 +++++++++++++++++++ 6 files changed, 141 insertions(+), 32 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshotParallelAndCompress.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java index 1f21871ac7b02..08fd39f481d10 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatPBINode.java @@ -790,6 +790,7 @@ void serializeINodeDirectorySection(OutputStream out) throws IOException { outputInodes = 0; parent.commitSubSection(summary, FSImageFormatProtobuf.SectionName.INODE_DIR_SUB); + out = parent.getSectionOutputStream(); } } parent.commitSectionAndSubSection(summary, @@ -817,6 +818,7 @@ void serializeINodeSection(OutputStream out) throws IOException { if (i % parent.getInodesPerSubSection() == 0) { parent.commitSubSection(summary, FSImageFormatProtobuf.SectionName.INODE_SUB); + out = parent.getSectionOutputStream(); } } parent.commitSectionAndSubSection(summary, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java index 58c24d4377be0..edacb7eaafd00 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageFormatProtobuf.java @@ -584,18 +584,6 @@ private void loadErasureCodingSection(InputStream in) private static boolean enableParallelSaveAndLoad(Configuration conf) { boolean loadInParallel = enableParallelLoad; - boolean compressionEnabled = conf.getBoolean( - DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, - DFSConfigKeys.DFS_IMAGE_COMPRESS_DEFAULT); - - if (loadInParallel) { - if (compressionEnabled) { - LOG.warn("Parallel Image loading and saving is not supported when {}" + - " is set to true. Parallel will be disabled.", - DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY); - loadInParallel = false; - } - } return loadInParallel; } @@ -653,7 +641,11 @@ public int getInodesPerSubSection() { return inodesPerSubSection; } - /** + public OutputStream getSectionOutputStream() { + return sectionOutputStream; + } + + /** * Commit the length and offset of a fsimage section to the summary index, * including the sub section, which will be committed before the section is * committed. @@ -664,14 +656,22 @@ public int getInodesPerSubSection() { */ public void commitSectionAndSubSection(FileSummary.Builder summary, SectionName name, SectionName subSectionName) throws IOException { - commitSubSection(summary, subSectionName); - commitSection(summary, name); + commitSubSection(summary, subSectionName, true); + commitSection(summary, name, true); } public void commitSection(FileSummary.Builder summary, SectionName name) - throws IOException { + throws IOException { + commitSection(summary, name, false); + } + + public void commitSection(FileSummary.Builder summary, SectionName name, + boolean afterSubSectionCommit) throws IOException { long oldOffset = currentOffset; - flushSectionOutputStream(); + boolean subSectionCommitted = afterSubSectionCommit && writeSubSections; + if (!subSectionCommitted) { + flushSectionOutputStream(); + } if (codec != null) { sectionOutputStream = codec.createOutputStream(underlyingOutputStream); @@ -685,14 +685,20 @@ public void commitSection(FileSummary.Builder summary, SectionName name) subSectionOffset = currentOffset; } + public void commitSubSection(FileSummary.Builder summary, SectionName name) + throws IOException { + this.commitSubSection(summary, name, false); + } + /** * Commit the length and offset of a fsimage sub-section to the summary * index. * @param summary The image summary object * @param name The name of the sub-section to commit + * @param isLast True if sub-section is the last sub-section of each section * @throws IOException */ - public void commitSubSection(FileSummary.Builder summary, SectionName name) + public void commitSubSection(FileSummary.Builder summary, SectionName name, boolean isLast) throws IOException { if (!writeSubSections) { return; @@ -701,7 +707,15 @@ public void commitSubSection(FileSummary.Builder summary, SectionName name) LOG.debug("Saving a subsection for {}", name.toString()); // The output stream must be flushed before the length is obtained // as the flush can move the length forward. - sectionOutputStream.flush(); + flushSectionOutputStream(); + + if (codec == null || isLast) { + // To avoid empty sub-section, Do not create CompressionOutputStream + // if sub-section is last sub-section of each section + sectionOutputStream = underlyingOutputStream; + } else { + sectionOutputStream = codec.createOutputStream(underlyingOutputStream); + } long length = fileChannel.position() - subSectionOffset; if (length == 0) { LOG.warn("The requested section for {} is empty. It will not be " + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java index a9b2191976465..2365a6a266783 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/FSImageFormatPBSnapshot.java @@ -530,8 +530,8 @@ public void serializeSnapshotDiffSection(OutputStream out) context.checkCancelled(); } if (i % parent.getInodesPerSubSection() == 0) { - parent.commitSubSection(headers, - FSImageFormatProtobuf.SectionName.SNAPSHOT_DIFF_SUB); + parent.commitSubSection(headers, FSImageFormatProtobuf.SectionName.SNAPSHOT_DIFF_SUB); + out = parent.getSectionOutputStream(); } } parent.commitSectionAndSubSection(headers, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java index 2a7a7105fd6e3..cdc067aeb2880 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImage.java @@ -1120,7 +1120,7 @@ public void testParallelSaveAndLoad() throws IOException { } @Test - public void testNoParallelSectionsWithCompressionEnabled() + public void testParallelSaveAndLoadWithCompression() throws IOException { Configuration conf = new Configuration(); conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true); @@ -1137,16 +1137,21 @@ public void testNoParallelSectionsWithCompressionEnabled() getLatestImageSummary(cluster); ArrayList

    sections = Lists.newArrayList( summary.getSectionsList()); + Section inodeSection = + getSubSectionsOfName(sections, SectionName.INODE).get(0); + Section dirSection = getSubSectionsOfName(sections, + SectionName.INODE_DIR).get(0); ArrayList
    inodeSubSections = getSubSectionsOfName(sections, SectionName.INODE_SUB); ArrayList
    dirSubSections = getSubSectionsOfName(sections, SectionName.INODE_DIR_SUB); + // Compression and parallel can be enabled at the same time. + assertEquals(4, inodeSubSections.size()); + assertEquals(4, dirSubSections.size()); - // As compression is enabled, there should be no sub-sections in the - // image header - assertEquals(0, inodeSubSections.size()); - assertEquals(0, dirSubSections.size()); + ensureSubSectionsAlignWithParent(inodeSubSections, inodeSection); + ensureSubSectionsAlignWithParent(dirSubSections, dirSection); } finally { if (cluster != null) { cluster.shutdown(); @@ -1229,4 +1234,4 @@ public void testUpdateBlocksMapAndNameCacheAsync() throws IOException { SnapshotTestHelper.compareDumpedTreeInFile( preRestartTree, postRestartTree, true); } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java index 1c1bb61721a76..48a6a2b77795b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshot.java @@ -23,6 +23,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.SafeModeAction; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -77,15 +78,18 @@ public class TestFSImageWithSnapshot { MiniDFSCluster cluster; FSNamesystem fsn; DistributedFileSystem hdfs; + + public void createCluster() throws IOException { + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build(); + cluster.waitActive(); + fsn = cluster.getNamesystem(); + hdfs = cluster.getFileSystem(); + } @Before public void setUp() throws Exception { conf = new Configuration(); - cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES) - .build(); - cluster.waitActive(); - fsn = cluster.getNamesystem(); - hdfs = cluster.getFileSystem(); + createCluster(); } @After @@ -512,6 +516,32 @@ public void testSaveLoadImageAfterSnapshotDeletion() hdfs = cluster.getFileSystem(); } + /** + * Test parallel compressed fsimage can be loaded serially. + */ + @Test + public void testLoadParallelCompressedImageSerial() throws Exception { + int s = 0; + cluster.shutdown(); + + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build(); + cluster.waitActive(); + fsn = cluster.getNamesystem(); + hdfs = cluster.getFileSystem(); + hdfs.mkdirs(dir); + SnapshotTestHelper.createSnapshot(hdfs, dir, "s"); + + Path sub1 = new Path(dir, "sub1"); + Path sub1file1 = new Path(sub1, "sub1file1"); + Path sub1file2 = new Path(sub1, "sub1file2"); + DFSTestUtil.createFile(hdfs, sub1file1, BLOCKSIZE, (short) 1, seed); + DFSTestUtil.createFile(hdfs, sub1file2, BLOCKSIZE, (short) 1, seed); + + conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, false); + conf.setBoolean(DFSConfigKeys.DFS_IMAGE_PARALLEL_LOAD_KEY, false); + checkImage(s); + } + void rename(Path src, Path dst) throws Exception { printTree("Before rename " + src + " -> " + dst); hdfs.rename(src, dst); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshotParallelAndCompress.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshotParallelAndCompress.java new file mode 100644 index 0000000000000..0a80bc9d5456e --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSImageWithSnapshotParallelAndCompress.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

    + * http://www.apache.org/licenses/LICENSE-2.0 + *

    + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; + +import org.slf4j.event.Level; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotTestHelper; +import org.apache.hadoop.io.compress.GzipCodec; +import org.apache.hadoop.test.GenericTestUtils; + +/** + * This test extends TestFSImageWithSnapshot to test + * enable both fsimage load parallel and fsimage compress. + */ +public class TestFSImageWithSnapshotParallelAndCompress extends TestFSImageWithSnapshot { + { + SnapshotTestHelper.disableLogs(); + GenericTestUtils.setLogLevel(INode.LOG, Level.TRACE); + } + + @Override + public void createCluster() throws IOException { + + // turn on both parallelization and compression + conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true); + conf.set(DFSConfigKeys.DFS_IMAGE_COMPRESSION_CODEC_KEY, GzipCodec.class.getCanonicalName()); + conf.setBoolean(DFSConfigKeys.DFS_IMAGE_PARALLEL_LOAD_KEY, true); + conf.setInt(DFSConfigKeys.DFS_IMAGE_PARALLEL_INODE_THRESHOLD_KEY, 2); + conf.setInt(DFSConfigKeys.DFS_IMAGE_PARALLEL_TARGET_SECTIONS_KEY, 2); + conf.setInt(DFSConfigKeys.DFS_IMAGE_PARALLEL_THREADS_KEY, 2); + + conf = new Configuration(); + cluster = new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build(); + cluster.waitActive(); + fsn = cluster.getNamesystem(); + hdfs = cluster.getFileSystem(); + } +} From e4ee3d560bddc27a495cc9a158278a9c18276dd0 Mon Sep 17 00:00:00 2001 From: K0K0V0K <109747532+K0K0V0K@users.noreply.github.com> Date: Tue, 27 Aug 2024 17:55:07 +0200 Subject: [PATCH 101/113] YARN-10345 HsWebServices containerlogs does not honor ACLs for completed jobs (#7013) - following rest apis did not have access control - - /ws/v1/history/containerlogs/{containerid}/{filename} - - /ws/v1/history/containers/{containerid}/logs Change-Id: I434f6138966ab22583d356509e40b70d328d9e7c --- .../v2/app/webapp/AMWebServices.java | 15 ++++-- .../mapreduce/v2/hs/webapp/HsWebServices.java | 11 ++++- .../v2/hs/webapp/TestHsWebServicesAcls.java | 46 +++++++++++++++---- 3 files changed, 60 insertions(+), 12 deletions(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMWebServices.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMWebServices.java index 78174afb6f892..e95a5d7d33e3a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMWebServices.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/AMWebServices.java @@ -40,6 +40,7 @@ import org.apache.hadoop.http.JettyUtils; import org.apache.hadoop.mapreduce.JobACL; +import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.v2.api.protocolrecords.KillTaskAttemptRequest; import org.apache.hadoop.mapreduce.v2.api.protocolrecords.KillTaskAttemptResponse; import org.apache.hadoop.mapreduce.v2.api.protocolrecords.impl.pb.KillTaskAttemptRequestPBImpl; @@ -113,9 +114,17 @@ private void init() { response.setContentType(null); } - /** - * convert a job id string to an actual job and handle all the error checking. - */ + public static Job getJobFromContainerIdString(String cid, AppContext appCtx) + throws NotFoundException { + //example container_e06_1724414851587_0004_01_000001 + String[] parts = cid.split("_"); + return getJobFromJobIdString(JobID.JOB + "_" + parts[2] + "_" + parts[3], appCtx); + } + + + /** + * convert a job id string to an actual job and handle all the error checking. + */ public static Job getJobFromJobIdString(String jid, AppContext appCtx) throws NotFoundException { JobId jobId; Job job; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java index a26724b1bb6b0..d16b70ac6f024 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsWebServices.java @@ -42,6 +42,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.http.JettyUtils; import org.apache.hadoop.mapreduce.JobACL; +import org.apache.hadoop.mapreduce.MRConfig; import org.apache.hadoop.mapreduce.v2.api.records.AMInfo; import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.api.records.TaskId; @@ -87,6 +88,7 @@ public class HsWebServices extends WebServices { private final HistoryContext ctx; private WebApp webapp; private LogServlet logServlet; + private boolean mrAclsEnabled; private @Context HttpServletResponse response; @Context UriInfo uriInfo; @@ -100,6 +102,7 @@ public HsWebServices(final HistoryContext ctx, this.ctx = ctx; this.webapp = webapp; this.logServlet = new LogServlet(conf, this); + this.mrAclsEnabled = conf.getBoolean(MRConfig.MR_ACLS_ENABLED, false); } private boolean hasAccess(Job job, HttpServletRequest request) { @@ -116,6 +119,11 @@ private void checkAccess(Job job, HttpServletRequest request) { throw new WebApplicationException(Status.UNAUTHORIZED); } } + private void checkAccess(String containerIdStr, HttpServletRequest hsr) { + if (mrAclsEnabled) { + checkAccess(AMWebServices.getJobFromContainerIdString(containerIdStr, ctx), hsr); + } + } private void init() { //clear content type @@ -500,7 +508,7 @@ public Response getContainerLogs(@Context HttpServletRequest hsr, @QueryParam(YarnWebServiceParams.MANUAL_REDIRECTION) @DefaultValue("false") boolean manualRedirection) { init(); - + checkAccess(containerIdStr, hsr); WrappedLogMetaRequest.Builder logMetaRequestBuilder = LogServlet.createRequestFromContainerId(containerIdStr); @@ -527,6 +535,7 @@ public Response getContainerLogFile(@Context HttpServletRequest req, @QueryParam(YarnWebServiceParams.MANUAL_REDIRECTION) @DefaultValue("false") boolean manualRedirection) { init(); + checkAccess(containerIdStr, req); return logServlet.getLogFile(req, containerIdStr, filename, format, size, nmId, redirectedFromNode, null, manualRedirection); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesAcls.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesAcls.java index 8d4f635e11d68..bb25a97c6cb52 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesAcls.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/webapp/TestHsWebServicesAcls.java @@ -18,23 +18,20 @@ package org.apache.hadoop.mapreduce.v2.hs.webapp; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.fail; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - import java.io.IOException; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; - import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.WebApplicationException; import javax.ws.rs.core.Response.Status; +import org.junit.Before; +import org.junit.Test; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.Path; @@ -60,9 +57,19 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.yarn.api.records.Priority; +import org.apache.hadoop.yarn.server.webapp.LogServlet; import org.apache.hadoop.yarn.webapp.WebApp; -import org.junit.Before; -import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.fail; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyBoolean; +import static org.mockito.Matchers.anyString; +import static org.mockito.Matchers.eq; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; public class TestHsWebServicesAcls { private static String FRIENDLY_USER = "friendly"; @@ -253,6 +260,29 @@ public void testGetJobTaskAttemptIdCountersAcls() { this.taskIdStr, this.taskAttemptIdStr); } + @Test + public void testLogs() { + HttpServletRequest hsr = mock(HttpServletRequest.class); + when(hsr.getRemoteUser()).thenReturn(ENEMY_USER); + hsWebServices.setLogServlet(mock(LogServlet.class)); + String cid = "container_e02_" + jobIdStr.substring(4) + "_01_000001"; + try { + hsWebServices.getContainerLogFile(hsr, cid, "syslog", + null, null, null, false, false); + fail("enemy can access job"); + } catch (WebApplicationException e) { + assertEquals(Status.UNAUTHORIZED, + Status.fromStatusCode(e.getResponse().getStatus())); + } + + when(hsr.getRemoteUser()).thenReturn(FRIENDLY_USER); + hsWebServices.getContainerLogFile(hsr, cid, "syslog", + "format", "1024", "nmid", false, false); + verify(hsWebServices.getLogServlet(), times(1)) + .getLogFile(any(), anyString(), anyString(), + anyString(), anyString(), anyString(), anyBoolean(), eq(null), anyBoolean()); + } + private static HistoryContext buildHistoryContext(final Configuration conf) throws IOException { HistoryContext ctx = new MockHistoryContext(1, 1, 1); From 0aab1a297647688173a024b003e88e98d9ae92ad Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Thu, 29 Aug 2024 03:18:46 +0800 Subject: [PATCH 102/113] HADOOP-19248. Protobuf code generate and replace should happen together (#6975) Contributed by Cheng Pan --- hadoop-project/pom.xml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 4c69012f08d18..05dccb6298501 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -2309,7 +2309,7 @@ replace-generated-sources - process-sources + generate-sources replace @@ -2329,7 +2329,7 @@ replace-generated-test-sources - process-test-resources + generate-test-resources replace @@ -2349,7 +2349,7 @@ replace-sources - process-sources + generate-sources replace @@ -2369,7 +2369,7 @@ replace-test-sources - process-test-sources + generate-test-sources replace From 0837c84a9f2e80d64e4e170964a5058fa0d61e5e Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Thu, 29 Aug 2024 00:29:19 +0530 Subject: [PATCH 103/113] Revert "HADOOP-19231. Add JacksonUtil to manage Jackson classes (#6953)" This reverts commit fa9bb0d1ac4b27a37ba9df0ee3e1104f1cd85e64. --- .../org/apache/hadoop/conf/Configuration.java | 8 +- .../crypto/key/kms/KMSClientProvider.java | 5 +- .../apache/hadoop/ipc/DecayRpcScheduler.java | 4 +- .../java/org/apache/hadoop/ipc/Server.java | 5 +- .../org/apache/hadoop/jmx/JMXJsonServlet.java | 10 +- .../hadoop/metrics2/MetricsJsonBuilder.java | 5 +- .../DelegationTokenAuthenticationHandler.java | 5 +- .../org/apache/hadoop/util/JacksonUtil.java | 123 ------------------ .../apache/hadoop/util/JsonSerialization.java | 9 +- .../crypto/key/kms/server/KMSJSONReader.java | 6 +- .../server/datanode/DiskBalancerWorkItem.java | 7 +- .../datanode/DiskBalancerWorkStatus.java | 12 +- .../hdfs/util/CombinedHostsFileReader.java | 14 +- .../hdfs/util/CombinedHostsFileWriter.java | 6 +- .../hadoop/hdfs/web/JsonUtilClient.java | 4 +- .../fs/http/client/HttpFSFileSystem.java | 3 +- .../blockmanagement/SlowDiskTracker.java | 4 +- .../blockmanagement/SlowPeerTracker.java | 5 +- .../datanode/fsdataset/impl/FsVolumeImpl.java | 9 +- .../fsdataset/impl/ProvidedVolumeImpl.java | 4 +- .../server/diskbalancer/command/Command.java | 5 +- .../connectors/JsonNodeConnector.java | 7 +- .../datamodel/DiskBalancerCluster.java | 6 +- .../datamodel/DiskBalancerVolume.java | 4 +- .../server/diskbalancer/planner/NodePlan.java | 8 +- .../namenode/NetworkTopologyServlet.java | 5 +- .../namenode/StartupProgressServlet.java | 4 +- .../org/apache/hadoop/hdfs/web/JsonUtil.java | 24 ++-- .../apache/hadoop/mapred/QueueManager.java | 5 +- .../mapreduce/util/JobHistoryEventUtils.java | 3 +- .../hadoop/fs/s3a/impl/S3AEncryption.java | 6 +- .../fs/azure/NativeAzureFileSystem.java | 4 +- .../fs/azure/RemoteSASKeyGeneratorImpl.java | 8 +- .../fs/azure/RemoteWasbAuthorizerImpl.java | 10 +- .../azurebfs/oauth2/AzureADAuthenticator.java | 8 +- .../azurebfs/services/AbfsHttpOperation.java | 7 +- .../tools/dynamometer/DynoInfraUtils.java | 5 +- .../apache/hadoop/tools/rumen/Anonymizer.java | 11 +- .../tools/rumen/JsonObjectMapperParser.java | 9 +- .../tools/rumen/JsonObjectMapperWriter.java | 5 +- .../hadoop/tools/rumen/state/StatePool.java | 11 +- .../hadoop/tools/rumen/TestHistograms.java | 4 +- .../org/apache/hadoop/yarn/sls/AMRunner.java | 10 +- .../hadoop/yarn/sls/RumenToSLSConverter.java | 8 +- .../sls/synthetic/SynthTraceJobProducer.java | 4 +- .../hadoop/yarn/sls/utils/SLSUtils.java | 12 +- .../yarn/sls/TestSynthJobGeneration.java | 5 +- .../application/AppCatalogSolrClient.java | 54 ++++---- .../application/YarnServiceClient.java | 34 +++-- .../component/instance/ComponentInstance.java | 3 +- .../yarn/service/utils/JsonSerDeser.java | 5 +- .../service/utils/PublishedConfiguration.java | 17 +-- .../api/impl/FileSystemTimelineWriter.java | 8 +- .../client/api/impl/TimelineClientImpl.java | 3 +- .../yarn/util/DockerClientConfigHandler.java | 9 +- .../yarn/util/timeline/TimelineUtils.java | 10 +- .../apache/hadoop/yarn/webapp/Controller.java | 5 +- .../server/timeline/GenericObjectMapper.java | 12 +- .../containermanager/AuxServices.java | 3 +- .../NetworkTagMappingJsonManager.java | 5 +- .../linux/runtime/RuncContainerRuntime.java | 4 +- .../runc/ImageTagToManifestPlugin.java | 8 +- .../resource/ResourceProfilesManagerImpl.java | 5 +- .../placement/MappingRuleCreator.java | 8 +- .../converter/LegacyMappingRuleToJson.java | 15 +-- .../FSConfigToCSConfigConverter.java | 6 +- .../timeline/EntityGroupFSTimelineStore.java | 3 +- .../timeline/LevelDBCacheTimelineStore.java | 5 +- .../server/timeline/PluginStoreTestUtils.java | 9 +- .../documentstore/JsonUtils.java | 3 +- .../storage/FileSystemTimelineReaderImpl.java | 8 +- 71 files changed, 296 insertions(+), 392 deletions(-) delete mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JacksonUtil.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 4f372374abe1b..94285a4dfb7e5 100755 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -22,6 +22,7 @@ import com.ctc.wstx.io.StreamBootstrapper; import com.ctc.wstx.io.SystemId; import com.ctc.wstx.stax.WstxInputFactory; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import java.io.BufferedInputStream; @@ -100,7 +101,6 @@ import org.apache.hadoop.security.alias.CredentialProviderFactory; import org.apache.hadoop.thirdparty.com.google.common.base.Strings; import org.apache.hadoop.util.ConfigurationHelper; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.StringInterner; @@ -3792,7 +3792,8 @@ public static void dumpConfiguration(Configuration config, throw new IllegalArgumentException("Property " + propertyName + " not found"); } else { - JsonGenerator dumpGenerator = JacksonUtil.getSharedWriter().createGenerator(out); + JsonFactory dumpFactory = new JsonFactory(); + JsonGenerator dumpGenerator = dumpFactory.createGenerator(out); dumpGenerator.writeStartObject(); dumpGenerator.writeFieldName("property"); appendJSONProperty(dumpGenerator, config, propertyName, @@ -3830,7 +3831,8 @@ public static void dumpConfiguration(Configuration config, */ public static void dumpConfiguration(Configuration config, Writer out) throws IOException { - JsonGenerator dumpGenerator = JacksonUtil.getSharedWriter().createGenerator(out); + JsonFactory dumpFactory = new JsonFactory(); + JsonGenerator dumpGenerator = dumpFactory.createGenerator(out); dumpGenerator.writeStartObject(); dumpGenerator.writeFieldName("properties"); dumpGenerator.writeStartArray(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java index fcacf1481a757..a3293620ab9e4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/KMSClientProvider.java @@ -42,7 +42,6 @@ import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSelector; import org.apache.hadoop.security.token.delegation.web.DelegationTokenAuthenticatedURL; import org.apache.hadoop.util.HttpExceptionUtils; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.JsonSerialization; import org.apache.hadoop.util.KMSUtil; import org.apache.http.client.utils.URIBuilder; @@ -80,6 +79,7 @@ import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension; import org.apache.hadoop.crypto.key.KeyProviderCryptoExtension.CryptoExtension; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.thirdparty.com.google.common.base.Strings; @@ -595,10 +595,11 @@ private T call(HttpURLConnection conn, Object jsonOutput, && conn.getContentType().trim().toLowerCase() .startsWith(APPLICATION_JSON_MIME) && klass != null) { + ObjectMapper mapper = new ObjectMapper(); InputStream is = null; try { is = conn.getInputStream(); - ret = JacksonUtil.getSharedReader().readValue(is, klass); + ret = mapper.readValue(is, klass); } finally { IOUtils.closeStream(is); } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java index 4d7cd023b5afa..63274bb01e72d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/DecayRpcScheduler.java @@ -38,10 +38,10 @@ import javax.management.ObjectName; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.AtomicDoubleArray; import org.apache.commons.lang3.exception.ExceptionUtils; @@ -146,7 +146,7 @@ public class DecayRpcScheduler implements RpcScheduler, public static final Logger LOG = LoggerFactory.getLogger(DecayRpcScheduler.class); - private static final ObjectWriter WRITER = JacksonUtil.getSharedWriter(); + private static final ObjectWriter WRITER = new ObjectMapper().writer(); // Track the decayed and raw (no decay) number of calls for each schedulable // identity from all previous decay windows: idx 0 for decayed call cost and diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index a808f07b0c0aa..0d9e7296d2a4c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -121,7 +121,6 @@ import org.apache.hadoop.security.token.SecretManager.InvalidToken; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.util.ExitUtil; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.ProtoUtil; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; @@ -131,6 +130,7 @@ import org.apache.hadoop.tracing.TraceScope; import org.apache.hadoop.tracing.Tracer; import org.apache.hadoop.tracing.TraceUtils; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; @@ -3843,8 +3843,9 @@ public int getNumOpenConnections() { * @return Get the NumOpenConnections/User. */ public String getNumOpenConnectionsPerUser() { + ObjectMapper mapper = new ObjectMapper(); try { - return JacksonUtil.getSharedWriter() + return mapper .writeValueAsString(connectionManager.getUserToConnectionsMap()); } catch (IOException ignored) { } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java index 6f54364fff4e0..f089db502783e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/jmx/JMXJsonServlet.java @@ -43,13 +43,13 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.commons.lang3.NotImplementedException; import org.apache.hadoop.http.HttpServer2; -import org.apache.hadoop.util.JacksonUtil; /* * This servlet is based off of the JMXProxyServlet from Tomcat 7.0.14. It has @@ -134,6 +134,11 @@ public class JMXJsonServlet extends HttpServlet { */ protected transient MBeanServer mBeanServer; + /** + * Json Factory to create Json generators for write objects in json format + */ + protected transient JsonFactory jsonFactory; + /** * Initialize this servlet. */ @@ -141,6 +146,7 @@ public class JMXJsonServlet extends HttpServlet { public void init() throws ServletException { // Retrieve the MBean server mBeanServer = ManagementFactory.getPlatformMBeanServer(); + jsonFactory = new JsonFactory(); } protected boolean isInstrumentationAccessAllowed(HttpServletRequest request, @@ -181,7 +187,7 @@ public void doGet(HttpServletRequest request, HttpServletResponse response) { response.setHeader(ACCESS_CONTROL_ALLOW_METHODS, "GET"); response.setHeader(ACCESS_CONTROL_ALLOW_ORIGIN, "*"); - jg = JacksonUtil.getSharedWriter().createGenerator(writer); + jg = jsonFactory.createGenerator(writer); jg.disable(JsonGenerator.Feature.AUTO_CLOSE_TARGET); jg.useDefaultPrettyPrinter(); jg.writeStartObject(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsJsonBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsJsonBuilder.java index 3534adfd6903e..3a9be12803143 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsJsonBuilder.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsJsonBuilder.java @@ -21,8 +21,8 @@ import org.apache.commons.lang3.exception.ExceptionUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.util.JacksonUtil; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -46,7 +46,8 @@ public class MetricsJsonBuilder extends MetricsRecordBuilder { private final MetricsCollector parent; private Map innerMetrics = new LinkedHashMap<>(); - private static final ObjectWriter WRITER = JacksonUtil.getSharedWriter(); + private static final ObjectWriter WRITER = + new ObjectMapper().writer(); /** * Build an instance. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java index 62c7c4ba6e024..f4ede6f35edb0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/token/delegation/web/DelegationTokenAuthenticationHandler.java @@ -46,7 +46,6 @@ import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenIdentifier; import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager; import org.apache.hadoop.util.HttpExceptionUtils; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -166,7 +165,7 @@ public void initTokenManager(Properties config) { @VisibleForTesting public void initJsonFactory(Properties config) { boolean hasFeature = false; - JsonFactory tmpJsonFactory = JacksonUtil.createBasicJsonFactory(); + JsonFactory tmpJsonFactory = new JsonFactory(); for (Map.Entry entry : config.entrySet()) { String key = (String)entry.getKey(); @@ -336,7 +335,7 @@ public boolean managementOperation(AuthenticationToken token, if (map != null) { response.setContentType(MediaType.APPLICATION_JSON); Writer writer = response.getWriter(); - ObjectMapper jsonMapper = JacksonUtil.createObjectMapper(jsonFactory); + ObjectMapper jsonMapper = new ObjectMapper(jsonFactory); jsonMapper.writeValue(writer, map); writer.write(ENTER); writer.flush(); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JacksonUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JacksonUtil.java deleted file mode 100644 index 7d90555c8780b..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JacksonUtil.java +++ /dev/null @@ -1,123 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.util; - -import com.fasterxml.jackson.core.JsonFactory; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.ObjectReader; -import com.fasterxml.jackson.databind.ObjectWriter; -import com.fasterxml.jackson.databind.json.JsonMapper; - -import org.apache.hadoop.classification.InterfaceAudience.Private; - -/** - * Utility for sharing code related to Jackson usage in Hadoop. - */ -@Private -public final class JacksonUtil { - - private static final ObjectMapper SHARED_BASIC_OBJECT_MAPPER = createBasicObjectMapper(); - private static final ObjectReader SHARED_BASIC_OBJECT_READER = - SHARED_BASIC_OBJECT_MAPPER.reader(); - private static final ObjectWriter SHARED_BASIC_OBJECT_WRITER = - SHARED_BASIC_OBJECT_MAPPER.writer(); - private static final ObjectWriter SHARED_BASIC_OBJECT_WRITER_PRETTY = - SHARED_BASIC_OBJECT_MAPPER.writerWithDefaultPrettyPrinter(); - - /** - * Creates a new {@link JsonFactory} instance with basic configuration. - * - * @return an {@link JsonFactory} with basic configuration - */ - public static JsonFactory createBasicJsonFactory() { - // deliberately return a new instance instead of sharing one because we can't trust - // that users won't modify this instance - return new JsonFactory(); - } - - /** - * Creates a new {@link ObjectMapper} instance with basic configuration. - * - * @return an {@link ObjectMapper} with basic configuration - */ - public static ObjectMapper createBasicObjectMapper() { - // deliberately return a new instance instead of sharing one because we can't trust - // that users won't modify this instance - return JsonMapper.builder(createBasicJsonFactory()).build(); - } - - /** - * Creates a new {@link ObjectMapper} instance based on the configuration - * in the input {@link JsonFactory}. - * - * @param jsonFactory a pre-configured {@link JsonFactory} - * @return an {@link ObjectMapper} with configuration set by the input {@link JsonFactory}. - */ - public static ObjectMapper createObjectMapper(final JsonFactory jsonFactory) { - return JsonMapper.builder(jsonFactory).build(); - } - - /** - * Returns a shared {@link ObjectReader} instance with basic configuration. - * - * @return a shared {@link ObjectReader} instance with basic configuration - */ - public static ObjectReader getSharedReader() { - return SHARED_BASIC_OBJECT_READER; - } - - /** - * Returns an {@link ObjectReader} for the given type instance with basic configuration. - * - * @param type the class that the reader has to support - * @return an {@link ObjectReader} instance with basic configuration - */ - public static ObjectReader createBasicReaderFor(Class type) { - return SHARED_BASIC_OBJECT_MAPPER.readerFor(type); - } - - /** - * Returns a shared {@link ObjectWriter} instance with basic configuration. - * - * @return a shared {@link ObjectWriter} instance with basic configuration - */ - public static ObjectWriter getSharedWriter() { - return SHARED_BASIC_OBJECT_WRITER; - } - - /** - * Returns a shared {@link ObjectWriter} instance with pretty print and basic configuration. - * - * @return a shared {@link ObjectWriter} instance with pretty print and basic configuration - */ - public static ObjectWriter getSharedWriterWithPrettyPrint() { - return SHARED_BASIC_OBJECT_WRITER_PRETTY; - } - - /** - * Returns an {@link ObjectWriter} for the given type instance with basic configuration. - * - * @param type the class that the writer has to support - * @return an {@link ObjectWriter} instance with basic configuration - */ - public static ObjectWriter createBasicWriterFor(Class type) { - return SHARED_BASIC_OBJECT_MAPPER.writerFor(type); - } - - private JacksonUtil() {} -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java index 05b069c3ad9b8..52c6c4505226a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java @@ -76,8 +76,11 @@ public class JsonSerialization { private final Class classType; private final ObjectMapper mapper; - private static final ObjectWriter WRITER = JacksonUtil.getSharedWriterWithPrettyPrint(); - private static final ObjectReader MAP_READER = JacksonUtil.createBasicReaderFor(Map.class); + private static final ObjectWriter WRITER = + new ObjectMapper().writerWithDefaultPrettyPrinter(); + + private static final ObjectReader MAP_READER = + new ObjectMapper().readerFor(Map.class); /** * @return an ObjectWriter which pretty-prints its output @@ -103,7 +106,7 @@ public JsonSerialization(Class classType, boolean failOnUnknownProperties, boolean pretty) { Preconditions.checkArgument(classType != null, "null classType"); this.classType = classType; - this.mapper = JacksonUtil.createBasicObjectMapper(); + this.mapper = new ObjectMapper(); mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, failOnUnknownProperties); mapper.configure(SerializationFeature.INDENT_OUTPUT, pretty); diff --git a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSJSONReader.java b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSJSONReader.java index 2f7a6d8557731..af781f5277850 100644 --- a/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSJSONReader.java +++ b/hadoop-common-project/hadoop-kms/src/main/java/org/apache/hadoop/crypto/key/kms/server/KMSJSONReader.java @@ -17,8 +17,9 @@ */ package org.apache.hadoop.crypto.key.kms.server; +import com.fasterxml.jackson.databind.ObjectMapper; + import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.util.JacksonUtil; import javax.ws.rs.Consumes; import javax.ws.rs.WebApplicationException; @@ -37,6 +38,7 @@ @Consumes(MediaType.APPLICATION_JSON) @InterfaceAudience.Private public class KMSJSONReader implements MessageBodyReader { + private static final ObjectMapper MAPPER = new ObjectMapper(); @Override public boolean isReadable(Class type, Type genericType, @@ -50,6 +52,6 @@ public Object readFrom(Class type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap httpHeaders, InputStream entityStream) throws IOException, WebApplicationException { - return JacksonUtil.getSharedReader().readValue(entityStream, type); + return MAPPER.readValue(entityStream, type); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkItem.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkItem.java index 041eb2912be50..d1ad5a2079f5f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkItem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkItem.java @@ -20,8 +20,8 @@ package org.apache.hadoop.hdfs.server.datanode; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -35,8 +35,9 @@ @InterfaceStability.Unstable @JsonInclude(JsonInclude.Include.NON_DEFAULT) public class DiskBalancerWorkItem { + private static final ObjectMapper MAPPER = new ObjectMapper(); private static final ObjectReader READER = - JacksonUtil.createBasicReaderFor(DiskBalancerWorkItem.class); + new ObjectMapper().readerFor(DiskBalancerWorkItem.class); private long startTime; private long secondsElapsed; @@ -172,7 +173,7 @@ public void incBlocksCopied() { * @throws IOException */ public String toJson() throws IOException { - return JacksonUtil.getSharedWriter().writeValueAsString(this); + return MAPPER.writeValueAsString(this); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkStatus.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkStatus.java index 7ea6e9d885e9e..5a5da7326a4e0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkStatus.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/server/datanode/DiskBalancerWorkStatus.java @@ -23,7 +23,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.databind.SerializationFeature; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -40,13 +39,14 @@ @InterfaceAudience.Private @InterfaceStability.Unstable public class DiskBalancerWorkStatus { - private static final ObjectMapper MAPPER = JacksonUtil.createBasicObjectMapper(); + private static final ObjectMapper MAPPER = new ObjectMapper(); private static final ObjectMapper MAPPER_WITH_INDENT_OUTPUT = - JacksonUtil.createBasicObjectMapper().enable(SerializationFeature.INDENT_OUTPUT); + new ObjectMapper().enable(SerializationFeature.INDENT_OUTPUT); private static final ObjectReader READER_WORKSTATUS = - MAPPER.readerFor(DiskBalancerWorkStatus.class); - private static final ObjectReader READER_WORKENTRY = MAPPER.readerFor( - defaultInstance().constructCollectionType(List.class, DiskBalancerWorkEntry.class)); + new ObjectMapper().readerFor(DiskBalancerWorkStatus.class); + private static final ObjectReader READER_WORKENTRY = new ObjectMapper() + .readerFor(defaultInstance().constructCollectionType(List.class, + DiskBalancerWorkEntry.class)); private final List currentState; private Result result; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileReader.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileReader.java index a41b727ab2d20..33f4934e5489d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileReader.java @@ -18,7 +18,9 @@ package org.apache.hadoop.hdfs.util; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import java.io.File; @@ -40,7 +42,6 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.protocol.DatanodeAdminProperties; -import org.apache.hadoop.util.JacksonUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -82,6 +83,7 @@ private CombinedHostsFileReader() { public static DatanodeAdminProperties[] readFile(final String hostsFilePath) throws IOException { DatanodeAdminProperties[] allDNs = new DatanodeAdminProperties[0]; + ObjectMapper objectMapper = new ObjectMapper(); File hostFile = new File(hostsFilePath); boolean tryOldFormat = false; @@ -89,8 +91,7 @@ private CombinedHostsFileReader() { try (Reader input = new InputStreamReader( Files.newInputStream(hostFile.toPath()), StandardCharsets.UTF_8)) { - allDNs = JacksonUtil.getSharedReader() - .readValue(input, DatanodeAdminProperties[].class); + allDNs = objectMapper.readValue(input, DatanodeAdminProperties[].class); } catch (JsonMappingException jme) { // The old format doesn't have json top-level token to enclose // the array. @@ -102,12 +103,15 @@ private CombinedHostsFileReader() { } if (tryOldFormat) { - ObjectReader objectReader = JacksonUtil.createBasicReaderFor(DatanodeAdminProperties.class); + ObjectReader objectReader = + objectMapper.readerFor(DatanodeAdminProperties.class); + JsonFactory jsonFactory = new JsonFactory(); List all = new ArrayList<>(); try (Reader input = new InputStreamReader(Files.newInputStream(Paths.get(hostsFilePath)), StandardCharsets.UTF_8)) { - Iterator iterator = objectReader.readValues(input); + Iterator iterator = + objectReader.readValues(jsonFactory.createParser(input)); while (iterator.hasNext()) { DatanodeAdminProperties properties = iterator.next(); all.add(properties); diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileWriter.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileWriter.java index dcd08cfc7010f..de4c12d556cc7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileWriter.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/util/CombinedHostsFileWriter.java @@ -26,11 +26,11 @@ import java.nio.file.Paths; import java.util.Set; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.protocol.DatanodeAdminProperties; -import org.apache.hadoop.util.JacksonUtil; /** * Writer support for JSON-based datanode configuration, an alternative format @@ -59,10 +59,12 @@ private CombinedHostsFileWriter() { */ public static void writeFile(final String hostsFile, final Set allDNs) throws IOException { + final ObjectMapper objectMapper = new ObjectMapper(); + try (Writer output = new OutputStreamWriter(Files.newOutputStream(Paths.get(hostsFile)), StandardCharsets.UTF_8)) { - JacksonUtil.getSharedWriter().writeValue(output, allDNs); + objectMapper.writeValue(output, allDNs); } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java index 54a44b33b17b7..108f74997a63e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/web/JsonUtilClient.java @@ -17,12 +17,12 @@ */ package org.apache.hadoop.hdfs.web; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.thirdparty.com.google.common.collect.Maps; import org.apache.hadoop.fs.ContentSummary; @@ -654,7 +654,7 @@ static List toXAttrNames(final Map json) } final String namesInJson = (String) json.get("XAttrNames"); - ObjectReader reader = JacksonUtil.createBasicReaderFor(List.class); + ObjectReader reader = new ObjectMapper().readerFor(List.class); final List xattrs = reader.readValue(namesInJson); final List names = Lists.newArrayListWithCapacity(json.keySet().size()); diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java index 1ec907004bd26..dab4776575bff 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/fs/http/client/HttpFSFileSystem.java @@ -71,7 +71,6 @@ import org.apache.hadoop.security.token.delegation.web.DelegationTokenAuthenticator; import org.apache.hadoop.security.token.delegation.web.KerberosDelegationTokenAuthenticator; import org.apache.hadoop.util.HttpExceptionUtils; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Lists; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.ReflectionUtils; @@ -1819,7 +1818,7 @@ public Collection getTrashRoots(boolean allUsers) { @VisibleForTesting static BlockLocation[] toBlockLocations(JSONObject json) throws IOException { - ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + ObjectMapper mapper = new ObjectMapper(); MapType subType = mapper.getTypeFactory().constructMapType(Map.class, String.class, BlockLocation[].class); MapType rootType = mapper.getTypeFactory().constructMapType(Map.class, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java index 312d63daed4e1..798b5fb5966f7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowDiskTracker.java @@ -21,6 +21,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableList; @@ -31,7 +32,6 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports.DiskOp; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Lists; import org.apache.hadoop.util.Timer; import org.slf4j.Logger; @@ -71,7 +71,7 @@ public class SlowDiskTracker { /** * ObjectWriter to convert JSON reports to String. */ - private static final ObjectWriter WRITER = JacksonUtil.getSharedWriter(); + private static final ObjectWriter WRITER = new ObjectMapper().writer(); /** * Number of disks to include in JSON report per operation. We will return diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerTracker.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerTracker.java index 3774a9dbdff21..e4feb4815eee4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerTracker.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/SlowPeerTracker.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hdfs.server.blockmanagement; import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; @@ -29,7 +30,6 @@ import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.server.protocol.OutlierMetrics; import org.apache.hadoop.hdfs.server.protocol.SlowPeerReports; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Timer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -75,8 +75,7 @@ public class SlowPeerTracker { /** * ObjectWriter to convert JSON reports to String. */ - private static final ObjectWriter WRITER = JacksonUtil.getSharedWriter(); - + private static final ObjectWriter WRITER = new ObjectMapper().writer(); /** * Number of nodes to include in JSON report. We will return nodes with * the highest number of votes from peers. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java index 080418db08afa..6b026823f19f9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/FsVolumeImpl.java @@ -79,18 +79,18 @@ import org.apache.hadoop.util.DataChecksum; import org.apache.hadoop.util.DiskChecker.DiskErrorException; import org.apache.hadoop.util.DiskChecker.DiskOutOfSpaceException; -import org.apache.hadoop.util.Preconditions; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Timer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.base.Joiner; +import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; /** @@ -103,9 +103,10 @@ public class FsVolumeImpl implements FsVolumeSpi { public static final Logger LOG = LoggerFactory.getLogger(FsVolumeImpl.class); - private static final ObjectWriter WRITER = JacksonUtil.getSharedWriterWithPrettyPrint(); + private static final ObjectWriter WRITER = + new ObjectMapper().writerWithDefaultPrettyPrinter(); private static final ObjectReader READER = - JacksonUtil.createBasicReaderFor(BlockIteratorState.class); + new ObjectMapper().readerFor(BlockIteratorState.class); private final FsDatasetImpl dataset; private final String storageID; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ProvidedVolumeImpl.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ProvidedVolumeImpl.java index 816a765c52907..69a46257317bf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ProvidedVolumeImpl.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/fsdataset/impl/ProvidedVolumeImpl.java @@ -32,6 +32,7 @@ import java.util.concurrent.atomic.AtomicLong; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.classification.InterfaceAudience; @@ -59,7 +60,6 @@ import org.apache.hadoop.hdfs.server.datanode.checker.VolumeCheckResult; import org.apache.hadoop.hdfs.server.datanode.fsdataset.FsVolumeSpi; import org.apache.hadoop.util.DiskChecker.DiskErrorException; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Timer; @@ -369,7 +369,7 @@ public void releaseReservedSpace(long bytesToRelease) { } private static final ObjectWriter WRITER = - JacksonUtil.getSharedWriterWithPrettyPrint(); + new ObjectMapper().writerWithDefaultPrettyPrinter(); private static class ProvidedBlockIteratorState { ProvidedBlockIteratorState() { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java index e9ba658ecdc91..c90b77e98d2e8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/command/Command.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.server.diskbalancer.command; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import org.apache.commons.cli.CommandLine; @@ -46,7 +47,6 @@ import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.HostsFileReader; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Lists; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -77,7 +77,8 @@ * Common interface for command handling. */ public abstract class Command extends Configured implements Closeable { - private static final ObjectReader READER = JacksonUtil.createBasicReaderFor(HashMap.class); + private static final ObjectReader READER = + new ObjectMapper().readerFor(HashMap.class); static final Logger LOG = LoggerFactory.getLogger(Command.class); private Map validArgs = new HashMap<>(); private URI clusterURI; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/JsonNodeConnector.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/JsonNodeConnector.java index 4e76c7e45e999..1cc82253f9885 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/JsonNodeConnector.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/connectors/JsonNodeConnector.java @@ -17,14 +17,15 @@ package org.apache.hadoop.hdfs.server.diskbalancer.connectors; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hdfs.server.diskbalancer.datamodel.DiskBalancerCluster; -import org.apache.hadoop.hdfs.server.diskbalancer.datamodel.DiskBalancerDataNode; +import org.apache.hadoop.hdfs.server.diskbalancer.datamodel + .DiskBalancerDataNode; import java.io.File; import java.net.URL; @@ -37,7 +38,7 @@ public class JsonNodeConnector implements ClusterConnector { private static final Logger LOG = LoggerFactory.getLogger(JsonNodeConnector.class); private static final ObjectReader READER = - JacksonUtil.createBasicReaderFor(DiskBalancerCluster.class); + new ObjectMapper().readerFor(DiskBalancerCluster.class); private final URL clusterURI; /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerCluster.java index f24f92ff1392d..7e935a3f82058 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerCluster.java @@ -19,7 +19,9 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; +import org.apache.hadoop.util.Preconditions; import org.apache.commons.io.FileUtils; import org.slf4j.Logger; @@ -29,8 +31,6 @@ import org.apache.hadoop.hdfs.server.diskbalancer.planner.Planner; import org.apache.hadoop.hdfs.server.diskbalancer.planner.PlannerFactory; import org.apache.hadoop.hdfs.web.JsonUtil; -import org.apache.hadoop.util.JacksonUtil; -import org.apache.hadoop.util.Preconditions; import java.io.File; import java.io.IOException; @@ -73,7 +73,7 @@ public class DiskBalancerCluster { private static final Logger LOG = LoggerFactory.getLogger(DiskBalancerCluster.class); private static final ObjectReader READER = - JacksonUtil.createBasicReaderFor(DiskBalancerCluster.class); + new ObjectMapper().readerFor(DiskBalancerCluster.class); private final Set exclusionList; private final Set inclusionList; private ClusterConnector clusterConnector; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolume.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolume.java index e354a23519ff2..e43b83e39ce3a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolume.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/datamodel/DiskBalancerVolume.java @@ -19,10 +19,10 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import org.apache.hadoop.hdfs.web.JsonUtil; -import org.apache.hadoop.util.JacksonUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -34,7 +34,7 @@ @JsonIgnoreProperties(ignoreUnknown = true) public class DiskBalancerVolume { private static final ObjectReader READER = - JacksonUtil.createBasicReaderFor(DiskBalancerVolume.class); + new ObjectMapper().readerFor(DiskBalancerVolume.class); private static final Logger LOG = LoggerFactory.getLogger(DiskBalancerVolume.class); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/NodePlan.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/NodePlan.java index 3dfd27dde4d2d..39a7c57bca2cd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/NodePlan.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/diskbalancer/planner/NodePlan.java @@ -18,9 +18,9 @@ package org.apache.hadoop.hdfs.server.diskbalancer.planner; import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.databind.ObjectWriter; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Preconditions; import java.io.IOException; @@ -39,8 +39,10 @@ public class NodePlan { private int port; private long timeStamp; - private static final ObjectReader READER = JacksonUtil.createBasicReaderFor(NodePlan.class); - private static final ObjectWriter WRITER = JacksonUtil.createBasicWriterFor(NodePlan.class); + private static final ObjectMapper MAPPER = new ObjectMapper(); + private static final ObjectReader READER = MAPPER.readerFor(NodePlan.class); + private static final ObjectWriter WRITER = MAPPER.writerFor( + MAPPER.constructType(NodePlan.class)); /** * returns timestamp when this plan was created. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NetworkTopologyServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NetworkTopologyServlet.java index 16d9e203d3143..a6460280835d3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NetworkTopologyServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NetworkTopologyServlet.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; @@ -25,7 +26,6 @@ import org.apache.hadoop.net.NodeBase; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.net.HttpHeaders; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.StringUtils; import javax.servlet.ServletContext; @@ -123,7 +123,8 @@ protected void printTopology(PrintStream stream, List leaves, protected void printJsonFormat(PrintStream stream, Map> tree, ArrayList racks) throws IOException { - JsonGenerator dumpGenerator = JacksonUtil.getSharedWriter().createGenerator(stream); + JsonFactory dumpFactory = new JsonFactory(); + JsonGenerator dumpGenerator = dumpFactory.createGenerator(stream); dumpGenerator.writeStartArray(); for(String r : racks) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java index 17cd49c2d5708..449a1aa62ab46 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/StartupProgressServlet.java @@ -21,6 +21,7 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import org.apache.hadoop.hdfs.server.namenode.startupprogress.Phase; import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; @@ -28,7 +29,6 @@ import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.classification.InterfaceAudience; @@ -61,7 +61,7 @@ protected void doGet(HttpServletRequest req, HttpServletResponse resp) StartupProgress prog = NameNodeHttpServer.getStartupProgressFromContext( getServletContext()); StartupProgressView view = prog.createView(); - JsonGenerator json = JacksonUtil.getSharedWriter().createGenerator(resp.getWriter()); + JsonGenerator json = new JsonFactory().createGenerator(resp.getWriter()); try { json.writeStartObject(); json.writeNumberField(ELAPSED_TIME, view.getElapsedTime()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java index 1ec6730bb87d2..5f90404ebee25 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/JsonUtil.java @@ -17,7 +17,6 @@ */ package org.apache.hadoop.hdfs.web; -import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.ContentSummary; @@ -39,12 +38,13 @@ import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Lists; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.thirdparty.com.google.common.collect.ImmutableMap; +import com.fasterxml.jackson.databind.ObjectMapper; + import java.io.IOException; import java.util.*; @@ -52,11 +52,11 @@ public class JsonUtil { private static final Object[] EMPTY_OBJECT_ARRAY = {}; - // Reuse ObjectWriter instance for improving performance. - // ObjectWriter is thread safe as long as we always configure instance + // Reuse ObjectMapper instance for improving performance. + // ObjectMapper is thread safe as long as we always configure instance // before use. We don't have a re-entrant call pattern in WebHDFS, // so we just need to worry about thread-safety. - private static final ObjectWriter SHARED_WRITER = JacksonUtil.getSharedWriter(); + private static final ObjectMapper MAPPER = new ObjectMapper(); /** Convert a token object to a Json string. */ public static String toJsonString(final Token token @@ -93,7 +93,7 @@ public static String toJsonString(final String key, final Object value) { final Map m = new TreeMap(); m.put(key, value); try { - return SHARED_WRITER.writeValueAsString(m); + return MAPPER.writeValueAsString(m); } catch (IOException ignored) { } return null; @@ -113,7 +113,7 @@ public static String toJsonString(final HdfsFileStatus status, final Map m = toJsonMap(status); try { return includeType ? - toJsonString(FileStatus.class, m) : SHARED_WRITER.writeValueAsString(m); + toJsonString(FileStatus.class, m) : MAPPER.writeValueAsString(m); } catch (IOException ignored) { } return null; @@ -453,7 +453,7 @@ public static String toJsonString(final AclStatus status) { finalMap.put(AclStatus.class.getSimpleName(), m); try { - return SHARED_WRITER.writeValueAsString(finalMap); + return MAPPER.writeValueAsString(finalMap); } catch (IOException ignored) { } return null; @@ -491,7 +491,7 @@ public static String toJsonString(final List xAttrs, final XAttrCodec encoding) throws IOException { final Map finalMap = new TreeMap(); finalMap.put("XAttrs", toJsonArray(xAttrs, encoding)); - return SHARED_WRITER.writeValueAsString(finalMap); + return MAPPER.writeValueAsString(finalMap); } public static String toJsonString(final List xAttrs) @@ -500,14 +500,14 @@ public static String toJsonString(final List xAttrs) for (XAttr xAttr : xAttrs) { names.add(XAttrHelper.getPrefixedName(xAttr)); } - String ret = SHARED_WRITER.writeValueAsString(names); + String ret = MAPPER.writeValueAsString(names); final Map finalMap = new TreeMap(); finalMap.put("XAttrNames", ret); - return SHARED_WRITER.writeValueAsString(finalMap); + return MAPPER.writeValueAsString(finalMap); } public static String toJsonString(Object obj) throws IOException { - return SHARED_WRITER.writeValueAsString(obj); + return MAPPER.writeValueAsString(obj); } public static String toJsonString(BlockStoragePolicy[] storagePolicies) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/QueueManager.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/QueueManager.java index 3a44b427928d8..ec43bce678b26 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/QueueManager.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/QueueManager.java @@ -18,6 +18,7 @@ package org.apache.hadoop.mapred; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerationException; import com.fasterxml.jackson.core.JsonGenerator; import org.apache.hadoop.classification.InterfaceAudience; @@ -27,7 +28,6 @@ import org.apache.hadoop.mapreduce.QueueState; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.authorize.AccessControlList; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -531,7 +531,8 @@ static void dumpConfiguration(Writer out, String configFile, return; } - JsonGenerator dumpGenerator = JacksonUtil.getSharedWriter().createGenerator(out); + JsonFactory dumpFactory = new JsonFactory(); + JsonGenerator dumpGenerator = dumpFactory.createGenerator(out); QueueConfigurationParser parser; boolean aclsEnabled = false; if (conf != null) { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/JobHistoryEventUtils.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/JobHistoryEventUtils.java index c7cd7a63a8692..b5c8b1178d1dd 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/JobHistoryEventUtils.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/JobHistoryEventUtils.java @@ -28,7 +28,6 @@ import org.apache.hadoop.mapreduce.Counter; import org.apache.hadoop.mapreduce.CounterGroup; import org.apache.hadoop.mapreduce.Counters; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.timelineservice.TimelineMetric; /** @@ -42,7 +41,7 @@ private JobHistoryEventUtils() { public static final int ATS_CONFIG_PUBLISH_SIZE_BYTES = 10 * 1024; public static JsonNode countersToJSON(Counters counters) { - ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + ObjectMapper mapper = new ObjectMapper(); ArrayNode nodes = mapper.createArrayNode(); if (counters != null) { for (CounterGroup counterGroup : counters) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AEncryption.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AEncryption.java index 00692abcf182f..a720d2ca10000 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AEncryption.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/S3AEncryption.java @@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets; import java.util.Map; +import com.fasterxml.jackson.databind.ObjectMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -29,7 +30,6 @@ import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.S3AUtils; -import org.apache.hadoop.util.JacksonUtil; import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_CONTEXT; @@ -91,8 +91,8 @@ public static String getS3EncryptionContextBase64Encoded( if (encryptionContextMap.isEmpty()) { return ""; } - final String encryptionContextJson = JacksonUtil.getSharedWriter() - .writeValueAsString(encryptionContextMap); + final String encryptionContextJson = new ObjectMapper().writeValueAsString( + encryptionContextMap); return Base64.encodeBase64String(encryptionContextJson.getBytes(StandardCharsets.UTF_8)); } catch (IOException e) { if (propagateExceptions) { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java index 2b59452a32d86..4e777da8b409f 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java @@ -84,7 +84,6 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.web.DelegationTokenAuthenticatedURL; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.LambdaUtils; import org.apache.hadoop.util.Progressable; import org.apache.hadoop.util.Time; @@ -97,6 +96,7 @@ import static org.apache.hadoop.fs.azure.NativeAzureFileSystemHelper.*; import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.VisibleForTesting; import com.microsoft.azure.storage.StorageException; @@ -127,7 +127,7 @@ public static class FolderRenamePending { private static final int FORMATTING_BUFFER = 10000; private boolean committed; public static final String SUFFIX = "-RenamePending.json"; - private static final ObjectReader READER = JacksonUtil.createBasicObjectMapper() + private static final ObjectReader READER = new ObjectMapper() .configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true) .readerFor(JsonNode.class); diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteSASKeyGeneratorImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteSASKeyGeneratorImpl.java index 3f8862e6d1def..473fa54f97c83 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteSASKeyGeneratorImpl.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteSASKeyGeneratorImpl.java @@ -24,11 +24,11 @@ import java.util.List; import java.util.concurrent.TimeUnit; +import com.fasterxml.jackson.databind.ObjectReader; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.azure.security.Constants; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryUtils; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.http.NameValuePair; @@ -40,7 +40,7 @@ import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.databind.JsonMappingException; -import com.fasterxml.jackson.databind.ObjectReader; +import com.fasterxml.jackson.databind.ObjectMapper; import static org.apache.hadoop.fs.azure.WasbRemoteCallHelper.REMOTE_CALL_SUCCESS_CODE; @@ -53,8 +53,8 @@ public class RemoteSASKeyGeneratorImpl extends SASKeyGeneratorImpl { public static final Logger LOG = LoggerFactory.getLogger(AzureNativeFileSystemStore.class); - private static final ObjectReader RESPONSE_READER = JacksonUtil - .createBasicReaderFor(RemoteSASKeyGenerationResponse.class); + private static final ObjectReader RESPONSE_READER = new ObjectMapper() + .readerFor(RemoteSASKeyGenerationResponse.class); /** * Configuration parameter name expected in the Configuration diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java index 7bcaecdba5b0b..eca8443b6c587 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java @@ -20,6 +20,7 @@ import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.commons.lang3.StringUtils; @@ -28,14 +29,13 @@ import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryUtils; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.JacksonUtil; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.utils.URIBuilder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.concurrent.TimeUnit; import java.io.IOException; -import java.util.concurrent.TimeUnit; import static org.apache.hadoop.fs.azure.WasbRemoteCallHelper.REMOTE_CALL_SUCCESS_CODE; @@ -49,8 +49,8 @@ public class RemoteWasbAuthorizerImpl implements WasbAuthorizerInterface { public static final Logger LOG = LoggerFactory .getLogger(RemoteWasbAuthorizerImpl.class); - private static final ObjectReader RESPONSE_READER = JacksonUtil - .createBasicReaderFor(RemoteWasbAuthorizerResponse.class); + private static final ObjectReader RESPONSE_READER = new ObjectMapper() + .readerFor(RemoteWasbAuthorizerResponse.class); /** * Configuration parameter name expected in the Configuration object to @@ -176,7 +176,7 @@ private boolean authorizeInternal(String wasbAbsolutePath, String accessType, St uriBuilder .addParameter(WASB_ABSOLUTE_PATH_QUERY_PARAM_NAME, wasbAbsolutePath); uriBuilder.addParameter(ACCESS_OPERATION_QUERY_PARAM_NAME, accessType); - if (StringUtils.isNotEmpty(resourceOwner)) { + if (resourceOwner != null && StringUtils.isNotEmpty(resourceOwner)) { uriBuilder.addParameter(WASB_RESOURCE_OWNER_QUERY_PARAM_NAME, resourceOwner); } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java index ab0282e19fc1d..dab4d79658451 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/oauth2/AzureADAuthenticator.java @@ -29,6 +29,9 @@ import java.util.Hashtable; import java.util.Map; +import org.apache.hadoop.util.Preconditions; + +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; import org.slf4j.Logger; @@ -39,8 +42,6 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.azurebfs.services.AbfsIoUtils; import org.apache.hadoop.fs.azurebfs.services.ExponentialRetryPolicy; -import org.apache.hadoop.util.JacksonUtil; -import org.apache.hadoop.util.Preconditions; /** * This class provides convenience methods to obtain AAD tokens. @@ -492,7 +493,8 @@ private static AzureADToken parseTokenFromStream( int expiryPeriodInSecs = 0; long expiresOnInSecs = -1; - JsonParser jp = JacksonUtil.createBasicJsonFactory().createParser(httpResponseStream); + JsonFactory jf = new JsonFactory(); + JsonParser jp = jf.createParser(httpResponseStream); String fieldName, fieldValue; jp.nextToken(); while (jp.hasCurrentToken()) { diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 658f2cfe65167..e2ce5c628a4b6 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -30,6 +30,7 @@ import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; +import com.fasterxml.jackson.databind.ObjectMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -39,7 +40,6 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.AbfsPerfLoggable; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; import org.apache.hadoop.fs.azurebfs.utils.UriUtils; -import org.apache.hadoop.util.JacksonUtil; /** * Base Http operation class for orchestrating server IO calls. Child classes would @@ -447,7 +447,7 @@ private void processStorageErrorResponse() { if (stream == null) { return; } - JsonFactory jf = JacksonUtil.createBasicJsonFactory(); + JsonFactory jf = new JsonFactory(); try (JsonParser jp = jf.createParser(stream)) { String fieldName, fieldValue; jp.nextToken(); // START_OBJECT - { @@ -509,7 +509,8 @@ private void parseListFilesResponse(final InputStream stream) } try { - this.listResultSchema = JacksonUtil.getSharedReader().readValue(stream, + final ObjectMapper objectMapper = new ObjectMapper(); + this.listResultSchema = objectMapper.readValue(stream, ListResultSchema.class); } catch (IOException ex) { log.error("Unable to deserialize list results", ex); diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java index 04e98754ca837..f6c8a6ac4d58b 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/src/main/java/org/apache/hadoop/tools/dynamometer/DynoInfraUtils.java @@ -51,7 +51,6 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.YarnUncaughtExceptionHandler; import org.apache.hadoop.yarn.api.ApplicationConstants.Environment; @@ -485,7 +484,7 @@ static Set parseStaleDataNodeList(String liveNodeJsonString, final int blockThreshold, final Logger log) throws IOException { final Set dataNodesToReport = new HashSet<>(); - JsonFactory fac = JacksonUtil.createBasicJsonFactory(); + JsonFactory fac = new JsonFactory(); JsonParser parser = fac.createParser(IOUtils .toInputStream(liveNodeJsonString, StandardCharsets.UTF_8.name())); @@ -555,7 +554,7 @@ static String fetchNameNodeJMXValue(Properties nameNodeProperties, "Unable to retrieve JMX: " + conn.getResponseMessage()); } InputStream in = conn.getInputStream(); - JsonFactory fac = JacksonUtil.createBasicJsonFactory(); + JsonFactory fac = new JsonFactory(); JsonParser parser = fac.createParser(in); if (parser.nextToken() != JsonToken.START_OBJECT || parser.nextToken() != JsonToken.FIELD_NAME diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Anonymizer.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Anonymizer.java index dc0856cd58a09..3c85a93ddbfc9 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Anonymizer.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/Anonymizer.java @@ -22,6 +22,7 @@ import java.io.OutputStream; import com.fasterxml.jackson.core.JsonEncoding; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.Version; import com.fasterxml.jackson.databind.ObjectMapper; @@ -35,7 +36,6 @@ import org.apache.hadoop.io.compress.CompressionCodecFactory; import org.apache.hadoop.io.compress.Compressor; import org.apache.hadoop.mapreduce.ID; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.tools.rumen.datatypes.*; @@ -55,7 +55,8 @@ public class Anonymizer extends Configured implements Tool { private StatePool statePool; private ObjectMapper outMapper = null; - + private JsonFactory outFactory = null; + private void initialize(String[] args) throws Exception { try { for (int i = 0; i < args.length; ++i) { @@ -84,7 +85,7 @@ private void initialize(String[] args) throws Exception { // initialize the state manager after the anonymizers are registered statePool.initialize(getConf()); - outMapper = JacksonUtil.createBasicObjectMapper(); + outMapper = new ObjectMapper(); // define a module SimpleModule module = new SimpleModule( "Anonymization Serializer", new Version(0, 1, 1, "FINAL", "", "")); @@ -103,6 +104,8 @@ private void initialize(String[] args) throws Exception { // register the module with the object-mapper outMapper.registerModule(module); + + outFactory = outMapper.getFactory(); } // anonymize the job trace file @@ -188,7 +191,7 @@ private JsonGenerator createJsonGenerator(Configuration conf, Path path) } JsonGenerator outGen = - outMapper.createGenerator(output, JsonEncoding.UTF8); + outFactory.createGenerator(output, JsonEncoding.UTF8); outGen.useDefaultPrettyPrinter(); return outGen; diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperParser.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperParser.java index 3d644b5ad2272..f95878dde95e3 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperParser.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperParser.java @@ -26,7 +26,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.util.JacksonUtil; /** * A simple wrapper for parsing JSON-encoded data using ObjectMapper. @@ -49,10 +48,10 @@ class JsonObjectMapperParser implements Closeable { */ public JsonObjectMapperParser(Path path, Class clazz, Configuration conf) throws IOException { - mapper = JacksonUtil.createBasicObjectMapper(); + mapper = new ObjectMapper(); this.clazz = clazz; InputStream input = new PossiblyDecompressedInputStream(path, conf); - jsonParser = mapper.createParser(input); + jsonParser = mapper.getFactory().createParser(input); } /** @@ -63,9 +62,9 @@ public JsonObjectMapperParser(Path path, Class clazz, */ public JsonObjectMapperParser(InputStream input, Class clazz) throws IOException { - mapper = JacksonUtil.createBasicObjectMapper(); + mapper = new ObjectMapper(); this.clazz = clazz; - jsonParser = mapper.createParser(input); + jsonParser = mapper.getFactory().createParser(input); } /** diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperWriter.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperWriter.java index e0caa18fff792..747b141fd98be 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperWriter.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/JsonObjectMapperWriter.java @@ -30,7 +30,6 @@ import org.apache.hadoop.tools.rumen.datatypes.DataType; import org.apache.hadoop.tools.rumen.serializers.DefaultRumenSerializer; import org.apache.hadoop.tools.rumen.serializers.ObjectStringSerializer; -import org.apache.hadoop.util.JacksonUtil; /** * Simple wrapper around {@link JsonGenerator} to write objects in JSON format. @@ -40,7 +39,7 @@ public class JsonObjectMapperWriter implements Closeable { private JsonGenerator writer; public JsonObjectMapperWriter(OutputStream output, boolean prettyPrint) throws IOException { - ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + ObjectMapper mapper = new ObjectMapper(); // define a module SimpleModule module = new SimpleModule( @@ -54,7 +53,7 @@ public JsonObjectMapperWriter(OutputStream output, boolean prettyPrint) throws I // register the module with the object-mapper mapper.registerModule(module); - writer = mapper.createGenerator(output, JsonEncoding.UTF8); + writer = mapper.getFactory().createGenerator(output, JsonEncoding.UTF8); if (prettyPrint) { writer.useDefaultPrettyPrinter(); } diff --git a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/state/StatePool.java b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/state/StatePool.java index 0c594afc3b72c..ab6f8942e7cfb 100644 --- a/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/state/StatePool.java +++ b/hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/state/StatePool.java @@ -30,6 +30,7 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.core.JsonEncoding; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.Version; @@ -43,7 +44,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.tools.rumen.Anonymizer; import org.apache.hadoop.tools.rumen.datatypes.DataType; -import org.apache.hadoop.util.JacksonUtil; /** * A pool of states. States used by {@link DataType}'s can be managed the @@ -206,7 +206,7 @@ private boolean reloadState(Path stateFile, Configuration configuration) } private void read(DataInput in) throws IOException { - ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + ObjectMapper mapper = new ObjectMapper(); // define a module SimpleModule module = new SimpleModule("State Serializer", new Version(0, 1, 1, "FINAL", "", "")); @@ -216,7 +216,7 @@ private void read(DataInput in) throws IOException { // register the module with the object-mapper mapper.registerModule(module); - JsonParser parser = mapper.createParser((InputStream)in); + JsonParser parser = mapper.getFactory().createParser((InputStream)in); StatePool statePool = mapper.readValue(parser, StatePool.class); this.setStates(statePool.getStates()); parser.close(); @@ -273,7 +273,7 @@ public void persist() throws IOException { private void write(DataOutput out) throws IOException { // This is just a JSON experiment System.out.println("Dumping the StatePool's in JSON format."); - ObjectMapper outMapper = JacksonUtil.createBasicObjectMapper(); + ObjectMapper outMapper = new ObjectMapper(); // define a module SimpleModule module = new SimpleModule("State Serializer", new Version(0, 1, 1, "FINAL", "", "")); @@ -283,8 +283,9 @@ private void write(DataOutput out) throws IOException { // register the module with the object-mapper outMapper.registerModule(module); + JsonFactory outFactory = outMapper.getFactory(); JsonGenerator jGen = - outMapper.createGenerator((OutputStream)out, JsonEncoding.UTF8); + outFactory.createGenerator((OutputStream)out, JsonEncoding.UTF8); jGen.useDefaultPrettyPrinter(); jGen.writeObject(this); diff --git a/hadoop-tools/hadoop-rumen/src/test/java/org/apache/hadoop/tools/rumen/TestHistograms.java b/hadoop-tools/hadoop-rumen/src/test/java/org/apache/hadoop/tools/rumen/TestHistograms.java index db6d47cf0726e..187251900b75d 100644 --- a/hadoop-tools/hadoop-rumen/src/test/java/org/apache/hadoop/tools/rumen/TestHistograms.java +++ b/hadoop-tools/hadoop-rumen/src/test/java/org/apache/hadoop/tools/rumen/TestHistograms.java @@ -23,6 +23,7 @@ import java.util.List; import com.fasterxml.jackson.core.JsonEncoding; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.ObjectMapper; @@ -140,8 +141,9 @@ public static void main(String[] args) throws IOException { Path goldFilePath = new Path(filePath.getParent(), "gold"+testName); ObjectMapper mapper = new ObjectMapper(); + JsonFactory factory = mapper.getFactory(); FSDataOutputStream ostream = lfs.create(goldFilePath, true); - JsonGenerator gen = mapper.createGenerator((OutputStream)ostream, + JsonGenerator gen = factory.createGenerator((OutputStream)ostream, JsonEncoding.UTF8); gen.useDefaultPrettyPrinter(); diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/AMRunner.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/AMRunner.java index 0d943471c6f9c..2dc09de665368 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/AMRunner.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/AMRunner.java @@ -16,13 +16,13 @@ package org.apache.hadoop.yarn.sls; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.databind.JavaType; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.tools.rumen.JobTraceReader; import org.apache.hadoop.tools.rumen.LoggedJob; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ReservationId; @@ -44,8 +44,11 @@ import java.io.InputStreamReader; import java.io.Reader; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -119,14 +122,15 @@ public void startAM() throws YarnException, IOException { * Parse workload from a SLS trace file. */ private void startAMFromSLSTrace(String inputTrace) throws IOException { - ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + JsonFactory jsonF = new JsonFactory(); + ObjectMapper mapper = new ObjectMapper(); try (Reader input = new InputStreamReader( new FileInputStream(inputTrace), StandardCharsets.UTF_8)) { JavaType type = mapper.getTypeFactory(). constructMapType(Map.class, String.class, String.class); Iterator> jobIter = mapper.readValues( - mapper.createParser(input), type); + jsonF.createParser(input), type); while (jobIter.hasNext()) { try { diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/RumenToSLSConverter.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/RumenToSLSConverter.java index 9b25275912377..2cdfe236c410d 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/RumenToSLSConverter.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/RumenToSLSConverter.java @@ -35,6 +35,7 @@ import java.util.TreeMap; import java.util.TreeSet; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.commons.cli.CommandLine; @@ -43,7 +44,6 @@ import org.apache.commons.cli.Options; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.sls.utils.SLSUtils; @Private @@ -126,10 +126,10 @@ private static void generateSLSLoadFile(String inputFile, String outputFile) StandardCharsets.UTF_8)) { try (Writer output = new OutputStreamWriter(new FileOutputStream(outputFile), StandardCharsets.UTF_8)) { - ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + ObjectMapper mapper = new ObjectMapper(); ObjectWriter writer = mapper.writerWithDefaultPrettyPrinter(); Iterator i = mapper.readValues( - mapper.createParser(input), Map.class); + new JsonFactory().createParser(input), Map.class); while (i.hasNext()) { Map m = i.next(); output.write(writer.writeValueAsString(createSLSJob(m)) + EOL); @@ -143,7 +143,7 @@ private static void generateSLSNodeFile(String outputFile) throws IOException { try (Writer output = new OutputStreamWriter(new FileOutputStream(outputFile), StandardCharsets.UTF_8)) { - ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + ObjectMapper mapper = new ObjectMapper(); ObjectWriter writer = mapper.writerWithDefaultPrettyPrinter(); for (Map.Entry> entry : rackNodeMap.entrySet()) { Map rack = new LinkedHashMap(); diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthTraceJobProducer.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthTraceJobProducer.java index 58f8b59ba65e9..18b1c034bdf3a 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthTraceJobProducer.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/synthetic/SynthTraceJobProducer.java @@ -34,7 +34,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.tools.rumen.JobStory; import org.apache.hadoop.tools.rumen.JobStoryProducer; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.sls.appmaster.MRAMSimulator; @@ -89,8 +88,7 @@ public SynthTraceJobProducer(Configuration conf, Path path) JsonFactoryBuilder jsonFactoryBuilder = new JsonFactoryBuilder(); jsonFactoryBuilder.configure(JsonFactory.Feature.INTERN_FIELD_NAMES, true); - - ObjectMapper mapper = JacksonUtil.createObjectMapper(jsonFactoryBuilder.build()); + ObjectMapper mapper = new ObjectMapper(jsonFactoryBuilder.build()); mapper.configure(FAIL_ON_UNKNOWN_PROPERTIES, false); FileSystem ifs = path.getFileSystem(conf); diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java index 676ef13b5a8e4..af0b4f6caf3ab 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/utils/SLSUtils.java @@ -34,6 +34,7 @@ import java.util.Map; import java.util.Set; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; @@ -44,7 +45,6 @@ import org.apache.hadoop.tools.rumen.LoggedJob; import org.apache.hadoop.tools.rumen.LoggedTask; import org.apache.hadoop.tools.rumen.LoggedTaskAttempt; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.NodeLabel; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceInformation; @@ -120,11 +120,12 @@ public static Set parseNodesFromRumenTrace( public static Set parseNodesFromSLSTrace( String jobTrace) throws IOException { Set nodeSet = new HashSet<>(); - ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + JsonFactory jsonF = new JsonFactory(); + ObjectMapper mapper = new ObjectMapper(); Reader input = new InputStreamReader(new FileInputStream(jobTrace), StandardCharsets.UTF_8); try { - Iterator i = mapper.readValues(mapper.createParser(input), Map.class); + Iterator i = mapper.readValues(jsonF.createParser(input), Map.class); while (i.hasNext()) { addNodes(nodeSet, i.next()); } @@ -166,11 +167,12 @@ private static void addNodes(Set nodeSet, public static Set parseNodesFromNodeFile( String nodeFile, Resource nmDefaultResource) throws IOException { Set nodeSet = new HashSet<>(); - ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + JsonFactory jsonF = new JsonFactory(); + ObjectMapper mapper = new ObjectMapper(); Reader input = new InputStreamReader(new FileInputStream(nodeFile), StandardCharsets.UTF_8); try { - Iterator i = mapper.readValues(mapper.createParser(input), Map.class); + Iterator i = mapper.readValues(jsonF.createParser(input), Map.class); while (i.hasNext()) { Map jsonE = i.next(); String rack = "/" + jsonE.get("rack"); diff --git a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSynthJobGeneration.java b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSynthJobGeneration.java index f690808f8e143..dd12a10f94612 100644 --- a/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSynthJobGeneration.java +++ b/hadoop-tools/hadoop-sls/src/test/java/org/apache/hadoop/yarn/sls/TestSynthJobGeneration.java @@ -18,7 +18,6 @@ package org.apache.hadoop.yarn.sls; import org.apache.commons.math3.random.JDKRandomGenerator; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.sls.synthetic.SynthJob; @@ -61,7 +60,7 @@ public void testWorkloadGenerateTime() JsonFactoryBuilder jsonFactoryBuilder = new JsonFactoryBuilder(); jsonFactoryBuilder.configure(JsonFactory.Feature.INTERN_FIELD_NAMES, true); - ObjectMapper mapper = JacksonUtil.createObjectMapper(jsonFactoryBuilder.build()); + ObjectMapper mapper = new ObjectMapper(jsonFactoryBuilder.build()); mapper.configure(FAIL_ON_UNKNOWN_PROPERTIES, false); SynthTraceJobProducer.Workload wl = mapper.readValue(workloadJson, SynthTraceJobProducer.Workload.class); @@ -182,7 +181,7 @@ public void testSample() throws IOException { JsonFactoryBuilder jsonFactoryBuilder = new JsonFactoryBuilder(); jsonFactoryBuilder.configure(JsonFactory.Feature.INTERN_FIELD_NAMES, true); - ObjectMapper mapper = JacksonUtil.createObjectMapper(jsonFactoryBuilder.build()); + ObjectMapper mapper = new ObjectMapper(jsonFactoryBuilder.build()); mapper.configure(FAIL_ON_UNKNOWN_PROPERTIES, false); JDKRandomGenerator rand = new JDKRandomGenerator(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/AppCatalogSolrClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/AppCatalogSolrClient.java index ea7a0ecdef669..ac8dbbac61d35 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/AppCatalogSolrClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/AppCatalogSolrClient.java @@ -28,7 +28,6 @@ import java.util.Properties; import java.util.Random; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.appcatalog.model.AppEntry; import org.apache.hadoop.yarn.appcatalog.model.AppStoreEntry; import org.apache.hadoop.yarn.appcatalog.model.Application; @@ -58,18 +57,6 @@ public class AppCatalogSolrClient { private static final Logger LOG = LoggerFactory.getLogger(AppCatalogSolrClient.class); private static String urlString; - /** - * It is more performant to reuse ObjectMapper instances but keeping the instance - * private makes it harder for someone to reconfigure it which might have unwanted - * side effects. - */ - private static final ObjectMapper OBJECT_MAPPER; - - static { - OBJECT_MAPPER = JacksonUtil.createBasicObjectMapper(); - OBJECT_MAPPER.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - } - public AppCatalogSolrClient() { // Locate Solr URL ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); @@ -159,6 +146,8 @@ public List search(String keyword) { public List listAppEntries() { List list = new ArrayList(); + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); SolrClient solr = getSolrClient(); SolrQuery query = new SolrQuery(); @@ -175,7 +164,7 @@ public List listAppEntries() { entry.setId(d.get("id").toString()); entry.setName(d.get("name_s").toString()); entry.setApp(d.get("app_s").toString()); - entry.setYarnfile(OBJECT_MAPPER.readValue(d.get("yarnfile_s").toString(), + entry.setYarnfile(mapper.readValue(d.get("yarnfile_s").toString(), Service.class)); list.add(entry); } @@ -187,6 +176,8 @@ public List listAppEntries() { public AppStoreEntry findAppStoreEntry(String id) { AppStoreEntry entry = new AppStoreEntry(); + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); SolrClient solr = getSolrClient(); SolrQuery query = new SolrQuery(); @@ -206,7 +197,7 @@ public AppStoreEntry findAppStoreEntry(String id) { entry.setDesc(d.get("desc_s").toString()); entry.setLike(Integer.parseInt(d.get("like_i").toString())); entry.setDownload(Integer.parseInt(d.get("download_i").toString())); - Service yarnApp = OBJECT_MAPPER.readValue(d.get("yarnfile_s").toString(), + Service yarnApp = mapper.readValue(d.get("yarnfile_s").toString(), Service.class); String name; try { @@ -231,6 +222,9 @@ public AppStoreEntry findAppStoreEntry(String id) { public AppEntry findAppEntry(String id) { AppEntry entry = new AppEntry(); + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + SolrClient solr = getSolrClient(); SolrQuery query = new SolrQuery(); query.setQuery("id:" + id); @@ -246,7 +240,7 @@ public AppEntry findAppEntry(String id) { entry.setId(d.get("id").toString()); entry.setApp(d.get("app_s").toString()); entry.setName(d.get("name_s").toString()); - entry.setYarnfile(OBJECT_MAPPER.readValue(d.get("yarnfile_s").toString(), + entry.setYarnfile(mapper.readValue(d.get("yarnfile_s").toString(), Service.class)); } } catch (SolrServerException | IOException e) { @@ -258,6 +252,8 @@ public AppEntry findAppEntry(String id) { public void deployApp(String id, Service service) throws SolrServerException, IOException { long download = 0; + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); Collection docs = new HashSet(); SolrClient solr = getSolrClient(); // Find application information from AppStore @@ -291,7 +287,7 @@ public void deployApp(String id, Service service) throws SolrServerException, request.addField("id", name); request.addField("name_s", name); request.addField("app_s", entry.getOrg()+"/"+entry.getName()); - request.addField("yarnfile_s", OBJECT_MAPPER.writeValueAsString(service)); + request.addField("yarnfile_s", mapper.writeValueAsString(service)); docs.add(request); } @@ -330,6 +326,8 @@ public void deleteApp(String id) { public void register(Application app) throws IOException { Collection docs = new HashSet(); SolrClient solr = getSolrClient(); + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); try { SolrInputDocument buffer = new SolrInputDocument(); buffer.setField("id", java.util.UUID.randomUUID().toString() @@ -345,10 +343,10 @@ public void register(Application app) throws IOException { buffer.setField("download_i", 0); // Keep only YARN data model for yarnfile field - String yarnFile = OBJECT_MAPPER.writeValueAsString(app); - LOG.info("app:{}", yarnFile); - Service yarnApp = OBJECT_MAPPER.readValue(yarnFile, Service.class); - buffer.setField("yarnfile_s", OBJECT_MAPPER.writeValueAsString(yarnApp)); + String yarnFile = mapper.writeValueAsString(app); + LOG.info("app:"+yarnFile); + Service yarnApp = mapper.readValue(yarnFile, Service.class); + buffer.setField("yarnfile_s", mapper.writeValueAsString(yarnApp)); docs.add(buffer); commitSolrChanges(solr, docs); @@ -361,6 +359,8 @@ public void register(Application app) throws IOException { protected void register(AppStoreEntry app) throws IOException { Collection docs = new HashSet(); SolrClient solr = getSolrClient(); + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); try { SolrInputDocument buffer = new SolrInputDocument(); buffer.setField("id", java.util.UUID.randomUUID().toString() @@ -376,10 +376,10 @@ protected void register(AppStoreEntry app) throws IOException { buffer.setField("download_i", app.getDownload()); // Keep only YARN data model for yarnfile field - String yarnFile = OBJECT_MAPPER.writeValueAsString(app); - LOG.info("app:{}", yarnFile); - Service yarnApp = OBJECT_MAPPER.readValue(yarnFile, Service.class); - buffer.setField("yarnfile_s", OBJECT_MAPPER.writeValueAsString(yarnApp)); + String yarnFile = mapper.writeValueAsString(app); + LOG.info("app:"+yarnFile); + Service yarnApp = mapper.readValue(yarnFile, Service.class); + buffer.setField("yarnfile_s", mapper.writeValueAsString(yarnApp)); docs.add(buffer); commitSolrChanges(solr, docs); @@ -391,6 +391,8 @@ protected void register(AppStoreEntry app) throws IOException { public void upgradeApp(Service service) throws IOException, SolrServerException { + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); Collection docs = new HashSet(); SolrClient solr = getSolrClient(); if (service!=null) { @@ -418,7 +420,7 @@ public void upgradeApp(Service service) throws IOException, request.addField("id", name); request.addField("name_s", name); request.addField("app_s", app); - request.addField("yarnfile_s", OBJECT_MAPPER.writeValueAsString(service)); + request.addField("yarnfile_s", mapper.writeValueAsString(service)); docs.add(request); } try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/YarnServiceClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/YarnServiceClient.java index 57c4b353d099c..185b1c8ddebd5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/YarnServiceClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/src/main/java/org/apache/hadoop/yarn/appcatalog/application/YarnServiceClient.java @@ -23,7 +23,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.appcatalog.model.AppEntry; import org.apache.hadoop.yarn.service.api.records.Service; import org.apache.hadoop.yarn.service.api.records.ServiceState; @@ -47,19 +46,6 @@ public class YarnServiceClient { private static final Logger LOG = LoggerFactory.getLogger(YarnServiceClient.class); - - /** - * It is more performant to reuse ObjectMapper instances but keeping the instance - * private makes it harder for someone to reconfigure it which might have unwanted - * side effects. - */ - private static final ObjectMapper OBJECT_MAPPER; - - static { - OBJECT_MAPPER = JacksonUtil.createBasicObjectMapper(); - OBJECT_MAPPER.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); - } - private static Configuration conf = new Configuration(); private static ClientConfig getClientConfig() { ClientConfig config = new DefaultClientConfig(); @@ -80,6 +66,8 @@ public YarnServiceClient() { } public void createApp(Service app) { + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); ClientResponse response; try { boolean useKerberos = UserGroupInformation.isSecurityEnabled(); @@ -102,7 +90,7 @@ public void createApp(Service app) { app.setKerberosPrincipal(kerberos); } response = asc.getApiClient().post(ClientResponse.class, - OBJECT_MAPPER.writeValueAsString(app)); + mapper.writeValueAsString(app)); if (response.getStatus() >= 299) { String message = response.getEntity(String.class); throw new RuntimeException("Failed : HTTP error code : " @@ -131,8 +119,10 @@ public void deleteApp(String appInstanceId) { } public void restartApp(Service app) throws JsonProcessingException { + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); String appInstanceId = app.getName(); - String yarnFile = OBJECT_MAPPER.writeValueAsString(app); + String yarnFile = mapper.writeValueAsString(app); ClientResponse response; try { response = asc.getApiClient(asc.getServicePath(appInstanceId)) @@ -149,8 +139,10 @@ public void restartApp(Service app) throws JsonProcessingException { } public void stopApp(Service app) throws JsonProcessingException { + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); String appInstanceId = app.getName(); - String yarnFile = OBJECT_MAPPER.writeValueAsString(app); + String yarnFile = mapper.writeValueAsString(app); ClientResponse response; try { response = asc.getApiClient(asc.getServicePath(appInstanceId)) @@ -167,12 +159,14 @@ public void stopApp(Service app) throws JsonProcessingException { } public void getStatus(AppEntry entry) { + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); String appInstanceId = entry.getName(); Service app = null; try { String yarnFile = asc.getApiClient(asc.getServicePath(appInstanceId)) .get(String.class); - app = OBJECT_MAPPER.readValue(yarnFile, Service.class); + app = mapper.readValue(yarnFile, Service.class); entry.setYarnfile(app); } catch (UniformInterfaceException | IOException e) { LOG.error("Error in fetching application status: ", e); @@ -180,9 +174,11 @@ public void getStatus(AppEntry entry) { } public void upgradeApp(Service app) throws JsonProcessingException { + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); String appInstanceId = app.getName(); app.setState(ServiceState.EXPRESS_UPGRADING); - String yarnFile = OBJECT_MAPPER.writeValueAsString(app); + String yarnFile = mapper.writeValueAsString(app); ClientResponse response; try { response = asc.getApiClient(asc.getServicePath(appInstanceId)) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java index 1e30fbd5ba1ec..cab4870493561 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/component/instance/ComponentInstance.java @@ -26,7 +26,6 @@ import org.apache.hadoop.registry.client.binding.RegistryPathUtils; import org.apache.hadoop.registry.client.types.ServiceRecord; import org.apache.hadoop.registry.client.types.yarn.PersistencePolicies; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.api.records.Container; import org.apache.hadoop.yarn.api.records.ContainerExitStatus; @@ -876,7 +875,7 @@ public void updateContainerStatus(ContainerStatus status) { doRegistryUpdate = false; } } - final ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + ObjectMapper mapper = new ObjectMapper(); try { Map>> ports = null; ports = mapper.readValue(status.getExposedPorts(), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/JsonSerDeser.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/JsonSerDeser.java index cf3d785a22ea6..254d6c5d37954 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/JsonSerDeser.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/JsonSerDeser.java @@ -30,7 +30,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.util.JacksonUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -62,10 +61,9 @@ public class JsonSerDeser { @SuppressWarnings("deprecation") public JsonSerDeser(Class classType) { this.classType = classType; - this.mapper = JacksonUtil.createBasicObjectMapper(); + this.mapper = new ObjectMapper(); mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); mapper.configure(SerializationFeature.WRITE_NULL_MAP_VALUES, false); - mapper.configure(SerializationFeature.INDENT_OUTPUT, true); } public JsonSerDeser(Class classType, PropertyNamingStrategy namingStrategy) { @@ -233,6 +231,7 @@ private void writeJsonAsBytes(T instance, * @throws JsonProcessingException parse problems */ public String toJson(T instance) throws JsonProcessingException { + mapper.configure(SerializationFeature.INDENT_OUTPUT, true); return mapper.writeValueAsString(instance); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/PublishedConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/PublishedConfiguration.java index ac30480fd8856..e7ec2d6f5e7c2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/PublishedConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/utils/PublishedConfiguration.java @@ -23,7 +23,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.service.exceptions.BadConfigException; import java.io.IOException; @@ -42,18 +41,6 @@ @JsonInclude(value = JsonInclude.Include.NON_NULL) public class PublishedConfiguration { - /** - * It is more performant to reuse ObjectMapper instances but keeping the instance - * private makes it harder for someone to reconfigure it which might have unwanted - * side effects. - */ - private static final ObjectMapper OBJECT_MAPPER; - - static { - OBJECT_MAPPER = JacksonUtil.createBasicObjectMapper(); - OBJECT_MAPPER.configure(SerializationFeature.INDENT_OUTPUT, true); - } - public String description; public long updated; @@ -167,7 +154,9 @@ public Properties asProperties() { * @throws IOException marshalling failure */ public String asJson() throws IOException { - String json = OBJECT_MAPPER.writeValueAsString(entries); + ObjectMapper mapper = new ObjectMapper(); + mapper.configure(SerializationFeature.INDENT_OUTPUT, true); + String json = mapper.writeValueAsString(entries); return json; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/FileSystemTimelineWriter.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/FileSystemTimelineWriter.java index dc60f9b274ede..b92f4e412347c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/FileSystemTimelineWriter.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/FileSystemTimelineWriter.java @@ -49,7 +49,6 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -61,6 +60,7 @@ import org.apache.hadoop.yarn.exceptions.YarnException; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.util.MinimalPrettyPrinter; import com.fasterxml.jackson.databind.ObjectMapper; @@ -274,7 +274,7 @@ public void flush() throws IOException { } private ObjectMapper createObjectMapper() { - ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + ObjectMapper mapper = new ObjectMapper(); mapper.setAnnotationIntrospector( new JaxbAnnotationIntrospector(TypeFactory.defaultInstance())); mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); @@ -365,8 +365,8 @@ public long getLastModifiedTime() { protected void prepareForWrite() throws IOException{ this.stream = createLogFileStream(fs, logPath); - this.jsonGenerator = JacksonUtil.getSharedWriter() - .createGenerator((OutputStream)stream); + this.jsonGenerator = new JsonFactory().createGenerator( + (OutputStream)stream); this.jsonGenerator.setPrettyPrinter(new MinimalPrettyPrinter("\n")); this.lastModifiedTime = Time.monotonicNow(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java index 0264e40c7be28..45da0f444ba0d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java @@ -30,7 +30,6 @@ import org.apache.commons.cli.Options; import org.apache.hadoop.security.authentication.server.KerberosAuthenticationHandler; import org.apache.hadoop.security.authentication.server.PseudoAuthenticationHandler; -import org.apache.hadoop.util.JacksonUtil; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.classification.InterfaceAudience.Private; @@ -63,7 +62,7 @@ public class TimelineClientImpl extends TimelineClient { private static final Logger LOG = LoggerFactory.getLogger(TimelineClientImpl.class); - private static final ObjectMapper MAPPER = JacksonUtil.createBasicObjectMapper(); + private static final ObjectMapper MAPPER = new ObjectMapper(); private static final String RESOURCE_URI_STR_V1 = "/ws/v1/timeline/"; private static Options opts; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java index 83b6a09607512..6351cb69c82e7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/DockerClientConfigHandler.java @@ -27,9 +27,9 @@ import org.apache.hadoop.security.Credentials; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.security.DockerCredentialTokenIdentifier; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; @@ -96,8 +96,9 @@ public static Credentials readCredentialsFromConfigFile(Path configFile, } // Parse the JSON and create the Tokens/Credentials. - ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); - JsonParser parser = mapper.createParser(contents); + ObjectMapper mapper = new ObjectMapper(); + JsonFactory factory = mapper.getFactory(); + JsonParser parser = factory.createParser(contents); JsonNode rootNode = mapper.readTree(parser); Credentials credentials = new Credentials(); @@ -160,7 +161,7 @@ public static boolean writeDockerCredentialsToPath(File outConfigFile, Credentials credentials) throws IOException { boolean foundDockerCred = false; if (credentials.numberOfTokens() > 0) { - ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + ObjectMapper mapper = new ObjectMapper(); ObjectNode rootNode = mapper.createObjectNode(); ObjectNode registryUrlNode = mapper.createObjectNode(); for (Token tk : credentials.getAllTokens()) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/timeline/TimelineUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/timeline/TimelineUtils.java index a36b96dca205a..14b9b0ceb7d12 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/timeline/TimelineUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/timeline/TimelineUtils.java @@ -31,7 +31,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.SecurityUtil; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.VersionInfo; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.timeline.TimelineAbout; @@ -54,10 +53,11 @@ public class TimelineUtils { "TIMELINE_FLOW_RUN_ID_TAG"; public final static String DEFAULT_FLOW_VERSION = "1"; - private static final ObjectMapper OBJECT_MAPPER = JacksonUtil.createBasicObjectMapper(); + private static ObjectMapper mapper; static { - YarnJacksonJaxbJsonProvider.configObjectMapper(OBJECT_MAPPER); + mapper = new ObjectMapper(); + YarnJacksonJaxbJsonProvider.configObjectMapper(mapper); } /** @@ -90,9 +90,9 @@ public static String dumpTimelineRecordtoJSON(Object o) public static String dumpTimelineRecordtoJSON(Object o, boolean pretty) throws JsonGenerationException, JsonMappingException, IOException { if (pretty) { - return OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(o); + return mapper.writerWithDefaultPrettyPrinter().writeValueAsString(o); } else { - return OBJECT_MAPPER.writeValueAsString(o); + return mapper.writeValueAsString(o); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Controller.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Controller.java index bf5500892de14..ad80a2eefe5bd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Controller.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Controller.java @@ -28,8 +28,8 @@ import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.webapp.view.DefaultPage; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -42,6 +42,7 @@ @InterfaceAudience.LimitedPrivate({"YARN", "MapReduce"}) public abstract class Controller implements Params { public static final Logger LOG = LoggerFactory.getLogger(Controller.class); + static final ObjectMapper jsonMapper = new ObjectMapper(); @RequestScoped public static class RequestContext{ @@ -224,7 +225,7 @@ protected void renderJSON(Object object) { context().rendered = true; context().response.setContentType(MimeType.JSON); try { - JacksonUtil.getSharedWriter().writeValue(writer(), object); + jsonMapper.writeValue(writer(), object); } catch (Exception e) { throw new WebAppException(e); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/GenericObjectMapper.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/GenericObjectMapper.java index 440c5d6f0600c..fdafcf0cd1c9d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/GenericObjectMapper.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/src/main/java/org/apache/hadoop/yarn/server/timeline/GenericObjectMapper.java @@ -19,11 +19,11 @@ import java.io.IOException; +import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectReader; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.util.JacksonUtil; /** * A utility class providing methods for serializing and deserializing @@ -38,8 +38,14 @@ public class GenericObjectMapper { private static final byte[] EMPTY_BYTES = new byte[0]; - public static final ObjectReader OBJECT_READER = JacksonUtil.createBasicReaderFor(Object.class); - public static final ObjectWriter OBJECT_WRITER = JacksonUtil.getSharedWriter(); + public static final ObjectReader OBJECT_READER; + public static final ObjectWriter OBJECT_WRITER; + + static { + ObjectMapper mapper = new ObjectMapper(); + OBJECT_READER = mapper.reader(Object.class); + OBJECT_WRITER = mapper.writer(); + } /** * Serializes an Object into a byte array. Along with {@link #read(byte[])}, diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java index cbbc33706db34..794ef9d9a4326 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/AuxServices.java @@ -43,7 +43,6 @@ import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.security.authorize.AccessControlList; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.server.nodemanager.containermanager.records.AuxServiceConfiguration; import org.apache.hadoop.yarn.server.nodemanager.containermanager.records.AuxServiceFile; import org.apache.hadoop.yarn.server.nodemanager.containermanager.records.AuxServiceRecord; @@ -136,7 +135,7 @@ public class AuxServices extends AbstractService this.dirsHandler = nmContext.getLocalDirsHandler(); this.delService = deletionService; this.userUGI = getRemoteUgi(); - this.mapper = JacksonUtil.createBasicObjectMapper(); + this.mapper = new ObjectMapper(); mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); // Obtain services from configuration in init() } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/NetworkTagMappingJsonManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/NetworkTagMappingJsonManager.java index 3b4e26eda1ff3..cc2ded4422b71 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/NetworkTagMappingJsonManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/resources/NetworkTagMappingJsonManager.java @@ -28,11 +28,11 @@ import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.server.nodemanager.containermanager.container.Container; @@ -58,8 +58,9 @@ public void initialize(Configuration conf) { + " we have to set the configuration:" + YarnConfiguration.NM_NETWORK_TAG_MAPPING_FILE_PATH); } + ObjectMapper mapper = new ObjectMapper(); try { - networkTagMapping = JacksonUtil.getSharedReader().readValue(new File(mappingJsonFile), + networkTagMapping = mapper.readValue(new File(mappingJsonFile), NetworkTagMapping.class); } catch (Exception e) { throw new YarnRuntimeException(e); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/RuncContainerRuntime.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/RuncContainerRuntime.java index 86bb5113dd26b..2c327c04ebaf1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/RuncContainerRuntime.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/RuncContainerRuntime.java @@ -27,7 +27,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair; import org.apache.hadoop.security.authorize.AccessControlList; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.concurrent.HadoopExecutors; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -92,7 +91,6 @@ import static org.apache.hadoop.yarn.conf.YarnConfiguration.NM_RUNC_MANIFEST_TO_RESOURCES_PLUGIN; import static org.apache.hadoop.yarn.conf.YarnConfiguration.NM_REAP_RUNC_LAYER_MOUNTS_INTERVAL; import static org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.LinuxContainerRuntimeConstants.*; - /** *

    This class is an extension of {@link OCIContainerRuntime} that uses the * native {@code container-executor} binary via a @@ -208,7 +206,7 @@ public void initialize(Configuration configuration, Context nmCtx) imageTagToManifestPlugin.init(conf); manifestToResourcesPlugin = chooseManifestToResourcesPlugin(); manifestToResourcesPlugin.init(conf); - mapper = JacksonUtil.createBasicObjectMapper(); + mapper = new ObjectMapper(); defaultRuncImage = conf.get(YarnConfiguration.NM_RUNC_IMAGE_NAME); allowedNetworks.clear(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/runc/ImageTagToManifestPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/runc/ImageTagToManifestPlugin.java index bb21c45f735a0..457939c9a1740 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/runc/ImageTagToManifestPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/linux/runtime/runc/ImageTagToManifestPlugin.java @@ -26,7 +26,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.service.AbstractService; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.concurrent.HadoopExecutors; import java.io.BufferedReader; @@ -43,6 +42,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; +import com.fasterxml.jackson.databind.ObjectMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -65,6 +65,7 @@ public class ImageTagToManifestPlugin extends AbstractService implements RuncImageTagToManifestPlugin { private Map manifestCache; + private ObjectMapper objMapper; private AtomicReference> localImageToHashCache = new AtomicReference<>(new HashMap<>()); private AtomicReference> hdfsImageToHashCache = @@ -106,7 +107,7 @@ public ImageManifest getManifestFromImageTag(String imageTag) } byte[] bytes = IOUtils.toByteArray(input); - manifest = JacksonUtil.getSharedReader().readValue(bytes, ImageManifest.class); + manifest = objMapper.readValue(bytes, ImageManifest.class); manifestCache.put(hash, manifest); return manifest; @@ -278,6 +279,7 @@ protected void serviceInit(Configuration configuration) throws Exception { DEFAULT_NM_RUNC_IMAGE_TOPLEVEL_DIR) + "/manifests/"; int numManifestsToCache = conf.getInt(NM_RUNC_NUM_MANIFESTS_TO_CACHE, DEFAULT_NUM_MANIFESTS_TO_CACHE); + this.objMapper = new ObjectMapper(); this.manifestCache = Collections.synchronizedMap( new LRUCache(numManifestsToCache, 0.75f)); @@ -313,7 +315,7 @@ protected void serviceStop() throws Exception { } private static class LRUCache extends LinkedHashMap { - private final int cacheSize; + private int cacheSize; LRUCache(int initialCapacity, float loadFactor) { super(initialCapacity, loadFactor, true); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceProfilesManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceProfilesManagerImpl.java index 8910ab48ddaaa..24cb34327b745 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceProfilesManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/resource/ResourceProfilesManagerImpl.java @@ -20,10 +20,10 @@ import org.apache.hadoop.classification.VisibleForTesting; +import com.fasterxml.jackson.databind.ObjectMapper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.api.records.ResourceInformation; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -105,7 +105,8 @@ private void loadProfiles() throws IOException { resourcesFile = tmp.getPath(); } } - Map data = JacksonUtil.getSharedReader().readValue(new File(resourcesFile), Map.class); + ObjectMapper mapper = new ObjectMapper(); + Map data = mapper.readValue(new File(resourcesFile), Map.class); Iterator iterator = data.entrySet().iterator(); while (iterator.hasNext()) { Map.Entry entry = (Map.Entry) iterator.next(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/MappingRuleCreator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/MappingRuleCreator.java index 0fa10570d030a..174577099e48c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/MappingRuleCreator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/MappingRuleCreator.java @@ -27,7 +27,6 @@ import java.util.List; import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.server.resourcemanager.placement.csmappingrule.MappingRule; import org.apache.hadoop.yarn.server.resourcemanager.placement.csmappingrule.MappingRuleAction; import org.apache.hadoop.yarn.server.resourcemanager.placement.csmappingrule.MappingRuleActions; @@ -44,6 +43,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.VisibleForTesting; public class MappingRuleCreator { @@ -58,12 +58,14 @@ public MappingRulesDescription getMappingRulesFromJsonFile(String filePath) MappingRulesDescription getMappingRulesFromJson(byte[] contents) throws IOException { - return JacksonUtil.getSharedReader().readValue(contents, MappingRulesDescription.class); + ObjectMapper objectMapper = new ObjectMapper(); + return objectMapper.readValue(contents, MappingRulesDescription.class); } MappingRulesDescription getMappingRulesFromJson(String contents) throws IOException { - return JacksonUtil.getSharedReader().readValue(contents, MappingRulesDescription.class); + ObjectMapper objectMapper = new ObjectMapper(); + return objectMapper.readValue(contents, MappingRulesDescription.class); } public List getMappingRulesFromFile(String jsonPath) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/converter/LegacyMappingRuleToJson.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/converter/LegacyMappingRuleToJson.java index 6c963775be770..108d52bc40c36 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/converter/LegacyMappingRuleToJson.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/capacity/placement/converter/LegacyMappingRuleToJson.java @@ -21,7 +21,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; import com.fasterxml.jackson.databind.node.ObjectNode; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.QueuePath; @@ -53,11 +52,9 @@ public class LegacyMappingRuleToJson { public static final String JSON_NODE_MATCHES = "matches"; /** - * It is more performant to reuse ObjectMapper instances but keeping the instance - * private makes it harder for someone to reconfigure it which might have unwanted - * side effects. + * Our internal object mapper, used to create JSON nodes. */ - private static final ObjectMapper OBJECT_MAPPER = JacksonUtil.createBasicObjectMapper(); + private ObjectMapper objectMapper = new ObjectMapper(); /** * Collection to store the legacy group mapping rule strings. @@ -141,8 +138,8 @@ public LegacyMappingRuleToJson setAppNameMappingRules( */ public String convert() { //creating the basic JSON config structure - ObjectNode rootNode = OBJECT_MAPPER.createObjectNode(); - ArrayNode rulesNode = OBJECT_MAPPER.createArrayNode(); + ObjectNode rootNode = objectMapper.createObjectNode(); + ArrayNode rulesNode = objectMapper.createArrayNode(); rootNode.set("rules", rulesNode); //Processing and adding all the user group mapping rules @@ -161,7 +158,7 @@ public String convert() { } try { - return OBJECT_MAPPER + return objectMapper .writerWithDefaultPrettyPrinter() .writeValueAsString(rootNode); } catch (JsonProcessingException e) { @@ -249,7 +246,7 @@ private String[] splitRule(String rule, int expectedParts) { * @return The object node with the preset fields */ private ObjectNode createDefaultRuleNode(String type) { - return OBJECT_MAPPER + return objectMapper .createObjectNode() .put("type", type) //All legacy rule fallback to place to default diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/FSConfigToCSConfigConverter.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/FSConfigToCSConfigConverter.java index 6a16aac686d6a..d801652377983 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/FSConfigToCSConfigConverter.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/fair/converter/FSConfigToCSConfigConverter.java @@ -32,7 +32,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.security.authorize.AccessControlList; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.QueueACL; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -56,6 +55,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.fasterxml.jackson.core.util.DefaultPrettyPrinter; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.ObjectWriter; import org.apache.hadoop.classification.VisibleForTesting; @@ -327,14 +327,14 @@ private void performRuleConversion(FairScheduler fs) placementConverter.convertPlacementPolicy(placementManager, ruleHandler, capacitySchedulerConfig, usePercentages); - final ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + ObjectMapper mapper = new ObjectMapper(); // close output stream if we write to a file, leave it open otherwise if (!consoleMode && rulesToFile) { mapper.configure(JsonGenerator.Feature.AUTO_CLOSE_TARGET, true); } else { mapper.configure(JsonGenerator.Feature.AUTO_CLOSE_TARGET, false); } - ObjectWriter writer = mapper.writerWithDefaultPrettyPrinter(); + ObjectWriter writer = mapper.writer(new DefaultPrettyPrinter()); if (consoleMode && rulesToFile) { System.out.println("======= " + MAPPING_RULES_JSON + " ======="); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStore.java index 7e49bd19aef73..1f4a9f42a9f8c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/EntityGroupFSTimelineStore.java @@ -42,7 +42,6 @@ import org.apache.hadoop.service.ServiceOperations; import org.apache.hadoop.ipc.CallerContext; import org.apache.hadoop.util.ApplicationClassLoader; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; @@ -321,7 +320,7 @@ protected void serviceStart() throws Exception { } } - objMapper = JacksonUtil.createBasicObjectMapper(); + objMapper = new ObjectMapper(); objMapper.setAnnotationIntrospector( new JaxbAnnotationIntrospector(TypeFactory.defaultInstance())); jsonFactory = new MappingJsonFactory(objMapper); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/LevelDBCacheTimelineStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/LevelDBCacheTimelineStore.java index 8ee6d1864c694..f84eeebbf0c8e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/LevelDBCacheTimelineStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/main/java/org/apache/hadoop/yarn/server/timeline/LevelDBCacheTimelineStore.java @@ -18,13 +18,13 @@ package org.apache.hadoop.yarn.server.timeline; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.timeline.util.LeveldbUtils; @@ -298,6 +298,7 @@ public void close() throws IOException { } }; } + static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); @SuppressWarnings("unchecked") private V getEntityForKey(byte[] key) throws IOException { @@ -305,7 +306,7 @@ private V getEntityForKey(byte[] key) throws IOException { if (resultRaw == null) { return null; } - return (V) JacksonUtil.getSharedReader().readValue(resultRaw, TimelineEntity.class); + return (V) OBJECT_MAPPER.readValue(resultRaw, TimelineEntity.class); } private byte[] getStartTimeKey(K entityId) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/test/java/org/apache/hadoop/yarn/server/timeline/PluginStoreTestUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/test/java/org/apache/hadoop/yarn/server/timeline/PluginStoreTestUtils.java index d3885c5bc8fb4..cb887fe264fab 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/test/java/org/apache/hadoop/yarn/server/timeline/PluginStoreTestUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/src/test/java/org/apache/hadoop/yarn/server/timeline/PluginStoreTestUtils.java @@ -18,6 +18,7 @@ package org.apache.hadoop.yarn.server.timeline; import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.core.JsonFactory; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.core.util.MinimalPrettyPrinter; import com.fasterxml.jackson.databind.ObjectMapper; @@ -30,7 +31,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.JacksonUtil; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntities; import org.apache.hadoop.yarn.api.records.timeline.TimelineEntity; import org.apache.hadoop.yarn.api.records.timeline.TimelineEvent; @@ -108,7 +108,7 @@ static FSDataOutputStream createLogFile(Path logPath, FileSystem fs) } static ObjectMapper createObjectMapper() { - ObjectMapper mapper = JacksonUtil.createBasicObjectMapper(); + ObjectMapper mapper = new ObjectMapper(); mapper.setAnnotationIntrospector( new JaxbAnnotationIntrospector(TypeFactory.defaultInstance())); mapper.setSerializationInclusion(JsonInclude.Include.NON_NULL); @@ -230,9 +230,10 @@ static TimelineEvent createEvent(long timestamp, String type, Map T getTimelineRecordFromJSON( String jsonString, Class clazz) throws JsonGenerationException, JsonMappingException, IOException { - return OBJECT_MAPPER.readValue(jsonString, clazz); + return mapper.readValue(jsonString, clazz); } private static void fillFields(TimelineEntity finalEntity, From a962aa37e006faf642bfd994c0f20eb77adf3939 Mon Sep 17 00:00:00 2001 From: litao Date: Fri, 30 Aug 2024 12:56:33 +0800 Subject: [PATCH 104/113] HDFS-17599. EC: Fix the mismatch between locations and indices for mover (#6980) --- .../hdfs/server/balancer/Dispatcher.java | 8 +- .../hadoop/hdfs/server/mover/Mover.java | 18 ++- .../hadoop/hdfs/server/mover/TestMover.java | 148 ++++++++++++++++++ 3 files changed, 171 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java index 6ad0e4d22a854..acac65d774505 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java @@ -539,6 +539,10 @@ public void setIndices(byte[] indices) { this.indices = indices; } + public byte[] getIndices() { + return this.indices; + } + /** * Adjust EC block indices,it will remove the element of adjustList from indices. * @param adjustList the list will be removed from indices @@ -889,8 +893,8 @@ private long getBlockList() throws IOException, IllegalArgumentException { if (g != null) { // not unknown block.addLocation(g); } else if (blkLocs instanceof StripedBlockWithLocations) { - // some datanode may not in storageGroupMap due to decommission operation - // or balancer cli with "-exclude" parameter + // some datanode may not in storageGroupMap due to decommission or maintenance + // operation or balancer cli with "-exclude" parameter adjustList.add(i); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java index 63fe238cd5e07..dbe10cca92143 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java @@ -49,6 +49,7 @@ import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.Lists; +import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.Tool; @@ -222,12 +223,27 @@ DBlock newDBlock(LocatedBlock lb, List locations, } else { db = new DBlock(blk); } - for(MLocation ml : locations) { + + List adjustList = new ArrayList<>(); + for (int i = 0; i < locations.size(); i++) { + MLocation ml = locations.get(i); StorageGroup source = storages.getSource(ml); if (source != null) { db.addLocation(source); + } else if (lb.isStriped()) { + // some datanode may not in storages due to decommission or maintenance operation + // or balancer cli with "-exclude" parameter + adjustList.add(i); } } + + if (!adjustList.isEmpty()) { + // block.locations mismatch with block.indices + // adjust indices to get correct internalBlock + ((DBlockStriped) db).adjustIndices(adjustList); + Preconditions.checkArgument(((DBlockStriped) db).getIndices().length + == db.getLocations().size()); + } return db; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java index 90353c352ea41..9794ea9762d2e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java @@ -23,6 +23,7 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_HTTPS_ADDRESS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_KERBEROS_PRINCIPAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_KEYTAB_FILE_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HTTP_POLICY_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_MOVER_ADDRESS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_MOVER_KERBEROS_PRINCIPAL_KEY; @@ -73,6 +74,7 @@ import org.apache.hadoop.hdfs.StripedFileTestUtil; import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.DatanodeInfoWithStorage; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.LocatedBlock; @@ -82,10 +84,13 @@ import org.apache.hadoop.hdfs.server.balancer.ExitStatus; import org.apache.hadoop.hdfs.server.balancer.NameNodeConnector; import org.apache.hadoop.hdfs.server.balancer.TestBalancer; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; +import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeManager; import org.apache.hadoop.hdfs.server.datanode.DataNode; import org.apache.hadoop.hdfs.server.datanode.InternalDataNodeTestUtils; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; import org.apache.hadoop.hdfs.server.mover.Mover.MLocation; +import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.metrics2.MetricsRecordBuilder; @@ -98,6 +103,7 @@ import org.apache.hadoop.security.ssl.KeyStoreTestUtil; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.MetricsAsserts; +import org.apache.hadoop.util.Lists; import org.apache.hadoop.util.ToolRunner; import org.junit.Assert; import org.junit.Test; @@ -1005,6 +1011,148 @@ public void testMoverWithStripedFile() throws Exception { } } + @Test(timeout = 300000) + public void testMoverWithStripedFileMaintenance() throws Exception { + final Configuration conf = new HdfsConfiguration(); + initConfWithStripe(conf); + + // Start 9 datanodes + int numOfDatanodes = 9; + int storagesPerDatanode = 2; + long capacity = 9 * defaultBlockSize; + long[][] capacities = new long[numOfDatanodes][storagesPerDatanode]; + for (int i = 0; i < numOfDatanodes; i++) { + for(int j = 0; j < storagesPerDatanode; j++){ + capacities[i][j] = capacity; + } + } + final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(numOfDatanodes) + .storagesPerDatanode(storagesPerDatanode) + .storageTypes(new StorageType[][]{ + {StorageType.SSD, StorageType.SSD}, + {StorageType.SSD, StorageType.SSD}, + {StorageType.SSD, StorageType.SSD}, + {StorageType.SSD, StorageType.SSD}, + {StorageType.SSD, StorageType.SSD}, + {StorageType.SSD, StorageType.SSD}, + {StorageType.SSD, StorageType.SSD}, + {StorageType.SSD, StorageType.SSD}, + {StorageType.SSD, StorageType.SSD}}) + .storageCapacities(capacities) + .build(); + + try { + cluster.waitActive(); + cluster.getFileSystem().enableErasureCodingPolicy( + StripedFileTestUtil.getDefaultECPolicy().getName()); + + ClientProtocol client = NameNodeProxies.createProxy(conf, + cluster.getFileSystem(0).getUri(), ClientProtocol.class).getProxy(); + String barDir = "/bar"; + client.mkdirs(barDir, new FsPermission((short) 777), true); + // Set "/bar" directory with ALL_SSD storage policy. + client.setStoragePolicy(barDir, "ALL_SSD"); + // Set an EC policy on "/bar" directory + client.setErasureCodingPolicy(barDir, + StripedFileTestUtil.getDefaultECPolicy().getName()); + + // Write file to barDir + final String fooFile = "/bar/foo"; + long fileLen = 6 * defaultBlockSize; + DFSTestUtil.createFile(cluster.getFileSystem(), new Path(fooFile), + fileLen, (short) 3, 0); + + // Verify storage types and locations + LocatedBlocks locatedBlocks = + client.getBlockLocations(fooFile, 0, fileLen); + DatanodeInfoWithStorage location = null; + for(LocatedBlock lb : locatedBlocks.getLocatedBlocks()){ + location = lb.getLocations()[8]; + for(StorageType type : lb.getStorageTypes()){ + Assert.assertEquals(StorageType.SSD, type); + } + } + + // Maintain the last datanode later + FSNamesystem ns = cluster.getNamesystem(0); + DatanodeManager datanodeManager = ns.getBlockManager().getDatanodeManager(); + DatanodeDescriptor dn = datanodeManager.getDatanode(location.getDatanodeUuid()); + + StripedFileTestUtil.verifyLocatedStripedBlocks(locatedBlocks, + dataBlocks + parityBlocks); + + // Start 5 more datanodes for mover + capacities = new long[5][storagesPerDatanode]; + for (int i = 0; i < 5; i++) { + for(int j = 0; j < storagesPerDatanode; j++){ + capacities[i][j] = capacity; + } + } + cluster.startDataNodes(conf, 5, + new StorageType[][]{ + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}, + {StorageType.DISK, StorageType.DISK}}, + true, null, null, null, capacities, + null, false, false, false, null, null, null); + cluster.triggerHeartbeats(); + + // Move blocks to DISK + client.setStoragePolicy(barDir, "HOT"); + int rc = ToolRunner.run(conf, new Mover.Cli(), + new String[]{"-p", barDir}); + // Verify the number of DISK storage types + waitForLocatedBlockWithDiskStorageType(cluster.getFileSystem(), fooFile, 5); + + // Maintain a datanode that simulates that one node in the location list + // is in ENTERING_MAINTENANCE status. + datanodeManager.getDatanode(dn.getDatanodeUuid()).startMaintenance(); + waitNodeState(dn, DatanodeInfo.AdminStates.ENTERING_MAINTENANCE); + + // Move blocks back to SSD. + // Without HDFS-17599, locations and indices lengths might not match, + // resulting in getting the wrong blockId in DBlockStriped#getInternalBlock, + // and mover will fail to run. + client.setStoragePolicy(barDir, "ALL_SSD"); + rc = ToolRunner.run(conf, new Mover.Cli(), + new String[]{"-p", barDir}); + + Assert.assertEquals("Movement to HOT should be successful", 0, rc); + } finally { + cluster.shutdown(); + } + } + + /** + * Wait till DataNode is transitioned to the expected state. + */ + protected void waitNodeState(DatanodeInfo node, DatanodeInfo.AdminStates state) { + waitNodeState(Lists.newArrayList(node), state); + } + + /** + * Wait till all DataNodes are transitioned to the expected state. + */ + protected void waitNodeState(List nodes, DatanodeInfo.AdminStates state) { + for (DatanodeInfo node : nodes) { + boolean done = (state == node.getAdminState()); + while (!done) { + LOG.info("Waiting for node " + node + " to change state to " + + state + " current state: " + node.getAdminState()); + try { + Thread.sleep(DFS_HEARTBEAT_INTERVAL_DEFAULT * 10); + } catch (InterruptedException e) { + // nothing + } + done = (state == node.getAdminState()); + } + LOG.info("node " + node + " reached the state " + state); + } + } + /** * Wait until Namenode reports expected storage type for all blocks of * given file. From b404c8c8f80d015edf48c674463ed57a9af6c55c Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 30 Aug 2024 11:50:51 +0100 Subject: [PATCH 105/113] HADOOP-19252. Upgrade hadoop-thirdparty to 1.3.0 (#7007) Update the version of hadoop-thirdparty to 1.3.0 across all shaded artifacts used. This synchronizes the shaded protobuf library with those of all other shaded artifacts (guava, avro) Contributed by Steve Loughran --- LICENSE-binary | 15 ++++++++------- hadoop-project/pom.xml | 4 ++-- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index cc018ed265bbf..a716db70f72b3 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -233,19 +233,19 @@ com.google:guice:5.1.0 com.google:guice-servlet:5.1.0 com.google.api.grpc:proto-google-common-protos:1.0.0 com.google.code.gson:2.9.0 -com.google.errorprone:error_prone_annotations:2.2.0 -com.google.j2objc:j2objc-annotations:1.1 +com.google.errorprone:error_prone_annotations:2.5.1 +com.google.j2objc:j2objc-annotations:1.3 com.google.json-simple:json-simple:1.1.1 com.google.guava:failureaccess:1.0 com.google.guava:guava:20.0 -com.google.guava:guava:27.0-jre +com.google.guava:guava:32.0.1-jre com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava com.microsoft.azure:azure-storage:7.0.0 com.nimbusds:nimbus-jose-jwt:9.37.2 com.zaxxer:HikariCP:4.0.3 commons-beanutils:commons-beanutils:1.9.4 commons-cli:commons-cli:1.5.0 -commons-codec:commons-codec:1.11 +commons-codec:commons-codec:1.15 commons-collections:commons-collections:3.2.2 commons-daemon:commons-daemon:1.0.13 commons-io:commons-io:2.16.1 @@ -298,6 +298,7 @@ javax.inject:javax.inject:1 net.java.dev.jna:jna:5.2.0 net.minidev:accessors-smart:1.2 org.apache.avro:avro:1.9.2 +org.apache.avro:avro:1.11.3 org.apache.commons:commons-collections4:4.2 org.apache.commons:commons-compress:1.26.1 org.apache.commons:commons-configuration2:2.10.1 @@ -362,7 +363,7 @@ org.objenesis:objenesis:2.6 org.xerial.snappy:snappy-java:1.1.10.4 org.yaml:snakeyaml:2.0 org.wildfly.openssl:wildfly-openssl:1.1.3.Final -software.amazon.awssdk:bundle:jar:2.25.53 +software.amazon.awssdk:bundle:2.25.53 -------------------------------------------------------------------------------- @@ -395,7 +396,7 @@ hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/d3-3.5.17.min.js leveldb v1.13 com.google.protobuf:protobuf-java:2.5.0 -com.google.protobuf:protobuf-java:3.21.12 +com.google.protobuf:protobuf-java:3.25.3 com.google.re2j:re2j:1.1 com.jcraft:jsch:0.1.55 com.thoughtworks.paranamer:paranamer:2.3 @@ -485,7 +486,7 @@ com.microsoft.sqlserver:mssql-jdbc:6.2.1.jre7 org.bouncycastle:bcpkix-jdk18on:1.78.1 org.bouncycastle:bcprov-jdk18on:1.78.1 org.bouncycastle:bcutil-jdk18on:1.78.1 -org.checkerframework:checker-qual:2.5.2 +org.checkerframework:checker-qual:3.8.0 org.codehaus.mojo:animal-sniffer-annotations:1.21 org.jruby.jcodings:jcodings:1.0.13 org.jruby.joni:joni:2.1.2 diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 05dccb6298501..33533dbbaedca 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -96,8 +96,8 @@ 3.23.4 ${env.HADOOP_PROTOC_PATH} - 1.2.0 - 1.3.0-SNAPSHOT + 1.3.0 + ${hadoop-thirdparty.version} ${hadoop-thirdparty.version} org.apache.hadoop.thirdparty ${hadoop-thirdparty-shaded-prefix}.protobuf From 1655acc5e2d5fe27e01f46ea02bd5a7dea44fe12 Mon Sep 17 00:00:00 2001 From: zhengchenyu Date: Fri, 30 Aug 2024 19:05:13 +0800 Subject: [PATCH 106/113] HADOOP-19250. [Addendum] Fix test TestServiceInterruptHandling.testRegisterAndRaise. (#7008) Contributed by Chenyu Zheng --- .../hadoop/service/launcher/TestServiceInterruptHandling.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/service/launcher/TestServiceInterruptHandling.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/service/launcher/TestServiceInterruptHandling.java index c21fa8b73073f..8181e07fae01f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/service/launcher/TestServiceInterruptHandling.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/service/launcher/TestServiceInterruptHandling.java @@ -38,7 +38,7 @@ public class TestServiceInterruptHandling @Test public void testRegisterAndRaise() throws Throwable { InterruptCatcher catcher = new InterruptCatcher(); - String name = IrqHandler.CONTROL_C; + String name = "USR2"; IrqHandler irqHandler = new IrqHandler(name, catcher); irqHandler.bind(); assertEquals(0, irqHandler.getSignalCount()); From 3bbfb2be0899053cf12986bf4b132d7006800ec9 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 3 Sep 2024 21:20:47 +0100 Subject: [PATCH 107/113] HADOOP-19257. S3A: ITestAssumeRole.testAssumeRoleBadInnerAuth failure (#7021) Remove the error string matched on so that no future message change from AWS will trigger a regression Contributed by Steve Loughran --- .../java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java index 5aa72e6949064..592529b553d24 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java @@ -283,8 +283,7 @@ public void testAssumeRoleBadInnerAuth() throws Exception { conf.set(SECRET_KEY, "not secret"); expectFileSystemCreateFailure(conf, AWSBadRequestException.class, - "not a valid " + - "key=value pair (missing equal-sign) in Authorization header"); + ""); } @Test From 9486844610473d068a4988512a7f5cfbefe4abac Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Wed, 4 Sep 2024 18:50:59 +0800 Subject: [PATCH 108/113] HADOOP-16928. Make javadoc work on Java 17 (#6976) Contributed by Cheng Pan --- .../org/apache/hadoop/conf/Configuration.java | 46 +++++------ .../apache/hadoop/fs/AbstractFileSystem.java | 2 +- .../apache/hadoop/fs/ChecksumFileSystem.java | 2 +- .../java/org/apache/hadoop/fs/ChecksumFs.java | 2 +- .../org/apache/hadoop/fs/FileContext.java | 32 ++++---- .../java/org/apache/hadoop/fs/FileSystem.java | 28 +++---- .../org/apache/hadoop/fs/RemoteIterator.java | 4 +- .../org/apache/hadoop/io/EnumSetWritable.java | 18 ++-- .../org/apache/hadoop/io/ObjectWritable.java | 4 +- .../org/apache/hadoop/io/SequenceFile.java | 10 +-- .../io/compress/bzip2/CBZip2InputStream.java | 8 +- .../io/compress/bzip2/CBZip2OutputStream.java | 82 +++++++++---------- .../io/compress/zlib/BuiltInZlibDeflater.java | 2 +- .../apache/hadoop/io/file/tfile/Chunk.java | 4 +- .../apache/hadoop/ipc/RpcClientException.java | 2 +- .../org/apache/hadoop/ipc/RpcException.java | 2 +- .../apache/hadoop/ipc/RpcServerException.java | 2 +- .../hadoop/ipc/UnexpectedServerException.java | 2 +- .../apache/hadoop/metrics2/package-info.java | 26 +++--- .../java/org/apache/hadoop/net/NetUtils.java | 6 +- .../security/AccessControlException.java | 6 +- .../authorize/AuthorizationException.java | 6 +- .../hadoop/util/GenericOptionsParser.java | 2 +- .../hadoop/util/InstrumentedReadLock.java | 2 +- .../util/InstrumentedReadWriteLock.java | 2 +- .../hadoop/util/InstrumentedWriteLock.java | 2 +- .../hadoop/util/ShutdownThreadsHelper.java | 16 ++-- .../org/apache/hadoop/util/StringUtils.java | 2 +- .../apache/hadoop/ipc/MiniRPCBenchmark.java | 12 +-- .../client/impl/BlockReaderLocalLegacy.java | 2 +- .../blockmanagement/DatanodeDescriptor.java | 2 +- .../namenode/EncryptionZoneManager.java | 2 +- .../hadoop/hdfs/server/namenode/NameNode.java | 2 +- .../server/namenode/snapshot/DiffList.java | 2 +- .../FileDistributionCalculator.java | 18 ++-- .../FileDistributionVisitor.java | 16 ++-- .../org/apache/hadoop/hdfs/TestSafeMode.java | 2 +- .../datanode/TestReadOnlySharedStorage.java | 6 +- .../app/rm/preemption/AMPreemptionPolicy.java | 2 +- .../hadoop/mapred/FileOutputFormat.java | 16 ++-- .../org/apache/hadoop/mapred/JobConf.java | 4 +- .../org/apache/hadoop/mapred/MapRunnable.java | 2 +- .../apache/hadoop/mapred/jobcontrol/Job.java | 2 +- .../mapred/join/CompositeInputFormat.java | 12 +-- .../mapred/join/CompositeRecordReader.java | 4 +- .../mapred/join/OverrideRecordReader.java | 2 +- .../org/apache/hadoop/mapred/join/Parser.java | 2 +- .../mapred/lib/TotalOrderPartitioner.java | 2 +- .../lib/jobcontrol/ControlledJob.java | 2 +- .../lib/join/CompositeInputFormat.java | 12 +-- .../lib/join/CompositeRecordReader.java | 4 +- .../lib/join/OverrideRecordReader.java | 2 +- .../hadoop/mapreduce/lib/join/Parser.java | 2 +- .../mapreduce/lib/join/TupleWritable.java | 2 +- .../lib/output/FileOutputFormat.java | 8 +- .../lib/partition/TotalOrderPartitioner.java | 10 +-- .../apache/hadoop/fs/AccumulatingReducer.java | 8 +- .../org/apache/hadoop/fs/IOMapperBase.java | 4 +- .../org/apache/hadoop/fs/JHLogAnalyzer.java | 42 +++++----- .../apache/hadoop/examples/pi/package.html | 71 ++++++++-------- hadoop-project/pom.xml | 23 ------ hadoop-tools/hadoop-aws/pom.xml | 1 - .../hadoop/mapred/gridmix/FilePool.java | 2 +- .../streaming/io/IdentifierResolver.java | 2 +- .../org/apache/hadoop/streaming/package.html | 2 +- .../org/apache/hadoop/typedbytes/package.html | 8 +- .../SignalContainerRequest.java | 2 +- .../timelineservice/ServiceMetricsSink.java | 2 +- .../yarn/security/AdminACLsManager.java | 4 +- .../hadoop/yarn/util/BoundedAppender.java | 2 +- .../yarn/server/utils/LeveldbIterator.java | 2 +- .../storage/common/BaseTable.java | 2 +- 72 files changed, 317 insertions(+), 336 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java index 94285a4dfb7e5..f7bf2b8703d97 100755 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java @@ -119,7 +119,7 @@ /** * Provides access to configuration parameters. * - *

    Resources

    + *

    Resources

    * *

    Configurations are specified by resources. A resource contains a set of * name/value pairs as XML data. Each resource is named by either a @@ -130,16 +130,16 @@ * *

    Unless explicitly turned off, Hadoop by default specifies two * resources, loaded in-order from the classpath:

      - *
    1. + *
    2. * - * core-default.xml: Read-only defaults for hadoop.
    3. - *
    4. core-site.xml: Site-specific configuration for a given hadoop + * core-default.xml: Read-only defaults for hadoop.
    5. + *
    6. core-site.xml: Site-specific configuration for a given hadoop * installation.
    7. *
    * Applications may add additional resources, which are loaded * subsequent to these resources in the order they are added. * - *

    Final Parameters

    + *

    Final Parameters

    * *

    Configuration parameters may be declared final. * Once a resource declares a value final, no subsequently-loaded @@ -153,9 +153,9 @@ * </property> * * Administrators typically define parameters as final in - * core-site.xml for values that user applications may not alter. + * core-site.xml for values that user applications may not alter. * - *

    Variable Expansion

    + *

    Variable Expansion

    * *

    Value strings are first processed for variable expansion. The * available properties are:

      @@ -185,22 +185,22 @@ * </property> * * - *

      When conf.get("tempdir") is called, then ${basedir} + *

      When conf.get("tempdir") is called, then ${basedir} * will be resolved to another property in this Configuration, while - * ${user.name} would then ordinarily be resolved to the value + * ${user.name} would then ordinarily be resolved to the value * of the System property with that name. - *

      When conf.get("otherdir") is called, then ${env.BASE_DIR} - * will be resolved to the value of the ${BASE_DIR} environment variable. - * It supports ${env.NAME:-default} and ${env.NAME-default} notations. - * The former is resolved to "default" if ${NAME} environment variable is undefined + *

      When conf.get("otherdir") is called, then ${env.BASE_DIR} + * will be resolved to the value of the ${BASE_DIR} environment variable. + * It supports ${env.NAME:-default} and ${env.NAME-default} notations. + * The former is resolved to "default" if ${NAME} environment variable is undefined * or its value is empty. - * The latter behaves the same way only if ${NAME} is undefined. + * The latter behaves the same way only if ${NAME} is undefined. *

      By default, warnings will be given to any deprecated configuration * parameters and these are suppressible by configuring - * log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in + * log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in * log4j.properties file. * - *

      Tags

      + *

      Tags

      * *

      Optionally we can tag related properties together by using tag * attributes. System tags are defined by hadoop.tags.system property. Users @@ -220,9 +220,9 @@ * <tag>HDFS,SECURITY</tag> * </property> * - *

      Properties marked with tags can be retrieved with conf - * .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags - * (Arrays.asList("YARN","SECURITY")).

      + *

      Properties marked with tags can be retrieved with conf + * .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags + * (Arrays.asList("YARN","SECURITY")).

      */ @InterfaceAudience.Public @InterfaceStability.Stable @@ -576,7 +576,7 @@ public static void addDeprecations(DeprecationDelta[] deltas) { * It does not override any existing entries in the deprecation map. * This is to be used only by the developers in order to add deprecation of * keys, and attempts to call this method after loading resources once, - * would lead to UnsupportedOperationException + * would lead to UnsupportedOperationException * * If a key is deprecated in favor of multiple keys, they are all treated as * aliases of each other, and setting any one of them resets all the others @@ -604,7 +604,7 @@ public static void addDeprecation(String key, String[] newKeys, * It does not override any existing entries in the deprecation map. * This is to be used only by the developers in order to add deprecation of * keys, and attempts to call this method after loading resources once, - * would lead to UnsupportedOperationException + * would lead to UnsupportedOperationException * * If you have multiple deprecation entries to add, it is more efficient to * use #addDeprecations(DeprecationDelta[] deltas) instead. @@ -624,7 +624,7 @@ public static void addDeprecation(String key, String newKey, * It does not override any existing entries in the deprecation map. * This is to be used only by the developers in order to add deprecation of * keys, and attempts to call this method after loading resources once, - * would lead to UnsupportedOperationException + * would lead to UnsupportedOperationException * * If a key is deprecated in favor of multiple keys, they are all treated as * aliases of each other, and setting any one of them resets all the others @@ -648,7 +648,7 @@ public static void addDeprecation(String key, String[] newKeys) { * It does not override any existing entries in the deprecation map. * This is to be used only by the developers in order to add deprecation of * keys, and attempts to call this method after loading resources once, - * would lead to UnsupportedOperationException + * would lead to UnsupportedOperationException * * If you have multiple deprecation entries to add, it is more efficient to * use #addDeprecations(DeprecationDelta[] deltas) instead. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java index 63b5bc7d94ac3..7988ebb7904dc 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java @@ -247,7 +247,7 @@ protected static synchronized Map getAllStatistics() { * The main factory method for creating a file system. Get a file system for * the URI's scheme and authority. The scheme of the uri * determines a configuration property name, - * fs.AbstractFileSystem.scheme.impl whose value names the + * fs.AbstractFileSystem.scheme.impl whose value names the * AbstractFileSystem class. * * The entire URI and conf is passed to the AbstractFileSystem factory method. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java index 716c6c5004918..586350d843997 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java @@ -866,7 +866,7 @@ boolean apply(Path p) throws IOException { /** * Set replication for an existing file. - * Implement the abstract setReplication of FileSystem + * Implement the abstract setReplication of FileSystem * @param src file name * @param replication new replication * @throws IOException if an I/O error occurs. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java index 4820c5c3045d7..5f3e5d9b8efa9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java @@ -453,7 +453,7 @@ private boolean isDirectory(Path f) } /** * Set replication for an existing file. - * Implement the abstract setReplication of FileSystem + * Implement the abstract setReplication of FileSystem * @param src file name * @param replication new replication * @throws IOException if an I/O error occurs. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java index eb5983f098a71..a903e337de140 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java @@ -1977,9 +1977,9 @@ public RemoteIterator listFiles( LocatedFileStatus curFile; /** - * Returns true if the iterator has more files. + * Returns true if the iterator has more files. * - * @return true if the iterator has more files. + * @return true if the iterator has more files. * @throws AccessControlException if not allowed to access next * file's status or locations * @throws FileNotFoundException if next file does not exist any more @@ -2071,34 +2071,34 @@ public LocatedFileStatus next() throws IOException { *
      *
      *
      - *
      ? + *
      ? *
      Matches any single character. * - *
      * + *
      * *
      Matches zero or more characters. * - *
      [abc] + *
      [abc] *
      Matches a single character from character set - * {a,b,c}. + * {a,b,c}. * - *
      [a-b] + *
      [a-b] *
      Matches a single character from the character range - * {a...b}. Note: character a must be - * lexicographically less than or equal to character b. + * {a...b}. Note: character a must be + * lexicographically less than or equal to character b. * - *
      [^a] + *
      [^a] *
      Matches a single char that is not from character set or range - * {a}. Note that the ^ character must occur + * {a}. Note that the ^ character must occur * immediately to the right of the opening bracket. * - *
      \c + *
      \c *
      Removes (escapes) any special meaning of character c. * - *
      {ab,cd} - *
      Matches a string from the string set {ab, cd} + *
      {ab,cd} + *
      Matches a string from the string set {ab, cd} * - *
      {ab,c{de,fh}} - *
      Matches a string from string set {ab, cde, cfh} + *
      {ab,c{de,fh}} + *
      Matches a string from string set {ab, cde, cfh} * *
      *
      diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index 38ec611451750..930abf0b5d172 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -2178,34 +2178,34 @@ public FileStatus[] listStatus(Path[] files, PathFilter filter) *
      *
      *
      - *
      ? + *
      ? *
      Matches any single character. * - *
      * + *
      * *
      Matches zero or more characters. * - *
      [abc] + *
      [abc] *
      Matches a single character from character set - * {a,b,c}. + * {a,b,c}. * - *
      [a-b] + *
      [a-b] *
      Matches a single character from the character range - * {a...b}. Note that character a must be - * lexicographically less than or equal to character b. + * {a...b}. Note that character a must be + * lexicographically less than or equal to character b. * - *
      [^a] + *
      [^a] *
      Matches a single character that is not from character set or range - * {a}. Note that the ^ character must occur + * {a}. Note that the ^ character must occur * immediately to the right of the opening bracket. * - *
      \c + *
      \c *
      Removes (escapes) any special meaning of character c. * - *
      {ab,cd} - *
      Matches a string from the string set {ab, cd} + *
      {ab,cd} + *
      Matches a string from the string set {ab, cd} * - *
      {ab,c{de,fh}} - *
      Matches a string from the string set {ab, cde, cfh} + *
      {ab,c{de,fh}} + *
      Matches a string from the string set {ab, cde, cfh} * *
      *
      diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RemoteIterator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RemoteIterator.java index 9238c3f6fb993..06b7728ae3e9d 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RemoteIterator.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RemoteIterator.java @@ -24,9 +24,9 @@ */ public interface RemoteIterator { /** - * Returns true if the iteration has more elements. + * Returns true if the iteration has more elements. * - * @return true if the iterator has more elements. + * @return true if the iterator has more elements. * @throws IOException if any IO error occurs */ boolean hasNext() throws IOException; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/EnumSetWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/EnumSetWritable.java index 4b1dc7513d054..f2c8b76e2ab70 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/EnumSetWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/EnumSetWritable.java @@ -59,10 +59,10 @@ public boolean add(E e) { } /** - * Construct a new EnumSetWritable. If the value argument is null or - * its size is zero, the elementType argument must not be null. If - * the argument value's size is bigger than zero, the argument - * elementType is not be used. + * Construct a new EnumSetWritable. If the value argument is null or + * its size is zero, the elementType argument must not be null. If + * the argument value's size is bigger than zero, the argument + * elementType is not be used. * * @param value enumSet value. * @param elementType elementType. @@ -72,7 +72,7 @@ public EnumSetWritable(EnumSet value, Class elementType) { } /** - * Construct a new EnumSetWritable. Argument value should not be null + * Construct a new EnumSetWritable. Argument value should not be null * or empty. * * @param value enumSet value. @@ -83,10 +83,10 @@ public EnumSetWritable(EnumSet value) { /** * reset the EnumSetWritable with specified - * value and elementType. If the value argument - * is null or its size is zero, the elementType argument must not be - * null. If the argument value's size is bigger than zero, the - * argument elementType is not be used. + * value and elementType. If the value argument + * is null or its size is zero, the elementType argument must not be + * null. If the argument value's size is bigger than zero, the + * argument elementType is not be used. * * @param value enumSet Value. * @param elementType elementType. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ObjectWritable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ObjectWritable.java index 29c06a01ad6e3..831931bdace66 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ObjectWritable.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/ObjectWritable.java @@ -401,8 +401,8 @@ static Method getStaticProtobufMethod(Class declaredClass, String method, } /** - * Find and load the class with given name className by first finding - * it in the specified conf. If the specified conf is null, + * Find and load the class with given name className by first finding + * it in the specified conf. If the specified conf is null, * try load it directly. * * @param conf configuration. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java index 9d6727c159c06..325820d11cc1c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java @@ -91,19 +91,19 @@ *

      The actual compression algorithm used to compress key and/or values can be * specified by using the appropriate {@link CompressionCodec}.

      * - *

      The recommended way is to use the static createWriter methods + *

      The recommended way is to use the static createWriter methods * provided by the SequenceFile to chose the preferred format.

      * *

      The {@link SequenceFile.Reader} acts as the bridge and can read any of the * above SequenceFile formats.

      * - *

      SequenceFile Formats

      + *

      SequenceFile Formats

      * *

      Essentially there are 3 different formats for SequenceFiles * depending on the CompressionType specified. All of them share a * common header described below. * - *

      + * *
        *
      • * version - 3 bytes of magic header SEQ, followed by 1 byte of actual @@ -136,7 +136,7 @@ *
      • *
      * - *
      Uncompressed SequenceFile Format
      + *

      Uncompressed SequenceFile Format

      *
        *
      • * Header @@ -155,7 +155,7 @@ *
      • *
      * - *
      Record-Compressed SequenceFile Format
      + *

      Record-Compressed SequenceFile Format

      *
        *
      • * Header diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java index 61e88d80d8ce4..116a74963a8d9 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2InputStream.java @@ -38,13 +38,13 @@ *

        * The decompression requires large amounts of memory. Thus you should call the * {@link #close() close()} method as soon as possible, to force - * CBZip2InputStream to release the allocated memory. See + * CBZip2InputStream to release the allocated memory. See * {@link CBZip2OutputStream CBZip2OutputStream} for information about memory * usage. *

        * *

        - * CBZip2InputStream reads bytes from the compressed source stream via + * CBZip2InputStream reads bytes from the compressed source stream via * the single byte {@link java.io.InputStream#read() read()} method exclusively. * Thus you should consider to use a buffered source stream. *

        @@ -279,7 +279,7 @@ private void makeMaps() { * specified stream. * *

        - * Although BZip2 headers are marked with the magic "Bz" this + * Although BZip2 headers are marked with the magic "Bz" this * constructor expects the next byte in the stream to be the first one after * the magic. Thus callers have to skip the first two bytes. Otherwise this * constructor will throw an exception. @@ -289,7 +289,7 @@ private void makeMaps() { * @throws IOException * if the stream content is malformed or an I/O error occurs. * @throws NullPointerException - * if in == null + * if in == null */ public CBZip2InputStream(final InputStream in, READ_MODE readMode) throws IOException { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java index 50bdddb8136fc..f94d1387ebc29 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/bzip2/CBZip2OutputStream.java @@ -37,7 +37,7 @@ *

        * The compression requires large amounts of memory. Thus you should call the * {@link #close() close()} method as soon as possible, to force - * CBZip2OutputStream to release the allocated memory. + * CBZip2OutputStream to release the allocated memory. *

        * *

        @@ -64,64 +64,64 @@ * <code>65k + (5 * blocksize)</code>. * * - * + *
        * - * + * * * - * * * - * - * - * + * + * + * * * - * - * - * + * + * + * * * - * - * - * + * + * + * * * - * - * - * + * + * + * * * - * - * - * + * + * + * * * - * - * - * + * + * + * * * - * - * - * + * + * + * * * - * - * - * + * + * + * * * - * - * - * + * + * + * * *
        Memory usage by blocksize
        Blocksize Compression
        - * memory usage
        Decompression
        + *
        Blocksize Compression
        + * memory usage
        Decompression
        * memory usage
        100k1300k565k100k1300k565k
        200k2200k1065k200k2200k1065k
        300k3100k1565k300k3100k1565k
        400k4000k2065k400k4000k2065k
        500k4900k2565k500k4900k2565k
        600k5800k3065k600k5800k3065k
        700k6700k3565k700k6700k3565k
        800k7600k4065k800k7600k4065k
        900k8500k4565k900k8500k4565k
        * *

        - * For decompression CBZip2InputStream allocates less memory if the + * For decompression CBZip2InputStream allocates less memory if the * bzipped input is smaller than one block. *

        * @@ -137,12 +137,12 @@ public class CBZip2OutputStream extends OutputStream implements BZip2Constants { /** - * The minimum supported blocksize == 1. + * The minimum supported blocksize == 1. */ public static final int MIN_BLOCKSIZE = 1; /** - * The maximum supported blocksize == 9. + * The maximum supported blocksize == 9. */ public static final int MAX_BLOCKSIZE = 9; @@ -566,12 +566,12 @@ private static void hbMakeCodeLengths(final byte[] len, final int[] freq, * * @return The blocksize, between {@link #MIN_BLOCKSIZE} and * {@link #MAX_BLOCKSIZE} both inclusive. For a negative - * inputLength this method returns MAX_BLOCKSIZE + * inputLength this method returns MAX_BLOCKSIZE * always. * * @param inputLength * The length of the data which will be compressed by - * CBZip2OutputStream. + * CBZip2OutputStream. */ public static int chooseBlockSize(long inputLength) { return (inputLength > 0) ? (int) Math @@ -579,11 +579,11 @@ public static int chooseBlockSize(long inputLength) { } /** - * Constructs a new CBZip2OutputStream with a blocksize of 900k. + * Constructs a new CBZip2OutputStream with a blocksize of 900k. * *

        * Attention: The caller is resonsible to write the two BZip2 magic - * bytes "BZ" to the specified stream prior to calling this + * bytes "BZ" to the specified stream prior to calling this * constructor. *

        * @@ -600,11 +600,11 @@ public CBZip2OutputStream(final OutputStream out) throws IOException { } /** - * Constructs a new CBZip2OutputStream with specified blocksize. + * Constructs a new CBZip2OutputStream with specified blocksize. * *

        * Attention: The caller is resonsible to write the two BZip2 magic - * bytes "BZ" to the specified stream prior to calling this + * bytes "BZ" to the specified stream prior to calling this * constructor. *

        * diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibDeflater.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibDeflater.java index 739788fa5f5ec..e98980f0f26aa 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibDeflater.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/zlib/BuiltInZlibDeflater.java @@ -57,7 +57,7 @@ public synchronized int compress(byte[] b, int off, int len) /** * reinit the compressor with the given configuration. It will reset the * compressor's compression level and compression strategy. Different from - * ZlibCompressor, BuiltInZlibDeflater only support three + * ZlibCompressor, BuiltInZlibDeflater only support three * kind of compression strategy: FILTERED, HUFFMAN_ONLY and DEFAULT_STRATEGY. * It will use DEFAULT_STRATEGY as default if the configured compression * strategy is not supported. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Chunk.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Chunk.java index 05e3d48a469a2..ec508c020468a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Chunk.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/file/tfile/Chunk.java @@ -219,8 +219,8 @@ static public class ChunkEncoder extends OutputStream { /** * The number of valid bytes in the buffer. This value is always in the - * range 0 through buf.length; elements buf[0] - * through buf[count-1] contain valid byte data. + * range 0 through buf.length; elements buf[0] + * through buf[count-1] contain valid byte data. */ private int count; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcClientException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcClientException.java index 7f8d9707f9cd7..107899a9c0d4b 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcClientException.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcClientException.java @@ -38,7 +38,7 @@ public class RpcClientException extends RpcException { * @param message message. * @param cause that cause this exception * @param cause the cause (can be retried by the {@link #getCause()} method). - * (A null value is permitted, and indicates that the cause + * (A null value is permitted, and indicates that the cause * is nonexistent or unknown.) */ RpcClientException(final String message, final Throwable cause) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcException.java index 8141333d717a8..ac687050d7cb1 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcException.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcException.java @@ -40,7 +40,7 @@ public class RpcException extends IOException { * @param message message. * @param cause that cause this exception * @param cause the cause (can be retried by the {@link #getCause()} method). - * (A null value is permitted, and indicates that the cause + * (A null value is permitted, and indicates that the cause * is nonexistent or unknown.) */ RpcException(final String message, final Throwable cause) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcServerException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcServerException.java index ce4aac54b6cd2..31f62d4f06fe0 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcServerException.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/RpcServerException.java @@ -39,7 +39,7 @@ public RpcServerException(final String message) { * * @param message message. * @param cause the cause (can be retried by the {@link #getCause()} method). - * (A null value is permitted, and indicates that the cause + * (A null value is permitted, and indicates that the cause * is nonexistent or unknown.) */ public RpcServerException(final String message, final Throwable cause) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/UnexpectedServerException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/UnexpectedServerException.java index f00948d5d5065..c683010a88029 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/UnexpectedServerException.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/UnexpectedServerException.java @@ -39,7 +39,7 @@ public class UnexpectedServerException extends RpcException { * @param message message. * @param cause that cause this exception * @param cause the cause (can be retried by the {@link #getCause()} method). - * (A null value is permitted, and indicates that the cause + * (A null value is permitted, and indicates that the cause * is nonexistent or unknown.) */ UnexpectedServerException(final String message, final Throwable cause) { diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/package-info.java index 196469be9dce2..3830a58865398 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/package-info.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/package-info.java @@ -17,7 +17,7 @@ */ /** -

        Metrics 2.0

        +

        Metrics 2.0

        -

        Overview

        +

        Overview

        This package provides a framework for metrics instrumentation and publication.

        @@ -46,7 +46,7 @@ metrics from sources to sinks based on (per source/sink) configuration design document for architecture and implementation notes.

        -

        Sub-packages

        +

        Sub-packages

        org.apache.hadoop.metrics2.annotation
        Public annotation interfaces for simpler metrics instrumentation. @@ -84,9 +84,9 @@ usually does not need to reference any class here.
        -

        Getting started

        -

        Implementing metrics sources

        - +

        Getting started

        +

        Implementing metrics sources

        +
        @@ -153,7 +153,7 @@ record named "CacheStat" for reporting a number of statistics relating to allowing generated metrics names and multiple records. In fact, the annotation interface is implemented with the MetricsSource interface internally.

        -

        Implementing metrics sinks

        +

        Implementing metrics sinks

           public class MySink implements MetricsSink {
             public void putMetrics(MetricsRecord record) {
        @@ -187,7 +187,7 @@ they need to be hooked up to a metrics system. In this case (and most
           
           DefaultMetricsSystem.initialize("test"); // called once per application
           DefaultMetricsSystem.register(new MyStat());
        -

        Metrics system configuration

        +

        Metrics system configuration

        Sinks are usually specified in a configuration file, say, "hadoop-metrics2-test.properties", as:

        @@ -209,7 +209,7 @@ identify a particular sink instance. The asterisk (*) can be for more examples.

        -

        Metrics Filtering

        +

        Metrics Filtering

        One of the features of the default metrics system is metrics filtering configuration by source, context, record/tags and metrics. The least expensive way to filter out metrics would be at the source level, e.g., @@ -241,7 +241,7 @@ identify a particular sink instance. The asterisk (*) can be level, respectively. Filters can be combined to optimize the filtering efficiency.

        -

        Metrics instrumentation strategy

        +

        Metrics instrumentation strategy

        In previous examples, we showed a minimal example to use the metrics framework. In a larger system (like Hadoop) that allows @@ -279,7 +279,7 @@ instrumentation interface (incrCounter0 etc.) that allows different -

        Migration from previous system

        +

        Migration from previous system

        Users of the previous metrics system would notice the lack of context prefix in the configuration examples. The new metrics system decouples the concept for context (for grouping) with the @@ -289,7 +289,7 @@ metrics system decouples the concept for context (for grouping) with the configure an implementation instance per context, even if you have a backend that can handle multiple contexts (file, gangalia etc.):

        -
        Implementing metrics sources
        +
        @@ -311,7 +311,7 @@ backend that can handle multiple contexts (file, gangalia etc.):

        In the new metrics system, you can simulate the previous behavior by using the context option in the sink options like the following:

        -
        Migration from previous system
        +
        diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java index a647bb041066f..3a4f4fd37d3f8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/net/NetUtils.java @@ -83,9 +83,9 @@ public class NetUtils { /** * Get the socket factory for the given class according to its * configuration parameter - * hadoop.rpc.socket.factory.class.<ClassName>. When no + * hadoop.rpc.socket.factory.class.<ClassName>. When no * such parameter exists then fall back on the default socket factory as - * configured by hadoop.rpc.socket.factory.class.default. If + * configured by hadoop.rpc.socket.factory.class.default. If * this default socket factory is not configured, then fall back on the JVM * default socket factory. * @@ -111,7 +111,7 @@ public static SocketFactory getSocketFactory(Configuration conf, /** * Get the default socket factory as specified by the configuration - * parameter hadoop.rpc.socket.factory.default + * parameter hadoop.rpc.socket.factory.default * * @param conf the configuration * @return the default socket factory as specified in the configuration or diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AccessControlException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AccessControlException.java index d0a3620d6d4b2..1ed121f9616da 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AccessControlException.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/AccessControlException.java @@ -48,10 +48,10 @@ public AccessControlException() { /** * Constructs a new exception with the specified cause and a detail - * message of (cause==null ? null : cause.toString()) (which - * typically contains the class and detail message of cause). + * message of (cause==null ? null : cause.toString()) (which + * typically contains the class and detail message of cause). * @param cause the cause (which is saved for later retrieval by the - * {@link #getCause()} method). (A null value is + * {@link #getCause()} method). (A null value is * permitted, and indicates that the cause is nonexistent or * unknown.) */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/AuthorizationException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/AuthorizationException.java index 79c7d1814da28..e9c3323bb5b12 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/AuthorizationException.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/authorize/AuthorizationException.java @@ -44,10 +44,10 @@ public AuthorizationException(String message) { /** * Constructs a new exception with the specified cause and a detail - * message of (cause==null ? null : cause.toString()) (which - * typically contains the class and detail message of cause). + * message of (cause==null ? null : cause.toString()) (which + * typically contains the class and detail message of cause). * @param cause the cause (which is saved for later retrieval by the - * {@link #getCause()} method). (A null value is + * {@link #getCause()} method). (A null value is * permitted, and indicates that the cause is nonexistent or * unknown.) */ diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java index d95878b5670cd..105a8cdcef022 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/GenericOptionsParser.java @@ -54,7 +54,7 @@ * line arguments, enabling applications to easily specify a namenode, a * ResourceManager, additional configuration resources etc. * - *

        Generic Options

        + *

        Generic Options

        * *

        The supported generic options are:

        *
        diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedReadLock.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedReadLock.java index c99290bc3d3ac..e001d6775c647 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedReadLock.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedReadLock.java @@ -26,7 +26,7 @@ import org.slf4j.Logger; /** - * This is a wrap class of a ReadLock. + * This is a wrap class of a ReadLock. * It extends the class {@link InstrumentedLock}, and can be used to track * whether a specific read lock is being held for too long and log * warnings if so. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedReadWriteLock.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedReadWriteLock.java index 758f1ff87cff7..caceb31cfb552 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedReadWriteLock.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedReadWriteLock.java @@ -28,7 +28,7 @@ /** * This is a wrap class of a {@link ReentrantReadWriteLock}. * It implements the interface {@link ReadWriteLock}, and can be used to - * create instrumented ReadLock and WriteLock. + * create instrumented ReadLock and WriteLock. */ @InterfaceAudience.Private @InterfaceStability.Unstable diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedWriteLock.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedWriteLock.java index 4637b5efe535b..f1cb5feb52d73 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedWriteLock.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/InstrumentedWriteLock.java @@ -26,7 +26,7 @@ import org.slf4j.Logger; /** - * This is a wrap class of a WriteLock. + * This is a wrap class of a WriteLock. * It extends the class {@link InstrumentedLock}, and can be used to track * whether a specific write lock is being held for too long and log * warnings if so. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownThreadsHelper.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownThreadsHelper.java index dc13697f158ad..f026585be285a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownThreadsHelper.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ShutdownThreadsHelper.java @@ -37,8 +37,8 @@ public class ShutdownThreadsHelper { /** * @param thread {@link Thread to be shutdown} - * @return true if the thread is successfully interrupted, - * false otherwise + * @return true if the thread is successfully interrupted, + * false otherwise */ public static boolean shutdownThread(Thread thread) { return shutdownThread(thread, SHUTDOWN_WAIT_MS); @@ -48,8 +48,8 @@ public static boolean shutdownThread(Thread thread) { * @param thread {@link Thread to be shutdown} * @param timeoutInMilliSeconds time to wait for thread to join after being * interrupted - * @return true if the thread is successfully interrupted, - * false otherwise + * @return true if the thread is successfully interrupted, + * false otherwise */ public static boolean shutdownThread(Thread thread, long timeoutInMilliSeconds) { @@ -71,8 +71,8 @@ public static boolean shutdownThread(Thread thread, * shutdownExecutorService. * * @param service {@link ExecutorService to be shutdown} - * @return true if the service is terminated, - * false otherwise + * @return true if the service is terminated, + * false otherwise * @throws InterruptedException if the thread is interrupted. */ public static boolean shutdownExecutorService(ExecutorService service) @@ -87,8 +87,8 @@ public static boolean shutdownExecutorService(ExecutorService service) * @param timeoutInMs time to wait for {@link * ExecutorService#awaitTermination(long, java.util.concurrent.TimeUnit)} * calls in milli seconds. - * @return true if the service is terminated, - * false otherwise + * @return true if the service is terminated, + * false otherwise * @throws InterruptedException if the thread is interrupted. */ public static boolean shutdownExecutorService(ExecutorService service, diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java index 14a745815750a..95e683e83778c 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/StringUtils.java @@ -245,7 +245,7 @@ public static String uriToString(URI[] uris){ /** * @param str * The string array to be parsed into an URI array. - * @return null if str is null, else the URI array + * @return null if str is null, else the URI array * equivalent to str. * @throws IllegalArgumentException * If any string in str violates RFC 2396. diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/MiniRPCBenchmark.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/MiniRPCBenchmark.java index 70ae639091421..4234f24006999 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/MiniRPCBenchmark.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/MiniRPCBenchmark.java @@ -53,18 +53,18 @@ * The benchmark supports three authentication methods: *
          *
        1. simple - no authentication. In order to enter this mode - * the configuration file core-site.xml should specify - * hadoop.security.authentication = simple. + * the configuration file core-site.xml should specify + * hadoop.security.authentication = simple. * This is the default mode.
        2. *
        3. kerberos - kerberos authentication. In order to enter this mode - * the configuration file core-site.xml should specify - * hadoop.security.authentication = kerberos and + * the configuration file core-site.xml should specify + * hadoop.security.authentication = kerberos and * the argument string should provide qualifying - * keytabFile and userName parameters. + * keytabFile and userName parameters. *
        4. delegation token - authentication using delegation token. * In order to enter this mode the benchmark should provide all the * mentioned parameters for kerberos authentication plus the - * useToken argument option. + * useToken argument option. *
        * Input arguments: *
          diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderLocalLegacy.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderLocalLegacy.java index 1d002b6e4c63a..a69ae329c39f0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderLocalLegacy.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/client/impl/BlockReaderLocalLegacy.java @@ -503,7 +503,7 @@ public synchronized int read(ByteBuffer buf) throws IOException { * byte buffer to write bytes to. If checksums are not required, buf * can have any number of bytes remaining, otherwise there must be a * multiple of the checksum chunk size remaining. - * @return max(min(totalBytesRead, len) - offsetFromChunkBoundary, 0) + * @return max(min(totalBytesRead, len) - offsetFromChunkBoundary, 0) * that is, the the number of useful bytes (up to the amount * requested) readable from the buffer by the client. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java index a11fa1bac2598..1ec63e0ca83da 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java @@ -107,7 +107,7 @@ synchronized List poll(int numBlocks) { } /** - * Returns true if the queue contains the specified element. + * Returns true if the queue contains the specified element. */ synchronized boolean contains(E e) { return blockq.contains(e); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java index 7bf5879971615..2118b1d03fffa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/EncryptionZoneManager.java @@ -369,7 +369,7 @@ String getFullPathName(Long nodeId) { } /** - * Get the key name for an encryption zone. Returns null if iip is + * Get the key name for an encryption zone. Returns null if iip is * not within an encryption zone. *

          * Called while holding the FSDirectory lock. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index a48cfdbe5957c..bcf56a86441d3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -1120,7 +1120,7 @@ private void stopHttpServer() { *

        • {@link StartupOption#IMPORT IMPORT} - import checkpoint
        • *
        * The option is passed via configuration field: - * dfs.namenode.startup + * dfs.namenode.startup * * The conf will be modified to reflect the actual ports on which * the NameNode is up and running if the user passes the port as diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffList.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffList.java index 80ef538000977..7ad3981d9c4f2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffList.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/snapshot/DiffList.java @@ -100,7 +100,7 @@ public List getMinListForRange(int startIndex, int endIndex, * @param index index of the element to return * @return the element at the specified position in this list * @throws IndexOutOfBoundsException if the index is out of range - * (index < 0 || index >= size()) + * (index < 0 || index >= size()) */ T get(int index); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java index fbeea0f673c0e..6586d42f92d64 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionCalculator.java @@ -37,20 +37,20 @@ /** * This is the tool for analyzing file sizes in the namespace image. In order to - * run the tool one should define a range of integers [0, maxSize] by - * specifying maxSize and a step. The range of integers is - * divided into segments of size step: - * [0, s1, ..., sn-1, maxSize], and the visitor + * run the tool one should define a range of integers [0, maxSize] by + * specifying maxSize and a step. The range of integers is + * divided into segments of size step: + * [0, s1, ..., sn-1, maxSize], and the visitor * calculates how many files in the system fall into each segment - * [si-1, si). Note that files larger than - * maxSize always fall into the very last segment. + * [si-1, si). Note that files larger than + * maxSize always fall into the very last segment. * *

        Input.

        *
          - *
        • filename specifies the location of the image file;
        • - *
        • maxSize determines the range [0, maxSize] of files + *
        • filename specifies the location of the image file;
        • + *
        • maxSize determines the range [0, maxSize] of files * sizes considered by the visitor;
        • - *
        • step the range is divided into segments of size step.
        • + *
        • step the range is divided into segments of size step.
        • *
        * *

        Output.

        The output file is formatted as a tab separated two column diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java index 7dcc29998f335..a7e93fe586683 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FileDistributionVisitor.java @@ -28,20 +28,20 @@ *

        Description.

        * This is the tool for analyzing file sizes in the namespace image. * In order to run the tool one should define a range of integers - * [0, maxSize] by specifying maxSize and a step. - * The range of integers is divided into segments of size step: - * [0, s1, ..., sn-1, maxSize], + * [0, maxSize] by specifying maxSize and a step. + * The range of integers is divided into segments of size step: + * [0, s1, ..., sn-1, maxSize], * and the visitor calculates how many files in the system fall into - * each segment [si-1, si). - * Note that files larger than maxSize always fall into + * each segment [si-1, si). + * Note that files larger than maxSize always fall into * the very last segment. * *

        Input.

        *
          - *
        • filename specifies the location of the image file;
        • - *
        • maxSize determines the range [0, maxSize] of files + *
        • filename specifies the location of the image file;
        • + *
        • maxSize determines the range [0, maxSize] of files * sizes considered by the visitor;
        • - *
        • step the range is divided into segments of size step.
        • + *
        • step the range is divided into segments of size step.
        • *
        * *

        Output.

        diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java index 905e3bf44f134..7264e182bf505 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestSafeMode.java @@ -110,7 +110,7 @@ public void tearDown() throws IOException { * Name-node should stay in automatic safe-mode. *
      • Enter safe mode manually.
      • *
      • Start the data-node.
      • - *
      • Wait longer than dfs.namenode.safemode.extension and + *
      • Wait longer than dfs.namenode.safemode.extension and * verify that the name-node is still in safe mode.
      • * * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java index 106c515d49c08..788e91b02578c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestReadOnlySharedStorage.java @@ -205,7 +205,7 @@ private void validateNumberReplicas(int expectedReplicas) throws IOException { } /** - * Verify that READ_ONLY_SHARED replicas are not counted towards the overall + * Verify that READ_ONLY_SHARED replicas are not counted towards the overall * replication count, but are included as replica locations returned to clients for reads. */ @Test @@ -221,7 +221,7 @@ public void testReplicaCounting() throws Exception { } /** - * Verify that the NameNode is able to still use READ_ONLY_SHARED replicas even + * Verify that the NameNode is able to still use READ_ONLY_SHARED replicas even * when the single NORMAL replica is offline (and the effective replication count is 0). */ @Test @@ -253,7 +253,7 @@ public void testNormalReplicaOffline() throws Exception { } /** - * Verify that corrupt READ_ONLY_SHARED replicas aren't counted + * Verify that corrupt READ_ONLY_SHARED replicas aren't counted * towards the corrupt replicas total. */ @Test diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/AMPreemptionPolicy.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/AMPreemptionPolicy.java index 85211f958d6c3..a49700d8e5587 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/AMPreemptionPolicy.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/AMPreemptionPolicy.java @@ -109,7 +109,7 @@ public abstract class Context { * TaskId}. Assigning a null is akin to remove all previous checkpoints for * this task. * @param taskId TaskID - * @param cid Checkpoint to assign or null to remove it. + * @param cid Checkpoint to assign or null to remove it. */ public void setCheckpointID(TaskId taskId, TaskCheckpointID cid); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileOutputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileOutputFormat.java index 3932e5849ea14..a89f1f1cee999 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileOutputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/FileOutputFormat.java @@ -185,7 +185,7 @@ public static Path getOutputPath(JobConf conf) { * is {@link FileOutputCommitter}. If OutputCommitter is not * a FileOutputCommitter, the task's temporary output * directory is same as {@link #getOutputPath(JobConf)} i.e. - * ${mapreduce.output.fileoutputformat.outputdir}$

        + * ${mapreduce.output.fileoutputformat.outputdir}$

        * *

        Some applications need to create/write-to side-files, which differ from * the actual job-outputs. @@ -194,27 +194,27 @@ public static Path getOutputPath(JobConf conf) { * (running simultaneously e.g. speculative tasks) trying to open/write-to the * same file (path) on HDFS. Hence the application-writer will have to pick * unique names per task-attempt (e.g. using the attemptid, say - * attempt_200709221812_0001_m_000000_0), not just per TIP.

        + * attempt_200709221812_0001_m_000000_0), not just per TIP.

        * *

        To get around this the Map-Reduce framework helps the application-writer * out by maintaining a special - * ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} + * ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} * sub-directory for each task-attempt on HDFS where the output of the * task-attempt goes. On successful completion of the task-attempt the files - * in the ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} (only) - * are promoted to ${mapreduce.output.fileoutputformat.outputdir}. Of course, the + * in the ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} (only) + * are promoted to ${mapreduce.output.fileoutputformat.outputdir}. Of course, the * framework discards the sub-directory of unsuccessful task-attempts. This * is completely transparent to the application.

        * *

        The application-writer can take advantage of this by creating any - * side-files required in ${mapreduce.task.output.dir} during execution + * side-files required in ${mapreduce.task.output.dir} during execution * of his reduce-task i.e. via {@link #getWorkOutputPath(JobConf)}, and the * framework will move them out similarly - thus she doesn't have to pick * unique paths per task-attempt.

        * - *

        Note: the value of ${mapreduce.task.output.dir} during + *

        Note: the value of ${mapreduce.task.output.dir} during * execution of a particular task-attempt is actually - * ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_{$taskid}, and this value is + * ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_{$taskid}, and this value is * set by the map-reduce framework. So, just create any side-files in the * path returned by {@link #getWorkOutputPath(JobConf)} from map/reduce * task to take advantage of this feature.

        diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java index db398e8dbdc65..d6d3c9ebfad0a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java @@ -1873,8 +1873,8 @@ public String getJobEndNotificationURI() { * Set the uri to be invoked in-order to send a notification after the job * has completed (success/failure). * - *

        The uri can contain 2 special parameters: $jobId and - * $jobStatus. Those, if present, are replaced by the job's + *

        The uri can contain 2 special parameters: $jobId and + * $jobStatus. Those, if present, are replaced by the job's * identifier and completion-status respectively.

        * *

        This is typically used by application-writers to implement chaining of diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapRunnable.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapRunnable.java index 7aa4f336ae522..e5f585e0fbc8f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapRunnable.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/MapRunnable.java @@ -37,7 +37,7 @@ public interface MapRunnable extends JobConfigurable { /** - * Start mapping input <key, value> pairs. + * Start mapping input <key, value> pairs. * *

        Mapping of input records to output records is complete when this method * returns.

        diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/jobcontrol/Job.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/jobcontrol/Job.java index fd078372fd500..0b1a9786cabbe 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/jobcontrol/Job.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/jobcontrol/Job.java @@ -143,7 +143,7 @@ protected synchronized void setState(int state) { * is waiting to run, not during or afterwards. * * @param dependingJob Job that this Job depends on. - * @return true if the Job was added. + * @return true if the Job was added. */ public synchronized boolean addDependingJob(Job dependingJob) { return super.addDependingJob(dependingJob); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/CompositeInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/CompositeInputFormat.java index 40690e7541fdb..226363ac8caae 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/CompositeInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/CompositeInputFormat.java @@ -38,10 +38,10 @@ * and partitioned the same way. * * A user may define new join types by setting the property - * mapred.join.define.<ident> to a classname. In the expression - * mapred.join.expr, the identifier will be assumed to be a + * mapred.join.define.<ident> to a classname. In the expression + * mapred.join.expr, the identifier will be assumed to be a * ComposableRecordReader. - * mapred.join.keycomparator can be a classname used to compare keys + * mapred.join.keycomparator can be a classname used to compare keys * in the join. * @see #setFormat * @see JoinRecordReader @@ -66,9 +66,9 @@ public CompositeInputFormat() { } * class ::= @see java.lang.Class#forName(java.lang.String) * path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) * } - * Reads expression from the mapred.join.expr property and - * user-supplied join types from mapred.join.define.<ident> - * types. Paths supplied to tbl are given as input paths to the + * Reads expression from the mapred.join.expr property and + * user-supplied join types from mapred.join.define.<ident> + * types. Paths supplied to tbl are given as input paths to the * InputFormat class listed. * @see #compose(java.lang.String, java.lang.Class, java.lang.String...) */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/CompositeRecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/CompositeRecordReader.java index 0684268d2d79f..1bb0745d918da 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/CompositeRecordReader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/CompositeRecordReader.java @@ -61,8 +61,8 @@ public abstract class CompositeRecordReader< protected abstract boolean combine(Object[] srcs, TupleWritable value); /** - * Create a RecordReader with capacity children to position - * id in the parent reader. + * Create a RecordReader with capacity children to position + * id in the parent reader. * The id of a root CompositeRecordReader is -1 by convention, but relying * on this is not recommended. */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/OverrideRecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/OverrideRecordReader.java index 1671e6e895684..d36b776a94409 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/OverrideRecordReader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/OverrideRecordReader.java @@ -31,7 +31,7 @@ /** * Prefer the "rightmost" data source for this key. - * For example, override(S1,S2,S3) will prefer values + * For example, override(S1,S2,S3) will prefer values * from S3 over S2, and values from S2 over S1 for all keys * emitted from all sources. */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/Parser.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/Parser.java index 3c7a991fd045e..96792c1e6662a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/Parser.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/Parser.java @@ -275,7 +275,7 @@ public WNode(String ident) { /** * Let the first actual define the InputFormat and the second define - * the mapred.input.dir property. + * the mapred.input.dir property. */ public void parse(List ll, JobConf job) throws IOException { StringBuilder sb = new StringBuilder(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TotalOrderPartitioner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TotalOrderPartitioner.java index b06961e5cfdd3..98ca9318df490 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TotalOrderPartitioner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/lib/TotalOrderPartitioner.java @@ -43,7 +43,7 @@ public void configure(JobConf job) { /** * Set the path to the SequenceFile storing the sorted partition keyset. - * It must be the case that for R reduces, there are R-1 + * It must be the case that for R reduces, there are R-1 * keys in the SequenceFile. * @deprecated Use * {@link #setPartitionFile(Configuration, Path)} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/jobcontrol/ControlledJob.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/jobcontrol/ControlledJob.java index 48cde0e5f0040..249079b8e4f91 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/jobcontrol/ControlledJob.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/jobcontrol/ControlledJob.java @@ -205,7 +205,7 @@ public List getDependentJobs() { * is waiting to run, not during or afterwards. * * @param dependingJob Job that this Job depends on. - * @return true if the Job was added. + * @return true if the Job was added. */ public synchronized boolean addDependingJob(ControlledJob dependingJob) { if (this.state == State.WAITING) { //only allowed to add jobs when waiting diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/CompositeInputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/CompositeInputFormat.java index 6189a271bc3cb..b0b459afe2a0b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/CompositeInputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/CompositeInputFormat.java @@ -41,10 +41,10 @@ * and partitioned the same way. * * A user may define new join types by setting the property - * mapreduce.join.define.<ident> to a classname. - * In the expression mapreduce.join.expr, the identifier will be + * mapreduce.join.define.<ident> to a classname. + * In the expression mapreduce.join.expr, the identifier will be * assumed to be a ComposableRecordReader. - * mapreduce.join.keycomparator can be a classname used to compare + * mapreduce.join.keycomparator can be a classname used to compare * keys in the join. * @see #setFormat * @see JoinRecordReader @@ -73,9 +73,9 @@ public CompositeInputFormat() { } * class ::= @see java.lang.Class#forName(java.lang.String) * path ::= @see org.apache.hadoop.fs.Path#Path(java.lang.String) * } - * Reads expression from the mapreduce.join.expr property and - * user-supplied join types from mapreduce.join.define.<ident> - * types. Paths supplied to tbl are given as input paths to the + * Reads expression from the mapreduce.join.expr property and + * user-supplied join types from mapreduce.join.define.<ident> + * types. Paths supplied to tbl are given as input paths to the * InputFormat class listed. * @see #compose(java.lang.String, java.lang.Class, java.lang.String...) */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/CompositeRecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/CompositeRecordReader.java index 40f3570cb59a2..45e3224a3fe08 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/CompositeRecordReader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/CompositeRecordReader.java @@ -67,8 +67,8 @@ public abstract class CompositeRecordReader< protected X value; /** - * Create a RecordReader with capacity children to position - * id in the parent reader. + * Create a RecordReader with capacity children to position + * id in the parent reader. * The id of a root CompositeRecordReader is -1 by convention, but relying * on this is not recommended. */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/OverrideRecordReader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/OverrideRecordReader.java index 5678445f11ba8..2396e9daa42da 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/OverrideRecordReader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/OverrideRecordReader.java @@ -33,7 +33,7 @@ /** * Prefer the "rightmost" data source for this key. - * For example, override(S1,S2,S3) will prefer values + * For example, override(S1,S2,S3) will prefer values * from S3 over S2, and values from S2 over S1 for all keys * emitted from all sources. */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/Parser.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/Parser.java index c557e14136622..68cf31025943f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/Parser.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/Parser.java @@ -290,7 +290,7 @@ public WNode(String ident) { /** * Let the first actual define the InputFormat and the second define - * the mapred.input.dir property. + * the mapred.input.dir property. */ @Override public void parse(List ll, Configuration conf) throws IOException { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/TupleWritable.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/TupleWritable.java index aa541f3640d3a..c48f925b9833f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/TupleWritable.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/TupleWritable.java @@ -144,7 +144,7 @@ public void remove() { /** * Convert Tuple to String as in the following. - * [<child1>,<child2>,...,<childn>] + * [<child1>,<child2>,...,<childn>] */ public String toString() { StringBuilder buf = new StringBuilder("["); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java index 2b1f7e37ebe75..5dd572835ccff 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/output/FileOutputFormat.java @@ -208,15 +208,15 @@ public static Path getOutputPath(JobContext job) { * (running simultaneously e.g. speculative tasks) trying to open/write-to the * same file (path) on HDFS. Hence the application-writer will have to pick * unique names per task-attempt (e.g. using the attemptid, say - * attempt_200709221812_0001_m_000000_0), not just per TIP.

        + * attempt_200709221812_0001_m_000000_0), not just per TIP.

        * *

        To get around this the Map-Reduce framework helps the application-writer * out by maintaining a special - * ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} + * ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} * sub-directory for each task-attempt on HDFS where the output of the * task-attempt goes. On successful completion of the task-attempt the files - * in the ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} (only) - * are promoted to ${mapreduce.output.fileoutputformat.outputdir}. Of course, the + * in the ${mapreduce.output.fileoutputformat.outputdir}/_temporary/_${taskid} (only) + * are promoted to ${mapreduce.output.fileoutputformat.outputdir}. Of course, the * framework discards the sub-directory of unsuccessful task-attempts. This * is completely transparent to the application.

        * diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/partition/TotalOrderPartitioner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/partition/TotalOrderPartitioner.java index c19724e842d26..25967f92fa8bd 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/partition/TotalOrderPartitioner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/partition/TotalOrderPartitioner.java @@ -65,8 +65,8 @@ public TotalOrderPartitioner() { } /** * Read in the partition file and build indexing data structures. * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and - * total.order.partitioner.natural.order is not false, a trie - * of the first total.order.partitioner.max.trie.depth(2) + 1 bytes + * total.order.partitioner.natural.order is not false, a trie + * of the first total.order.partitioner.max.trie.depth(2) + 1 bytes * will be built. Otherwise, keys will be located using a binary search of * the partition keyset using the {@link org.apache.hadoop.io.RawComparator} * defined for this job. The input file must be sorted with the same @@ -128,7 +128,7 @@ public int getPartition(K key, V value, int numPartitions) { /** * Set the path to the SequenceFile storing the sorted partition keyset. - * It must be the case that for R reduces, there are R-1 + * It must be the case that for R reduces, there are R-1 * keys in the SequenceFile. */ public static void setPartitionFile(Configuration conf, Path p) { @@ -156,7 +156,7 @@ interface Node { /** * Base class for trie nodes. If the keytype is memcomp-able, this builds - * tries of the first total.order.partitioner.max.trie.depth + * tries of the first total.order.partitioner.max.trie.depth * bytes. */ static abstract class TrieNode implements Node { @@ -171,7 +171,7 @@ int getLevel() { /** * For types that are not {@link org.apache.hadoop.io.BinaryComparable} or - * where disabled by total.order.partitioner.natural.order, + * where disabled by total.order.partitioner.natural.order, * search the partition keyset with a binary search. */ class BinarySearchNode implements Node { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/AccumulatingReducer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/AccumulatingReducer.java index b6313494e4542..eb0972b30f5ad 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/AccumulatingReducer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/AccumulatingReducer.java @@ -31,13 +31,13 @@ * The type is specified in the key part of the key-value pair * as a prefix to the key in the following way *

        - * type:key + * type:key *

        * The values are accumulated according to the types: *

          - *
        • s: - string, concatenate
        • - *
        • f: - float, summ
        • - *
        • l: - long, summ
        • + *
        • s: - string, concatenate
        • + *
        • f: - float, summ
        • + *
        • l: - long, summ
        • *
        * */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/IOMapperBase.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/IOMapperBase.java index ddd2d2f126936..7ded7a1e639ed 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/IOMapperBase.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/IOMapperBase.java @@ -109,8 +109,8 @@ abstract void collectStats(OutputCollector output, * Map file name and offset into statistical data. *

        * The map task is to get the - * key, which contains the file name, and the - * value, which is the offset within the file. + * key, which contains the file name, and the + * value, which is the offset within the file. * * The parameters are passed to the abstract method * {@link #doIO(Reporter,String,long)}, which performs the io operation, diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/JHLogAnalyzer.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/JHLogAnalyzer.java index 8937bdafe35cd..125dad5cbe14d 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/JHLogAnalyzer.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/fs/JHLogAnalyzer.java @@ -76,7 +76,7 @@ * specific attempt A during hour h. * The tool then sums all slots for all attempts for every hour. * The result is the slot hour utilization of the cluster: - * slotTime(h) = SUMA slotTime(A,h). + * slotTime(h) = SUMA slotTime(A,h). *

        * Log analyzer calculates slot hours for MAP and REDUCE * attempts separately. @@ -88,8 +88,8 @@ *

        * Map-reduce clusters are usually configured to have a fixed number of MAP * and REDUCE slots per node. Thus the maximal possible number of slots on - * the cluster is total_slots = total_nodes * slots_per_node. - * Effective slot hour cannot exceed total_slots for successful + * the cluster is total_slots = total_nodes * slots_per_node. + * Effective slot hour cannot exceed total_slots for successful * attempts. *

        * Pending time characterizes the wait time of attempts. @@ -106,39 +106,39 @@ * The following input parameters can be specified in the argument string * to the job log analyzer: *

          - *
        • -historyDir inputDir specifies the location of the directory + *
        • -historyDir inputDir specifies the location of the directory * where analyzer will be looking for job history log files.
        • - *
        • -resFile resultFile the name of the result file.
        • - *
        • -usersIncluded | -usersExcluded userList slot utilization and + *
        • -resFile resultFile the name of the result file.
        • + *
        • -usersIncluded | -usersExcluded userList slot utilization and * pending time can be calculated for all or for all but the specified users. *
          - * userList is a comma or semicolon separated list of users.
        • - *
        • -gzip is used if history log files are compressed. + * userList is a comma or semicolon separated list of users.
        • + *
        • -gzip is used if history log files are compressed. * Only {@link GzipCodec} is currently supported.
        • - *
        • -jobDelimiter pattern one can concatenate original log files into + *
        • -jobDelimiter pattern one can concatenate original log files into * larger file(s) with the specified delimiter to recognize the end of the log * for one job from the next one.
          - * pattern is a java regular expression + * pattern is a java regular expression * {@link java.util.regex.Pattern}, which should match only the log delimiters. *
          - * E.g. pattern ".!!FILE=.*!!" matches delimiters, which contain + * E.g. pattern ".!!FILE=.*!!" matches delimiters, which contain * the original history log file names in the following form:
          - * "$!!FILE=my.job.tracker.com_myJobId_user_wordcount.log!!"
        • - *
        • -clean cleans up default directories used by the analyzer.
        • - *
        • -test test one file locally and exit; + * "$!!FILE=my.job.tracker.com_myJobId_user_wordcount.log!!"
        • + *
        • -clean cleans up default directories used by the analyzer.
        • + *
        • -test test one file locally and exit; * does not require map-reduce.
        • - *
        • -help print usage.
        • + *
        • -help print usage.
        • *
        * *

        Output.

        * The output file is formatted as a tab separated table consisting of four - * columns: SERIES, PERIOD, TYPE, SLOT_HOUR. + * columns: SERIES, PERIOD, TYPE, SLOT_HOUR. *
          - *
        • SERIES one of the four statistical series;
        • - *
        • PERIOD the start of the time interval in the following format: - * "yyyy-mm-dd hh:mm:ss";
        • - *
        • TYPE the slot type, e.g. MAP or REDUCE;
        • - *
        • SLOT_HOUR the value of the slot usage during this + *
        • SERIES one of the four statistical series;
        • + *
        • PERIOD the start of the time interval in the following format: + * "yyyy-mm-dd hh:mm:ss";
        • + *
        • TYPE the slot type, e.g. MAP or REDUCE;
        • + *
        • SLOT_HOUR the value of the slot usage during this * time interval.
        • *
        */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/pi/package.html b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/pi/package.html index 91484aa49c4c2..9fa0c41e0ac3a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/pi/package.html +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/pi/package.html @@ -23,7 +23,7 @@ for large n, say n > 100,000,000. For computing the lower bits of π, consider using bbp. -

        The distbbp Program

        +

        The distbbp Program

        The main class is DistBbp and the actually computation is done by DistSum jobs. The steps for launching the jobs are: @@ -39,8 +39,10 @@

        The distbbp Program

      • Combine the job outputs and print the π bits.
      • -
        Metrics2
        -

        The Bits of π

        + + +
        "The Bits of Pi"
        +

        The Bits of π

        The table on the right are the results computed by distbbp.

        @@ -56,7 +58,7 @@

        The Bits of π

      • The computations in Row 13 and Row 14 were completed on May 20, 2009. It seems that the corresponding bits were never computed before.
      • -
      • The first part of Row 15 (6216B06) +
      • The first part of Row 15 (6216B06)
        • The first 30% of the computation was done in idle cycles of some clusters spread over 20 days.
        • @@ -69,7 +71,7 @@

          The Bits of π

          this YDN blog.
      • -
      • The second part of Row 15 (D3611) +
      • The second part of Row 15 (D3611)
        • The starting position is 1,000,000,000,000,053, totally 20 bits.
        • Two computations, at positions n and n+4, were performed.
        • A single computation was divided into 14,000 jobs @@ -85,42 +87,42 @@

          The Bits of π

          computed ever in the history.
      • -
        - - - - - - - - - - - - - - - - - - - - - - +
        Position nπ bits (in hex) starting at n
        01243F6A8885A3*
        111FDAA22168C23
        21013707344A409
        31,001574E69A458F
        410,00144EC5716F2B
        5100,001944F7A204
        61,000,0016FFFA4103
        710,000,0016CFDD54E3
        8100,000,001A306CFA7
        91,000,000,0013E08FF2B
        1010,000,000,0010A8BD8C0
        11100,000,000,001B2238C1
        121,000,000,000,0010FEE563
        1310,000,000,000,001896DC3
        14100,000,000,000,001C216EC
        151,000,000,000,000,0016216B06 ... D3611
        + + + + + + + + + + + + + + + + + + + + + +
        "Pi in hex"
        Position nπ bits (in hex) starting at n
        01243F6A8885A3*
        111FDAA22168C23
        21013707344A409
        31,001574E69A458F
        410,00144EC5716F2B
        5100,001944F7A204
        61,000,0016FFFA4103
        710,000,0016CFDD54E3
        8100,000,001A306CFA7
        91,000,000,0013E08FF2B
        1010,000,000,0010A8BD8C0
        11100,000,000,001B2238C1
        121,000,000,000,0010FEE563
        1310,000,000,000,001896DC3
        14100,000,000,000,001C216EC
        151,000,000,000,000,0016216B06 ... D3611
        * By representing π in decimal, hexadecimal and binary, we have - - +
        π=3.1415926535 8979323846 2643383279 ...
        + - + - +
        "Pi in various formats"
        π=3.1415926535 8979323846 2643383279 ...
        =3.243F6A8885 A308D31319 8A2E037073 ...=3.243F6A8885 A308D31319 8A2E037073 ...
        =11.0010010000 1111110110 1010100010 ...=11.0010010000 1111110110 1010100010 ...
        -The first ten bits of π are 0010010000. +The first ten bits of π are 0010010000.
        @@ -130,7 +132,8 @@

        Command Line Usages

        $ hadoop org.apache.hadoop.examples.pi.DistBbp \ <b> <nThreads> <nJobs> <type> <nPart> <remoteDir> <localDir> And the parameters are: - +
        + diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 33533dbbaedca..32a140a9de867 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -158,7 +158,6 @@ 1.8 - false - -html4 - - - - - - diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 595413edc4f40..997f63b0dc15f 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -35,7 +35,6 @@ UTF-8 true ${project.build.directory}/test - true unset diff --git a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/FilePool.java b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/FilePool.java index 9a0cca380bcbd..9fbad6b7a9817 100644 --- a/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/FilePool.java +++ b/hadoop-tools/hadoop-gridmix/src/main/java/org/apache/hadoop/mapred/gridmix/FilePool.java @@ -128,7 +128,7 @@ static abstract class Node { /** * Return a set of files whose cumulative size is at least - * targetSize. + * targetSize. * TODO Clearly size is not the only criterion, e.g. refresh from * generated data without including running task output, tolerance * for permission issues, etc. diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/io/IdentifierResolver.java b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/io/IdentifierResolver.java index b0cd5b4fdb733..9db4087c0cb88 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/io/IdentifierResolver.java +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/io/IdentifierResolver.java @@ -26,7 +26,7 @@ /** * This class is used to resolve a string identifier into the required IO * classes. By extending this class and pointing the property - * stream.io.identifier.resolver.class to this extension, additional + * stream.io.identifier.resolver.class to this extension, additional * IO classes can be added by external code. */ public class IdentifierResolver { diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/package.html b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/package.html index be64426757e72..d7924e8d4e75d 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/package.html +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/streaming/package.html @@ -19,7 +19,7 @@ -Hadoop Streaming is a utility which allows users to create and run +Hadoop Streaming is a utility which allows users to create and run Map-Reduce jobs with any executables (e.g. Unix shell utilities) as the mapper and/or the reducer. diff --git a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/typedbytes/package.html b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/typedbytes/package.html index 3494fbd8586cd..fb72cc3a8e594 100644 --- a/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/typedbytes/package.html +++ b/hadoop-tools/hadoop-streaming/src/main/java/org/apache/hadoop/typedbytes/package.html @@ -22,11 +22,12 @@ Typed bytes are sequences of bytes in which the first byte is a type code. They are especially useful as a (simple and very straightforward) binary format for transferring data to and from Hadoop Streaming programs. -

        Type Codes

        +

        Type Codes

        Each typed bytes sequence starts with an unsigned byte that contains the type code. Possible values are: -
        "command line option"
        <b> The number of bits to skip, i.e. compute the (b+1)th position.
        +
        + @@ -48,7 +49,8 @@

        Subsequent Bytes

        These are the subsequent bytes for the different type codes (everything is big-endian and unpadded): -
        "Type Codes"
        CodeType
        0A sequence of bytes.
        1A byte.
        +
        + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/SignalContainerRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/SignalContainerRequest.java index 28cc8ea5b4c50..d002071e6cc09 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/SignalContainerRequest.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/SignalContainerRequest.java @@ -29,7 +29,7 @@ *

        The request sent by the client to the ResourceManager * or by the ApplicationMaster to the NodeManager * to signal a container. - * @see SignalContainerCommand

        + * @see SignalContainerCommand */ @Public @Evolving diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/timelineservice/ServiceMetricsSink.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/timelineservice/ServiceMetricsSink.java index ff4556f7cd7a3..c4417851a56a5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/timelineservice/ServiceMetricsSink.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/timelineservice/ServiceMetricsSink.java @@ -31,7 +31,7 @@ * adding the following to by This would actually be set as: * [prefix].sink.[some instance name].class * =org.apache.hadoop.yarn.service.timelineservice.ServiceMetricsSink - * , where prefix is "atsv2": and some instance name is + * , where prefix is "atsv2": and some instance name is * just any unique name, so properties can be differentiated if there are * multiple sinks of the same type created */ diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/AdminACLsManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/AdminACLsManager.java index 949c6a2e27cc7..3ff53cce8b7c3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/AdminACLsManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/AdminACLsManager.java @@ -93,7 +93,7 @@ public UserGroupInformation getOwner() { * * @see YarnConfiguration#YARN_ACL_ENABLE * @see YarnConfiguration#DEFAULT_YARN_ACL_ENABLE - * @return true if ACLs are enabled + * @return true if ACLs are enabled */ public boolean areACLsEnabled() { return aclsEnabled; @@ -103,7 +103,7 @@ public boolean areACLsEnabled() { * Returns whether the specified user/group is an administrator * * @param callerUGI user/group to to check - * @return true if the UserGroupInformation specified + * @return true if the UserGroupInformation specified * is a member of the access control list for administrators */ public boolean isAdmin(UserGroupInformation callerUGI) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BoundedAppender.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BoundedAppender.java index 6d582ca1ec799..423f029b9d59a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BoundedAppender.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/util/BoundedAppender.java @@ -56,7 +56,7 @@ * } * *

        - * Note that null values are {@link #append(CharSequence) append}ed + * Note that null values are {@link #append(CharSequence) append}ed * just like in {@link StringBuilder#append(CharSequence) original * implementation}. *

        diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/LeveldbIterator.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/LeveldbIterator.java index 463bee7ebab89..00b97aadc8a32 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/LeveldbIterator.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/utils/LeveldbIterator.java @@ -112,7 +112,7 @@ public void seekToLast() throws DBException { } /** - * Returns true if the iteration has more elements. + * Returns true if the iteration has more elements. */ public boolean hasNext() throws DBException { try { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-common/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/common/BaseTable.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-common/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/common/BaseTable.java index 433b35201644a..cac14eaa82b69 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-common/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/common/BaseTable.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-common/src/main/java/org/apache/hadoop/yarn/server/timelineservice/storage/common/BaseTable.java @@ -21,7 +21,7 @@ /** * The base type of tables. - * @param T table type + * @param table type */ public abstract class BaseTable { } From 6c01490f14b65f43196e1f235c51749a712e7338 Mon Sep 17 00:00:00 2001 From: Syed Shameerur Rahman Date: Wed, 4 Sep 2024 17:56:42 +0530 Subject: [PATCH 109/113] YARN-11664. Remove HDFS Binaries/Jars Dependency From Yarn (#6631) To support YARN deployments in clusters without HDFS some changes have been made in packaging * new hadoop-common class org.apache.hadoop.fs.HdfsCommonConstants * hdfs class org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair moved from hdfs-client to hadoop-common * YARN handlers for DSQuotaExceededException replaced by use of superclass ClusterStorageCapacityExceededException. Contributed by Syed Shameerur Rahman --- .../apache/hadoop/fs/HdfsCommonConstants.java | 47 +++++++++++++++++++ .../protocol/datatransfer/IOStreamPair.java | 6 +-- .../protocol/datatransfer/package-info.java | 25 ++++++++++ .../delegation/DelegationTokenIdentifier.java | 9 +++- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 7 ++- .../hadoop/yarn/service/ServiceMaster.java | 4 +- .../yarn/service/client/ServiceClient.java | 6 +-- .../yarn/service/client/TestServiceCLI.java | 4 +- .../logaggregation/AggregatedLogFormat.java | 6 +-- .../tfile/LogAggregationTFileController.java | 4 +- 10 files changed, 100 insertions(+), 18 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HdfsCommonConstants.java rename {hadoop-hdfs-project/hadoop-hdfs-client => hadoop-common-project/hadoop-common}/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/IOStreamPair.java (85%) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/package-info.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HdfsCommonConstants.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HdfsCommonConstants.java new file mode 100644 index 0000000000000..f6c3ca4517d55 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HdfsCommonConstants.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.io.Text; + +/** + * This class contains constants for configuration keys and default values. + */ +@InterfaceAudience.LimitedPrivate({"YARN", "HDFS"}) +@InterfaceStability.Evolving +public final class HdfsCommonConstants { + + /** + * Represents the kind of delegation token used for HDFS. + * This is a constant string value "HDFS_DELEGATION_TOKEN". + */ + public static final Text HDFS_DELEGATION_KIND = + new Text("HDFS_DELEGATION_TOKEN"); + + /** + * DFS_ADMIN configuration: {@value}. + */ + public static final String DFS_ADMIN = "dfs.cluster.administrators"; + + private HdfsCommonConstants() { + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/IOStreamPair.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/IOStreamPair.java similarity index 85% rename from hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/IOStreamPair.java rename to hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/IOStreamPair.java index 4ec73e3378445..597c804d6fc51 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/IOStreamPair.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/IOStreamPair.java @@ -22,13 +22,13 @@ import java.io.InputStream; import java.io.OutputStream; -import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.io.IOUtils; /** - * A little struct class to wrap an InputStream and an OutputStream. + * This class wraps an InputStream and an OutputStream. + * Both the InputStream and OutputStream are closed on close call. + * This class is moved from HDFS module to COMMON module for removing HDFS dependencies from YARN. */ -@InterfaceAudience.Private public class IOStreamPair implements Closeable { public final InputStream in; public final OutputStream out; diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/package-info.java new file mode 100644 index 0000000000000..d2b8638b96e20 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/package-info.java @@ -0,0 +1,25 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This package provides access to following class. + * {@link org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair} class. + */ +@InterfaceAudience.Private +package org.apache.hadoop.hdfs.protocol.datatransfer; + +import org.apache.hadoop.classification.InterfaceAudience; \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenIdentifier.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenIdentifier.java index 1f4c36f679670..fa90342d419e8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenIdentifier.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenIdentifier.java @@ -26,6 +26,7 @@ import org.apache.commons.collections.map.LRUMap; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.HdfsCommonConstants; import org.apache.hadoop.hdfs.web.WebHdfsConstants; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.UserGroupInformation; @@ -41,8 +42,12 @@ @InterfaceAudience.Private public class DelegationTokenIdentifier extends AbstractDelegationTokenIdentifier { - public static final Text HDFS_DELEGATION_KIND = - new Text("HDFS_DELEGATION_TOKEN"); + + /** + * The value is referenced from {@link HdfsCommonConstants#HDFS_DELEGATION_KIND}. + */ + @Deprecated + public static final Text HDFS_DELEGATION_KIND = HdfsCommonConstants.HDFS_DELEGATION_KIND; @SuppressWarnings("unchecked") private static Map ugiCache = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index dd3193fdadff2..601d9c772e795 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -20,6 +20,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.fs.HdfsCommonConstants; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.net.DFSNetworkTopology; import org.apache.hadoop.hdfs.protocol.HdfsConstants; @@ -381,7 +382,11 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_NAMENODE_XATTRS_ENABLED_KEY = "dfs.namenode.xattrs.enabled"; public static final boolean DFS_NAMENODE_XATTRS_ENABLED_DEFAULT = true; - public static final String DFS_ADMIN = "dfs.cluster.administrators"; + /** + * The value is referenced from {@link HdfsCommonConstants#DFS_ADMIN}. + */ + @Deprecated + public static final String DFS_ADMIN = HdfsCommonConstants.DFS_ADMIN; public static final String DFS_SERVER_HTTPS_KEYSTORE_RESOURCE_KEY = "dfs.https.server.keystore.resource"; public static final String DFS_SERVER_HTTPS_KEYSTORE_RESOURCE_DEFAULT = "ssl-server.xml"; public static final String DFS_SERVER_HTTPS_KEYPASSWORD_KEY = "ssl.server.keystore.keypassword"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceMaster.java index 00883d94eefe6..97ed7ec2a4836 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceMaster.java @@ -23,8 +23,8 @@ import org.apache.commons.cli.Options; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.HdfsCommonConstants; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; @@ -230,7 +230,7 @@ private static void removeHdfsDelegationToken(UserGroupInformation user) { while (iter.hasNext()) { Token token = iter.next(); if (token.getKind().equals( - DelegationTokenIdentifier.HDFS_DELEGATION_KIND)) { + HdfsCommonConstants.HDFS_DELEGATION_KIND)) { LOG.info("Remove HDFS delegation token {}.", token); iter.remove(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java index 9098bb4a751fd..2fda45c9b490e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java @@ -31,10 +31,10 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.HdfsCommonConstants; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.registry.client.api.RegistryConstants; @@ -1707,12 +1707,12 @@ private boolean checkPermissions(Path dependencyLibTarGzip) throws YarnConfiguration.YARN_ADMIN_ACL, YarnConfiguration.DEFAULT_YARN_ADMIN_ACL)); AccessControlList dfsAdminAcl = new AccessControlList( - getConfig().get(DFSConfigKeys.DFS_ADMIN, " ")); + getConfig().get(HdfsCommonConstants.DFS_ADMIN, " ")); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); if (!yarnAdminAcl.isUserAllowed(ugi) && !dfsAdminAcl.isUserAllowed(ugi)) { LOG.error("User must be on the {} or {} list to have permission to " + "upload AM dependency tarball", YarnConfiguration.YARN_ADMIN_ACL, - DFSConfigKeys.DFS_ADMIN); + HdfsCommonConstants.DFS_ADMIN); return false; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceCLI.java index f75c0afdbebb6..3b5f29a7ca1a3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceCLI.java @@ -20,9 +20,9 @@ import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.HdfsCommonConstants; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.yarn.client.cli.ApplicationCLI; @@ -138,7 +138,7 @@ public void setup() throws Throwable { basedir.mkdirs(); } yarnAdminNoneAclProp = YarnConfiguration.YARN_ADMIN_ACL + "=none"; - dfsAdminAclProp = DFSConfigKeys.DFS_ADMIN + "=" + + dfsAdminAclProp = HdfsCommonConstants.DFS_ADMIN + "=" + UserGroupInformation.getCurrentUser(); System.setProperty(YarnServiceConstants.PROPERTY_LIB_DIR, basedir .getAbsolutePath()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java index 5a752064bce1c..1a35a6042561b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java @@ -52,6 +52,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.ClusterStorageCapacityExceededException; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -60,7 +61,6 @@ import org.apache.hadoop.fs.Options; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.SecureIOUtils; import org.apache.hadoop.io.Writable; @@ -547,7 +547,7 @@ public void append(LogKey logKey, LogValue logValue) throws IOException { } @Override - public void close() throws DSQuotaExceededException { + public void close() throws ClusterStorageCapacityExceededException { try { if (writer != null) { writer.close(); @@ -557,7 +557,7 @@ public void close() throws DSQuotaExceededException { } finally { try { this.fsDataOStream.close(); - } catch (DSQuotaExceededException e) { + } catch (ClusterStorageCapacityExceededException e) { LOG.error("Exception in closing {}", this.fsDataOStream.getClass(), e); throw e; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/tfile/LogAggregationTFileController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/tfile/LogAggregationTFileController.java index 4b282488c6d56..ccfd8cef6c592 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/tfile/LogAggregationTFileController.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/tfile/LogAggregationTFileController.java @@ -38,12 +38,12 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.ClusterStorageCapacityExceededException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.HarFs; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogKey; @@ -99,7 +99,7 @@ public void closeWriter() throws LogAggregationDFSException { if (this.writer != null) { try { this.writer.close(); - } catch (DSQuotaExceededException e) { + } catch (ClusterStorageCapacityExceededException e) { throw new LogAggregationDFSException(e); } finally { this.writer = null; From 1f302e83fd93366544ccbe2bc5ee2de305e65cb6 Mon Sep 17 00:00:00 2001 From: Shintaro Onuma <31045635+shintaroonuma@users.noreply.github.com> Date: Thu, 5 Sep 2024 14:14:04 +0100 Subject: [PATCH 110/113] HADOOP-18938. S3A: Fix endpoint region parsing for vpc endpoints. (#6466) Contributed by Shintaro Onuma --- .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 16 ++++++- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 13 +++++- .../hadoop/fs/s3a/TestS3AEndpointParsing.java | 43 +++++++++++++++++++ 3 files changed, 70 insertions(+), 2 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index ba9fc080c2c51..c52454ac15c81 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -21,6 +21,8 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; @@ -85,6 +87,9 @@ public class DefaultS3ClientFactory extends Configured private static final String S3_SERVICE_NAME = "s3"; + private static final Pattern VPC_ENDPOINT_PATTERN = + Pattern.compile("^(?:.+\\.)?([a-z0-9-]+)\\.vpce\\.amazonaws\\.(?:com|com\\.cn)$"); + /** * Subclasses refer to this. */ @@ -390,10 +395,19 @@ private static URI getS3Endpoint(String endpoint, final Configuration conf) { * @param endpointEndsWithCentral true if the endpoint is configured as central. * @return the S3 region, null if unable to resolve from endpoint. */ - private static Region getS3RegionFromEndpoint(final String endpoint, + @VisibleForTesting + static Region getS3RegionFromEndpoint(final String endpoint, final boolean endpointEndsWithCentral) { if (!endpointEndsWithCentral) { + // S3 VPC endpoint parsing + Matcher matcher = VPC_ENDPOINT_PATTERN.matcher(endpoint); + if (matcher.find()) { + LOG.debug("Mapping to VPCE"); + LOG.debug("Endpoint {} is vpc endpoint; parsing region as {}", endpoint, matcher.group(1)); + return Region.of(matcher.group(1)); + } + LOG.debug("Endpoint {} is not the default; parsing", endpoint); return AwsHostNameUtils.parseSigningRegion(endpoint, S3_SERVICE_NAME).orElse(null); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index 8403b6bd6cb96..d06224df5b355 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -97,6 +97,8 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com"; + private static final String CN_VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.cn-northwest-1.vpce.amazonaws.com.cn"; + public static final String EXCEPTION_THROWN_BY_INTERCEPTOR = "Exception thrown by interceptor"; /** @@ -294,7 +296,6 @@ public void testWithGovCloudEndpoint() throws Throwable { } @Test - @Ignore("Pending HADOOP-18938. S3A region logic to handle vpce and non standard endpoints") public void testWithVPCE() throws Throwable { describe("Test with vpc endpoint"); Configuration conf = getConfiguration(); @@ -304,6 +305,16 @@ public void testWithVPCE() throws Throwable { expectInterceptorException(client); } + @Test + public void testWithChinaVPCE() throws Throwable { + describe("Test with china vpc endpoint"); + Configuration conf = getConfiguration(); + + S3Client client = createS3Client(conf, CN_VPC_ENDPOINT, null, CN_NORTHWEST_1, false); + + expectInterceptorException(client); + } + @Test public void testCentralEndpointAndDifferentRegionThanBucket() throws Throwable { describe("Access public bucket using central endpoint and region " diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java new file mode 100644 index 0000000000000..8a77c102ac67d --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AEndpointParsing.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import software.amazon.awssdk.regions.Region; + +public class TestS3AEndpointParsing extends AbstractS3AMockTest { + + private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com"; + private static final String NON_VPC_ENDPOINT = "s3.eu-west-1.amazonaws.com"; + private static final String US_WEST_2 = "us-west-2"; + private static final String EU_WEST_1 = "eu-west-1"; + + @Test + public void testVPCEndpoint() { + Region region = DefaultS3ClientFactory.getS3RegionFromEndpoint(VPC_ENDPOINT, false); + Assertions.assertThat(region).isEqualTo(Region.of(US_WEST_2)); + } + + @Test + public void testNonVPCEndpoint() { + Region region = DefaultS3ClientFactory.getS3RegionFromEndpoint(NON_VPC_ENDPOINT, false); + Assertions.assertThat(region).isEqualTo(Region.of(EU_WEST_1)); + } +} From 57e62ae07f1c4eb8adccd9c61fc909080ca76c53 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 5 Sep 2024 14:35:50 +0100 Subject: [PATCH 111/113] Revert "YARN-11664. Remove HDFS Binaries/Jars Dependency From Yarn (#6631)" This reverts commit 6c01490f14b65f43196e1f235c51749a712e7338. --- .../apache/hadoop/fs/HdfsCommonConstants.java | 47 ------------------- .../protocol/datatransfer/package-info.java | 25 ---------- .../protocol/datatransfer/IOStreamPair.java | 6 +-- .../delegation/DelegationTokenIdentifier.java | 9 +--- .../org/apache/hadoop/hdfs/DFSConfigKeys.java | 7 +-- .../hadoop/yarn/service/ServiceMaster.java | 4 +- .../yarn/service/client/ServiceClient.java | 6 +-- .../yarn/service/client/TestServiceCLI.java | 4 +- .../logaggregation/AggregatedLogFormat.java | 6 +-- .../tfile/LogAggregationTFileController.java | 4 +- 10 files changed, 18 insertions(+), 100 deletions(-) delete mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HdfsCommonConstants.java delete mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/package-info.java rename {hadoop-common-project/hadoop-common => hadoop-hdfs-project/hadoop-hdfs-client}/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/IOStreamPair.java (85%) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HdfsCommonConstants.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HdfsCommonConstants.java deleted file mode 100644 index f6c3ca4517d55..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HdfsCommonConstants.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.io.Text; - -/** - * This class contains constants for configuration keys and default values. - */ -@InterfaceAudience.LimitedPrivate({"YARN", "HDFS"}) -@InterfaceStability.Evolving -public final class HdfsCommonConstants { - - /** - * Represents the kind of delegation token used for HDFS. - * This is a constant string value "HDFS_DELEGATION_TOKEN". - */ - public static final Text HDFS_DELEGATION_KIND = - new Text("HDFS_DELEGATION_TOKEN"); - - /** - * DFS_ADMIN configuration: {@value}. - */ - public static final String DFS_ADMIN = "dfs.cluster.administrators"; - - private HdfsCommonConstants() { - } - -} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/package-info.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/package-info.java deleted file mode 100644 index d2b8638b96e20..0000000000000 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/package-info.java +++ /dev/null @@ -1,25 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * This package provides access to following class. - * {@link org.apache.hadoop.hdfs.protocol.datatransfer.IOStreamPair} class. - */ -@InterfaceAudience.Private -package org.apache.hadoop.hdfs.protocol.datatransfer; - -import org.apache.hadoop.classification.InterfaceAudience; \ No newline at end of file diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/IOStreamPair.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/IOStreamPair.java similarity index 85% rename from hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/IOStreamPair.java rename to hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/IOStreamPair.java index 597c804d6fc51..4ec73e3378445 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/IOStreamPair.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/datatransfer/IOStreamPair.java @@ -22,13 +22,13 @@ import java.io.InputStream; import java.io.OutputStream; +import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.io.IOUtils; /** - * This class wraps an InputStream and an OutputStream. - * Both the InputStream and OutputStream are closed on close call. - * This class is moved from HDFS module to COMMON module for removing HDFS dependencies from YARN. + * A little struct class to wrap an InputStream and an OutputStream. */ +@InterfaceAudience.Private public class IOStreamPair implements Closeable { public final InputStream in; public final OutputStream out; diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenIdentifier.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenIdentifier.java index fa90342d419e8..1f4c36f679670 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenIdentifier.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/security/token/delegation/DelegationTokenIdentifier.java @@ -26,7 +26,6 @@ import org.apache.commons.collections.map.LRUMap; import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.fs.HdfsCommonConstants; import org.apache.hadoop.hdfs.web.WebHdfsConstants; import org.apache.hadoop.io.Text; import org.apache.hadoop.security.UserGroupInformation; @@ -42,12 +41,8 @@ @InterfaceAudience.Private public class DelegationTokenIdentifier extends AbstractDelegationTokenIdentifier { - - /** - * The value is referenced from {@link HdfsCommonConstants#HDFS_DELEGATION_KIND}. - */ - @Deprecated - public static final Text HDFS_DELEGATION_KIND = HdfsCommonConstants.HDFS_DELEGATION_KIND; + public static final Text HDFS_DELEGATION_KIND = + new Text("HDFS_DELEGATION_TOKEN"); @SuppressWarnings("unchecked") private static Map ugiCache = diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 601d9c772e795..dd3193fdadff2 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -20,7 +20,6 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.CommonConfigurationKeys; -import org.apache.hadoop.fs.HdfsCommonConstants; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.net.DFSNetworkTopology; import org.apache.hadoop.hdfs.protocol.HdfsConstants; @@ -382,11 +381,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_NAMENODE_XATTRS_ENABLED_KEY = "dfs.namenode.xattrs.enabled"; public static final boolean DFS_NAMENODE_XATTRS_ENABLED_DEFAULT = true; - /** - * The value is referenced from {@link HdfsCommonConstants#DFS_ADMIN}. - */ - @Deprecated - public static final String DFS_ADMIN = HdfsCommonConstants.DFS_ADMIN; + public static final String DFS_ADMIN = "dfs.cluster.administrators"; public static final String DFS_SERVER_HTTPS_KEYSTORE_RESOURCE_KEY = "dfs.https.server.keystore.resource"; public static final String DFS_SERVER_HTTPS_KEYSTORE_RESOURCE_DEFAULT = "ssl-server.xml"; public static final String DFS_SERVER_HTTPS_KEYPASSWORD_KEY = "ssl.server.keystore.keypassword"; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceMaster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceMaster.java index 97ed7ec2a4836..00883d94eefe6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceMaster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/ServiceMaster.java @@ -23,8 +23,8 @@ import org.apache.commons.cli.Options; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.HdfsCommonConstants; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; @@ -230,7 +230,7 @@ private static void removeHdfsDelegationToken(UserGroupInformation user) { while (iter.hasNext()) { Token token = iter.next(); if (token.getKind().equals( - HdfsCommonConstants.HDFS_DELEGATION_KIND)) { + DelegationTokenIdentifier.HDFS_DELEGATION_KIND)) { LOG.info("Remove HDFS delegation token {}.", token); iter.remove(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java index 2fda45c9b490e..9098bb4a751fd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/main/java/org/apache/hadoop/yarn/service/client/ServiceClient.java @@ -31,10 +31,10 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.HdfsCommonConstants; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.io.DataOutputBuffer; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.registry.client.api.RegistryConstants; @@ -1707,12 +1707,12 @@ private boolean checkPermissions(Path dependencyLibTarGzip) throws YarnConfiguration.YARN_ADMIN_ACL, YarnConfiguration.DEFAULT_YARN_ADMIN_ACL)); AccessControlList dfsAdminAcl = new AccessControlList( - getConfig().get(HdfsCommonConstants.DFS_ADMIN, " ")); + getConfig().get(DFSConfigKeys.DFS_ADMIN, " ")); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); if (!yarnAdminAcl.isUserAllowed(ugi) && !dfsAdminAcl.isUserAllowed(ugi)) { LOG.error("User must be on the {} or {} list to have permission to " + "upload AM dependency tarball", YarnConfiguration.YARN_ADMIN_ACL, - HdfsCommonConstants.DFS_ADMIN); + DFSConfigKeys.DFS_ADMIN); return false; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceCLI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceCLI.java index 3b5f29a7ca1a3..f75c0afdbebb6 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceCLI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/src/test/java/org/apache/hadoop/yarn/service/client/TestServiceCLI.java @@ -20,9 +20,9 @@ import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.HdfsCommonConstants; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.yarn.client.cli.ApplicationCLI; @@ -138,7 +138,7 @@ public void setup() throws Throwable { basedir.mkdirs(); } yarnAdminNoneAclProp = YarnConfiguration.YARN_ADMIN_ACL + "=none"; - dfsAdminAclProp = HdfsCommonConstants.DFS_ADMIN + "=" + + dfsAdminAclProp = DFSConfigKeys.DFS_ADMIN + "=" + UserGroupInformation.getCurrentUser(); System.setProperty(YarnServiceConstants.PROPERTY_LIB_DIR, basedir .getAbsolutePath()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java index 1a35a6042561b..5a752064bce1c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/AggregatedLogFormat.java @@ -52,7 +52,6 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.ClusterStorageCapacityExceededException; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; @@ -61,6 +60,7 @@ import org.apache.hadoop.fs.Options; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.SecureIOUtils; import org.apache.hadoop.io.Writable; @@ -547,7 +547,7 @@ public void append(LogKey logKey, LogValue logValue) throws IOException { } @Override - public void close() throws ClusterStorageCapacityExceededException { + public void close() throws DSQuotaExceededException { try { if (writer != null) { writer.close(); @@ -557,7 +557,7 @@ public void close() throws ClusterStorageCapacityExceededException { } finally { try { this.fsDataOStream.close(); - } catch (ClusterStorageCapacityExceededException e) { + } catch (DSQuotaExceededException e) { LOG.error("Exception in closing {}", this.fsDataOStream.getClass(), e); throw e; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/tfile/LogAggregationTFileController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/tfile/LogAggregationTFileController.java index ccfd8cef6c592..4b282488c6d56 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/tfile/LogAggregationTFileController.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/logaggregation/filecontroller/tfile/LogAggregationTFileController.java @@ -38,12 +38,12 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.ClusterStorageCapacityExceededException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.HarFs; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.hdfs.protocol.DSQuotaExceededException; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.logaggregation.AggregatedLogFormat.LogKey; @@ -99,7 +99,7 @@ public void closeWriter() throws LogAggregationDFSException { if (this.writer != null) { try { this.writer.close(); - } catch (ClusterStorageCapacityExceededException e) { + } catch (DSQuotaExceededException e) { throw new LogAggregationDFSException(e); } finally { this.writer = null; From a00b1c06f32564e9e723202d153698affff4eb53 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Thu, 5 Sep 2024 20:29:44 +0100 Subject: [PATCH 112/113] HADOOP-19269. Upgrade maven-shade-plugin 3.6.0 (#7029) Contributed by PJ Fanning --- hadoop-project/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 32a140a9de867..caa89e65f7e2d 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -176,7 +176,7 @@ 3.1.0 2.5.1 2.6 - 3.4.1 + 3.6.0 2.5 3.1.0 2.3 From 8c41fbcaf543b448ce7d446002ab739176b4c738 Mon Sep 17 00:00:00 2001 From: Benjamin Teke Date: Sat, 7 Sep 2024 08:48:38 +0200 Subject: [PATCH 113/113] =?UTF-8?q?Revert=20"YARN-11709.=20NodeManager=20s?= =?UTF-8?q?hould=20be=20shut=20down=20or=20blacklisted=20when=20it=20ca?= =?UTF-8?q?=E2=80=A6"=20(#7028)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f00094203bf40a8c3f2216cf22eaa5599e3b9b4d. --- .../nodemanager/LinuxContainerExecutor.java | 6 ++-- .../TestLinuxContainerExecutorWithMocks.java | 35 ++----------------- 2 files changed, 5 insertions(+), 36 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java index 19c06736035e9..19335045c865b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/LinuxContainerExecutor.java @@ -451,10 +451,8 @@ public void startLocalizer(LocalizerStartContext ctx) } catch (PrivilegedOperationException e) { int exitCode = e.getExitCode(); - LOG.error("Unrecoverable issue occurred. Marking the node as unhealthy to prevent " - + "further containers to get scheduled on the node and cause application failures. " + - "Exit code from the container " + locId + "startLocalizer is : " + exitCode, e); - nmContext.getNodeStatusUpdater().reportException(e); + LOG.warn("Exit code from container {} startLocalizer is : {}", + locId, exitCode, e); throw new IOException("Application " + appId + " initialization failed" + " (exitCode=" + exitCode + ") with output: " + e.getOutput(), e); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java index 7d49cab4a86d2..3d9d33c5a10dd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestLinuxContainerExecutorWithMocks.java @@ -26,7 +26,6 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyBoolean; import static org.mockito.Mockito.doAnswer; -import static org.mockito.Mockito.doNothing; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.spy; @@ -38,7 +37,6 @@ import java.io.FileReader; import java.io.IOException; import java.io.LineNumberReader; -import java.lang.reflect.Field; import java.net.InetSocketAddress; import java.net.URI; import java.net.URISyntaxException; @@ -347,8 +345,7 @@ public void testStartLocalizer() throws IOException { @Test public void testContainerLaunchError() - throws IOException, ContainerExecutionException, URISyntaxException, IllegalAccessException, - NoSuchFieldException { + throws IOException, ContainerExecutionException, URISyntaxException { final String[] expecetedMessage = {"badcommand", "Exit code: 24"}; final String[] executor = { @@ -390,14 +387,6 @@ public Object answer(InvocationOnMock invocationOnMock) dirsHandler.init(conf); mockExec.setConf(conf); - //set the private nmContext field without initing the LinuxContainerExecutor - NodeManager nodeManager = new NodeManager(); - NodeManager.NMContext nmContext = - nodeManager.createNMContext(null, null, null, false, conf); - Field lceNmContext = LinuxContainerExecutor.class.getDeclaredField("nmContext"); - lceNmContext.setAccessible(true); - lceNmContext.set(mockExec, nmContext); - String appSubmitter = "nobody"; String cmd = String .valueOf(PrivilegedOperation.RunAsUserCommand.LAUNCH_CONTAINER. @@ -612,6 +601,8 @@ public void testNoExitCodeFromPrivilegedOperation() throws Exception { LinuxContainerRuntime runtime = new DefaultLinuxContainerRuntime( spyPrivilegedExecutor); runtime.initialize(conf, null); + mockExec = new LinuxContainerExecutor(runtime); + mockExec.setConf(conf); LinuxContainerExecutor lce = new LinuxContainerExecutor(runtime) { @Override protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() { @@ -619,23 +610,6 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() { } }; lce.setConf(conf); - - //set the private nmContext field without initing the LinuxContainerExecutor - NodeManager nodeManager = new NodeManager(); - NodeManager.NMContext nmContext = - nodeManager.createNMContext(null, null, null, false, conf); - NodeManager.NMContext spyNmContext = spy(nmContext); - - //initialize a mock NodeStatusUpdater - NodeStatusUpdaterImpl nodeStatusUpdater = mock(NodeStatusUpdaterImpl.class); - nmContext.setNodeStatusUpdater(nodeStatusUpdater); - //imitate a void method call on the NodeStatusUpdater when setting NM unhealthy. - doNothing().when(nodeStatusUpdater).reportException(any()); - - Field lceNmContext = LinuxContainerExecutor.class.getDeclaredField("nmContext"); - lceNmContext.setAccessible(true); - lceNmContext.set(lce, nmContext); - InetSocketAddress address = InetSocketAddress.createUnresolved( "localhost", 8040); Path nmPrivateCTokensPath= new Path("file:///bin/nmPrivateCTokensPath"); @@ -698,9 +672,6 @@ protected PrivilegedOperationExecutor getPrivilegedOperationExecutor() { assertTrue("Unexpected exception " + e, e.getMessage().contains("exit code")); } - - //verify that the NM was set unhealthy on PrivilegedOperationException - verify(nodeStatusUpdater, times(1)).reportException(any()); } @Test

        "Subsequent Bytes"
        CodeSubsequent Bytes
        0<32-bit signed integer> <as many bytes as indicated by the integer>
        1<signed byte>