From fe1f321ae25415cc4c0f15af291a6976a6180d63 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Fri, 11 Jun 2021 10:41:16 +0100 Subject: [PATCH 01/19] HADOOP-17139 Re-enable optimized copyFromLocal implementation in S3AFileSystem S3a `copyFromLocalFile` was not using the optimized `innerCopyFromLocalFile` because the optimized version was not handling directory copies appropriately. This change fixes `innerCopyFromLocalFile` to handle directories. --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 47 ++++++++++------ .../fs/s3a/ITestS3ACopyFromLocalFile.java | 56 ++++++++++++++++--- 2 files changed, 78 insertions(+), 25 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 439d52edc14f5..e016d66267064 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -79,6 +79,7 @@ import com.amazonaws.services.s3.transfer.model.UploadResult; import com.amazonaws.event.ProgressListener; +import org.apache.hadoop.fs.PathExistsException; import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A; import org.apache.hadoop.fs.store.audit.ActiveThreadSpanSource; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; @@ -3784,8 +3785,7 @@ public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, checkNotClosed(); LOG.debug("Copying local file from {} to {}", src, dst); trackDurationAndSpan(INVOCATION_COPY_FROM_LOCAL_FILE, dst, () -> { - // innerCopyFromLocalFile(delSrc, overwrite, src, dst); - super.copyFromLocalFile(delSrc, overwrite, src, dst); + innerCopyFromLocalFile(delSrc, overwrite, src, dst); return null; }); } @@ -3820,31 +3820,44 @@ private void innerCopyFromLocalFile(boolean delSrc, boolean overwrite, // Since we have a local file, we don't need to stream into a temporary file LocalFileSystem local = getLocal(getConf()); - File srcfile = local.pathToFile(src); - if (!srcfile.exists()) { + File srcFile = local.pathToFile(src); + if (!srcFile.exists()) { throw new FileNotFoundException("No file: " + src); } - if (!srcfile.isFile()) { - throw new FileNotFoundException("Not a file: " + src); - } try { - FileStatus status = innerGetFileStatus(dst, false, StatusProbeEnum.ALL); - if (!status.isFile()) { - throw new FileAlreadyExistsException(dst + " exists and is not a file"); + S3AFileStatus dstStatus = innerGetFileStatus(dst, false, StatusProbeEnum.ALL); + if (srcFile.isFile() && dstStatus.isDirectory()) { + throw new PathExistsException("Source is file and destination '" + dst + "' is directory"); } + if (!overwrite) { - throw new FileAlreadyExistsException(dst + " already exists"); + throw new PathExistsException(dst + " already exists"); } } catch (FileNotFoundException e) { // no destination, all is well } - final String key = pathToKey(dst); - final ObjectMetadata om = newObjectMetadata(srcfile.length()); - Progressable progress = null; - PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, srcfile); - invoker.retry("copyFromLocalFile(" + src + ")", dst.toString(), true, - () -> executePut(putObjectRequest, progress)); + + if (srcFile.isDirectory()) { + // make the directory in the destination + String key = pathToKey(dst); + createFakeDirectory(key); + FileStatus[] contents = local.listStatus(src); + + // copy all directory contents from dst to src: DFS + for (FileStatus status: contents) { + Path childDst = new Path(dst, status.getPath().getName()); + innerCopyFromLocalFile(delSrc, overwrite, status.getPath(), childDst); + } + } else { + final String key = pathToKey(dst); + final ObjectMetadata om = newObjectMetadata(srcFile.length()); + Progressable progress = null; + PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, srcFile); + invoker.retry("copyFromLocalFile(" + src + ")", dst.toString(), true, + () -> executePut(putObjectRequest, progress)); + } + if (delSrc) { local.delete(src, false); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java index 668e129d57e4a..215d3d566bbe8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java @@ -23,6 +23,7 @@ import java.io.IOException; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; +import java.nio.file.Files; import org.junit.Ignore; import org.junit.Test; @@ -100,14 +101,13 @@ public void testCopyFileOverwrite() throws Throwable { } @Test - @Ignore("HADOOP-15932") public void testCopyFileNoOverwriteDirectory() throws Throwable { file = createTempFile("hello"); Path dest = upload(file, true); S3AFileSystem fs = getFileSystem(); fs.delete(dest, false); fs.mkdirs(dest); - intercept(FileAlreadyExistsException.class, + intercept(PathExistsException.class, () -> upload(file, true)); } @@ -120,15 +120,48 @@ public void testCopyMissingFile() throws Throwable { () -> upload(file, true)); } + /* + * The following path is being created on disk and copied over + * /parent/ (trailing slash to make it clear it's a directory + * /parent/test1.txt + * /parent/child/test.txt + */ @Test - @Ignore("HADOOP-15932") - public void testCopyDirectoryFile() throws Throwable { - file = File.createTempFile("test", ".txt"); - // first upload to create - intercept(FileNotFoundException.class, "Not a file", - () -> upload(file.getParentFile(), true)); + public void testCopyTreeDirectoryWithoutDelete() throws Throwable { + java.nio.file.Path srcDir = Files.createTempDirectory("parent"); + java.nio.file.Path childDir = Files.createTempDirectory(srcDir, "child"); + java.nio.file.Path parentFile = Files.createTempFile(srcDir, "test1", ".txt"); + java.nio.file.Path childFile = Files.createTempFile(childDir, "test2", ".txt"); + + Path src = new Path(srcDir.toUri()); + Path dst = path(srcDir.getFileName().toString()); + getFileSystem().copyFromLocalFile(false, true, src, dst); + + java.nio.file.Path parent = srcDir.getParent(); + + assertPathExists("Parent directory", srcDir, parent); + assertPathExists("Child directory", childDir, parent); + assertPathExists("Parent file", parentFile, parent); + assertPathExists("Child file", childFile, parent); + + if (!Files.exists(srcDir)) { + throw new Exception("Folder was deleted when it shouldn't have!"); + } } + @Test + public void testCopyDirectoryWithDelete() throws Throwable { + java.nio.file.Path srcDir = Files.createTempDirectory("parent"); + Files.createTempFile(srcDir, "test1", ".txt"); + + Path src = new Path(srcDir.toUri()); + Path dst = path(srcDir.getFileName().toString()); + getFileSystem().copyFromLocalFile(true, true, src, dst); + + if (Files.exists(srcDir)) { + throw new Exception("Source directory should've been deleted"); + } + } @Test public void testLocalFilesOnly() throws Throwable { @@ -158,4 +191,11 @@ public File createTempFile(String text) throws IOException { FileUtils.write(f, text, ASCII); return f; } + + private void assertPathExists(String message, + java.nio.file.Path toVerify, + java.nio.file.Path parent) throws IOException { + Path qualifiedPath = path(parent.relativize(toVerify).toString()); + assertPathExists(message, qualifiedPath); + } } From cd7e17e5d00466ad78531017cee9df19dd8286ad Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Fri, 11 Jun 2021 15:48:18 +0100 Subject: [PATCH 02/19] Tracking the copyFromLocalFile PUT Object execution Turns out `testCostOfCopyFromLocalFile` tests were failing in `ITestS3AFileOperationCost` because OBJECT_PUT_REQUESTS statistic wasn't getting incremented properly. This should keep track of that stat. --- .../java/org/apache/hadoop/fs/s3a/S3AFileSystem.java | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index e016d66267064..5c59217a08ff4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3815,7 +3815,7 @@ public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, @Retries.RetryTranslated private void innerCopyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst) - throws IOException, FileAlreadyExistsException, AmazonClientException { + throws IOException, PathExistsException, AmazonClientException { LOG.debug("Copying local file from {} to {}", src, dst); // Since we have a local file, we don't need to stream into a temporary file @@ -3854,8 +3854,11 @@ private void innerCopyFromLocalFile(boolean delSrc, boolean overwrite, final ObjectMetadata om = newObjectMetadata(srcFile.length()); Progressable progress = null; PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, srcFile); - invoker.retry("copyFromLocalFile(" + src + ")", dst.toString(), true, - () -> executePut(putObjectRequest, progress)); + trackDurationOfInvocation(getDurationTrackerFactory(), OBJECT_PUT_REQUESTS.getSymbol(), + () -> invoker.retry( + "copyFromLocalFile(" + src + ")", dst.toString(), + true, + () -> executePut(putObjectRequest, progress))); } if (delSrc) { From 75fddaa07df1e2e3b0fb1e9f2e69e7d5e4dac3d9 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Thu, 17 Jun 2021 21:21:11 +0100 Subject: [PATCH 03/19] Checkpoint commit --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 60 ++++- .../fs/s3a/impl/CopyFromLocalOperation.java | 205 ++++++++++++++++++ 2 files changed, 261 insertions(+), 4 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 5c59217a08ff4..2279495de2726 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -81,6 +81,7 @@ import org.apache.hadoop.fs.PathExistsException; import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A; +import org.apache.hadoop.fs.s3a.impl.CopyFromLocalOperation; import org.apache.hadoop.fs.store.audit.ActiveThreadSpanSource; import org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions; @@ -3784,10 +3785,61 @@ public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst) throws IOException { checkNotClosed(); LOG.debug("Copying local file from {} to {}", src, dst); - trackDurationAndSpan(INVOCATION_COPY_FROM_LOCAL_FILE, dst, () -> { - innerCopyFromLocalFile(delSrc, overwrite, src, dst); - return null; - }); + trackDurationAndSpan(INVOCATION_COPY_FROM_LOCAL_FILE, dst, + new CopyFromLocalOperation( + createStoreContext(), + delSrc, + overwrite, + src, + dst, + createCopyFromLocalCallbacks())); + } + + protected CopyFromLocalOperation.CopyFromLocalOperationCallbacks + createCopyFromLocalCallbacks() { + return new CopyFromLocalCallbacksImpl(); + } + + protected class CopyFromLocalCallbacksImpl implements + CopyFromLocalOperation.CopyFromLocalOperationCallbacks { + @Override + public RemoteIterator listStatusIterator( + final Path path, + final boolean recursive) throws IOException { + LocalFileSystem local = getLocal(getConf()); + return local.listFiles(path, recursive); + } + + @Override + public LocalFileSystem getLocalFS() throws IOException { + return getLocal(getConf()); + } + + @Override + public void copyFileFromTo(File file, Path from, Path to) throws IOException { + final String key = pathToKey(to); + final ObjectMetadata om = newObjectMetadata(file.length()); + Progressable progress = null; + PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, file); + trackDurationOfInvocation( + getDurationTrackerFactory(), + OBJECT_PUT_REQUESTS.getSymbol(), + () -> S3AFileSystem.this.invoker.retry( + "copyFromLocalFile(" + "" + ")", to.toString(), + true, + () -> executePut(putObjectRequest, progress))); + } + + @Override + public S3AFileStatus getFileStatus( + Path f, + boolean needEmptyDirectoryFlag, + Set probes) throws IOException { + return S3AFileSystem.this.innerGetFileStatus( + f, + needEmptyDirectoryFlag, + probes); + } } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java new file mode 100644 index 0000000000000..cf26a30f5a7e9 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java @@ -0,0 +1,205 @@ +package org.apache.hadoop.fs.s3a.impl; + +import org.apache.commons.collections.comparators.ReverseComparator; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathExistsException; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.Retries; +import org.apache.hadoop.fs.s3a.S3AFileStatus; +import org.apache.hadoop.fs.statistics.IOStatisticsSource; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Set; + +/** + * TODO list: + * - Add abstract class + tests for LocalFS + * - Add tests for this class + * - Add documentation + * - This class + * - `filesystem.md` + * - Remove remaining TODO s + * - Clean old `innerCopyFromLocalFile` code up + */ +public class CopyFromLocalOperation extends ExecutingStoreOperation + implements IOStatisticsSource { + + private static final Logger LOG = LoggerFactory.getLogger( + CopyFromLocalOperation.class); + + private final CopyFromLocalOperationCallbacks callbacks; + private final boolean delSrc; + private final boolean overwrite; + private final Path src; + private final Path dst; + + public CopyFromLocalOperation( + final StoreContext storeContext, + boolean delSrc, + boolean overwrite, + Path src, + Path dst, + CopyFromLocalOperationCallbacks callbacks) { + super(storeContext); + this.callbacks = callbacks; + this.delSrc = delSrc; + this.overwrite = overwrite; + this.src = src; + this.dst = dst; + } + + @Override + @Retries.RetryTranslated + public Void execute() + throws IOException, PathExistsException { + LOG.debug("Copying local file from {} to {}", src, dst); + LocalFileSystem local = callbacks.getLocalFS(); + File sourceFile = local.pathToFile(src); + + checkSource(sourceFile); + prepareDestination(dst, sourceFile.isFile(), overwrite); + uploadSourceFromFS(local); + + if (delSrc) { + local.delete(src, true); + } + + return null; + } + + private void uploadSourceFromFS(LocalFileSystem local) + throws IOException, PathExistsException { + RemoteIterator localFiles = callbacks + .listStatusIterator(src, true); + + // Go through all files and create UploadEntries + List entries = new ArrayList<>(); + while (localFiles.hasNext()) { + LocatedFileStatus status = localFiles.next(); + Path destPath = getFinalPath(status.getPath()); + // UploadEntries: have a destination path, a file size + entries.add(new UploadEntry( + status.getPath(), + destPath, + status.getLen())); + } + + if (localFiles instanceof Closeable) { + ((Closeable) localFiles).close(); + } + + // Sort all upload entries based on size + entries.sort(new ReverseComparator(new UploadEntry.SizeComparator())); + + int LARGEST_N_FILES = 5; + final int sortedUploadsCount = Math.min(LARGEST_N_FILES, entries.size()); + List uploaded = new ArrayList<>(); + + // Take only top most X entries and upload + for (int uploadNo = 0; uploadNo < sortedUploadsCount; uploadNo++) { + UploadEntry uploadEntry = entries.get(uploadNo); + File file = local.pathToFile(uploadEntry.source); + callbacks.copyFileFromTo( + file, + uploadEntry.source, + uploadEntry.destination); + + uploaded.add(uploadEntry); + } + + // Shuffle all remaining entries and upload them + // TODO: Should directories be handled differently? + entries.removeAll(uploaded); + Collections.shuffle(entries); + for (UploadEntry uploadEntry : entries) { + File file = local.pathToFile(uploadEntry.source); + callbacks.copyFileFromTo( + file, + uploadEntry.source, + uploadEntry.destination); + } + } + + private void checkSource(File sourceFile) + throws FileNotFoundException { + if (!sourceFile.exists()) { + throw new FileNotFoundException("No file: " + src); + } + } + + + private void prepareDestination( + Path dst, + boolean isSrcFile, + boolean overwrite) throws PathExistsException, IOException { + try { + S3AFileStatus dstStatus = callbacks.getFileStatus( + dst, + false, + StatusProbeEnum.ALL); + + if (isSrcFile && dstStatus.isDirectory()) { + throw new PathExistsException("Source is file and destination '" + dst + "' is directory"); + } + + if (!overwrite) { + throw new PathExistsException(dst + " already exists"); + } + } catch (FileNotFoundException e) { + // no destination, all is well + } + } + + private Path getFinalPath(Path srcFile) throws IOException { + // TODO: Implement this bad boy + return null; + } + + private static final class UploadEntry { + private final Path source; + private final Path destination; + private final long size; + + private UploadEntry(Path source, Path destination, long size) { + this.source = source; + this.destination = destination; + this.size = size; + } + + static class SizeComparator implements Comparator { + @Override + public int compare(UploadEntry entry1, UploadEntry entry2) { + return Long.compare(entry1.size, entry2.size); + } + } + } + + public interface CopyFromLocalOperationCallbacks { + RemoteIterator listStatusIterator( + Path f, + boolean recursive) throws IOException; + + S3AFileStatus getFileStatus( + final Path f, + final boolean needEmptyDirectoryFlag, + final Set probes) throws IOException; + + LocalFileSystem getLocalFS() throws IOException; + + void copyFileFromTo( + File file, + Path source, + Path destination) throws IOException; + } +} From 6b612899733ee483ef560b1fcf60b7f15d723e0a Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Fri, 25 Jun 2021 11:23:08 +0100 Subject: [PATCH 04/19] First working implementation Have to admit I've spent more time than I would've liked trying to make sure all operations have valid audit spans. `trackDurationAndSpan` on an operation doesn't do what I think it does with regards to the validity of the span. That's something to look at. --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 74 ++++++----- .../fs/s3a/impl/CopyFromLocalOperation.java | 120 ++++++++++-------- .../fs/s3a/ITestS3ACopyFromLocalFile.java | 29 +++++ 3 files changed, 144 insertions(+), 79 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 2279495de2726..51ffa570088d8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -224,6 +224,7 @@ import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.logIOStatisticsAtLevel; import static org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_CONTINUE_LIST_REQUEST; import static org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_LIST_REQUEST; +import static org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_PUT_REQUEST; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.pairedTrackerFactory; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfInvocation; @@ -3786,59 +3787,74 @@ public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, checkNotClosed(); LOG.debug("Copying local file from {} to {}", src, dst); trackDurationAndSpan(INVOCATION_COPY_FROM_LOCAL_FILE, dst, - new CopyFromLocalOperation( + () -> new CopyFromLocalOperation( createStoreContext(), - delSrc, - overwrite, src, dst, - createCopyFromLocalCallbacks())); + delSrc, + overwrite, + createCopyFromLocalCallbacks()).execute()); } protected CopyFromLocalOperation.CopyFromLocalOperationCallbacks - createCopyFromLocalCallbacks() { - return new CopyFromLocalCallbacksImpl(); + createCopyFromLocalCallbacks() throws IOException { + LocalFileSystem local = getLocal(getConf()); + return new CopyFromLocalCallbacksImpl(local); } protected class CopyFromLocalCallbacksImpl implements CopyFromLocalOperation.CopyFromLocalOperationCallbacks { + private final LocalFileSystem local; + + private CopyFromLocalCallbacksImpl(LocalFileSystem local) { + this.local = local; + } + @Override public RemoteIterator listStatusIterator( final Path path, final boolean recursive) throws IOException { - LocalFileSystem local = getLocal(getConf()); - return local.listFiles(path, recursive); + return local.listFilesAndDirs(path, true); + } + + @Override + public File pathToFile(Path path) { + return local.pathToFile(path); } @Override - public LocalFileSystem getLocalFS() throws IOException { - return getLocal(getConf()); + public boolean delete(Path path, boolean recursive) throws IOException { + return local.delete(path, recursive); } @Override public void copyFileFromTo(File file, Path from, Path to) throws IOException { - final String key = pathToKey(to); - final ObjectMetadata om = newObjectMetadata(file.length()); - Progressable progress = null; - PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, file); - trackDurationOfInvocation( - getDurationTrackerFactory(), - OBJECT_PUT_REQUESTS.getSymbol(), - () -> S3AFileSystem.this.invoker.retry( - "copyFromLocalFile(" + "" + ")", to.toString(), - true, - () -> executePut(putObjectRequest, progress))); + trackDurationAndSpan( + OBJECT_PUT_REQUESTS, + to, + () -> { + + final String key = pathToKey(to); + final ObjectMetadata om = newObjectMetadata(file.length()); + Progressable progress = null; + PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, file); + S3AFileSystem.this.invoker.retry( + "putObject(" + "" + ")", to.toString(), + true, + () -> executePut(putObjectRequest, progress)); + + return null; + }); + } + + @Override + public FileStatus getFileStatus(Path f) throws IOException { + return S3AFileSystem.this.getFileStatus(f); } @Override - public S3AFileStatus getFileStatus( - Path f, - boolean needEmptyDirectoryFlag, - Set probes) throws IOException { - return S3AFileSystem.this.innerGetFileStatus( - f, - needEmptyDirectoryFlag, - probes); + public boolean createEmptyDir(Path path) throws IOException { + return S3AFileSystem.this.mkdirs(path); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java index cf26a30f5a7e9..f468836cbab28 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java @@ -1,14 +1,12 @@ package org.apache.hadoop.fs.s3a.impl; import org.apache.commons.collections.comparators.ReverseComparator; -import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathExistsException; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.Retries; -import org.apache.hadoop.fs.s3a.S3AFileStatus; -import org.apache.hadoop.fs.statistics.IOStatisticsSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -16,9 +14,11 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.net.URI; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.HashSet; import java.util.List; import java.util.Set; @@ -32,67 +32,76 @@ * - Remove remaining TODO s * - Clean old `innerCopyFromLocalFile` code up */ -public class CopyFromLocalOperation extends ExecutingStoreOperation - implements IOStatisticsSource { +public class CopyFromLocalOperation extends ExecutingStoreOperation { private static final Logger LOG = LoggerFactory.getLogger( CopyFromLocalOperation.class); private final CopyFromLocalOperationCallbacks callbacks; - private final boolean delSrc; + private final boolean deleteSource; private final boolean overwrite; - private final Path src; - private final Path dst; + private final Path source; + private final Path destination; public CopyFromLocalOperation( final StoreContext storeContext, - boolean delSrc, + Path source, + Path destination, + boolean deleteSource, boolean overwrite, - Path src, - Path dst, CopyFromLocalOperationCallbacks callbacks) { super(storeContext); this.callbacks = callbacks; - this.delSrc = delSrc; + this.deleteSource = deleteSource; this.overwrite = overwrite; - this.src = src; - this.dst = dst; + this.source = source; + this.destination = destination; } @Override @Retries.RetryTranslated public Void execute() throws IOException, PathExistsException { - LOG.debug("Copying local file from {} to {}", src, dst); - LocalFileSystem local = callbacks.getLocalFS(); - File sourceFile = local.pathToFile(src); + LOG.debug("Copying local file from {} to {}", source, destination); + File sourceFile = callbacks.pathToFile(source); checkSource(sourceFile); - prepareDestination(dst, sourceFile.isFile(), overwrite); - uploadSourceFromFS(local); + prepareDestination(destination, sourceFile, overwrite); + uploadSourceFromFS(); - if (delSrc) { - local.delete(src, true); + if (deleteSource) { + callbacks.delete(source, true); } return null; } - private void uploadSourceFromFS(LocalFileSystem local) + private void uploadSourceFromFS() throws IOException, PathExistsException { RemoteIterator localFiles = callbacks - .listStatusIterator(src, true); + .listStatusIterator(source, true); - // Go through all files and create UploadEntries + // After all files are traversed, this set will contain only emptyDirs + Set emptyDirs = new HashSet<>(); List entries = new ArrayList<>(); while (localFiles.hasNext()) { - LocatedFileStatus status = localFiles.next(); - Path destPath = getFinalPath(status.getPath()); + LocatedFileStatus sourceFile = localFiles.next(); + Path sourceFilePath = sourceFile.getPath(); + + // Directory containing this file / directory isn't empty + emptyDirs.remove(sourceFilePath.getParent()); + + if (sourceFile.isDirectory()) { + emptyDirs.add(sourceFilePath); + continue; + } + + Path destPath = getFinalPath(sourceFilePath); // UploadEntries: have a destination path, a file size entries.add(new UploadEntry( - status.getPath(), + sourceFilePath, destPath, - status.getLen())); + sourceFile.getLen())); } if (localFiles instanceof Closeable) { @@ -109,7 +118,7 @@ private void uploadSourceFromFS(LocalFileSystem local) // Take only top most X entries and upload for (int uploadNo = 0; uploadNo < sortedUploadsCount; uploadNo++) { UploadEntry uploadEntry = entries.get(uploadNo); - File file = local.pathToFile(uploadEntry.source); + File file = callbacks.pathToFile(uploadEntry.source); callbacks.copyFileFromTo( file, uploadEntry.source, @@ -119,38 +128,39 @@ private void uploadSourceFromFS(LocalFileSystem local) } // Shuffle all remaining entries and upload them - // TODO: Should directories be handled differently? entries.removeAll(uploaded); Collections.shuffle(entries); for (UploadEntry uploadEntry : entries) { - File file = local.pathToFile(uploadEntry.source); + File file = callbacks.pathToFile(uploadEntry.source); callbacks.copyFileFromTo( file, uploadEntry.source, uploadEntry.destination); } + + for (Path emptyDir : emptyDirs) { + callbacks.createEmptyDir(getFinalPath(emptyDir)); + } } - private void checkSource(File sourceFile) + private void checkSource(File src) throws FileNotFoundException { - if (!sourceFile.exists()) { - throw new FileNotFoundException("No file: " + src); + if (!src.exists()) { + throw new FileNotFoundException("No file: " + src.getPath()); } } - private void prepareDestination( Path dst, - boolean isSrcFile, + File src, boolean overwrite) throws PathExistsException, IOException { try { - S3AFileStatus dstStatus = callbacks.getFileStatus( - dst, - false, - StatusProbeEnum.ALL); + FileStatus dstStatus = callbacks.getFileStatus(dst); - if (isSrcFile && dstStatus.isDirectory()) { - throw new PathExistsException("Source is file and destination '" + dst + "' is directory"); + if (src.isFile() && dstStatus.isDirectory()) { + throw new PathExistsException( + "Source '" + src.getPath() +"' is file and " + + "destination '" + dst + "' is directory"); } if (!overwrite) { @@ -161,9 +171,18 @@ private void prepareDestination( } } - private Path getFinalPath(Path srcFile) throws IOException { - // TODO: Implement this bad boy - return null; + private Path getFinalPath(Path src) throws IOException { + URI currentSrcUri = src.toUri(); + URI relativeSrcUri = source.toUri().relativize(currentSrcUri); + if (currentSrcUri == relativeSrcUri) { + throw new IOException("Cannot get relative path"); + } + + if (!relativeSrcUri.getPath().isEmpty()) { + return new Path(destination, relativeSrcUri.getPath()); + } else { + return new Path(destination, src.getName()); + } } private static final class UploadEntry { @@ -190,16 +209,17 @@ RemoteIterator listStatusIterator( Path f, boolean recursive) throws IOException; - S3AFileStatus getFileStatus( - final Path f, - final boolean needEmptyDirectoryFlag, - final Set probes) throws IOException; + FileStatus getFileStatus( final Path f) throws IOException; + + File pathToFile(Path path); - LocalFileSystem getLocalFS() throws IOException; + boolean delete(Path path, boolean recursive) throws IOException; void copyFileFromTo( File file, Path source, Path destination) throws IOException; + + boolean createEmptyDir(Path path) throws IOException; } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java index 215d3d566bbe8..a2f34179269e2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java @@ -24,7 +24,13 @@ import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; +import java.util.Set; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.impl.CopyFromLocalOperation; +import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum; import org.junit.Ignore; import org.junit.Test; @@ -120,6 +126,29 @@ public void testCopyMissingFile() throws Throwable { () -> upload(file, true)); } + @Test + public void testImplementationTemporary() throws Throwable { + java.nio.file.Path srcDir = Files.createTempDirectory("parent"); + java.nio.file.Path childDir = Files.createTempDirectory(srcDir, "child"); + java.nio.file.Path secondChild = Files.createTempDirectory(srcDir, "secondChild"); + java.nio.file.Path parentFile = Files.createTempFile(srcDir, "test1", ".txt"); + java.nio.file.Path childFile = Files.createTempFile(childDir, "test2", ".txt"); + + Path src = new Path(srcDir.toUri()); + Path dst = path(srcDir.getFileName().toString()); + + S3AFileSystem fileSystem = getFileSystem(); + fileSystem.copyFromLocalFile(true, true, src, dst); + + java.nio.file.Path parent = srcDir.getParent(); + + assertPathExists("Parent directory", srcDir, parent); + assertPathExists("Child directory", childDir, parent); + assertPathExists("Parent file", parentFile, parent); + assertPathExists("Child file", childFile, parent); + + } + /* * The following path is being created on disk and copied over * /parent/ (trailing slash to make it clear it's a directory From 1a14abdb6fe3e46222be8332f497237692adbd8e Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Fri, 25 Jun 2021 14:24:50 +0100 Subject: [PATCH 05/19] Improvements in directory handling --- .../fs/s3a/impl/CopyFromLocalOperation.java | 44 +++++++++++++------ .../fs/s3a/ITestS3ACopyFromLocalFile.java | 9 ---- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java index f468836cbab28..df0d701626c98 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java @@ -20,16 +20,18 @@ import java.util.Comparator; import java.util.HashSet; import java.util.List; +import java.util.Optional; import java.util.Set; /** * TODO list: + * - Improve implementation to use Completable Futures + * - Better error handling * - Add abstract class + tests for LocalFS * - Add tests for this class * - Add documentation * - This class * - `filesystem.md` - * - Remove remaining TODO s * - Clean old `innerCopyFromLocalFile` code up */ public class CopyFromLocalOperation extends ExecutingStoreOperation { @@ -43,6 +45,8 @@ public class CopyFromLocalOperation extends ExecutingStoreOperation { private final Path source; private final Path destination; + private FileStatus dstStatus; + public CopyFromLocalOperation( final StoreContext storeContext, Path source, @@ -64,6 +68,11 @@ public Void execute() throws IOException, PathExistsException { LOG.debug("Copying local file from {} to {}", source, destination); File sourceFile = callbacks.pathToFile(source); + try { + dstStatus = callbacks.getFileStatus(destination); + } catch (FileNotFoundException e) { + dstStatus = null; + } checkSource(sourceFile); prepareDestination(destination, sourceFile, overwrite); @@ -154,20 +163,18 @@ private void prepareDestination( Path dst, File src, boolean overwrite) throws PathExistsException, IOException { - try { - FileStatus dstStatus = callbacks.getFileStatus(dst); + if (!getDstStatus().isPresent()) { + return; + } - if (src.isFile() && dstStatus.isDirectory()) { - throw new PathExistsException( - "Source '" + src.getPath() +"' is file and " + - "destination '" + dst + "' is directory"); - } + if (src.isFile() && getDstStatus().get().isDirectory()) { + throw new PathExistsException( + "Source '" + src.getPath() +"' is file and " + + "destination '" + dst + "' is directory"); + } - if (!overwrite) { - throw new PathExistsException(dst + " already exists"); - } - } catch (FileNotFoundException e) { - // no destination, all is well + if (!overwrite) { + throw new PathExistsException(dst + " already exists"); } } @@ -178,13 +185,22 @@ private Path getFinalPath(Path src) throws IOException { throw new IOException("Cannot get relative path"); } + Optional status = getDstStatus(); if (!relativeSrcUri.getPath().isEmpty()) { return new Path(destination, relativeSrcUri.getPath()); - } else { + } else if (status.isPresent() && status.get().isDirectory()) { + // file to dir return new Path(destination, src.getName()); + } else { + // file to file + return destination; } } + private Optional getDstStatus() { + return Optional.ofNullable(dstStatus); + } + private static final class UploadEntry { private final Path source; private final Path destination; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java index a2f34179269e2..0a2d4ddd63f9a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java @@ -24,19 +24,10 @@ import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; -import java.util.Set; - -import org.apache.hadoop.fs.LocalFileSystem; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.fs.s3a.impl.CopyFromLocalOperation; -import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum; -import org.junit.Ignore; import org.junit.Test; import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathExistsException; From 90adc2b29bc2401c0dd6d2c25a2652a199398257 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Fri, 25 Jun 2021 14:28:22 +0100 Subject: [PATCH 06/19] Adding hadoop-common changes --- .../java/org/apache/hadoop/fs/FileSystem.java | 18 ++++++++++++++- .../fs/s3a/ITestS3ACopyFromLocalFile.java | 23 ------------------- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index 057382bed9cde..f06b485a5a195 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -2281,10 +2281,22 @@ public RemoteIterator listStatusIterator(final Path p) public RemoteIterator listFiles( final Path f, final boolean recursive) throws FileNotFoundException, IOException { + return createRemoteIterator(f, recursive, false); + } + + public RemoteIterator listFilesAndDirs( + final Path f, final boolean recursive) + throws FileNotFoundException, IOException { + return createRemoteIterator(f, recursive, true); + } + + private RemoteIterator createRemoteIterator( + final Path f, final boolean recursive, boolean includeDir + ) throws FileNotFoundException, IOException { return new RemoteIterator() { private Stack> itors = new Stack<>(); private RemoteIterator curItor = - listLocatedStatus(f); + listLocatedStatus(f); private LocatedFileStatus curFile; @Override @@ -2313,6 +2325,10 @@ private void handleFileStat(LocatedFileStatus stat) throws IOException { if (stat.isFile()) { // file curFile = stat; } else if (recursive) { // directory + if (includeDir) { + curFile = stat; + } + itors.push(curItor); curItor = listLocatedStatus(stat.getPath()); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java index 0a2d4ddd63f9a..6e56657f29f54 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java @@ -117,29 +117,6 @@ public void testCopyMissingFile() throws Throwable { () -> upload(file, true)); } - @Test - public void testImplementationTemporary() throws Throwable { - java.nio.file.Path srcDir = Files.createTempDirectory("parent"); - java.nio.file.Path childDir = Files.createTempDirectory(srcDir, "child"); - java.nio.file.Path secondChild = Files.createTempDirectory(srcDir, "secondChild"); - java.nio.file.Path parentFile = Files.createTempFile(srcDir, "test1", ".txt"); - java.nio.file.Path childFile = Files.createTempFile(childDir, "test2", ".txt"); - - Path src = new Path(srcDir.toUri()); - Path dst = path(srcDir.getFileName().toString()); - - S3AFileSystem fileSystem = getFileSystem(); - fileSystem.copyFromLocalFile(true, true, src, dst); - - java.nio.file.Path parent = srcDir.getParent(); - - assertPathExists("Parent directory", srcDir, parent); - assertPathExists("Child directory", childDir, parent); - assertPathExists("Parent file", parentFile, parent); - assertPathExists("Child file", childFile, parent); - - } - /* * The following path is being created on disk and copied over * /parent/ (trailing slash to make it clear it's a directory From 081d3864c812d9c5c859c04612fe8e1c9fc0d352 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Fri, 25 Jun 2021 14:36:54 +0100 Subject: [PATCH 07/19] Remove old implementation --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 77 ------------------- 1 file changed, 77 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 51ffa570088d8..a2aed49fc9c3b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -224,7 +224,6 @@ import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.logIOStatisticsAtLevel; import static org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_CONTINUE_LIST_REQUEST; import static org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_LIST_REQUEST; -import static org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_PUT_REQUEST; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.pairedTrackerFactory; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDuration; import static org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding.trackDurationOfInvocation; @@ -3858,82 +3857,6 @@ public boolean createEmptyDir(Path path) throws IOException { } } - /** - * The src file is on the local disk. Add it to FS at - * the given dst name. - * - * This version doesn't need to create a temporary file to calculate the md5. - * Sadly this doesn't seem to be used by the shell cp :( - * - * HADOOP-15932: this method has been unwired from - * {@link #copyFromLocalFile(boolean, boolean, Path, Path)} until - * it is extended to list and copy whole directories. - * delSrc indicates if the source should be removed - * @param delSrc whether to delete the src - * @param overwrite whether to overwrite an existing file - * @param src Source path: must be on local filesystem - * @param dst path - * @throws IOException IO problem - * @throws FileAlreadyExistsException the destination file exists and - * overwrite==false, or if the destination is a directory. - * @throws FileNotFoundException if the source file does not exit - * @throws AmazonClientException failure in the AWS SDK - * @throws IllegalArgumentException if the source path is not on the local FS - */ - @Retries.RetryTranslated - private void innerCopyFromLocalFile(boolean delSrc, boolean overwrite, - Path src, Path dst) - throws IOException, PathExistsException, AmazonClientException { - LOG.debug("Copying local file from {} to {}", src, dst); - - // Since we have a local file, we don't need to stream into a temporary file - LocalFileSystem local = getLocal(getConf()); - File srcFile = local.pathToFile(src); - if (!srcFile.exists()) { - throw new FileNotFoundException("No file: " + src); - } - - try { - S3AFileStatus dstStatus = innerGetFileStatus(dst, false, StatusProbeEnum.ALL); - if (srcFile.isFile() && dstStatus.isDirectory()) { - throw new PathExistsException("Source is file and destination '" + dst + "' is directory"); - } - - if (!overwrite) { - throw new PathExistsException(dst + " already exists"); - } - } catch (FileNotFoundException e) { - // no destination, all is well - } - - if (srcFile.isDirectory()) { - // make the directory in the destination - String key = pathToKey(dst); - createFakeDirectory(key); - FileStatus[] contents = local.listStatus(src); - - // copy all directory contents from dst to src: DFS - for (FileStatus status: contents) { - Path childDst = new Path(dst, status.getPath().getName()); - innerCopyFromLocalFile(delSrc, overwrite, status.getPath(), childDst); - } - } else { - final String key = pathToKey(dst); - final ObjectMetadata om = newObjectMetadata(srcFile.length()); - Progressable progress = null; - PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, srcFile); - trackDurationOfInvocation(getDurationTrackerFactory(), OBJECT_PUT_REQUESTS.getSymbol(), - () -> invoker.retry( - "copyFromLocalFile(" + src + ")", dst.toString(), - true, - () -> executePut(putObjectRequest, progress))); - } - - if (delSrc) { - local.delete(src, false); - } - } - /** * Execute a PUT via the transfer manager, blocking for completion, * updating the metastore afterwards. From 057db227552711e1e724beef846d8beaee7d45b4 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Mon, 28 Jun 2021 09:15:37 +0100 Subject: [PATCH 08/19] Reverting common changes --- .../java/org/apache/hadoop/fs/FileSystem.java | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java index f06b485a5a195..057382bed9cde 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java @@ -2281,22 +2281,10 @@ public RemoteIterator listStatusIterator(final Path p) public RemoteIterator listFiles( final Path f, final boolean recursive) throws FileNotFoundException, IOException { - return createRemoteIterator(f, recursive, false); - } - - public RemoteIterator listFilesAndDirs( - final Path f, final boolean recursive) - throws FileNotFoundException, IOException { - return createRemoteIterator(f, recursive, true); - } - - private RemoteIterator createRemoteIterator( - final Path f, final boolean recursive, boolean includeDir - ) throws FileNotFoundException, IOException { return new RemoteIterator() { private Stack> itors = new Stack<>(); private RemoteIterator curItor = - listLocatedStatus(f); + listLocatedStatus(f); private LocatedFileStatus curFile; @Override @@ -2325,10 +2313,6 @@ private void handleFileStat(LocatedFileStatus stat) throws IOException { if (stat.isFile()) { // file curFile = stat; } else if (recursive) { // directory - if (includeDir) { - curFile = stat; - } - itors.push(curItor); curItor = listLocatedStatus(stat.getPath()); } From 817acb6ec2b55527c39e563b2768fac00a804eb1 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Wed, 30 Jun 2021 09:26:36 +0100 Subject: [PATCH 09/19] Added executor for copyFromLocal + improvements A couple of things happening in this commit: - Added comments to CopyFromLocalOperation since I'm kind of done with experimenting with it - Added a thread executor that takes care of the uploads in the operation (uploading files + creating empty dirs) - Added `AbstractContractCopyFromLocalTest` test class with all the tests inside the ITestS3ACopyFromLocalFile file with implementation for S3a and Local FS - Turns out maybe some cases weren't handled properly in S3a when compared to local: copy from file to dir, which will get fixed in the next commit --- .../hadoop/fs/TestLocalFSCopyFromLocal.java | 13 + .../AbstractContractCopyFromLocalTest.java | 242 ++++++++++++ .../apache/hadoop/fs/s3a/S3AFileSystem.java | 5 +- .../fs/s3a/impl/CopyFromLocalOperation.java | 344 +++++++++++++++--- .../fs/s3a/ITestS3ACopyFromLocalFile.java | 174 +-------- 5 files changed, 566 insertions(+), 212 deletions(-) create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java new file mode 100644 index 0000000000000..f1c465d17fcbb --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java @@ -0,0 +1,13 @@ +package org.apache.hadoop.fs; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractCopyFromLocalTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.localfs.LocalFSContract; + +public class TestLocalFSCopyFromLocal extends AbstractContractCopyFromLocalTest { + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new LocalFSContract(conf); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java new file mode 100644 index 0000000000000..4e0c500caa276 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java @@ -0,0 +1,242 @@ +package org.apache.hadoop.fs.contract; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathExistsException; +import org.junit.Ignore; +import org.junit.Test; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +public abstract class AbstractContractCopyFromLocalTest extends + AbstractFSContractTestBase { + + private static final Charset ASCII = StandardCharsets.US_ASCII; + private File file; + + @Override + public void teardown() throws Exception { + super.teardown(); + if (file != null) { + file.delete(); + } + } + + + private Path mkTestDir(String prefix) throws IOException { + FileSystem fs = getFileSystem(); + File file = File.createTempFile(prefix, ".dir"); + Path dest = path(file.getName()); + fs.delete(dest, false); + mkdirs(dest); + return dest; + } + + // file - dir: should be mismatch error + @Test + public void testSourceIsFileAndDestinationIsDirectory() throws Throwable { + describe("Source is a file and destination is a directory should fail"); + + file = File.createTempFile("test", ".txt"); + Path source = new Path(file.toURI()); + Path destination = mkTestDir("test"); + + intercept(PathExistsException.class, + () -> getFileSystem().copyFromLocalFile(source, destination)); + } + + // file - dir: no overwrite of directory even when flag is set + @Test + public void testCopyFileNoOverwriteDirectory() throws Throwable { + describe("Source is a file and destination is directory, overwrite" + + " flag should not change behaviour"); + + file = createTempFile("hello"); + Path dest = upload(file, true); + FileSystem fs = getFileSystem(); + fs.delete(dest, false); + fs.mkdirs(dest); + + intercept(PathExistsException.class, + () -> upload(file, true)); + } + + private Path createTempDirectory(String prefix) throws IOException { + return new Path(Files.createTempDirectory("srcDir").toUri()); + } + + // source is directory and dest is file failure + // dir - file: should be mismatch error + @Test + public void testSourceIsDirectoryAndDestinationIsFile() throws Throwable { + describe("Source is a directory and destination is a file should fail"); + + file = File.createTempFile("local", ""); + Path source = createTempDirectory("srcDir"); + Path destination = upload(file, false); + + intercept(FileAlreadyExistsException.class, + () -> getFileSystem().copyFromLocalFile( + false, true, source, destination)); + } + + + // are empty directories copied? + // source is directory and dest is directory too + // is the source cleaned up properly after deletion + // does the source stay the same when it should + // directory: is the destination NOT overwritten if the flag isn't there + + // source is directory and dest has a parent of a file + // dest is dir or file overwriting a file + // source is empty dir and dest is empty dir + // source is dir with empty dir underneath, dest is the same + + // source is file, destination is file + @Test + public void testCopyEmptyFile() throws Throwable { + file = File.createTempFile("test", ".txt"); + Path dest = upload(file, true); + assertPathExists("uploaded file", dest); + } + + // source is file, destination is file, contents are checked + @Test + public void testCopyFile() throws Throwable { + String message = "hello"; + file = createTempFile(message); + Path dest = upload(file, true); + assertPathExists("uploaded file not found", dest); + FileSystem fs = getFileSystem(); + FileStatus status = fs.getFileStatus(dest); + assertEquals("File length of " + status, + message.getBytes(ASCII).length, status.getLen()); + assertFileTextEquals(dest, message); + } + + // file: is the destination NOT overwritten if the flag isn't there + @Test + public void testCopyFileNoOverwrite() throws Throwable { + file = createTempFile("hello"); + Path dest = upload(file, true); + // HADOOP-15932: the exception type changes here + intercept(PathExistsException.class, + () -> upload(file, false)); + } + + // file - file: is the destination overwritten when flag + @Test + public void testCopyFileOverwrite() throws Throwable { + file = createTempFile("hello"); + Path dest = upload(file, true); + String updated = "updated"; + FileUtils.write(file, updated, ASCII); + upload(file, true); + assertFileTextEquals(dest, updated); + } + + @Test + public void testCopyMissingFile() throws Throwable { + file = File.createTempFile("test", ".txt"); + file.delete(); + // first upload to create + intercept(FileNotFoundException.class, "", + () -> upload(file, true)); + } + + /* + * The following path is being created on disk and copied over + * /parent/ (trailing slash to make it clear it's a directory + * /parent/test1.txt + * /parent/child/test.txt + */ + @Test + public void testCopyTreeDirectoryWithoutDelete() throws Throwable { + java.nio.file.Path srcDir = Files.createTempDirectory("parent"); + java.nio.file.Path childDir = Files.createTempDirectory(srcDir, "child"); + java.nio.file.Path secondChild = Files.createTempDirectory(srcDir, "secondChild"); + java.nio.file.Path parentFile = Files.createTempFile(srcDir, "test1", ".txt"); + java.nio.file.Path childFile = Files.createTempFile(childDir, "test2", ".txt"); + + Path src = new Path(srcDir.toUri()); + Path dst = path(srcDir.getFileName().toString()); + getFileSystem().copyFromLocalFile(false, true, src, dst); + + java.nio.file.Path parent = srcDir.getParent(); + + assertNioPathExists("Parent directory", srcDir, parent); + assertNioPathExists("Child directory", childDir, parent); + assertNioPathExists("Second Child directory", secondChild, parent); + assertNioPathExists("Parent file", parentFile, parent); + assertNioPathExists("Child file", childFile, parent); + + assertTrue("Folder was deleted when it shouldn't have!", + Files.exists(srcDir)); + } + + @Test + public void testCopyDirectoryWithDelete() throws Throwable { + java.nio.file.Path srcDir = Files.createTempDirectory("parent"); + Files.createTempFile(srcDir, "test1", ".txt"); + + Path src = new Path(srcDir.toUri()); + Path dst = path(srcDir.getFileName().toString()); + getFileSystem().copyFromLocalFile(true, true, src, dst); + + assertFalse("Source directory should've been deleted", + Files.exists(srcDir)); + } + + @Test + public void testLocalFilesOnly() throws Throwable { + Path dst = path("testLocalFilesOnly"); + intercept(IllegalArgumentException.class, + () -> { + getFileSystem().copyFromLocalFile(false, true, dst, dst); + return "copy successful"; + }); + } + + public Path upload(File srcFile, boolean overwrite) throws IOException { + Path src = new Path(srcFile.toURI()); + Path dst = path(srcFile.getName()); + getFileSystem().copyFromLocalFile(false, overwrite, src, dst); + return dst; + } + + public void assertFileTextEquals(Path path, String expected) + throws IOException { + assertEquals("Wrong data in " + path, + expected, IOUtils.toString(getFileSystem().open(path), ASCII)); + } + + /** + * Create a temp file with some text. + * @param text text for the file + * @return the file + * @throws IOException on a failure + */ + public File createTempFile(String text) throws IOException { + File f = File.createTempFile("test", ".txt"); + FileUtils.write(f, text, ASCII); + return f; + } + + private void assertNioPathExists(String message, + java.nio.file.Path toVerify, + java.nio.file.Path parent) throws IOException { + Path qualifiedPath = path(parent.relativize(toVerify).toString()); + assertPathExists(message, qualifiedPath); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index a2aed49fc9c3b..255875e5b2e64 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3811,9 +3811,8 @@ private CopyFromLocalCallbacksImpl(LocalFileSystem local) { @Override public RemoteIterator listStatusIterator( - final Path path, - final boolean recursive) throws IOException { - return local.listFilesAndDirs(path, true); + final Path path) throws IOException { + return local.listLocatedStatus(path); } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java index df0d701626c98..566d3320ef682 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.fs.s3a.impl; import org.apache.commons.collections.comparators.ReverseComparator; @@ -5,8 +23,13 @@ import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathExistsException; +import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.Retries; +import org.apache.hadoop.fs.store.audit.AuditingFunctions; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -22,29 +45,77 @@ import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.Stack; +import java.util.concurrent.CompletableFuture; /** * TODO list: - * - Improve implementation to use Completable Futures - * - Better error handling * - Add abstract class + tests for LocalFS * - Add tests for this class * - Add documentation - * - This class * - `filesystem.md` - * - Clean old `innerCopyFromLocalFile` code up + * + *

Implementation of CopyFromLocalOperation

+ *

+ * This operation copies a file or directory (recursively) from a local + * FS to an object store. Initially, this operation has been developed for + * S3 (s3a) interaction, however, there's minimal work needed for it to + * work with other stores. + *

+ *

How the uploading of files works:

+ *
    + *
  • all source files and directories are scanned through;
  • + *
  • the LARGEST_N_FILES start uploading;
  • + *
  • the remaining files are shuffled and uploaded;
  • + *
  • + * any remaining empty directory is uploaded too to preserve local + * tree structure. + *
  • + *
*/ public class CopyFromLocalOperation extends ExecutingStoreOperation { + /** + * Largest N files to be uploaded first + */ + private static final int LARGEST_N_FILES = 5; + private static final Logger LOG = LoggerFactory.getLogger( CopyFromLocalOperation.class); + /** + * Callbacks to be used by this operation for external / IO actions + */ private final CopyFromLocalOperationCallbacks callbacks; + + /** + * Delete source after operation finishes + */ private final boolean deleteSource; + + /** + * Overwrite destination files / folders + */ private final boolean overwrite; + + /** + * Source path to file / directory + */ private final Path source; + + /** + * Destination path, expected to be + */ private final Path destination; + /** + * Async operations executor + */ + private final ListeningExecutorService executor; + + /** + * Destination file status + */ private FileStatus dstStatus; public CopyFromLocalOperation( @@ -60,8 +131,19 @@ public CopyFromLocalOperation( this.overwrite = overwrite; this.source = source; this.destination = destination; + this.executor = MoreExecutors.listeningDecorator( + storeContext.createThrottledExecutor(1) + ); } + /** + * Executes the {@link CopyFromLocalOperation}. + * + * @throws IOException - if there are any failures with upload or deletion + * of files. Check {@link CopyFromLocalOperationCallbacks} for specifics. + * @throws PathExistsException - if the path exists and no overwrite flag + * is set OR if the source is file and destination is a directory + */ @Override @Retries.RetryTranslated public Void execute() @@ -75,7 +157,7 @@ public Void execute() } checkSource(sourceFile); - prepareDestination(destination, sourceFile, overwrite); + checkDestination(destination, sourceFile, overwrite); uploadSourceFromFS(); if (deleteSource) { @@ -85,10 +167,18 @@ public Void execute() return null; } - private void uploadSourceFromFS() - throws IOException, PathExistsException { - RemoteIterator localFiles = callbacks - .listStatusIterator(source, true); + /** + * Starts async upload operations for files. Creating an empty directory + * classifies as a "file upload". + * + * Check {@link CopyFromLocalOperation} for details on the order of + * operations. + * + * @throws IOException - if listing or upload fail + */ + private void uploadSourceFromFS() throws IOException { + RemoteIterator localFiles = listFilesAndDirs(source); + List> activeOps = new ArrayList<>(); // After all files are traversed, this set will contain only emptyDirs Set emptyDirs = new HashSet<>(); @@ -114,44 +204,82 @@ private void uploadSourceFromFS() } if (localFiles instanceof Closeable) { - ((Closeable) localFiles).close(); + IOUtils.closeStream((Closeable) localFiles); } // Sort all upload entries based on size entries.sort(new ReverseComparator(new UploadEntry.SizeComparator())); - int LARGEST_N_FILES = 5; + // Take only top most N entries and upload final int sortedUploadsCount = Math.min(LARGEST_N_FILES, entries.size()); - List uploaded = new ArrayList<>(); + List markedForUpload = new ArrayList<>(); - // Take only top most X entries and upload for (int uploadNo = 0; uploadNo < sortedUploadsCount; uploadNo++) { UploadEntry uploadEntry = entries.get(uploadNo); File file = callbacks.pathToFile(uploadEntry.source); - callbacks.copyFileFromTo( - file, - uploadEntry.source, - uploadEntry.destination); - - uploaded.add(uploadEntry); + activeOps.add(submitUpload(file, uploadEntry)); + markedForUpload.add(uploadEntry); } // Shuffle all remaining entries and upload them - entries.removeAll(uploaded); + entries.removeAll(markedForUpload); Collections.shuffle(entries); for (UploadEntry uploadEntry : entries) { File file = callbacks.pathToFile(uploadEntry.source); - callbacks.copyFileFromTo( - file, - uploadEntry.source, - uploadEntry.destination); + activeOps.add(submitUpload(file, uploadEntry)); } + // TODO: Add test that checks the number of calls for empty dirs has been made for (Path emptyDir : emptyDirs) { - callbacks.createEmptyDir(getFinalPath(emptyDir)); + Path emptyDirPath = getFinalPath(emptyDir); + activeOps.add(submitCreateEmptyDir(emptyDirPath)); } + + CallableSupplier.waitForCompletion(activeOps); } + /** + * Async call to create an empty directory. + * + * @param dir directory path + * @return the submitted future + */ + private CompletableFuture submitCreateEmptyDir(Path dir) { + return CallableSupplier.submit(executor, AuditingFunctions.callableWithinAuditSpan( + getAuditSpan(), () -> { + callbacks.createEmptyDir(dir); + return null; + } + )); + } + + /** + * Async call to upload a file. + * + * @param file - File to be uploaded + * @param uploadEntry - Upload entry holding the source and destination + * @return the submitted future + */ + private CompletableFuture submitUpload( + File file, + UploadEntry uploadEntry) { + return CallableSupplier.submit(executor, AuditingFunctions.callableWithinAuditSpan( + getAuditSpan(), () -> { + callbacks.copyFileFromTo( + file, + uploadEntry.source, + uploadEntry.destination); + return null; + } + )); + } + + /** + * Checks the source before upload starts + * + * @param src - Source file + * @throws FileNotFoundException - if the file isn't found + */ private void checkSource(File src) throws FileNotFoundException { if (!src.exists()) { @@ -159,33 +287,49 @@ private void checkSource(File src) } } - private void prepareDestination( - Path dst, + /** + * Check the destination path and make sure it's compatible with the source, + * i.e. source and destination are both files / directories. + * + * @param dest - Destination path + * @param src - Source file + * @param overwrite - Should source overwrite destination + * @throws PathExistsException - If the destination path exists and no + * overwrite flag is set or source is file and destination is path + */ + private void checkDestination( + Path dest, File src, - boolean overwrite) throws PathExistsException, IOException { - if (!getDstStatus().isPresent()) { + boolean overwrite) throws PathExistsException { + if (!getDestStatus().isPresent()) { return; } - if (src.isFile() && getDstStatus().get().isDirectory()) { + if (src.isFile() && getDestStatus().get().isDirectory()) { throw new PathExistsException( "Source '" + src.getPath() +"' is file and " + - "destination '" + dst + "' is directory"); + "destination '" + dest + "' is directory"); } if (!overwrite) { - throw new PathExistsException(dst + " already exists"); + throw new PathExistsException(dest + " already exists"); } } - private Path getFinalPath(Path src) throws IOException { + /** + * Get the final path of a source file with regards to its destination + * @param src - source path + * @return - the final path for the source file to be uploaded to + * @throws PathIOException - if a relative path can't be created + */ + private Path getFinalPath(Path src) throws PathIOException { URI currentSrcUri = src.toUri(); URI relativeSrcUri = source.toUri().relativize(currentSrcUri); - if (currentSrcUri == relativeSrcUri) { - throw new IOException("Cannot get relative path"); + if (relativeSrcUri.equals(currentSrcUri)) { + throw new PathIOException("Cannot get relative path"); } - Optional status = getDstStatus(); + Optional status = getDestStatus(); if (!relativeSrcUri.getPath().isEmpty()) { return new Path(destination, relativeSrcUri.getPath()); } else if (status.isPresent() && status.get().isDirectory()) { @@ -197,10 +341,81 @@ private Path getFinalPath(Path src) throws IOException { } } - private Optional getDstStatus() { + private Optional getDestStatus() { return Optional.ofNullable(dstStatus); } + /** + * {@link RemoteIterator} which lists all of the files and directories for + * a given path. It's strikingly similar to + * {@link org.apache.hadoop.fs.LocalFileSystem#listFiles(Path, boolean)} + * however with the small addition that it includes directories. + * + * @param path - Path to list files and directories from + * @return - an iterator + * @throws IOException - if listing of a path file fails + */ + private RemoteIterator listFilesAndDirs(Path path) + throws IOException { + return new RemoteIterator() { + private final Stack> iterators = + new Stack<>(); + private RemoteIterator current = + callbacks.listStatusIterator(path); + private LocatedFileStatus curFile; + + @Override + public boolean hasNext() throws IOException { + while (curFile == null) { + if (current.hasNext()) { + handleFileStat(current.next()); + } else if (!iterators.empty()) { + current = iterators.pop(); + } else { + return false; + } + } + return true; + } + + /** + * Process the input stat. + * If it is a file or directory return the file stat. + * If it is a directory, traverse the directory; + * @param stat input status + * @throws IOException if any IO error occurs + */ + private void handleFileStat(LocatedFileStatus stat) + throws IOException { + if (stat.isFile()) { // file + curFile = stat; + } else { // directory + curFile = stat; + iterators.push(current); + current = callbacks.listStatusIterator(stat.getPath()); + } + } + + @Override + public LocatedFileStatus next() throws IOException { + if (hasNext()) { + LocatedFileStatus result = curFile; + curFile = null; + return result; + } + throw new java.util.NoSuchElementException("No more entry in " + + path); + } + }; + } + + /** + *

Represents an entry for a file to be uploaded.

+ *

+ * Helpful with sorting files by their size and keeping track of path + * information for the upload. + *

+ */ private static final class UploadEntry { private final Path source; private final Path destination; @@ -212,6 +427,9 @@ private UploadEntry(Path source, Path destination, long size) { this.size = size; } + /** + * Compares {@link UploadEntry} objects and produces DESC ordering + */ static class SizeComparator implements Comparator { @Override public int compare(UploadEntry entry1, UploadEntry entry2) { @@ -220,22 +438,62 @@ public int compare(UploadEntry entry1, UploadEntry entry2) { } } + /** + * Define the contract for {@link CopyFromLocalOperation} to interact + * with any external resources. + */ public interface CopyFromLocalOperationCallbacks { - RemoteIterator listStatusIterator( - Path f, - boolean recursive) throws IOException; - - FileStatus getFileStatus( final Path f) throws IOException; - + /** + * List all entries (files AND directories) for a path. + * @param path - path to list + * @return an iterator for all entries + * @throws IOException - for any failure + */ + RemoteIterator listStatusIterator(Path path) + throws IOException; + + /** + * Get the file status for a path + * @param path - target path + * @return FileStatus + * @throws IOException - for any failure + */ + FileStatus getFileStatus(Path path) throws IOException; + + /** + * Get the file from a path + * @param path - target path + * @return file at path + */ File pathToFile(Path path); + /** + * Delete file / directory at path + * @param path - target path + * @param recursive - recursive deletion + * @return boolean result of operation + * @throws IOException for any failure + */ boolean delete(Path path, boolean recursive) throws IOException; + /** + * Copy / Upload a file from a source path to a destination path + * @param file - target file + * @param source - source path + * @param destination - destination path + * @throws IOException for any failure + */ void copyFileFromTo( File file, Path source, Path destination) throws IOException; + /** + * Create empty directory at path. Most likely an upload operation + * @param path - target path + * @return boolean result of operation + * @throws IOException for any failure + */ boolean createEmptyDir(Path path) throws IOException; } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java index 6e56657f29f54..589d79d6a5d2c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java @@ -18,181 +18,23 @@ package org.apache.hadoop.fs.s3a; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import org.junit.Test; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractCopyFromLocalTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.s3a.S3AContract; -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathExistsException; - -import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** * Test {@link S3AFileSystem#copyFromLocalFile(boolean, boolean, Path, Path)}. * Some of the tests have been disabled pending a fix for HADOOP-15932 and * recursive directory copying; the test cases themselves may be obsolete. */ -public class ITestS3ACopyFromLocalFile extends AbstractS3ATestBase { - private static final Charset ASCII = StandardCharsets.US_ASCII; - - private File file; +public class ITestS3ACopyFromLocalFile extends + AbstractContractCopyFromLocalTest { @Override - public void teardown() throws Exception { - super.teardown(); - if (file != null) { - file.delete(); - } - } - - @Test - public void testCopyEmptyFile() throws Throwable { - file = File.createTempFile("test", ".txt"); - Path dest = upload(file, true); - assertPathExists("uploaded file", dest); - } - - @Test - public void testCopyFile() throws Throwable { - String message = "hello"; - file = createTempFile(message); - Path dest = upload(file, true); - assertPathExists("uploaded file not found", dest); - S3AFileSystem fs = getFileSystem(); - FileStatus status = fs.getFileStatus(dest); - assertEquals("File length of " + status, - message.getBytes(ASCII).length, status.getLen()); - assertFileTextEquals(dest, message); - } - - public void assertFileTextEquals(Path path, String expected) - throws IOException { - assertEquals("Wrong data in " + path, - expected, IOUtils.toString(getFileSystem().open(path), ASCII)); - } - - @Test - public void testCopyFileNoOverwrite() throws Throwable { - file = createTempFile("hello"); - Path dest = upload(file, true); - // HADOOP-15932: the exception type changes here - intercept(PathExistsException.class, - () -> upload(file, false)); - } - - @Test - public void testCopyFileOverwrite() throws Throwable { - file = createTempFile("hello"); - Path dest = upload(file, true); - String updated = "updated"; - FileUtils.write(file, updated, ASCII); - upload(file, true); - assertFileTextEquals(dest, updated); - } - - @Test - public void testCopyFileNoOverwriteDirectory() throws Throwable { - file = createTempFile("hello"); - Path dest = upload(file, true); - S3AFileSystem fs = getFileSystem(); - fs.delete(dest, false); - fs.mkdirs(dest); - intercept(PathExistsException.class, - () -> upload(file, true)); - } - - @Test - public void testCopyMissingFile() throws Throwable { - file = File.createTempFile("test", ".txt"); - file.delete(); - // first upload to create - intercept(FileNotFoundException.class, "", - () -> upload(file, true)); - } - - /* - * The following path is being created on disk and copied over - * /parent/ (trailing slash to make it clear it's a directory - * /parent/test1.txt - * /parent/child/test.txt - */ - @Test - public void testCopyTreeDirectoryWithoutDelete() throws Throwable { - java.nio.file.Path srcDir = Files.createTempDirectory("parent"); - java.nio.file.Path childDir = Files.createTempDirectory(srcDir, "child"); - java.nio.file.Path parentFile = Files.createTempFile(srcDir, "test1", ".txt"); - java.nio.file.Path childFile = Files.createTempFile(childDir, "test2", ".txt"); - - Path src = new Path(srcDir.toUri()); - Path dst = path(srcDir.getFileName().toString()); - getFileSystem().copyFromLocalFile(false, true, src, dst); - - java.nio.file.Path parent = srcDir.getParent(); - - assertPathExists("Parent directory", srcDir, parent); - assertPathExists("Child directory", childDir, parent); - assertPathExists("Parent file", parentFile, parent); - assertPathExists("Child file", childFile, parent); - - if (!Files.exists(srcDir)) { - throw new Exception("Folder was deleted when it shouldn't have!"); - } - } - - @Test - public void testCopyDirectoryWithDelete() throws Throwable { - java.nio.file.Path srcDir = Files.createTempDirectory("parent"); - Files.createTempFile(srcDir, "test1", ".txt"); - - Path src = new Path(srcDir.toUri()); - Path dst = path(srcDir.getFileName().toString()); - getFileSystem().copyFromLocalFile(true, true, src, dst); - - if (Files.exists(srcDir)) { - throw new Exception("Source directory should've been deleted"); - } - } - - @Test - public void testLocalFilesOnly() throws Throwable { - Path dst = path("testLocalFilesOnly"); - intercept(IllegalArgumentException.class, - () -> { - getFileSystem().copyFromLocalFile(false, true, dst, dst); - return "copy successful"; - }); - } - - public Path upload(File srcFile, boolean overwrite) throws IOException { - Path src = new Path(srcFile.toURI()); - Path dst = path(srcFile.getName()); - getFileSystem().copyFromLocalFile(false, overwrite, src, dst); - return dst; - } - - /** - * Create a temp file with some text. - * @param text text for the file - * @return the file - * @throws IOException on a failure - */ - public File createTempFile(String text) throws IOException { - File f = File.createTempFile("test", ".txt"); - FileUtils.write(f, text, ASCII); - return f; - } - - private void assertPathExists(String message, - java.nio.file.Path toVerify, - java.nio.file.Path parent) throws IOException { - Path qualifiedPath = path(parent.relativize(toVerify).toString()); - assertPathExists(message, qualifiedPath); + protected AbstractFSContract createContract(Configuration conf) { + return new S3AContract(conf); } } From dd1644f357eaf5fd42ca97720cb3dae3c7011ca6 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Wed, 30 Jun 2021 15:23:34 +0100 Subject: [PATCH 10/19] Cleaning up and organizing tests Went through tests and polished some rough bits here and there by abstracting duplicate code and moving a test case to be handled differently for the 2 file systems, check: `testSourceIsDirectoryAndDestinationIsFile` for both S3a and LocalFS. New bug fix: - Empty source directory is being handled for s3a; --- .../hadoop/fs/TestLocalFSCopyFromLocal.java | 19 ++ .../AbstractContractCopyFromLocalTest.java | 281 ++++++++++-------- .../fs/s3a/impl/CopyFromLocalOperation.java | 14 +- .../fs/s3a/ITestS3ACopyFromLocalFile.java | 29 ++ 4 files changed, 207 insertions(+), 136 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java index f1c465d17fcbb..4ec3dbba0d927 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java @@ -4,10 +4,29 @@ import org.apache.hadoop.fs.contract.AbstractContractCopyFromLocalTest; import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.contract.localfs.LocalFSContract; +import org.junit.Test; + +import java.io.File; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; public class TestLocalFSCopyFromLocal extends AbstractContractCopyFromLocalTest { @Override protected AbstractFSContract createContract(Configuration conf) { return new LocalFSContract(conf); } + + @Test + public void testSourceIsDirectoryAndDestinationIsFile() throws Throwable { + describe("Source is a directory and destination is a file should fail"); + + File file = createTempFile("local"); + File source = createTempDirectory("srcDir"); + Path destination = copyFromLocal(file, false); + Path sourcePath = new Path(source.toURI()); + + intercept(FileAlreadyExistsException.class, + () -> getFileSystem().copyFromLocalFile(false, true, + sourcePath, destination)); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java index 4e0c500caa276..d1591df4a7bb0 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java @@ -2,12 +2,10 @@ import org.apache.commons.io.FileUtils; import org.apache.commons.io.IOUtils; -import org.apache.hadoop.fs.FileAlreadyExistsException; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathExistsException; -import org.junit.Ignore; import org.junit.Test; import java.io.File; @@ -33,91 +31,23 @@ public void teardown() throws Exception { } } - - private Path mkTestDir(String prefix) throws IOException { - FileSystem fs = getFileSystem(); - File file = File.createTempFile(prefix, ".dir"); - Path dest = path(file.getName()); - fs.delete(dest, false); - mkdirs(dest); - return dest; - } - - // file - dir: should be mismatch error - @Test - public void testSourceIsFileAndDestinationIsDirectory() throws Throwable { - describe("Source is a file and destination is a directory should fail"); - - file = File.createTempFile("test", ".txt"); - Path source = new Path(file.toURI()); - Path destination = mkTestDir("test"); - - intercept(PathExistsException.class, - () -> getFileSystem().copyFromLocalFile(source, destination)); - } - - // file - dir: no overwrite of directory even when flag is set - @Test - public void testCopyFileNoOverwriteDirectory() throws Throwable { - describe("Source is a file and destination is directory, overwrite" + - " flag should not change behaviour"); - - file = createTempFile("hello"); - Path dest = upload(file, true); - FileSystem fs = getFileSystem(); - fs.delete(dest, false); - fs.mkdirs(dest); - - intercept(PathExistsException.class, - () -> upload(file, true)); - } - - private Path createTempDirectory(String prefix) throws IOException { - return new Path(Files.createTempDirectory("srcDir").toUri()); - } - - // source is directory and dest is file failure - // dir - file: should be mismatch error - @Test - public void testSourceIsDirectoryAndDestinationIsFile() throws Throwable { - describe("Source is a directory and destination is a file should fail"); - - file = File.createTempFile("local", ""); - Path source = createTempDirectory("srcDir"); - Path destination = upload(file, false); - - intercept(FileAlreadyExistsException.class, - () -> getFileSystem().copyFromLocalFile( - false, true, source, destination)); - } - - - // are empty directories copied? - // source is directory and dest is directory too - // is the source cleaned up properly after deletion - // does the source stay the same when it should - // directory: is the destination NOT overwritten if the flag isn't there - - // source is directory and dest has a parent of a file - // dest is dir or file overwriting a file - // source is empty dir and dest is empty dir - // source is dir with empty dir underneath, dest is the same - - // source is file, destination is file @Test public void testCopyEmptyFile() throws Throwable { file = File.createTempFile("test", ".txt"); - Path dest = upload(file, true); + Path dest = copyFromLocal(file, true); assertPathExists("uploaded file", dest); } - // source is file, destination is file, contents are checked @Test public void testCopyFile() throws Throwable { String message = "hello"; file = createTempFile(message); - Path dest = upload(file, true); + Path dest = copyFromLocal(file, true); + assertPathExists("uploaded file not found", dest); + // TODO: Should this be assertFileExists? + assertTrue("source file deleted", Files.exists(file.toPath())); + FileSystem fs = getFileSystem(); FileStatus status = fs.getFileStatus(dest); assertEquals("File length of " + status, @@ -125,64 +55,145 @@ public void testCopyFile() throws Throwable { assertFileTextEquals(dest, message); } - // file: is the destination NOT overwritten if the flag isn't there @Test public void testCopyFileNoOverwrite() throws Throwable { file = createTempFile("hello"); - Path dest = upload(file, true); - // HADOOP-15932: the exception type changes here + copyFromLocal(file, true); intercept(PathExistsException.class, - () -> upload(file, false)); + () -> copyFromLocal(file, false)); } - // file - file: is the destination overwritten when flag @Test public void testCopyFileOverwrite() throws Throwable { file = createTempFile("hello"); - Path dest = upload(file, true); + Path dest = copyFromLocal(file, true); String updated = "updated"; FileUtils.write(file, updated, ASCII); - upload(file, true); + copyFromLocal(file, true); assertFileTextEquals(dest, updated); } @Test public void testCopyMissingFile() throws Throwable { - file = File.createTempFile("test", ".txt"); + file = createTempFile("test"); file.delete(); // first upload to create intercept(FileNotFoundException.class, "", - () -> upload(file, true)); + () -> copyFromLocal(file, true)); + } + + @Test + public void testSourceIsFileAndDelSrcTrue() throws Throwable { + describe("Source is a file delSrc flag is set to true"); + + file = createTempFile("test"); + copyFromLocal(file, false, true); + + assertFalse("uploaded file", Files.exists(file.toPath())); + } + + @Test + public void testSourceIsFileAndDestinationIsDirectory() throws Throwable { + describe("Source is a file and destination is a directory. File" + + "should be copied inside the directory."); + + file = createTempFile("test"); + Path source = new Path(file.toURI()); + FileSystem fs = getFileSystem(); + + File dir = createTempDirectory("test"); + Path destination = fileToPath(dir); + fs.delete(destination, false); + mkdirs(destination); + + fs.copyFromLocalFile(source, destination); + System.out.println("Did this work?"); + } + + @Test + public void testSrcIsDirWithFilesAndCopySuccessful() throws Throwable { + describe("Source is a directory with files, copy should copy all" + + " dir contents to source"); + String firstChild = "childOne"; + String secondChild = "childTwo"; + File parent = createTempDirectory("parent"); + File root = parent.getParentFile(); + File childFile = createTempFile(parent, firstChild, firstChild); + File secondChildFile = createTempFile(parent, secondChild, secondChild); + + copyFromLocal(parent, false); + + assertPathExists("Parent directory not copied", fileToPath(parent)); + assertFileTextEquals(fileToPath(childFile, root), firstChild); + assertFileTextEquals(fileToPath(secondChildFile, root), secondChild); + } + + @Test + public void testSrcIsEmptyDirWithCopySuccessful() throws Throwable { + describe("Source is an empty directory, copy should succeed"); + File source = createTempDirectory("source"); + Path dest = copyFromLocal(source, false); + + assertPathExists("Empty directory not copied", dest); + } + + @Test + public void testSrcIsDirWithOverwriteOptions() throws Throwable { + describe("Source is a directory, destination exists and" + + "should be overwritten."); + File source = createTempDirectory("source"); + String contents = "child file"; + File child = createTempFile(source, "child", contents); + + copyFromLocal(source, false); + // TODO: Fix local FS + intercept(PathExistsException.class, + () -> copyFromLocal(source, false)); + + copyFromLocal(source, true); + assertPathExists("Parent directory not copied", fileToPath(source)); + assertFileTextEquals(fileToPath(child, source.getParentFile()), + contents); + } + + @Test + public void testSrcIsDirWithDelSrcOptions() throws Throwable { + describe("Source is a directory, destination exists and" + + "should be overwritten."); + File source = createTempDirectory("source"); + String contents = "child file"; + File child = createTempFile(source, "child", contents); + + copyFromLocal(source, false, true); + Path dest = fileToPath(child, source.getParentFile()); + + assertFalse("Directory not deleted", Files.exists(source.toPath())); + assertFileTextEquals(dest, contents); } /* * The following path is being created on disk and copied over - * /parent/ (trailing slash to make it clear it's a directory + * /parent/ (directory) * /parent/test1.txt * /parent/child/test.txt + * /parent/secondChild/ (directory) */ @Test public void testCopyTreeDirectoryWithoutDelete() throws Throwable { - java.nio.file.Path srcDir = Files.createTempDirectory("parent"); - java.nio.file.Path childDir = Files.createTempDirectory(srcDir, "child"); - java.nio.file.Path secondChild = Files.createTempDirectory(srcDir, "secondChild"); - java.nio.file.Path parentFile = Files.createTempFile(srcDir, "test1", ".txt"); - java.nio.file.Path childFile = Files.createTempFile(childDir, "test2", ".txt"); - - Path src = new Path(srcDir.toUri()); - Path dst = path(srcDir.getFileName().toString()); - getFileSystem().copyFromLocalFile(false, true, src, dst); - - java.nio.file.Path parent = srcDir.getParent(); - - assertNioPathExists("Parent directory", srcDir, parent); - assertNioPathExists("Child directory", childDir, parent); - assertNioPathExists("Second Child directory", secondChild, parent); - assertNioPathExists("Parent file", parentFile, parent); - assertNioPathExists("Child file", childFile, parent); - - assertTrue("Folder was deleted when it shouldn't have!", - Files.exists(srcDir)); + File srcDir = createTempDirectory("parent"); + File childDir = createTempDirectory(srcDir, "child"); + File secondChild = createTempDirectory(srcDir, "secondChild"); + File parentFile = createTempFile(srcDir, "test1", ".txt"); + File childFile = createTempFile(childDir, "test2", ".txt"); + + copyFromLocal(srcDir, false, false); + File root = srcDir.getParentFile(); + + assertPathExists("Parent directory", fileToPath(srcDir)); + assertPathExists("Child directory", fileToPath(childDir, root)); + assertPathExists("Second Child directory", fileToPath(secondChild, root)); + assertPathExists("Parent file", fileToPath(parentFile, root)); + assertPathExists("Child file", fileToPath(childFile, root)); } @Test @@ -198,27 +209,32 @@ public void testCopyDirectoryWithDelete() throws Throwable { Files.exists(srcDir)); } - @Test - public void testLocalFilesOnly() throws Throwable { - Path dst = path("testLocalFilesOnly"); - intercept(IllegalArgumentException.class, - () -> { - getFileSystem().copyFromLocalFile(false, true, dst, dst); - return "copy successful"; - }); + protected Path fileToPath(File file) throws IOException { + return path(file.getName()); } - public Path upload(File srcFile, boolean overwrite) throws IOException { - Path src = new Path(srcFile.toURI()); - Path dst = path(srcFile.getName()); - getFileSystem().copyFromLocalFile(false, overwrite, src, dst); - return dst; + protected Path fileToPath(File file, File parent) throws IOException { + return path(parent + .toPath() + .relativize(file.toPath()) + .toString()); + } + + protected File createTempDirectory(String name) throws IOException { + return Files.createTempDirectory(name).toFile(); + } + + protected Path copyFromLocal(File srcFile, boolean overwrite) throws + IOException { + return copyFromLocal(srcFile, overwrite, false); } - public void assertFileTextEquals(Path path, String expected) + protected Path copyFromLocal(File srcFile, boolean overwrite, boolean delSrc) throws IOException { - assertEquals("Wrong data in " + path, - expected, IOUtils.toString(getFileSystem().open(path), ASCII)); + Path src = new Path(srcFile.toURI()); + Path dst = path(srcFile.getName()); + getFileSystem().copyFromLocalFile(delSrc, overwrite, src, dst); + return dst; } /** @@ -227,16 +243,25 @@ public void assertFileTextEquals(Path path, String expected) * @return the file * @throws IOException on a failure */ - public File createTempFile(String text) throws IOException { + protected File createTempFile(String text) throws IOException { File f = File.createTempFile("test", ".txt"); FileUtils.write(f, text, ASCII); return f; } - private void assertNioPathExists(String message, - java.nio.file.Path toVerify, - java.nio.file.Path parent) throws IOException { - Path qualifiedPath = path(parent.relativize(toVerify).toString()); - assertPathExists(message, qualifiedPath); + protected File createTempFile(File parent, String name, String text) throws IOException { + File f = File.createTempFile(name, ".txt", parent); + FileUtils.write(f, text, ASCII); + return f; + } + + private File createTempDirectory(File parent, String name) throws IOException { + return Files.createTempDirectory(parent.toPath(), name).toFile(); + } + + private void assertFileTextEquals(Path path, String expected) + throws IOException { + assertEquals("Wrong data in " + path, + expected, IOUtils.toString(getFileSystem().open(path), ASCII)); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java index 566d3320ef682..3c98c8f300f66 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java @@ -49,12 +49,6 @@ import java.util.concurrent.CompletableFuture; /** - * TODO list: - * - Add abstract class + tests for LocalFS - * - Add tests for this class - * - Add documentation - * - `filesystem.md` - * *

Implementation of CopyFromLocalOperation

*

* This operation copies a file or directory (recursively) from a local @@ -221,6 +215,11 @@ private void uploadSourceFromFS() throws IOException { markedForUpload.add(uploadEntry); } + // No files found, it's empty source directory + if (entries.isEmpty()) { + emptyDirs.add(source); + } + // Shuffle all remaining entries and upload them entries.removeAll(markedForUpload); Collections.shuffle(entries); @@ -229,7 +228,6 @@ private void uploadSourceFromFS() throws IOException { activeOps.add(submitUpload(file, uploadEntry)); } - // TODO: Add test that checks the number of calls for empty dirs has been made for (Path emptyDir : emptyDirs) { Path emptyDirPath = getFinalPath(emptyDir); activeOps.add(submitCreateEmptyDir(emptyDirPath)); @@ -305,7 +303,7 @@ private void checkDestination( return; } - if (src.isFile() && getDestStatus().get().isDirectory()) { + if (src.isDirectory() && getDestStatus().get().isFile()) { throw new PathExistsException( "Source '" + src.getPath() +"' is file and " + "destination '" + dest + "' is directory"); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java index 589d79d6a5d2c..cecfa2e50f4e4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java @@ -19,11 +19,17 @@ package org.apache.hadoop.fs.s3a; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.PathExistsException; import org.apache.hadoop.fs.contract.AbstractContractCopyFromLocalTest; import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.contract.s3a.S3AContract; import org.apache.hadoop.fs.Path; +import org.junit.Test; + +import java.io.File; + +import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** * Test {@link S3AFileSystem#copyFromLocalFile(boolean, boolean, Path, Path)}. @@ -37,4 +43,27 @@ public class ITestS3ACopyFromLocalFile extends protected AbstractFSContract createContract(Configuration conf) { return new S3AContract(conf); } + + @Test + public void testLocalFilesOnly() throws Throwable { + Path dst = fileToPath(createTempDirectory("someDir")); + + intercept(IllegalArgumentException.class, + () -> getFileSystem().copyFromLocalFile( + false, true, dst, dst)); + } + + @Test + public void testSourceIsDirectoryAndDestinationIsFile() throws Throwable { + describe("Source is a directory and destination is a file should fail"); + + File file = createTempFile("local"); + File source = createTempDirectory("srcDir"); + Path destination = copyFromLocal(file, false); + Path sourcePath = new Path(source.toURI()); + + intercept(PathExistsException.class, + () -> getFileSystem().copyFromLocalFile(false, true, + sourcePath, destination)); + } } From eb4b314f7746474c3469bcdb794beb27e3cc5b8d Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Wed, 30 Jun 2021 15:44:31 +0100 Subject: [PATCH 11/19] Adding license headers --- .../hadoop/fs/TestLocalFSCopyFromLocal.java | 18 ++++++++++++++++++ .../AbstractContractCopyFromLocalTest.java | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java index 4ec3dbba0d927..3b808f3bbd0f9 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.fs; import org.apache.hadoop.conf.Configuration; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java index d1591df4a7bb0..9ab4bee3020d2 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java @@ -1,3 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.hadoop.fs.contract; import org.apache.commons.io.FileUtils; From 357727923378fdd9acc8b9185dae044e700d52b8 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Tue, 6 Jul 2021 13:23:36 +0100 Subject: [PATCH 12/19] Directory to directory copy improvements Changes: - LocalFS directory to directory copy with overwrite flag is now enabled. Conceptually it should, let's see if it's right or wrong; - S3a directory to directory should behave in the following way now (consistent with LocalFS): If destination exists: - bar/ -> foo/ => foo/bar/ - bar/ -> foo/bar/ => foo/bar/bar/ If destination doesn't exist: - bar/ -> foo/ => foo/ - bar/ -> foo/bar/ => foo/bar --- .../java/org/apache/hadoop/fs/FileUtil.java | 3 ++ .../AbstractContractCopyFromLocalTest.java | 24 ++++++++---- .../fs/s3a/impl/CopyFromLocalOperation.java | 38 ++++++++++++++----- 3 files changed, 48 insertions(+), 17 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java index 63cbd6212b322..6671bf1e50e02 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java @@ -524,6 +524,9 @@ private static Path checkDest(String srcName, FileSystem dstFS, Path dst, if (null != sdst) { if (sdst.isDirectory()) { if (null == srcName) { + if (overwrite) { + return dst; + } throw new PathIsDirectoryException(dst.toString()); } return checkDest(null, dstFS, new Path(dst, srcName), overwrite); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java index 9ab4bee3020d2..9988d1735b759 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java @@ -63,7 +63,6 @@ public void testCopyFile() throws Throwable { Path dest = copyFromLocal(file, true); assertPathExists("uploaded file not found", dest); - // TODO: Should this be assertFileExists? assertTrue("source file deleted", Files.exists(file.toPath())); FileSystem fs = getFileSystem(); @@ -159,19 +158,30 @@ public void testSrcIsEmptyDirWithCopySuccessful() throws Throwable { public void testSrcIsDirWithOverwriteOptions() throws Throwable { describe("Source is a directory, destination exists and" + "should be overwritten."); + // Disabling checksum because overwriting directories does not + // overwrite checksums + FileSystem fs = getFileSystem(); + fs.setVerifyChecksum(false); + File source = createTempDirectory("source"); - String contents = "child file"; + Path sourcePath = new Path(source.toURI()); + String contents = "test file"; File child = createTempFile(source, "child", contents); - copyFromLocal(source, false); - // TODO: Fix local FS + Path dest = path(source.getName()).getParent(); + fs.copyFromLocalFile(sourcePath, dest); intercept(PathExistsException.class, - () -> copyFromLocal(source, false)); + () -> fs.copyFromLocalFile(false, false, + sourcePath, dest)); + + String updated = "updated contents"; + FileUtils.write(child, updated, ASCII); + fs.copyFromLocalFile(false, true, sourcePath, dest); - copyFromLocal(source, true); assertPathExists("Parent directory not copied", fileToPath(source)); assertFileTextEquals(fileToPath(child, source.getParentFile()), - contents); + updated); + getFileSystem().setVerifyChecksum(true); } @Test diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java index 3c98c8f300f66..7a8395638adc9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java @@ -98,19 +98,19 @@ public class CopyFromLocalOperation extends ExecutingStoreOperation { private final Path source; /** - * Destination path, expected to be + * Async operations executor */ - private final Path destination; + private final ListeningExecutorService executor; /** - * Async operations executor + * Destination path */ - private final ListeningExecutorService executor; + private Path destination; /** * Destination file status */ - private FileStatus dstStatus; + private FileStatus destStatus; public CopyFromLocalOperation( final StoreContext storeContext, @@ -144,10 +144,13 @@ public Void execute() throws IOException, PathExistsException { LOG.debug("Copying local file from {} to {}", source, destination); File sourceFile = callbacks.pathToFile(source); - try { - dstStatus = callbacks.getFileStatus(destination); - } catch (FileNotFoundException e) { - dstStatus = null; + updateDestStatus(destination); + + // Handles bar/ -> foo/ => foo/bar and bar/ -> foo/bar/ => foo/bar/bar + if (getDestStatus().isPresent() && getDestStatus().get().isDirectory() + && sourceFile.isDirectory()) { + destination = new Path(destination, sourceFile.getName()); + updateDestStatus(destination); } checkSource(sourceFile); @@ -161,6 +164,21 @@ public Void execute() return null; } + /** + * Does a {@link CopyFromLocalOperationCallbacks#getFileStatus(Path)} + * operation on the provided destination and updates the internal status of + * destPath property + * + * @throws IOException if getFileStatus fails + */ + private void updateDestStatus(Path dest) throws IOException { + try { + destStatus = callbacks.getFileStatus(dest); + } catch (FileNotFoundException e) { + destStatus = null; + } + } + /** * Starts async upload operations for files. Creating an empty directory * classifies as a "file upload". @@ -340,7 +358,7 @@ private Path getFinalPath(Path src) throws PathIOException { } private Optional getDestStatus() { - return Optional.ofNullable(dstStatus); + return Optional.ofNullable(destStatus); } /** From 0b3b197ff0a1cf3c002f0dfbc0972cf0fffc9e97 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Thu, 8 Jul 2021 09:47:04 +0100 Subject: [PATCH 13/19] Fixing identation and spot bugs error --- .../hadoop/fs/TestLocalFSCopyFromLocal.java | 36 +- .../AbstractContractCopyFromLocalTest.java | 508 +++++------ .../apache/hadoop/fs/s3a/S3AFileSystem.java | 54 +- .../fs/s3a/impl/CopyFromLocalOperation.java | 801 +++++++++--------- .../fs/s3a/ITestS3ACopyFromLocalFile.java | 7 +- 5 files changed, 705 insertions(+), 701 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java index 3b808f3bbd0f9..a92a35f3be136 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java @@ -29,22 +29,22 @@ import static org.apache.hadoop.test.LambdaTestUtils.intercept; public class TestLocalFSCopyFromLocal extends AbstractContractCopyFromLocalTest { - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new LocalFSContract(conf); - } - - @Test - public void testSourceIsDirectoryAndDestinationIsFile() throws Throwable { - describe("Source is a directory and destination is a file should fail"); - - File file = createTempFile("local"); - File source = createTempDirectory("srcDir"); - Path destination = copyFromLocal(file, false); - Path sourcePath = new Path(source.toURI()); - - intercept(FileAlreadyExistsException.class, - () -> getFileSystem().copyFromLocalFile(false, true, - sourcePath, destination)); - } + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new LocalFSContract(conf); + } + + @Test + public void testSourceIsDirectoryAndDestinationIsFile() throws Throwable { + describe("Source is a directory and destination is a file should fail"); + + File file = createTempFile("local"); + File source = createTempDirectory("srcDir"); + Path destination = copyFromLocal(file, false); + Path sourcePath = new Path(source.toURI()); + + intercept(FileAlreadyExistsException.class, + () -> getFileSystem().copyFromLocalFile(false, true, + sourcePath, destination)); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java index 9988d1735b759..3ba0bd11b7053 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java @@ -36,260 +36,262 @@ import static org.apache.hadoop.test.LambdaTestUtils.intercept; public abstract class AbstractContractCopyFromLocalTest extends - AbstractFSContractTestBase { + AbstractFSContractTestBase { - private static final Charset ASCII = StandardCharsets.US_ASCII; - private File file; + private static final Charset ASCII = StandardCharsets.US_ASCII; + private File file; - @Override - public void teardown() throws Exception { - super.teardown(); - if (file != null) { - file.delete(); - } - } - - @Test - public void testCopyEmptyFile() throws Throwable { - file = File.createTempFile("test", ".txt"); - Path dest = copyFromLocal(file, true); - assertPathExists("uploaded file", dest); - } - - @Test - public void testCopyFile() throws Throwable { - String message = "hello"; - file = createTempFile(message); - Path dest = copyFromLocal(file, true); - - assertPathExists("uploaded file not found", dest); - assertTrue("source file deleted", Files.exists(file.toPath())); - - FileSystem fs = getFileSystem(); - FileStatus status = fs.getFileStatus(dest); - assertEquals("File length of " + status, - message.getBytes(ASCII).length, status.getLen()); - assertFileTextEquals(dest, message); - } - - @Test - public void testCopyFileNoOverwrite() throws Throwable { - file = createTempFile("hello"); - copyFromLocal(file, true); - intercept(PathExistsException.class, - () -> copyFromLocal(file, false)); - } - - @Test - public void testCopyFileOverwrite() throws Throwable { - file = createTempFile("hello"); - Path dest = copyFromLocal(file, true); - String updated = "updated"; - FileUtils.write(file, updated, ASCII); - copyFromLocal(file, true); - assertFileTextEquals(dest, updated); - } - - @Test - public void testCopyMissingFile() throws Throwable { - file = createTempFile("test"); - file.delete(); - // first upload to create - intercept(FileNotFoundException.class, "", - () -> copyFromLocal(file, true)); - } - - @Test - public void testSourceIsFileAndDelSrcTrue() throws Throwable { - describe("Source is a file delSrc flag is set to true"); - - file = createTempFile("test"); - copyFromLocal(file, false, true); - - assertFalse("uploaded file", Files.exists(file.toPath())); - } - - @Test - public void testSourceIsFileAndDestinationIsDirectory() throws Throwable { - describe("Source is a file and destination is a directory. File" + - "should be copied inside the directory."); - - file = createTempFile("test"); - Path source = new Path(file.toURI()); - FileSystem fs = getFileSystem(); - - File dir = createTempDirectory("test"); - Path destination = fileToPath(dir); - fs.delete(destination, false); - mkdirs(destination); - - fs.copyFromLocalFile(source, destination); - System.out.println("Did this work?"); - } - - @Test - public void testSrcIsDirWithFilesAndCopySuccessful() throws Throwable { - describe("Source is a directory with files, copy should copy all" + - " dir contents to source"); - String firstChild = "childOne"; - String secondChild = "childTwo"; - File parent = createTempDirectory("parent"); - File root = parent.getParentFile(); - File childFile = createTempFile(parent, firstChild, firstChild); - File secondChildFile = createTempFile(parent, secondChild, secondChild); - - copyFromLocal(parent, false); - - assertPathExists("Parent directory not copied", fileToPath(parent)); - assertFileTextEquals(fileToPath(childFile, root), firstChild); - assertFileTextEquals(fileToPath(secondChildFile, root), secondChild); - } - - @Test - public void testSrcIsEmptyDirWithCopySuccessful() throws Throwable { - describe("Source is an empty directory, copy should succeed"); - File source = createTempDirectory("source"); - Path dest = copyFromLocal(source, false); - - assertPathExists("Empty directory not copied", dest); - } - - @Test - public void testSrcIsDirWithOverwriteOptions() throws Throwable { - describe("Source is a directory, destination exists and" + - "should be overwritten."); - // Disabling checksum because overwriting directories does not - // overwrite checksums - FileSystem fs = getFileSystem(); - fs.setVerifyChecksum(false); - - File source = createTempDirectory("source"); - Path sourcePath = new Path(source.toURI()); - String contents = "test file"; - File child = createTempFile(source, "child", contents); - - Path dest = path(source.getName()).getParent(); - fs.copyFromLocalFile(sourcePath, dest); - intercept(PathExistsException.class, - () -> fs.copyFromLocalFile(false, false, - sourcePath, dest)); - - String updated = "updated contents"; - FileUtils.write(child, updated, ASCII); - fs.copyFromLocalFile(false, true, sourcePath, dest); - - assertPathExists("Parent directory not copied", fileToPath(source)); - assertFileTextEquals(fileToPath(child, source.getParentFile()), - updated); - getFileSystem().setVerifyChecksum(true); - } - - @Test - public void testSrcIsDirWithDelSrcOptions() throws Throwable { - describe("Source is a directory, destination exists and" + - "should be overwritten."); - File source = createTempDirectory("source"); - String contents = "child file"; - File child = createTempFile(source, "child", contents); - - copyFromLocal(source, false, true); - Path dest = fileToPath(child, source.getParentFile()); - - assertFalse("Directory not deleted", Files.exists(source.toPath())); - assertFileTextEquals(dest, contents); - } - - /* - * The following path is being created on disk and copied over - * /parent/ (directory) - * /parent/test1.txt - * /parent/child/test.txt - * /parent/secondChild/ (directory) - */ - @Test - public void testCopyTreeDirectoryWithoutDelete() throws Throwable { - File srcDir = createTempDirectory("parent"); - File childDir = createTempDirectory(srcDir, "child"); - File secondChild = createTempDirectory(srcDir, "secondChild"); - File parentFile = createTempFile(srcDir, "test1", ".txt"); - File childFile = createTempFile(childDir, "test2", ".txt"); - - copyFromLocal(srcDir, false, false); - File root = srcDir.getParentFile(); - - assertPathExists("Parent directory", fileToPath(srcDir)); - assertPathExists("Child directory", fileToPath(childDir, root)); - assertPathExists("Second Child directory", fileToPath(secondChild, root)); - assertPathExists("Parent file", fileToPath(parentFile, root)); - assertPathExists("Child file", fileToPath(childFile, root)); - } - - @Test - public void testCopyDirectoryWithDelete() throws Throwable { - java.nio.file.Path srcDir = Files.createTempDirectory("parent"); - Files.createTempFile(srcDir, "test1", ".txt"); - - Path src = new Path(srcDir.toUri()); - Path dst = path(srcDir.getFileName().toString()); - getFileSystem().copyFromLocalFile(true, true, src, dst); - - assertFalse("Source directory should've been deleted", - Files.exists(srcDir)); - } - - protected Path fileToPath(File file) throws IOException { - return path(file.getName()); - } - - protected Path fileToPath(File file, File parent) throws IOException { - return path(parent - .toPath() - .relativize(file.toPath()) - .toString()); - } - - protected File createTempDirectory(String name) throws IOException { - return Files.createTempDirectory(name).toFile(); - } - - protected Path copyFromLocal(File srcFile, boolean overwrite) throws - IOException { - return copyFromLocal(srcFile, overwrite, false); - } - - protected Path copyFromLocal(File srcFile, boolean overwrite, boolean delSrc) - throws IOException { - Path src = new Path(srcFile.toURI()); - Path dst = path(srcFile.getName()); - getFileSystem().copyFromLocalFile(delSrc, overwrite, src, dst); - return dst; - } - - /** - * Create a temp file with some text. - * @param text text for the file - * @return the file - * @throws IOException on a failure - */ - protected File createTempFile(String text) throws IOException { - File f = File.createTempFile("test", ".txt"); - FileUtils.write(f, text, ASCII); - return f; - } - - protected File createTempFile(File parent, String name, String text) throws IOException { - File f = File.createTempFile(name, ".txt", parent); - FileUtils.write(f, text, ASCII); - return f; - } - - private File createTempDirectory(File parent, String name) throws IOException { - return Files.createTempDirectory(parent.toPath(), name).toFile(); - } - - private void assertFileTextEquals(Path path, String expected) - throws IOException { - assertEquals("Wrong data in " + path, - expected, IOUtils.toString(getFileSystem().open(path), ASCII)); + @Override + public void teardown() throws Exception { + super.teardown(); + if (file != null) { + file.delete(); } + } + + @Test + public void testCopyEmptyFile() throws Throwable { + file = File.createTempFile("test", ".txt"); + Path dest = copyFromLocal(file, true); + assertPathExists("uploaded file", dest); + } + + @Test + public void testCopyFile() throws Throwable { + String message = "hello"; + file = createTempFile(message); + Path dest = copyFromLocal(file, true); + + assertPathExists("uploaded file not found", dest); + assertTrue("source file deleted", Files.exists(file.toPath())); + + FileSystem fs = getFileSystem(); + FileStatus status = fs.getFileStatus(dest); + assertEquals("File length of " + status, + message.getBytes(ASCII).length, status.getLen()); + assertFileTextEquals(dest, message); + } + + @Test + public void testCopyFileNoOverwrite() throws Throwable { + file = createTempFile("hello"); + copyFromLocal(file, true); + intercept(PathExistsException.class, + () -> copyFromLocal(file, false)); + } + + @Test + public void testCopyFileOverwrite() throws Throwable { + file = createTempFile("hello"); + Path dest = copyFromLocal(file, true); + String updated = "updated"; + FileUtils.write(file, updated, ASCII); + copyFromLocal(file, true); + assertFileTextEquals(dest, updated); + } + + @Test + public void testCopyMissingFile() throws Throwable { + file = createTempFile("test"); + file.delete(); + // first upload to create + intercept(FileNotFoundException.class, "", + () -> copyFromLocal(file, true)); + } + + @Test + public void testSourceIsFileAndDelSrcTrue() throws Throwable { + describe("Source is a file delSrc flag is set to true"); + + file = createTempFile("test"); + copyFromLocal(file, false, true); + + assertFalse("uploaded file", Files.exists(file.toPath())); + } + + @Test + public void testSourceIsFileAndDestinationIsDirectory() throws Throwable { + describe("Source is a file and destination is a directory. File" + + "should be copied inside the directory."); + + file = createTempFile("test"); + Path source = new Path(file.toURI()); + FileSystem fs = getFileSystem(); + + File dir = createTempDirectory("test"); + Path destination = fileToPath(dir); + fs.delete(destination, false); + mkdirs(destination); + + fs.copyFromLocalFile(source, destination); + System.out.println("Did this work?"); + } + + @Test + public void testSrcIsDirWithFilesAndCopySuccessful() throws Throwable { + describe("Source is a directory with files, copy should copy all" + + " dir contents to source"); + String firstChild = "childOne"; + String secondChild = "childTwo"; + File parent = createTempDirectory("parent"); + File root = parent.getParentFile(); + File childFile = createTempFile(parent, firstChild, firstChild); + File secondChildFile = createTempFile(parent, secondChild, secondChild); + + copyFromLocal(parent, false); + + assertPathExists("Parent directory not copied", fileToPath(parent)); + assertFileTextEquals(fileToPath(childFile, root), firstChild); + assertFileTextEquals(fileToPath(secondChildFile, root), secondChild); + } + + @Test + public void testSrcIsEmptyDirWithCopySuccessful() throws Throwable { + describe("Source is an empty directory, copy should succeed"); + File source = createTempDirectory("source"); + Path dest = copyFromLocal(source, false); + + assertPathExists("Empty directory not copied", dest); + } + + @Test + public void testSrcIsDirWithOverwriteOptions() throws Throwable { + describe("Source is a directory, destination exists and" + + "should be overwritten."); + // Disabling checksum because overwriting directories does not + // overwrite checksums + FileSystem fs = getFileSystem(); + fs.setVerifyChecksum(false); + + File source = createTempDirectory("source"); + Path sourcePath = new Path(source.toURI()); + String contents = "test file"; + File child = createTempFile(source, "child", contents); + + Path dest = path(source.getName()).getParent(); + fs.copyFromLocalFile(sourcePath, dest); + intercept(PathExistsException.class, + () -> fs.copyFromLocalFile(false, false, + sourcePath, dest)); + + String updated = "updated contents"; + FileUtils.write(child, updated, ASCII); + fs.copyFromLocalFile(false, true, sourcePath, dest); + + assertPathExists("Parent directory not copied", fileToPath(source)); + assertFileTextEquals(fileToPath(child, source.getParentFile()), + updated); + getFileSystem().setVerifyChecksum(true); + } + + @Test + public void testSrcIsDirWithDelSrcOptions() throws Throwable { + describe("Source is a directory, destination exists and" + + "should be overwritten."); + File source = createTempDirectory("source"); + String contents = "child file"; + File child = createTempFile(source, "child", contents); + + copyFromLocal(source, false, true); + Path dest = fileToPath(child, source.getParentFile()); + + assertFalse("Directory not deleted", Files.exists(source.toPath())); + assertFileTextEquals(dest, contents); + } + + /* + * The following path is being created on disk and copied over + * /parent/ (directory) + * /parent/test1.txt + * /parent/child/test.txt + * /parent/secondChild/ (directory) + */ + @Test + public void testCopyTreeDirectoryWithoutDelete() throws Throwable { + File srcDir = createTempDirectory("parent"); + File childDir = createTempDirectory(srcDir, "child"); + File secondChild = createTempDirectory(srcDir, "secondChild"); + File parentFile = createTempFile(srcDir, "test1", ".txt"); + File childFile = createTempFile(childDir, "test2", ".txt"); + + copyFromLocal(srcDir, false, false); + File root = srcDir.getParentFile(); + + assertPathExists("Parent directory", fileToPath(srcDir)); + assertPathExists("Child directory", fileToPath(childDir, root)); + assertPathExists("Second Child directory", fileToPath(secondChild, root)); + assertPathExists("Parent file", fileToPath(parentFile, root)); + assertPathExists("Child file", fileToPath(childFile, root)); + } + + @Test + public void testCopyDirectoryWithDelete() throws Throwable { + java.nio.file.Path srcDir = Files.createTempDirectory("parent"); + Files.createTempFile(srcDir, "test1", ".txt"); + + Path src = new Path(srcDir.toUri()); + Path dst = path(srcDir.getFileName().toString()); + getFileSystem().copyFromLocalFile(true, true, src, dst); + + assertFalse("Source directory should've been deleted", + Files.exists(srcDir)); + } + + protected Path fileToPath(File file) throws IOException { + return path(file.getName()); + } + + protected Path fileToPath(File file, File parent) throws IOException { + return path(parent + .toPath() + .relativize(file.toPath()) + .toString()); + } + + protected File createTempDirectory(String name) throws IOException { + return Files.createTempDirectory(name).toFile(); + } + + protected Path copyFromLocal(File srcFile, boolean overwrite) throws + IOException { + return copyFromLocal(srcFile, overwrite, false); + } + + protected Path copyFromLocal(File srcFile, boolean overwrite, boolean delSrc) + throws IOException { + Path src = new Path(srcFile.toURI()); + Path dst = path(srcFile.getName()); + getFileSystem().copyFromLocalFile(delSrc, overwrite, src, dst); + return dst; + } + + /** + * Create a temp file with some text. + * @param text text for the file + * @return the file + * @throws IOException on a failure + */ + protected File createTempFile(String text) throws IOException { + File f = File.createTempFile("test", ".txt"); + FileUtils.write(f, text, ASCII); + return f; + } + + protected File createTempFile(File parent, String name, String text) + throws IOException { + File f = File.createTempFile(name, ".txt", parent); + FileUtils.write(f, text, ASCII); + return f; + } + + private File createTempDirectory(File parent, String name) + throws IOException { + return Files.createTempDirectory(parent.toPath(), name).toFile(); + } + + private void assertFileTextEquals(Path path, String expected) + throws IOException { + assertEquals("Wrong data in " + path, + expected, IOUtils.toString(getFileSystem().open(path), ASCII)); + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 255875e5b2e64..e61ce24778298 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -79,7 +79,6 @@ import com.amazonaws.services.s3.transfer.model.UploadResult; import com.amazonaws.event.ProgressListener; -import org.apache.hadoop.fs.PathExistsException; import org.apache.hadoop.fs.s3a.audit.AuditSpanS3A; import org.apache.hadoop.fs.s3a.impl.CopyFromLocalOperation; import org.apache.hadoop.fs.store.audit.ActiveThreadSpanSource; @@ -3782,36 +3781,36 @@ private boolean s3Exists(final Path path, final Set probes) @Override @AuditEntryPoint public void copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, - Path dst) throws IOException { + Path dst) throws IOException { checkNotClosed(); LOG.debug("Copying local file from {} to {}", src, dst); trackDurationAndSpan(INVOCATION_COPY_FROM_LOCAL_FILE, dst, - () -> new CopyFromLocalOperation( - createStoreContext(), - src, - dst, - delSrc, - overwrite, - createCopyFromLocalCallbacks()).execute()); + () -> new CopyFromLocalOperation( + createStoreContext(), + src, + dst, + delSrc, + overwrite, + createCopyFromLocalCallbacks()).execute()); } protected CopyFromLocalOperation.CopyFromLocalOperationCallbacks - createCopyFromLocalCallbacks() throws IOException { + createCopyFromLocalCallbacks() throws IOException { LocalFileSystem local = getLocal(getConf()); return new CopyFromLocalCallbacksImpl(local); } protected class CopyFromLocalCallbacksImpl implements - CopyFromLocalOperation.CopyFromLocalOperationCallbacks { + CopyFromLocalOperation.CopyFromLocalOperationCallbacks { private final LocalFileSystem local; private CopyFromLocalCallbacksImpl(LocalFileSystem local) { - this.local = local; + this.local = local; } @Override public RemoteIterator listStatusIterator( - final Path path) throws IOException { + final Path path) throws IOException { return local.listLocatedStatus(path); } @@ -3828,21 +3827,20 @@ public boolean delete(Path path, boolean recursive) throws IOException { @Override public void copyFileFromTo(File file, Path from, Path to) throws IOException { trackDurationAndSpan( - OBJECT_PUT_REQUESTS, - to, - () -> { - - final String key = pathToKey(to); - final ObjectMetadata om = newObjectMetadata(file.length()); - Progressable progress = null; - PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, file); - S3AFileSystem.this.invoker.retry( - "putObject(" + "" + ")", to.toString(), - true, - () -> executePut(putObjectRequest, progress)); + OBJECT_PUT_REQUESTS, + to, + () -> { + final String key = pathToKey(to); + final ObjectMetadata om = newObjectMetadata(file.length()); + Progressable progress = null; + PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, file); + S3AFileSystem.this.invoker.retry( + "putObject(" + "" + ")", to.toString(), + true, + () -> executePut(putObjectRequest, progress)); - return null; - }); + return null; + }); } @Override @@ -3852,7 +3850,7 @@ public FileStatus getFileStatus(Path f) throws IOException { @Override public boolean createEmptyDir(Path path) throws IOException { - return S3AFileSystem.this.mkdirs(path); + return S3AFileSystem.this.mkdirs(path); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java index 7a8395638adc9..42e86f2d24a51 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java @@ -26,7 +26,6 @@ import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.s3a.Retries; -import org.apache.hadoop.fs.store.audit.AuditingFunctions; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; @@ -37,6 +36,7 @@ import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.Serializable; import java.net.URI; import java.util.ArrayList; import java.util.Collections; @@ -48,6 +48,10 @@ import java.util.Stack; import java.util.concurrent.CompletableFuture; +import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.submit; +import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletion; +import static org.apache.hadoop.fs.store.audit.AuditingFunctions.callableWithinAuditSpan; + /** *

Implementation of CopyFromLocalOperation

*

@@ -69,447 +73,448 @@ */ public class CopyFromLocalOperation extends ExecutingStoreOperation { - /** - * Largest N files to be uploaded first - */ - private static final int LARGEST_N_FILES = 5; + /** + * Largest N files to be uploaded first + */ + private static final int LARGEST_N_FILES = 5; + + private static final Logger LOG = LoggerFactory.getLogger( + CopyFromLocalOperation.class); + + /** + * Callbacks to be used by this operation for external / IO actions + */ + private final CopyFromLocalOperationCallbacks callbacks; + + /** + * Delete source after operation finishes + */ + private final boolean deleteSource; + + /** + * Overwrite destination files / folders + */ + private final boolean overwrite; + + /** + * Source path to file / directory + */ + private final Path source; + + /** + * Async operations executor + */ + private final ListeningExecutorService executor; + + /** + * Destination path + */ + private Path destination; + + /** + * Destination file status + */ + private FileStatus destStatus; + + public CopyFromLocalOperation( + final StoreContext storeContext, + Path source, + Path destination, + boolean deleteSource, + boolean overwrite, + CopyFromLocalOperationCallbacks callbacks) { + super(storeContext); + this.callbacks = callbacks; + this.deleteSource = deleteSource; + this.overwrite = overwrite; + this.source = source; + this.destination = destination; + this.executor = MoreExecutors.listeningDecorator( + storeContext.createThrottledExecutor(1) + ); + } + + /** + * Executes the {@link CopyFromLocalOperation}. + * + * @throws IOException - if there are any failures with upload or deletion + * of files. Check {@link CopyFromLocalOperationCallbacks} for specifics. + * @throws PathExistsException - if the path exists and no overwrite flag + * is set OR if the source is file and destination is a directory + */ + @Override + @Retries.RetryTranslated + public Void execute() + throws IOException, PathExistsException { + LOG.debug("Copying local file from {} to {}", source, destination); + File sourceFile = callbacks.pathToFile(source); + updateDestStatus(destination); + + // Handles bar/ -> foo/ => foo/bar and bar/ -> foo/bar/ => foo/bar/bar + if (getDestStatus().isPresent() && getDestStatus().get().isDirectory() + && sourceFile.isDirectory()) { + destination = new Path(destination, sourceFile.getName()); + updateDestStatus(destination); + } - private static final Logger LOG = LoggerFactory.getLogger( - CopyFromLocalOperation.class); + checkSource(sourceFile); + checkDestination(destination, sourceFile, overwrite); + uploadSourceFromFS(); - /** - * Callbacks to be used by this operation for external / IO actions - */ - private final CopyFromLocalOperationCallbacks callbacks; - - /** - * Delete source after operation finishes - */ - private final boolean deleteSource; + if (deleteSource) { + callbacks.delete(source, true); + } - /** - * Overwrite destination files / folders - */ - private final boolean overwrite; + return null; + } + + /** + * Does a {@link CopyFromLocalOperationCallbacks#getFileStatus(Path)} + * operation on the provided destination and updates the internal status of + * destPath property + * + * @throws IOException if getFileStatus fails + */ + private void updateDestStatus(Path dest) throws IOException { + try { + destStatus = callbacks.getFileStatus(dest); + } catch (FileNotFoundException e) { + destStatus = null; + } + } + + /** + * Starts async upload operations for files. Creating an empty directory + * classifies as a "file upload". + * + * Check {@link CopyFromLocalOperation} for details on the order of + * operations. + * + * @throws IOException - if listing or upload fail + */ + private void uploadSourceFromFS() throws IOException { + RemoteIterator localFiles = listFilesAndDirs(source); + List> activeOps = new ArrayList<>(); + + // After all files are traversed, this set will contain only emptyDirs + Set emptyDirs = new HashSet<>(); + List entries = new ArrayList<>(); + while (localFiles.hasNext()) { + LocatedFileStatus sourceFile = localFiles.next(); + Path sourceFilePath = sourceFile.getPath(); + + // Directory containing this file / directory isn't empty + emptyDirs.remove(sourceFilePath.getParent()); + + if (sourceFile.isDirectory()) { + emptyDirs.add(sourceFilePath); + continue; + } + + Path destPath = getFinalPath(sourceFilePath); + // UploadEntries: have a destination path, a file size + entries.add(new UploadEntry( + sourceFilePath, + destPath, + sourceFile.getLen())); + } - /** - * Source path to file / directory - */ - private final Path source; + if (localFiles instanceof Closeable) { + IOUtils.closeStream((Closeable) localFiles); + } - /** - * Async operations executor - */ - private final ListeningExecutorService executor; + // Sort all upload entries based on size + entries.sort(new ReverseComparator(new UploadEntry.SizeComparator())); - /** - * Destination path - */ - private Path destination; + // Take only top most N entries and upload + final int sortedUploadsCount = Math.min(LARGEST_N_FILES, entries.size()); + List markedForUpload = new ArrayList<>(); - /** - * Destination file status - */ - private FileStatus destStatus; - - public CopyFromLocalOperation( - final StoreContext storeContext, - Path source, - Path destination, - boolean deleteSource, - boolean overwrite, - CopyFromLocalOperationCallbacks callbacks) { - super(storeContext); - this.callbacks = callbacks; - this.deleteSource = deleteSource; - this.overwrite = overwrite; - this.source = source; - this.destination = destination; - this.executor = MoreExecutors.listeningDecorator( - storeContext.createThrottledExecutor(1) - ); + for (int uploadNo = 0; uploadNo < sortedUploadsCount; uploadNo++) { + UploadEntry uploadEntry = entries.get(uploadNo); + File file = callbacks.pathToFile(uploadEntry.source); + activeOps.add(submitUpload(file, uploadEntry)); + markedForUpload.add(uploadEntry); } - /** - * Executes the {@link CopyFromLocalOperation}. - * - * @throws IOException - if there are any failures with upload or deletion - * of files. Check {@link CopyFromLocalOperationCallbacks} for specifics. - * @throws PathExistsException - if the path exists and no overwrite flag - * is set OR if the source is file and destination is a directory - */ - @Override - @Retries.RetryTranslated - public Void execute() - throws IOException, PathExistsException { - LOG.debug("Copying local file from {} to {}", source, destination); - File sourceFile = callbacks.pathToFile(source); - updateDestStatus(destination); - - // Handles bar/ -> foo/ => foo/bar and bar/ -> foo/bar/ => foo/bar/bar - if (getDestStatus().isPresent() && getDestStatus().get().isDirectory() - && sourceFile.isDirectory()) { - destination = new Path(destination, sourceFile.getName()); - updateDestStatus(destination); - } - - checkSource(sourceFile); - checkDestination(destination, sourceFile, overwrite); - uploadSourceFromFS(); - - if (deleteSource) { - callbacks.delete(source, true); - } + // No files found, it's empty source directory + if (entries.isEmpty()) { + emptyDirs.add(source); + } - return null; + // Shuffle all remaining entries and upload them + entries.removeAll(markedForUpload); + Collections.shuffle(entries); + for (UploadEntry uploadEntry : entries) { + File file = callbacks.pathToFile(uploadEntry.source); + activeOps.add(submitUpload(file, uploadEntry)); } - /** - * Does a {@link CopyFromLocalOperationCallbacks#getFileStatus(Path)} - * operation on the provided destination and updates the internal status of - * destPath property - * - * @throws IOException if getFileStatus fails - */ - private void updateDestStatus(Path dest) throws IOException { - try { - destStatus = callbacks.getFileStatus(dest); - } catch (FileNotFoundException e) { - destStatus = null; - } + for (Path emptyDir : emptyDirs) { + Path emptyDirPath = getFinalPath(emptyDir); + activeOps.add(submitCreateEmptyDir(emptyDirPath)); } - /** - * Starts async upload operations for files. Creating an empty directory - * classifies as a "file upload". - * - * Check {@link CopyFromLocalOperation} for details on the order of - * operations. - * - * @throws IOException - if listing or upload fail - */ - private void uploadSourceFromFS() throws IOException { - RemoteIterator localFiles = listFilesAndDirs(source); - List> activeOps = new ArrayList<>(); - - // After all files are traversed, this set will contain only emptyDirs - Set emptyDirs = new HashSet<>(); - List entries = new ArrayList<>(); - while (localFiles.hasNext()) { - LocatedFileStatus sourceFile = localFiles.next(); - Path sourceFilePath = sourceFile.getPath(); - - // Directory containing this file / directory isn't empty - emptyDirs.remove(sourceFilePath.getParent()); - - if (sourceFile.isDirectory()) { - emptyDirs.add(sourceFilePath); - continue; - } - - Path destPath = getFinalPath(sourceFilePath); - // UploadEntries: have a destination path, a file size - entries.add(new UploadEntry( - sourceFilePath, - destPath, - sourceFile.getLen())); + waitForCompletion(activeOps); + } + + /** + * Async call to create an empty directory. + * + * @param dir directory path + * @return the submitted future + */ + private CompletableFuture submitCreateEmptyDir(Path dir) { + return submit(executor, callableWithinAuditSpan( + getAuditSpan(), () -> { + callbacks.createEmptyDir(dir); + return null; } - - if (localFiles instanceof Closeable) { - IOUtils.closeStream((Closeable) localFiles); + )); + } + + /** + * Async call to upload a file. + * + * @param file - File to be uploaded + * @param uploadEntry - Upload entry holding the source and destination + * @return the submitted future + */ + private CompletableFuture submitUpload( + File file, + UploadEntry uploadEntry) { + return submit(executor, callableWithinAuditSpan( + getAuditSpan(), () -> { + callbacks.copyFileFromTo( + file, + uploadEntry.source, + uploadEntry.destination); + return null; } + )); + } + + /** + * Checks the source before upload starts + * + * @param src - Source file + * @throws FileNotFoundException - if the file isn't found + */ + private void checkSource(File src) + throws FileNotFoundException { + if (!src.exists()) { + throw new FileNotFoundException("No file: " + src.getPath()); + } + } + + /** + * Check the destination path and make sure it's compatible with the source, + * i.e. source and destination are both files / directories. + * + * @param dest - Destination path + * @param src - Source file + * @param overwrite - Should source overwrite destination + * @throws PathExistsException - If the destination path exists and no + * overwrite flag is set or source is file and destination is path + */ + private void checkDestination( + Path dest, + File src, + boolean overwrite) throws PathExistsException { + if (!getDestStatus().isPresent()) { + return; + } - // Sort all upload entries based on size - entries.sort(new ReverseComparator(new UploadEntry.SizeComparator())); - - // Take only top most N entries and upload - final int sortedUploadsCount = Math.min(LARGEST_N_FILES, entries.size()); - List markedForUpload = new ArrayList<>(); + if (src.isDirectory() && getDestStatus().get().isFile()) { + throw new PathExistsException( + "Source '" + src.getPath() +"' is file and " + + "destination '" + dest + "' is directory"); + } - for (int uploadNo = 0; uploadNo < sortedUploadsCount; uploadNo++) { - UploadEntry uploadEntry = entries.get(uploadNo); - File file = callbacks.pathToFile(uploadEntry.source); - activeOps.add(submitUpload(file, uploadEntry)); - markedForUpload.add(uploadEntry); - } + if (!overwrite) { + throw new PathExistsException(dest + " already exists"); + } + } + + /** + * Get the final path of a source file with regards to its destination + * @param src - source path + * @return - the final path for the source file to be uploaded to + * @throws PathIOException - if a relative path can't be created + */ + private Path getFinalPath(Path src) throws PathIOException { + URI currentSrcUri = src.toUri(); + URI relativeSrcUri = source.toUri().relativize(currentSrcUri); + if (relativeSrcUri.equals(currentSrcUri)) { + throw new PathIOException("Cannot get relative path"); + } - // No files found, it's empty source directory - if (entries.isEmpty()) { - emptyDirs.add(source); + Optional status = getDestStatus(); + if (!relativeSrcUri.getPath().isEmpty()) { + return new Path(destination, relativeSrcUri.getPath()); + } else if (status.isPresent() && status.get().isDirectory()) { + // file to dir + return new Path(destination, src.getName()); + } else { + // file to file + return destination; + } + } + + private Optional getDestStatus() { + return Optional.ofNullable(destStatus); + } + + /** + * {@link RemoteIterator} which lists all of the files and directories for + * a given path. It's strikingly similar to + * {@link org.apache.hadoop.fs.LocalFileSystem#listFiles(Path, boolean)} + * however with the small addition that it includes directories. + * + * @param path - Path to list files and directories from + * @return - an iterator + * @throws IOException - if listing of a path file fails + */ + private RemoteIterator listFilesAndDirs(Path path) + throws IOException { + return new RemoteIterator() { + private final Stack> iterators = + new Stack<>(); + private RemoteIterator current = + callbacks.listStatusIterator(path); + private LocatedFileStatus curFile; + + @Override + public boolean hasNext() throws IOException { + while (curFile == null) { + if (current.hasNext()) { + handleFileStat(current.next()); + } else if (!iterators.empty()) { + current = iterators.pop(); + } else { + return false; + } } - - // Shuffle all remaining entries and upload them - entries.removeAll(markedForUpload); - Collections.shuffle(entries); - for (UploadEntry uploadEntry : entries) { - File file = callbacks.pathToFile(uploadEntry.source); - activeOps.add(submitUpload(file, uploadEntry)); + return true; + } + + /** + * Process the input stat. + * If it is a file or directory return the file stat. + * If it is a directory, traverse the directory; + * @param stat input status + * @throws IOException if any IO error occurs + */ + private void handleFileStat(LocatedFileStatus stat) + throws IOException { + if (stat.isFile()) { // file + curFile = stat; + } else { // directory + curFile = stat; + iterators.push(current); + current = callbacks.listStatusIterator(stat.getPath()); } - - for (Path emptyDir : emptyDirs) { - Path emptyDirPath = getFinalPath(emptyDir); - activeOps.add(submitCreateEmptyDir(emptyDirPath)); + } + + @Override + public LocatedFileStatus next() throws IOException { + if (hasNext()) { + LocatedFileStatus result = curFile; + curFile = null; + return result; } + throw new java.util.NoSuchElementException("No more entry in " + + path); + } + }; + } + + /** + *

Represents an entry for a file to be uploaded.

+ *

+ * Helpful with sorting files by their size and keeping track of path + * information for the upload. + *

+ */ + private static final class UploadEntry { + private final Path source; + private final Path destination; + private final long size; - CallableSupplier.waitForCompletion(activeOps); + private UploadEntry(Path source, Path destination, long size) { + this.source = source; + this.destination = destination; + this.size = size; } /** - * Async call to create an empty directory. - * - * @param dir directory path - * @return the submitted future + * Compares {@link UploadEntry} objects and produces DESC ordering */ - private CompletableFuture submitCreateEmptyDir(Path dir) { - return CallableSupplier.submit(executor, AuditingFunctions.callableWithinAuditSpan( - getAuditSpan(), () -> { - callbacks.createEmptyDir(dir); - return null; - } - )); + static class SizeComparator implements Comparator, + Serializable { + @Override + public int compare(UploadEntry entry1, UploadEntry entry2) { + return Long.compare(entry1.size, entry2.size); + } } + } + /** + * Define the contract for {@link CopyFromLocalOperation} to interact + * with any external resources. + */ + public interface CopyFromLocalOperationCallbacks { /** - * Async call to upload a file. - * - * @param file - File to be uploaded - * @param uploadEntry - Upload entry holding the source and destination - * @return the submitted future + * List all entries (files AND directories) for a path. + * @param path - path to list + * @return an iterator for all entries + * @throws IOException - for any failure */ - private CompletableFuture submitUpload( - File file, - UploadEntry uploadEntry) { - return CallableSupplier.submit(executor, AuditingFunctions.callableWithinAuditSpan( - getAuditSpan(), () -> { - callbacks.copyFileFromTo( - file, - uploadEntry.source, - uploadEntry.destination); - return null; - } - )); - } - - /** - * Checks the source before upload starts - * - * @param src - Source file - * @throws FileNotFoundException - if the file isn't found - */ - private void checkSource(File src) - throws FileNotFoundException { - if (!src.exists()) { - throw new FileNotFoundException("No file: " + src.getPath()); - } - } + RemoteIterator listStatusIterator(Path path) + throws IOException; /** - * Check the destination path and make sure it's compatible with the source, - * i.e. source and destination are both files / directories. - * - * @param dest - Destination path - * @param src - Source file - * @param overwrite - Should source overwrite destination - * @throws PathExistsException - If the destination path exists and no - * overwrite flag is set or source is file and destination is path + * Get the file status for a path + * @param path - target path + * @return FileStatus + * @throws IOException - for any failure */ - private void checkDestination( - Path dest, - File src, - boolean overwrite) throws PathExistsException { - if (!getDestStatus().isPresent()) { - return; - } - - if (src.isDirectory() && getDestStatus().get().isFile()) { - throw new PathExistsException( - "Source '" + src.getPath() +"' is file and " + - "destination '" + dest + "' is directory"); - } - - if (!overwrite) { - throw new PathExistsException(dest + " already exists"); - } - } + FileStatus getFileStatus(Path path) throws IOException; /** - * Get the final path of a source file with regards to its destination - * @param src - source path - * @return - the final path for the source file to be uploaded to - * @throws PathIOException - if a relative path can't be created + * Get the file from a path + * @param path - target path + * @return file at path */ - private Path getFinalPath(Path src) throws PathIOException { - URI currentSrcUri = src.toUri(); - URI relativeSrcUri = source.toUri().relativize(currentSrcUri); - if (relativeSrcUri.equals(currentSrcUri)) { - throw new PathIOException("Cannot get relative path"); - } - - Optional status = getDestStatus(); - if (!relativeSrcUri.getPath().isEmpty()) { - return new Path(destination, relativeSrcUri.getPath()); - } else if (status.isPresent() && status.get().isDirectory()) { - // file to dir - return new Path(destination, src.getName()); - } else { - // file to file - return destination; - } - } - - private Optional getDestStatus() { - return Optional.ofNullable(destStatus); - } + File pathToFile(Path path); /** - * {@link RemoteIterator} which lists all of the files and directories for - * a given path. It's strikingly similar to - * {@link org.apache.hadoop.fs.LocalFileSystem#listFiles(Path, boolean)} - * however with the small addition that it includes directories. - * - * @param path - Path to list files and directories from - * @return - an iterator - * @throws IOException - if listing of a path file fails + * Delete file / directory at path + * @param path - target path + * @param recursive - recursive deletion + * @return boolean result of operation + * @throws IOException for any failure */ - private RemoteIterator listFilesAndDirs(Path path) - throws IOException { - return new RemoteIterator() { - private final Stack> iterators = - new Stack<>(); - private RemoteIterator current = - callbacks.listStatusIterator(path); - private LocatedFileStatus curFile; - - @Override - public boolean hasNext() throws IOException { - while (curFile == null) { - if (current.hasNext()) { - handleFileStat(current.next()); - } else if (!iterators.empty()) { - current = iterators.pop(); - } else { - return false; - } - } - return true; - } - - /** - * Process the input stat. - * If it is a file or directory return the file stat. - * If it is a directory, traverse the directory; - * @param stat input status - * @throws IOException if any IO error occurs - */ - private void handleFileStat(LocatedFileStatus stat) - throws IOException { - if (stat.isFile()) { // file - curFile = stat; - } else { // directory - curFile = stat; - iterators.push(current); - current = callbacks.listStatusIterator(stat.getPath()); - } - } - - @Override - public LocatedFileStatus next() throws IOException { - if (hasNext()) { - LocatedFileStatus result = curFile; - curFile = null; - return result; - } - throw new java.util.NoSuchElementException("No more entry in " - + path); - } - }; - } + boolean delete(Path path, boolean recursive) throws IOException; /** - *

Represents an entry for a file to be uploaded.

- *

- * Helpful with sorting files by their size and keeping track of path - * information for the upload. - *

+ * Copy / Upload a file from a source path to a destination path + * @param file - target file + * @param source - source path + * @param destination - destination path + * @throws IOException for any failure */ - private static final class UploadEntry { - private final Path source; - private final Path destination; - private final long size; - - private UploadEntry(Path source, Path destination, long size) { - this.source = source; - this.destination = destination; - this.size = size; - } - - /** - * Compares {@link UploadEntry} objects and produces DESC ordering - */ - static class SizeComparator implements Comparator { - @Override - public int compare(UploadEntry entry1, UploadEntry entry2) { - return Long.compare(entry1.size, entry2.size); - } - } - } + void copyFileFromTo( + File file, + Path source, + Path destination) throws IOException; /** - * Define the contract for {@link CopyFromLocalOperation} to interact - * with any external resources. + * Create empty directory at path. Most likely an upload operation + * @param path - target path + * @return boolean result of operation + * @throws IOException for any failure */ - public interface CopyFromLocalOperationCallbacks { - /** - * List all entries (files AND directories) for a path. - * @param path - path to list - * @return an iterator for all entries - * @throws IOException - for any failure - */ - RemoteIterator listStatusIterator(Path path) - throws IOException; - - /** - * Get the file status for a path - * @param path - target path - * @return FileStatus - * @throws IOException - for any failure - */ - FileStatus getFileStatus(Path path) throws IOException; - - /** - * Get the file from a path - * @param path - target path - * @return file at path - */ - File pathToFile(Path path); - - /** - * Delete file / directory at path - * @param path - target path - * @param recursive - recursive deletion - * @return boolean result of operation - * @throws IOException for any failure - */ - boolean delete(Path path, boolean recursive) throws IOException; - - /** - * Copy / Upload a file from a source path to a destination path - * @param file - target file - * @param source - source path - * @param destination - destination path - * @throws IOException for any failure - */ - void copyFileFromTo( - File file, - Path source, - Path destination) throws IOException; - - /** - * Create empty directory at path. Most likely an upload operation - * @param path - target path - * @return boolean result of operation - * @throws IOException for any failure - */ - boolean createEmptyDir(Path path) throws IOException; - } + boolean createEmptyDir(Path path) throws IOException; + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java index cecfa2e50f4e4..8fca3ab4f6abe 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java @@ -49,8 +49,7 @@ public void testLocalFilesOnly() throws Throwable { Path dst = fileToPath(createTempDirectory("someDir")); intercept(IllegalArgumentException.class, - () -> getFileSystem().copyFromLocalFile( - false, true, dst, dst)); + () -> getFileSystem().copyFromLocalFile(false, true, dst, dst)); } @Test @@ -63,7 +62,7 @@ public void testSourceIsDirectoryAndDestinationIsFile() throws Throwable { Path sourcePath = new Path(source.toURI()); intercept(PathExistsException.class, - () -> getFileSystem().copyFromLocalFile(false, true, - sourcePath, destination)); + () -> getFileSystem().copyFromLocalFile(false, true, + sourcePath, destination)); } } From 3ef0e9938d6fa0f72ab81544dbd8d14594fc71e5 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Thu, 8 Jul 2021 10:02:24 +0100 Subject: [PATCH 14/19] Trying to reboot yetus --- .../hadoop/fs/contract/AbstractContractCopyFromLocalTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java index 3ba0bd11b7053..d64f51f50e3f7 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java @@ -92,6 +92,7 @@ public void testCopyFileOverwrite() throws Throwable { @Test public void testCopyMissingFile() throws Throwable { + describe("Copying a file that's not there should fail."); file = createTempFile("test"); file.delete(); // first upload to create From 0b09425b2760a3d01ed90e124598e3249559e6db Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Thu, 8 Jul 2021 14:12:33 +0100 Subject: [PATCH 15/19] Updating `filesystem.md` documentation - Updates to `filesystem.md`; - Cleanup code; - Add missed test case; --- .../site/markdown/filesystem/filesystem.md | 69 +++++++++++++++++++ .../AbstractContractCopyFromLocalTest.java | 19 ++++- 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md index a5a35df30c0b5..96f9b62306057 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md @@ -1419,6 +1419,75 @@ operations related to the part of the file being truncated is undefined. +### `boolean copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst)` + +The source file or directory at `src` is on the local disk and is copied into the file system at +destination `dst`. If the source should be deleted after the move then `delSrc` flag needs to be +set to TRUE. If destination already exists, and the destination contents should be overwritten +then `overwrite` flag should be set to TRUE. + +#### Preconditions + +The source file or directory must exist: + + if not exists(FS, src) : raise FileNotFoundException + +Directories cannot be copied into files regardless to what the overwrite flag is set to: + + if isDir(FS, src) && isFile(FS, dst) : raise PathExistsException + +If destination exists and the above precondition holds then the overwrite flag must be set to TRUE +for the operation to succeed. This will also overwrite any files / directories at the destination: + + if exists(FS, dst) && not overwrite : raise PathExistsException + +#### Postconditions +Copying a file into an existing directory at destination with a non-existing file at destination + + if isFile(fs, src) and not exists(FS, dst) => success + +Copying a file into an existing directory at destination with an existing file at destination and +overwrite set to TRUE + + if isFile(FS, src) and overwrite and exists(FS, dst) => success + + +Copying a file into a non-existent directory. POSIX file systems would fail this operation, HDFS +allows this to happen creating all the directories in the destination path. + + if isFile(FS, src) and not exists(FS, parent(dst)) => success + +Copying directory into destination directory - last part of the destination path doesn't exist e.g. +`/src/bar/ -> /dst/foo/ => /dst/foo/` with the precondition that `/dst/` exists but `/dst/foo/` +doesn't: + + if isDir(FS, src) and not exists(FS, dst) => success + +Copying directory into destination directory - last part of the destination path exists e.g. +`/src/bar/ -> /dst/foo/ => /dst/foo/bar/` with the precondition that `/dst/foo/` exists but +`/dst/foo/bar/` doesn't: + + if isDir(FS, src) and exists(FS, dst) => success + +Copying a directory into a destination directory - last part of destination path and source directory +name exist e.g. `/src/foo/ -> /dst/` with the precondition that `/dst/foo/` exists. This operation +will only succeed if the overwrite flag is set to TRUE + + if isDir(FS, src) and exists(FS, dst) and overwrite => success + +For all operations if the `delSrc` flag is set to TRUE then the source will be deleted. If source +is a directory then it will be recursively deleted. + +#### Implementation + +The default HDFS implementation, is to recurse through each file and folder, found at `src`, and +copy them sequentially to their final destination (relative to `dst`). + +Object store based file systems should be mindful of what limitations arise from the above +implementation and could take advantage of parallel uploads and possible re-ordering of files copied +into the store to maximize throughput. + + ## interface `RemoteIterator` The `RemoteIterator` interface is used as a remote-access equivalent diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java index d64f51f50e3f7..9b0b527e0cff3 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java @@ -125,7 +125,24 @@ public void testSourceIsFileAndDestinationIsDirectory() throws Throwable { mkdirs(destination); fs.copyFromLocalFile(source, destination); - System.out.println("Did this work?"); + } + + @Test + public void testSourceIsFileAndDestinationIsNonExistentDirectory() + throws Throwable { + describe("Source is a file and destination directory does not exist. " + + "Copy operation should still work."); + + file = createTempFile("test"); + Path source = new Path(file.toURI()); + FileSystem fs = getFileSystem(); + + File dir = createTempDirectory("test"); + Path destination = fileToPath(dir); + fs.delete(destination, false); + + fs.copyFromLocalFile(source, destination); + assertPathExists("Destination should exist.", destination); } @Test From bff3d889d5218bfe69a5d3c17bcd1db68e37eef1 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Fri, 16 Jul 2021 18:25:07 +0100 Subject: [PATCH 16/19] Addressing PR comments Mostly changes to documentation and test cases. Next PR will be more focused on updating the `filesystem.md` spec and adding the remaining test cases --- .../site/markdown/filesystem/filesystem.md | 13 +-- .../hadoop/fs/TestLocalFSCopyFromLocal.java | 14 --- .../AbstractContractCopyFromLocalTest.java | 91 ++++++++++------ .../apache/hadoop/fs/s3a/S3AFileSystem.java | 2 +- .../fs/s3a/impl/CopyFromLocalOperation.java | 100 ++++++++++-------- .../fs/s3a/ITestS3ACopyFromLocalFile.java | 17 --- 6 files changed, 122 insertions(+), 115 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md index 96f9b62306057..bd52c6add2aad 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md @@ -1422,9 +1422,9 @@ operations related to the part of the file being truncated is undefined. ### `boolean copyFromLocalFile(boolean delSrc, boolean overwrite, Path src, Path dst)` The source file or directory at `src` is on the local disk and is copied into the file system at -destination `dst`. If the source should be deleted after the move then `delSrc` flag needs to be -set to TRUE. If destination already exists, and the destination contents should be overwritten -then `overwrite` flag should be set to TRUE. +destination `dst`. If the source must be deleted after the move then `delSrc` flag must be +set to TRUE. If destination already exists, and the destination contents must be overwritten +then `overwrite` flag must be set to TRUE. #### Preconditions @@ -1434,10 +1434,11 @@ The source file or directory must exist: Directories cannot be copied into files regardless to what the overwrite flag is set to: - if isDir(FS, src) && isFile(FS, dst) : raise PathExistsException + if isDir(FS, src) and isFile(FS, dst) : raise PathExistsException -If destination exists and the above precondition holds then the overwrite flag must be set to TRUE -for the operation to succeed. This will also overwrite any files / directories at the destination: +For all cases, except the one for which the above precondition throws, the overwrite flag must be +set to TRUE for the operation to succeed. This will also overwrite any files / directories at the +destination: if exists(FS, dst) && not overwrite : raise PathExistsException diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java index a92a35f3be136..242c0b7815a89 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java @@ -33,18 +33,4 @@ public class TestLocalFSCopyFromLocal extends AbstractContractCopyFromLocalTest protected AbstractFSContract createContract(Configuration conf) { return new LocalFSContract(conf); } - - @Test - public void testSourceIsDirectoryAndDestinationIsFile() throws Throwable { - describe("Source is a directory and destination is a file should fail"); - - File file = createTempFile("local"); - File source = createTempDirectory("srcDir"); - Path destination = copyFromLocal(file, false); - Path sourcePath = new Path(source.toURI()); - - intercept(FileAlreadyExistsException.class, - () -> getFileSystem().copyFromLocalFile(false, true, - sourcePath, destination)); - } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java index 9b0b527e0cff3..6d8c70b236b19 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java @@ -18,14 +18,6 @@ package org.apache.hadoop.fs.contract; -import org.apache.commons.io.FileUtils; -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathExistsException; -import org.junit.Test; - import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; @@ -33,6 +25,16 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; +import org.junit.Test; + +import org.apache.commons.io.FileUtils; +import org.apache.commons.io.IOUtils; +import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathExistsException; + import static org.apache.hadoop.test.LambdaTestUtils.intercept; public abstract class AbstractContractCopyFromLocalTest extends @@ -53,7 +55,7 @@ public void teardown() throws Exception { public void testCopyEmptyFile() throws Throwable { file = File.createTempFile("test", ".txt"); Path dest = copyFromLocal(file, true); - assertPathExists("uploaded file", dest); + assertPathExists("uploaded file not found", dest); } @Test @@ -67,7 +69,7 @@ public void testCopyFile() throws Throwable { FileSystem fs = getFileSystem(); FileStatus status = fs.getFileStatus(dest); - assertEquals("File length of " + status, + assertEquals("File length not equal " + status, message.getBytes(ASCII).length, status.getLen()); assertFileTextEquals(dest, message); } @@ -92,7 +94,7 @@ public void testCopyFileOverwrite() throws Throwable { @Test public void testCopyMissingFile() throws Throwable { - describe("Copying a file that's not there should fail."); + describe("Copying a file that's not there must fail."); file = createTempFile("test"); file.delete(); // first upload to create @@ -107,31 +109,34 @@ public void testSourceIsFileAndDelSrcTrue() throws Throwable { file = createTempFile("test"); copyFromLocal(file, false, true); - assertFalse("uploaded file", Files.exists(file.toPath())); + assertFalse("Source file not deleted", Files.exists(file.toPath())); } @Test public void testSourceIsFileAndDestinationIsDirectory() throws Throwable { describe("Source is a file and destination is a directory. File" + - "should be copied inside the directory."); + "must be copied inside the directory."); file = createTempFile("test"); Path source = new Path(file.toURI()); FileSystem fs = getFileSystem(); - File dir = createTempDirectory("test"); Path destination = fileToPath(dir); + + // Make sure there's nothing already existing at destination fs.delete(destination, false); mkdirs(destination); - fs.copyFromLocalFile(source, destination); + + Path expectedFile = path(dir.getName() + "/" + source.getName()); + assertPathExists("File not copied into directory", expectedFile); } @Test public void testSourceIsFileAndDestinationIsNonExistentDirectory() throws Throwable { describe("Source is a file and destination directory does not exist. " + - "Copy operation should still work."); + "Copy operation must still work."); file = createTempFile("test"); Path source = new Path(file.toURI()); @@ -140,15 +145,16 @@ public void testSourceIsFileAndDestinationIsNonExistentDirectory() File dir = createTempDirectory("test"); Path destination = fileToPath(dir); fs.delete(destination, false); + assertPathDoesNotExist("Destination not deleted", destination); fs.copyFromLocalFile(source, destination); - assertPathExists("Destination should exist.", destination); + assertPathExists("Destination doesn't exist.", destination); } @Test public void testSrcIsDirWithFilesAndCopySuccessful() throws Throwable { - describe("Source is a directory with files, copy should copy all" + - " dir contents to source"); + describe("Source is a directory with files, copy must copy all" + + " dir contents to destination"); String firstChild = "childOne"; String secondChild = "childTwo"; File parent = createTempDirectory("parent"); @@ -165,7 +171,7 @@ public void testSrcIsDirWithFilesAndCopySuccessful() throws Throwable { @Test public void testSrcIsEmptyDirWithCopySuccessful() throws Throwable { - describe("Source is an empty directory, copy should succeed"); + describe("Source is an empty directory, copy must succeed"); File source = createTempDirectory("source"); Path dest = copyFromLocal(source, false); @@ -175,12 +181,9 @@ public void testSrcIsEmptyDirWithCopySuccessful() throws Throwable { @Test public void testSrcIsDirWithOverwriteOptions() throws Throwable { describe("Source is a directory, destination exists and" + - "should be overwritten."); - // Disabling checksum because overwriting directories does not - // overwrite checksums - FileSystem fs = getFileSystem(); - fs.setVerifyChecksum(false); + "must be overwritten."); + FileSystem fs = getFileSystem(); File source = createTempDirectory("source"); Path sourcePath = new Path(source.toURI()); String contents = "test file"; @@ -194,18 +197,17 @@ public void testSrcIsDirWithOverwriteOptions() throws Throwable { String updated = "updated contents"; FileUtils.write(child, updated, ASCII); - fs.copyFromLocalFile(false, true, sourcePath, dest); + fs.copyFromLocalFile(sourcePath, dest); assertPathExists("Parent directory not copied", fileToPath(source)); assertFileTextEquals(fileToPath(child, source.getParentFile()), updated); - getFileSystem().setVerifyChecksum(true); } @Test public void testSrcIsDirWithDelSrcOptions() throws Throwable { - describe("Source is a directory, destination exists and" + - "should be overwritten."); + describe("Source is a directory containing a file and delSrc flag is set" + + ", this must delete the source after the copy."); File source = createTempDirectory("source"); String contents = "child file"; File child = createTempFile(source, "child", contents); @@ -235,11 +237,16 @@ public void testCopyTreeDirectoryWithoutDelete() throws Throwable { copyFromLocal(srcDir, false, false); File root = srcDir.getParentFile(); - assertPathExists("Parent directory", fileToPath(srcDir)); - assertPathExists("Child directory", fileToPath(childDir, root)); - assertPathExists("Second Child directory", fileToPath(secondChild, root)); - assertPathExists("Parent file", fileToPath(parentFile, root)); - assertPathExists("Child file", fileToPath(childFile, root)); + assertPathExists("Parent directory not found", + fileToPath(srcDir)); + assertPathExists("Child directory not found", + fileToPath(childDir, root)); + assertPathExists("Second child directory not found", + fileToPath(secondChild, root)); + assertPathExists("Parent file not found", + fileToPath(parentFile, root)); + assertPathExists("Child file not found", + fileToPath(childFile, root)); } @Test @@ -251,10 +258,24 @@ public void testCopyDirectoryWithDelete() throws Throwable { Path dst = path(srcDir.getFileName().toString()); getFileSystem().copyFromLocalFile(true, true, src, dst); - assertFalse("Source directory should've been deleted", + assertFalse("Source directory was not deleted", Files.exists(srcDir)); } + @Test + public void testSourceIsDirectoryAndDestinationIsFile() throws Throwable { + describe("Source is a directory and destination is a file must fail"); + + File file = createTempFile("local"); + File source = createTempDirectory("srcDir"); + Path destination = copyFromLocal(file, false); + Path sourcePath = new Path(source.toURI()); + + intercept(FileAlreadyExistsException.class, + () -> getFileSystem().copyFromLocalFile(false, true, + sourcePath, destination)); + } + protected Path fileToPath(File file) throws IOException { return path(file.getName()); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index e61ce24778298..08e07b1e410eb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3825,7 +3825,7 @@ public boolean delete(Path path, boolean recursive) throws IOException { } @Override - public void copyFileFromTo(File file, Path from, Path to) throws IOException { + public void copyLocalFileFromTo(File file, Path from, Path to) throws IOException { trackDurationAndSpan( OBJECT_PUT_REQUESTS, to, diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java index 42e86f2d24a51..ba916ebcfc188 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java @@ -18,20 +18,6 @@ package org.apache.hadoop.fs.s3a.impl; -import org.apache.commons.collections.comparators.ReverseComparator; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathExistsException; -import org.apache.hadoop.fs.PathIOException; -import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.fs.s3a.Retries; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; -import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - import java.io.Closeable; import java.io.File; import java.io.FileNotFoundException; @@ -48,17 +34,33 @@ import java.util.Stack; import java.util.concurrent.CompletableFuture; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.commons.collections.comparators.ReverseComparator; +import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathExistsException; +import org.apache.hadoop.fs.PathIOException; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.Retries; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService; +import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.MoreExecutors; + import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.submit; import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletion; import static org.apache.hadoop.fs.store.audit.AuditingFunctions.callableWithinAuditSpan; /** - *

Implementation of CopyFromLocalOperation

+ * Implementation of CopyFromLocalOperation. *

- * This operation copies a file or directory (recursively) from a local - * FS to an object store. Initially, this operation has been developed for - * S3 (s3a) interaction, however, there's minimal work needed for it to - * work with other stores. + * This operation copies a file or directory (recursively) from a local + * FS to an object store. Initially, this operation has been developed for + * S3 (s3a) interaction, however, there's minimal work needed for it to + * work with other stores. *

*

How the uploading of files works:

*
    @@ -74,7 +76,7 @@ public class CopyFromLocalOperation extends ExecutingStoreOperation { /** - * Largest N files to be uploaded first + * Largest N files to be uploaded first. */ private static final int LARGEST_N_FILES = 5; @@ -129,6 +131,9 @@ public CopyFromLocalOperation( this.overwrite = overwrite; this.source = source; this.destination = destination; + + // Capacity of 1 is a safe default for now since transfer manager can also + // spawn threads when uploading bigger files. this.executor = MoreExecutors.listeningDecorator( storeContext.createThrottledExecutor(1) ); @@ -137,10 +142,10 @@ public CopyFromLocalOperation( /** * Executes the {@link CopyFromLocalOperation}. * - * @throws IOException - if there are any failures with upload or deletion - * of files. Check {@link CopyFromLocalOperationCallbacks} for specifics. + * @throws IOException - if there are any failures with upload or deletion + * of files. Check {@link CopyFromLocalOperationCallbacks} for specifics. * @throws PathExistsException - if the path exists and no overwrite flag - * is set OR if the source is file and destination is a directory + * is set OR if the source is file and destination is a directory */ @Override @Retries.RetryTranslated @@ -154,6 +159,7 @@ public Void execute() if (getDestStatus().isPresent() && getDestStatus().get().isDirectory() && sourceFile.isDirectory()) { destination = new Path(destination, sourceFile.getName()); + LOG.debug("Destination updated to: {}", destination); updateDestStatus(destination); } @@ -276,7 +282,7 @@ private CompletableFuture submitCreateEmptyDir(Path dir) { /** * Async call to upload a file. * - * @param file - File to be uploaded + * @param file - File to be uploaded * @param uploadEntry - Upload entry holding the source and destination * @return the submitted future */ @@ -285,7 +291,7 @@ private CompletableFuture submitUpload( UploadEntry uploadEntry) { return submit(executor, callableWithinAuditSpan( getAuditSpan(), () -> { - callbacks.copyFileFromTo( + callbacks.copyLocalFileFromTo( file, uploadEntry.source, uploadEntry.destination); @@ -295,7 +301,7 @@ private CompletableFuture submitUpload( } /** - * Checks the source before upload starts + * Checks the source before upload starts. * * @param src - Source file * @throws FileNotFoundException - if the file isn't found @@ -311,24 +317,26 @@ private void checkSource(File src) * Check the destination path and make sure it's compatible with the source, * i.e. source and destination are both files / directories. * - * @param dest - Destination path - * @param src - Source file + * @param dest - Destination path + * @param src - Source file * @param overwrite - Should source overwrite destination * @throws PathExistsException - If the destination path exists and no - * overwrite flag is set or source is file and destination is path + * overwrite flag is set + * @throws FileAlreadyExistsException - If source is file and destination is path */ private void checkDestination( Path dest, File src, - boolean overwrite) throws PathExistsException { + boolean overwrite) throws PathExistsException, + FileAlreadyExistsException { if (!getDestStatus().isPresent()) { return; } if (src.isDirectory() && getDestStatus().get().isFile()) { - throw new PathExistsException( - "Source '" + src.getPath() +"' is file and " + - "destination '" + dest + "' is directory"); + throw new FileAlreadyExistsException( + "Source '" + src.getPath() + "' is directory and " + + "destination '" + dest + "' is file"); } if (!overwrite) { @@ -337,7 +345,8 @@ private void checkDestination( } /** - * Get the final path of a source file with regards to its destination + * Get the final path of a source file with regards to its destination. + * * @param src - source path * @return - the final path for the source file to be uploaded to * @throws PathIOException - if a relative path can't be created @@ -346,7 +355,8 @@ private Path getFinalPath(Path src) throws PathIOException { URI currentSrcUri = src.toUri(); URI relativeSrcUri = source.toUri().relativize(currentSrcUri); if (relativeSrcUri.equals(currentSrcUri)) { - throw new PathIOException("Cannot get relative path"); + throw new PathIOException("Cannot get relative path for URI:" + + relativeSrcUri); } Optional status = getDestStatus(); @@ -430,10 +440,10 @@ public LocatedFileStatus next() throws IOException { } /** - *

    Represents an entry for a file to be uploaded.

    + *

    Represents an entry for a file to be moved.

    *

    - * Helpful with sorting files by their size and keeping track of path - * information for the upload. + * Helpful with sorting files by their size and keeping track of path + * information for the upload. *

    */ private static final class UploadEntry { @@ -466,6 +476,7 @@ public int compare(UploadEntry entry1, UploadEntry entry2) { public interface CopyFromLocalOperationCallbacks { /** * List all entries (files AND directories) for a path. + * * @param path - path to list * @return an iterator for all entries * @throws IOException - for any failure @@ -475,6 +486,7 @@ RemoteIterator listStatusIterator(Path path) /** * Get the file status for a path + * * @param path - target path * @return FileStatus * @throws IOException - for any failure @@ -483,6 +495,7 @@ RemoteIterator listStatusIterator(Path path) /** * Get the file from a path + * * @param path - target path * @return file at path */ @@ -490,7 +503,8 @@ RemoteIterator listStatusIterator(Path path) /** * Delete file / directory at path - * @param path - target path + * + * @param path - target path * @param recursive - recursive deletion * @return boolean result of operation * @throws IOException for any failure @@ -499,18 +513,20 @@ RemoteIterator listStatusIterator(Path path) /** * Copy / Upload a file from a source path to a destination path - * @param file - target file - * @param source - source path + * + * @param file - target file + * @param source - source path * @param destination - destination path * @throws IOException for any failure */ - void copyFileFromTo( + void copyLocalFileFromTo( File file, Path source, Path destination) throws IOException; /** * Create empty directory at path. Most likely an upload operation + * * @param path - target path * @return boolean result of operation * @throws IOException for any failure diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java index 8fca3ab4f6abe..c8a6abf8df1bc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java @@ -19,7 +19,6 @@ package org.apache.hadoop.fs.s3a; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.PathExistsException; import org.apache.hadoop.fs.contract.AbstractContractCopyFromLocalTest; import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.contract.s3a.S3AContract; @@ -27,8 +26,6 @@ import org.apache.hadoop.fs.Path; import org.junit.Test; -import java.io.File; - import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** @@ -51,18 +48,4 @@ public void testLocalFilesOnly() throws Throwable { intercept(IllegalArgumentException.class, () -> getFileSystem().copyFromLocalFile(false, true, dst, dst)); } - - @Test - public void testSourceIsDirectoryAndDestinationIsFile() throws Throwable { - describe("Source is a directory and destination is a file should fail"); - - File file = createTempFile("local"); - File source = createTempDirectory("srcDir"); - Path destination = copyFromLocal(file, false); - Path sourcePath = new Path(source.toURI()); - - intercept(PathExistsException.class, - () -> getFileSystem().copyFromLocalFile(false, true, - sourcePath, destination)); - } } From 286b7f6b58bfc01383085e66a55ae83c301f80b0 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Wed, 21 Jul 2021 16:26:43 +0100 Subject: [PATCH 17/19] Addressing comments Alright, this should be the last major commit. - `filesystem.md` has been updated appropriately; - java docs updated; - crazy fun test cases added; --- .../site/markdown/filesystem/filesystem.md | 114 ++++++++++++------ .../hadoop/fs/TestLocalFSCopyFromLocal.java | 68 ++++++++++- .../AbstractContractCopyFromLocalTest.java | 8 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 16 ++- .../fs/s3a/impl/CopyFromLocalOperation.java | 56 +++++---- .../fs/s3a/ITestS3ACopyFromLocalFile.java | 17 ++- 6 files changed, 199 insertions(+), 80 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md index bd52c6add2aad..6a3a69f6f38ea 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md @@ -1427,57 +1427,91 @@ set to TRUE. If destination already exists, and the destination contents must be then `overwrite` flag must be set to TRUE. #### Preconditions +Source and destination must be different +```python +if src = dest : raise FileExistsException +``` -The source file or directory must exist: +Destination and source must not be descendants one another +```python +if isDescendant(src, dest) or isDescendant(dest, src) : TODO +``` - if not exists(FS, src) : raise FileNotFoundException +The source file or directory must exist locally: +```python +if not exists(LocalFS, src) : raise FileNotFoundException +``` Directories cannot be copied into files regardless to what the overwrite flag is set to: - if isDir(FS, src) and isFile(FS, dst) : raise PathExistsException +```python +if isDir(LocalFS, src) and isFile(FS, dst) : raise PathExistsException +``` For all cases, except the one for which the above precondition throws, the overwrite flag must be -set to TRUE for the operation to succeed. This will also overwrite any files / directories at the -destination: - - if exists(FS, dst) && not overwrite : raise PathExistsException - -#### Postconditions -Copying a file into an existing directory at destination with a non-existing file at destination - - if isFile(fs, src) and not exists(FS, dst) => success - -Copying a file into an existing directory at destination with an existing file at destination and -overwrite set to TRUE - - if isFile(FS, src) and overwrite and exists(FS, dst) => success - +set to TRUE for the operation to succeed if destination exists. This will also overwrite any files + / directories at the destination: -Copying a file into a non-existent directory. POSIX file systems would fail this operation, HDFS -allows this to happen creating all the directories in the destination path. - - if isFile(FS, src) and not exists(FS, parent(dst)) => success - -Copying directory into destination directory - last part of the destination path doesn't exist e.g. -`/src/bar/ -> /dst/foo/ => /dst/foo/` with the precondition that `/dst/` exists but `/dst/foo/` -doesn't: - - if isDir(FS, src) and not exists(FS, dst) => success - -Copying directory into destination directory - last part of the destination path exists e.g. -`/src/bar/ -> /dst/foo/ => /dst/foo/bar/` with the precondition that `/dst/foo/` exists but -`/dst/foo/bar/` doesn't: - - if isDir(FS, src) and exists(FS, dst) => success +```python +if exists(FS, dst) and not overwrite : raise PathExistsException +``` -Copying a directory into a destination directory - last part of destination path and source directory -name exist e.g. `/src/foo/ -> /dst/` with the precondition that `/dst/foo/` exists. This operation -will only succeed if the overwrite flag is set to TRUE +#### Determining the final name of the copy +Given a base path on the source `base` and a child path `child` where `base` is in +`ancestors(child) + child`: - if isDir(FS, src) and exists(FS, dst) and overwrite => success +```python +def final_name(base, child, dest): + is base = child: + return dest + else: + return dest + childElements(base, child) +``` -For all operations if the `delSrc` flag is set to TRUE then the source will be deleted. If source -is a directory then it will be recursively deleted. +#### Outcome where source is a file `isFile(LocalFS, src)` +For a file, data at destination becomes that of the source. All ancestors are directories. +```python +if isFile(LocalFS, src) and (not exists(FS, dest) or (exists(FS, dest) and overwrite)): + FS' = FS where: + FS'.Files[dest] = LocalFS.Files[src] + FS'.Directories = FS.Directories + ancestors(FS, dest) + LocalFS' = LocalFS where + not delSrc or (delSrc = true and delete(LocalFS, src, false)) +else if isFile(LocalFS, src) and isDir(FS, dest): + FS' = FS where: + let d = final_name(src, dest) + FS'.Files[d] = LocalFS.Files[src] + LocalFS' = LocalFS where: + not delSrc or (delSrc = true and delete(LocalFS, src, false)) +``` +There are no expectations that the file changes are atomic for both local `LocalFS` and remote `FS`. + +#### Outcome where source is a directory `isDir(LocalFS, src)` +```python +if is Dir(LocalFS, src) and (isFile(FS, dest) or isFile(FS, dest + childElements(src))): + raise FileAlreadyExistsException +else if isDir(LocalFS, src): + dest' = dest + if exists(FS, dest) + dest' = dest + childElements(src) + if exists(FS, dest') and not overwrite: + raise PathExistsException + + FS' = FS where: + forall c in descendants(LocalFS, src): + not exists(FS', final_name(c)) or overwrite + and forall c in descendants(LocalFS, src) where isDir(LocalFS, c): + FS'.Directories = FS'.Directories + (dest' + childElements(src, c)) + and forall c in descendants(LocalFS, src) where isFile(LocalFS, c): + FS'.Files[final_name(c, dest')] = LocalFS.Files[c] + LocalFS' = LocalFS where + not delSrc or (delSrc = true and delete(LocalFS, src, true)) +``` +There are no expectations of operation isolation / atomicity. This means files can change +in source or destination while the operation is executing. No guarantees are made for the +final state of the file or directory after a copy other than it is best effort. E.g.: when +copying a directory, one file can be moved from source to destination but there's nothing +stopping the new file at destination being updated while the copy operation is still in place. #### Implementation diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java index 242c0b7815a89..7acb39c6e17db 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/TestLocalFSCopyFromLocal.java @@ -18,13 +18,14 @@ package org.apache.hadoop.fs; +import java.io.File; + +import org.junit.Test; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.contract.AbstractContractCopyFromLocalTest; import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.contract.localfs.LocalFSContract; -import org.junit.Test; - -import java.io.File; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -33,4 +34,65 @@ public class TestLocalFSCopyFromLocal extends AbstractContractCopyFromLocalTest protected AbstractFSContract createContract(Configuration conf) { return new LocalFSContract(conf); } + + @Test + public void testDestinationFileIsToParentDirectory() throws Throwable { + describe("Source is a file and destination is its own parent directory"); + + File file = createTempFile("local"); + Path dest = new Path(file.getParentFile().toURI()); + Path src = new Path(file.toURI()); + + intercept(PathOperationException.class, + () -> getFileSystem().copyFromLocalFile( true, true, src, dest)); + } + + @Test + public void testDestinationDirectoryToSelf() throws Throwable { + describe("Source is a directory and it is copied into itself with " + + "delSrc flag set, destination must not exist"); + + File source = createTempDirectory("srcDir"); + Path dest = new Path(source.toURI()); + getFileSystem().copyFromLocalFile( true, true, dest, dest); + + assertPathDoesNotExist("Source found", dest); + } + + @Test + public void testSourceIntoDestinationSubDirectoryWithDelSrc() throws Throwable { + describe("Copying a parent folder inside a child folder with" + + " delSrc=TRUE"); + File parent = createTempDirectory("parent"); + File child = createTempDirectory(parent, "child"); + + Path src = new Path(parent.toURI()); + Path dest = new Path(child.toURI()); + getFileSystem().copyFromLocalFile(true, true, src, dest); + + assertPathDoesNotExist("Source found", src); + assertPathDoesNotExist("Destination found", dest); + } + + @Test + public void testSourceIntoDestinationSubDirectory() throws Throwable { + describe("Copying a parent folder inside a child folder with" + + " delSrc=FALSE"); + File parent = createTempDirectory("parent"); + File child = createTempDirectory(parent, "child"); + + Path src = new Path(parent.toURI()); + Path dest = new Path(child.toURI()); + getFileSystem().copyFromLocalFile(false, true, src, dest); + + Path recursiveParent = new Path(dest, parent.getName()); + Path recursiveChild = new Path(recursiveParent, child.getName()); + + // This definitely counts as interesting behaviour which needs documented + // Depending on the underlying system this can recurse 15+ times + recursiveParent = new Path(recursiveChild, parent.getName()); + recursiveChild = new Path(recursiveParent, child.getName()); + assertPathExists("Recursive parent not found", recursiveParent); + assertPathExists("Recursive child not found", recursiveChild); + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java index 6d8c70b236b19..e24eb7181ec9f 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/AbstractContractCopyFromLocalTest.java @@ -114,8 +114,8 @@ public void testSourceIsFileAndDelSrcTrue() throws Throwable { @Test public void testSourceIsFileAndDestinationIsDirectory() throws Throwable { - describe("Source is a file and destination is a directory. File" + - "must be copied inside the directory."); + describe("Source is a file and destination is a directory. " + + "File must be copied inside the directory."); file = createTempFile("test"); Path source = new Path(file.toURI()); @@ -180,7 +180,7 @@ public void testSrcIsEmptyDirWithCopySuccessful() throws Throwable { @Test public void testSrcIsDirWithOverwriteOptions() throws Throwable { - describe("Source is a directory, destination exists and" + + describe("Source is a directory, destination exists and " + "must be overwritten."); FileSystem fs = getFileSystem(); @@ -323,7 +323,7 @@ protected File createTempFile(File parent, String name, String text) return f; } - private File createTempDirectory(File parent, String name) + protected File createTempDirectory(File parent, String name) throws IOException { return Files.createTempDirectory(parent.toPath(), name).toFile(); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 08e07b1e410eb..439b8991fe761 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3809,18 +3809,18 @@ private CopyFromLocalCallbacksImpl(LocalFileSystem local) { } @Override - public RemoteIterator listStatusIterator( + public RemoteIterator listLocalStatusIterator( final Path path) throws IOException { return local.listLocatedStatus(path); } @Override - public File pathToFile(Path path) { + public File pathToLocalFile(Path path) { return local.pathToFile(path); } @Override - public boolean delete(Path path, boolean recursive) throws IOException { + public boolean deleteLocal(Path path, boolean recursive) throws IOException { return local.delete(path, recursive); } @@ -3849,8 +3849,14 @@ public FileStatus getFileStatus(Path f) throws IOException { } @Override - public boolean createEmptyDir(Path path) throws IOException { - return S3AFileSystem.this.mkdirs(path); + public boolean createEmptyDir(Path path, StoreContext storeContext) + throws IOException { + return trackDuration(getDurationTrackerFactory(), + INVOCATION_MKDIRS.getSymbol(), + new MkdirOperation( + storeContext, + path, + createMkdirOperationCallbacks())); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java index ba916ebcfc188..0a665cd33f280 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/CopyFromLocalOperation.java @@ -29,6 +29,7 @@ import java.util.Comparator; import java.util.HashSet; import java.util.List; +import java.util.NoSuchElementException; import java.util.Optional; import java.util.Set; import java.util.Stack; @@ -84,37 +85,37 @@ public class CopyFromLocalOperation extends ExecutingStoreOperation { CopyFromLocalOperation.class); /** - * Callbacks to be used by this operation for external / IO actions + * Callbacks to be used by this operation for external / IO actions. */ private final CopyFromLocalOperationCallbacks callbacks; /** - * Delete source after operation finishes + * Delete source after operation finishes. */ private final boolean deleteSource; /** - * Overwrite destination files / folders + * Overwrite destination files / folders. */ private final boolean overwrite; /** - * Source path to file / directory + * Source path to file / directory. */ private final Path source; /** - * Async operations executor + * Async operations executor. */ private final ListeningExecutorService executor; /** - * Destination path + * Destination path. */ private Path destination; /** - * Destination file status + * Destination file status. */ private FileStatus destStatus; @@ -152,7 +153,7 @@ public CopyFromLocalOperation( public Void execute() throws IOException, PathExistsException { LOG.debug("Copying local file from {} to {}", source, destination); - File sourceFile = callbacks.pathToFile(source); + File sourceFile = callbacks.pathToLocalFile(source); updateDestStatus(destination); // Handles bar/ -> foo/ => foo/bar and bar/ -> foo/bar/ => foo/bar/bar @@ -168,7 +169,7 @@ public Void execute() uploadSourceFromFS(); if (deleteSource) { - callbacks.delete(source, true); + callbacks.deleteLocal(source, true); } return null; @@ -177,8 +178,9 @@ public Void execute() /** * Does a {@link CopyFromLocalOperationCallbacks#getFileStatus(Path)} * operation on the provided destination and updates the internal status of - * destPath property + * destStatus field. * + * @param dest - destination Path * @throws IOException if getFileStatus fails */ private void updateDestStatus(Path dest) throws IOException { @@ -238,7 +240,7 @@ private void uploadSourceFromFS() throws IOException { for (int uploadNo = 0; uploadNo < sortedUploadsCount; uploadNo++) { UploadEntry uploadEntry = entries.get(uploadNo); - File file = callbacks.pathToFile(uploadEntry.source); + File file = callbacks.pathToLocalFile(uploadEntry.source); activeOps.add(submitUpload(file, uploadEntry)); markedForUpload.add(uploadEntry); } @@ -252,7 +254,7 @@ private void uploadSourceFromFS() throws IOException { entries.removeAll(markedForUpload); Collections.shuffle(entries); for (UploadEntry uploadEntry : entries) { - File file = callbacks.pathToFile(uploadEntry.source); + File file = callbacks.pathToLocalFile(uploadEntry.source); activeOps.add(submitUpload(file, uploadEntry)); } @@ -273,7 +275,7 @@ private void uploadSourceFromFS() throws IOException { private CompletableFuture submitCreateEmptyDir(Path dir) { return submit(executor, callableWithinAuditSpan( getAuditSpan(), () -> { - callbacks.createEmptyDir(dir); + callbacks.createEmptyDir(dir, getStoreContext()); return null; } )); @@ -391,7 +393,7 @@ private RemoteIterator listFilesAndDirs(Path path) private final Stack> iterators = new Stack<>(); private RemoteIterator current = - callbacks.listStatusIterator(path); + callbacks.listLocalStatusIterator(path); private LocatedFileStatus curFile; @Override @@ -422,7 +424,7 @@ private void handleFileStat(LocatedFileStatus stat) } else { // directory curFile = stat; iterators.push(current); - current = callbacks.listStatusIterator(stat.getPath()); + current = callbacks.listLocalStatusIterator(stat.getPath()); } } @@ -433,7 +435,7 @@ public LocatedFileStatus next() throws IOException { curFile = null; return result; } - throw new java.util.NoSuchElementException("No more entry in " + throw new NoSuchElementException("No more entry in " + path); } }; @@ -458,7 +460,7 @@ private UploadEntry(Path source, Path destination, long size) { } /** - * Compares {@link UploadEntry} objects and produces DESC ordering + * Compares {@link UploadEntry} objects and produces DESC ordering. */ static class SizeComparator implements Comparator, Serializable { @@ -481,11 +483,11 @@ public interface CopyFromLocalOperationCallbacks { * @return an iterator for all entries * @throws IOException - for any failure */ - RemoteIterator listStatusIterator(Path path) + RemoteIterator listLocalStatusIterator(Path path) throws IOException; /** - * Get the file status for a path + * Get the file status for a path. * * @param path - target path * @return FileStatus @@ -494,25 +496,25 @@ RemoteIterator listStatusIterator(Path path) FileStatus getFileStatus(Path path) throws IOException; /** - * Get the file from a path + * Get the file from a path. * * @param path - target path * @return file at path */ - File pathToFile(Path path); + File pathToLocalFile(Path path); /** - * Delete file / directory at path + * Delete file / directory at path. * * @param path - target path * @param recursive - recursive deletion * @return boolean result of operation * @throws IOException for any failure */ - boolean delete(Path path, boolean recursive) throws IOException; + boolean deleteLocal(Path path, boolean recursive) throws IOException; /** - * Copy / Upload a file from a source path to a destination path + * Copy / Upload a file from a source path to a destination path. * * @param file - target file * @param source - source path @@ -525,12 +527,14 @@ void copyLocalFileFromTo( Path destination) throws IOException; /** - * Create empty directory at path. Most likely an upload operation + * Create empty directory at path. Most likely an upload operation. * * @param path - target path + * @param storeContext - store context * @return boolean result of operation * @throws IOException for any failure */ - boolean createEmptyDir(Path path) throws IOException; + boolean createEmptyDir(Path path, StoreContext storeContext) + throws IOException; } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java index c8a6abf8df1bc..c34dc35a0f634 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java @@ -18,6 +18,8 @@ package org.apache.hadoop.fs.s3a; +import java.io.File; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.contract.AbstractContractCopyFromLocalTest; import org.apache.hadoop.fs.contract.AbstractFSContract; @@ -43,9 +45,20 @@ protected AbstractFSContract createContract(Configuration conf) { @Test public void testLocalFilesOnly() throws Throwable { - Path dst = fileToPath(createTempDirectory("someDir")); + describe("Copying into other file systems must fail"); + Path dest = fileToPath(createTempDirectory("someDir")); + + intercept(IllegalArgumentException.class, + () -> getFileSystem().copyFromLocalFile(false, true, dest, dest)); + } + + @Test + public void testOnlyFromLocal() throws Throwable { + describe("Copying must be from a local file system"); + File source = createTempFile("someFile"); + Path dest = copyFromLocal(source, true); intercept(IllegalArgumentException.class, - () -> getFileSystem().copyFromLocalFile(false, true, dst, dst)); + () -> getFileSystem().copyFromLocalFile(true, true, dest, dest)); } } From 35b9b08e463493bbb2cc87916dcfc754cbf44d32 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Thu, 22 Jul 2021 16:13:45 +0100 Subject: [PATCH 18/19] Heave yetus heave --- .../org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java index c34dc35a0f634..dfac771dd78dc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACopyFromLocalFile.java @@ -30,11 +30,6 @@ import static org.apache.hadoop.test.LambdaTestUtils.intercept; -/** - * Test {@link S3AFileSystem#copyFromLocalFile(boolean, boolean, Path, Path)}. - * Some of the tests have been disabled pending a fix for HADOOP-15932 and - * recursive directory copying; the test cases themselves may be obsolete. - */ public class ITestS3ACopyFromLocalFile extends AbstractContractCopyFromLocalTest { From 03c85c592bfdbe2f04e4912dc4efcadbf9ee70f1 Mon Sep 17 00:00:00 2001 From: Bogdan Stolojan Date: Thu, 29 Jul 2021 12:29:32 +0100 Subject: [PATCH 19/19] Updating spec --- .../site/markdown/filesystem/filesystem.md | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md index 6a3a69f6f38ea..2eb0bc07196d6 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/filesystem/filesystem.md @@ -1434,7 +1434,7 @@ if src = dest : raise FileExistsException Destination and source must not be descendants one another ```python -if isDescendant(src, dest) or isDescendant(dest, src) : TODO +if isDescendant(src, dest) or isDescendant(dest, src) : raise IOException ``` The source file or directory must exist locally: @@ -1488,14 +1488,15 @@ There are no expectations that the file changes are atomic for both local `Local #### Outcome where source is a directory `isDir(LocalFS, src)` ```python -if is Dir(LocalFS, src) and (isFile(FS, dest) or isFile(FS, dest + childElements(src))): +if isDir(LocalFS, src) and (isFile(FS, dest) or isFile(FS, dest + childElements(src))): raise FileAlreadyExistsException else if isDir(LocalFS, src): - dest' = dest - if exists(FS, dest) + if exists(FS, dest): dest' = dest + childElements(src) if exists(FS, dest') and not overwrite: raise PathExistsException + else: + dest' = dest FS' = FS where: forall c in descendants(LocalFS, src): @@ -1507,11 +1508,12 @@ else if isDir(LocalFS, src): LocalFS' = LocalFS where not delSrc or (delSrc = true and delete(LocalFS, src, true)) ``` -There are no expectations of operation isolation / atomicity. This means files can change -in source or destination while the operation is executing. No guarantees are made for the -final state of the file or directory after a copy other than it is best effort. E.g.: when -copying a directory, one file can be moved from source to destination but there's nothing -stopping the new file at destination being updated while the copy operation is still in place. +There are no expectations of operation isolation / atomicity. +This means files can change in source or destination while the operation is executing. +No guarantees are made for the final state of the file or directory after a copy other than it is +best effort. E.g.: when copying a directory, one file can be moved from source to destination but +there's nothing stopping the new file at destination being updated while the copy operation is still +in place. #### Implementation