From 82ea338beb6bab184c6e1fc966e34b276ee3e900 Mon Sep 17 00:00:00 2001 From: Arunkumar Chacko Date: Thu, 5 Sep 2024 11:07:18 +0530 Subject: [PATCH 1/3] Add open() API with FileStatus --- .../hadoop/fs/gcs/GoogleHadoopFileSystem.java | 18 ++++++++++ .../fs/gcs/HadoopFileSystemTestBase.java | 33 +++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java b/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java index e45534d8b2..4a89b80c1f 100644 --- a/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java +++ b/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java @@ -589,6 +589,24 @@ public FSDataInputStream open(Path hadoopPath, int bufferSize) throws IOExceptio }); } + public FSDataInputStream open(FileStatus status) throws IOException { + logger.atFine().log("openWithStatus(%s)", status); + + GoogleHadoopFileStatus fileStatus = (GoogleHadoopFileStatus) status; + + return trackDurationWithTracing( + instrumentation, + globalStorageStatistics, + GhfsStatistic.INVOCATION_OPEN, + status.getPath(), + this.traceFactory, + () -> { + checkOpen(); + return new FSDataInputStream( + GoogleHadoopFSInputStream.create(this, fileStatus.getFileInfo(), statistics)); + }); + } + @Override public FSDataOutputStream create( Path hadoopPath, diff --git a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/HadoopFileSystemTestBase.java b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/HadoopFileSystemTestBase.java index 7af465a4e9..7b25095a42 100644 --- a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/HadoopFileSystemTestBase.java +++ b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/HadoopFileSystemTestBase.java @@ -40,6 +40,7 @@ import java.io.EOFException; import java.io.FileNotFoundException; import java.io.IOException; +import java.lang.reflect.Method; import java.net.URI; import java.nio.ByteBuffer; import java.time.Instant; @@ -49,6 +50,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.UUID; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -1065,6 +1067,37 @@ public void testInvalidRangeRequest() throws Exception { } } + @Test + public void testGetOpenWithStatus() throws Exception { + String content = UUID.randomUUID().toString(); + Path hadoopPath = ghfsHelper.castAsHadoopPath(getTempFilePath()); + + ghfsHelper.writeFile(hadoopPath, content, 1, /* overwrite= */ false); + + FileStatus fileStatus = ghfs.getFileStatus(hadoopPath); + + Method openWithStatus = ghfs.getClass().getMethod("open", FileStatus.class); + try (FSDataInputStream readStream = + (FSDataInputStream) openWithStatus.invoke(ghfs, fileStatus)) { + String readContent = readContent(readStream); + + assertThat(readContent).isEqualTo(content); + } + } + + private static String readContent(FSDataInputStream readStream) throws IOException { + byte[] readBuffer = new byte[1024]; + StringBuilder returnBuffer = new StringBuilder(); + + int numBytesRead = readStream.read(readBuffer); + while (numBytesRead > 0) { + returnBuffer.append(new String(readBuffer, 0, numBytesRead, UTF_8)); + numBytesRead = readStream.read(readBuffer); + } + + return returnBuffer.toString(); + } + private void validateVectoredReadResult(List fileRanges, Path hadoopPath) throws Exception { for (FileRange range : fileRanges) { From 16ff443b1951fc4b39dd9e24b325d69b8afe8860 Mon Sep 17 00:00:00 2001 From: Arunkumar Chacko Date: Mon, 9 Sep 2024 10:41:15 +0530 Subject: [PATCH 2/3] Check path belong to the same bucket --- gcs/CHANGES.md | 1 + .../hadoop/fs/gcs/GoogleHadoopFileSystem.java | 2 + .../fs/gcs/GoogleHadoopFileSystemTest.java | 42 +++++++++++++++++++ 3 files changed, 45 insertions(+) diff --git a/gcs/CHANGES.md b/gcs/CHANGES.md index 6bd4783954..f350a9b362 100644 --- a/gcs/CHANGES.md +++ b/gcs/CHANGES.md @@ -1,6 +1,7 @@ # Release Notes ## Next +1. PR #1244 - Add open() API which takes FileStatus as parameter ## 3.0.2 - 2024-08-06 1. PR #1230 - Avoid registering subscriber class multiple times diff --git a/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java b/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java index 4a89b80c1f..36466abfb7 100644 --- a/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java +++ b/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java @@ -594,6 +594,8 @@ public FSDataInputStream open(FileStatus status) throws IOException { GoogleHadoopFileStatus fileStatus = (GoogleHadoopFileStatus) status; + checkPath(status.getPath()); + return trackDurationWithTracing( instrumentation, globalStorageStatistics, diff --git a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTest.java b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTest.java index c62f3f423e..14e23bd39b 100644 --- a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTest.java +++ b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTest.java @@ -24,6 +24,7 @@ import static com.google.common.truth.Truth.assertThat; import static org.junit.Assert.assertThrows; +import com.google.cloud.hadoop.gcsio.FileInfo; import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem; import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemImpl; import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemOptions; @@ -31,10 +32,12 @@ import com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions; import com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadOptions; import com.google.cloud.hadoop.gcsio.MethodOutcome; +import com.google.cloud.hadoop.gcsio.StorageResourceId; import com.google.cloud.hadoop.gcsio.testing.InMemoryGoogleCloudStorage; import com.google.cloud.hadoop.util.AccessTokenProvider; import com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.AuthenticationType; import com.google.cloud.hadoop.util.testing.TestingAccessTokenProvider; +import com.google.common.collect.ImmutableMap; import com.google.errorprone.annotations.CanIgnoreReturnValue; import java.io.FileNotFoundException; import java.io.IOException; @@ -49,6 +52,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; @@ -269,6 +273,44 @@ public void testTotalTimeStatistics() throws IOException { assertThat(stats.getLong(STREAM_WRITE_OPERATIONS.getSymbol() + "_duration")).isEqualTo(200); } + @Test + @SuppressWarnings("CheckReturnValue") + public void testStatus() throws Exception { + URI bucketName = new URI("gs://read-test-bucket/"); + URI failureBucketName = new URI("gs://read-test-bucket-other/"); + + FileInfo fileInfo = + FileInfo.fromItemInfo( + GoogleCloudStorageItemInfo.createObject( + new StorageResourceId(bucketName.getAuthority(), "bar/test/object"), + /* creationTime= */ 10L, + /* modificationTime= */ 15L, + /* size= */ 200L, + "text/plain", + /* contentEncoding= */ "lzma", + /* metadata= */ ImmutableMap.of("foo-meta", new byte[] {5, 66, 56}), + /* contentGeneration= */ 312432L, + /* metaGeneration= */ 2L, + /* verificationAttributes= */ null)); + + GoogleHadoopFileStatus fileStatus = + new GoogleHadoopFileStatus( + fileInfo, new Path(fileInfo.getPath()), 1, 2, FsPermission.getFileDefault(), "foo"); + new GoogleHadoopFileSystem(); + GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem(); + fs.initialize(bucketName, new Configuration()); + fs.open(fileStatus); + + fs.initialize(failureBucketName, new Configuration()); + + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, () -> fs.open(fileStatus)); + assertThat(exception.getMessage()) + .isEqualTo( + "Wrong bucket: read-test-bucket, in path: gs://read-test-bucket/bar/test/object, expected bucket: read-test-bucket-other"); + fs.close(); + } + // ----------------------------------------------------------------- // Inherited tests that we suppress because their behavior differs // from the base class. From 2349976c9dda4c5172ab846d15337303e0fd2efa Mon Sep 17 00:00:00 2001 From: Arunkumar Chacko Date: Mon, 9 Sep 2024 12:08:02 +0530 Subject: [PATCH 3/3] Take CR comments --- .../hadoop/fs/gcs/GoogleHadoopFileSystem.java | 7 ++++ .../fs/gcs/GoogleHadoopFileSystemTest.java | 41 ++++++++++++------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java b/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java index 36466abfb7..a5f3db8042 100644 --- a/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java +++ b/gcs/src/main/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystem.java @@ -592,6 +592,13 @@ public FSDataInputStream open(Path hadoopPath, int bufferSize) throws IOExceptio public FSDataInputStream open(FileStatus status) throws IOException { logger.atFine().log("openWithStatus(%s)", status); + if (!GoogleHadoopFileStatus.class.isAssignableFrom(status.getClass())) { + throw new IllegalArgumentException( + String.format( + "Expected status to be of type GoogleHadoopFileStatus, but found %s", + status.getClass())); + } + GoogleHadoopFileStatus fileStatus = (GoogleHadoopFileStatus) status; checkPath(status.getPath()); diff --git a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTest.java b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTest.java index 14e23bd39b..6085fd9247 100644 --- a/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTest.java +++ b/gcs/src/test/java/com/google/cloud/hadoop/fs/gcs/GoogleHadoopFileSystemTest.java @@ -274,8 +274,7 @@ public void testTotalTimeStatistics() throws IOException { } @Test - @SuppressWarnings("CheckReturnValue") - public void testStatus() throws Exception { + public void testFileOpenWithStatus() throws Exception { URI bucketName = new URI("gs://read-test-bucket/"); URI failureBucketName = new URI("gs://read-test-bucket-other/"); @@ -296,19 +295,31 @@ public void testStatus() throws Exception { GoogleHadoopFileStatus fileStatus = new GoogleHadoopFileStatus( fileInfo, new Path(fileInfo.getPath()), 1, 2, FsPermission.getFileDefault(), "foo"); - new GoogleHadoopFileSystem(); - GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem(); - fs.initialize(bucketName, new Configuration()); - fs.open(fileStatus); - - fs.initialize(failureBucketName, new Configuration()); - - IllegalArgumentException exception = - assertThrows(IllegalArgumentException.class, () -> fs.open(fileStatus)); - assertThat(exception.getMessage()) - .isEqualTo( - "Wrong bucket: read-test-bucket, in path: gs://read-test-bucket/bar/test/object, expected bucket: read-test-bucket-other"); - fs.close(); + try (GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem()) { + fs.initialize(bucketName, new Configuration()); + fs.open(fileStatus); + + fs.initialize(failureBucketName, new Configuration()); + + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, () -> fs.open(fileStatus)); + assertThat(exception.getMessage()) + .isEqualTo( + "Wrong bucket: read-test-bucket, in path: gs://read-test-bucket/bar/test/object, expected bucket: read-test-bucket-other"); + } + } + + @Test + public void testFileOpenWithStatusInvalidType() throws Exception { + try (GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem()) { + fs.initialize(new URI("gs://read-test-bucket/"), new Configuration()); + + IllegalArgumentException exception = + assertThrows(IllegalArgumentException.class, () -> fs.open(new FileStatus())); + assertThat(exception.getMessage()) + .isEqualTo( + "Expected status to be of type GoogleHadoopFileStatus, but found class org.apache.hadoop.fs.FileStatus"); + } } // -----------------------------------------------------------------