Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add open() API with FileStatus #1244

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions gcs/CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Release Notes

## Next
1. PR #1244 - Add open() API which takes FileStatus as parameter

## 3.0.2 - 2024-08-06
1. PR #1230 - Avoid registering subscriber class multiple times
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,33 @@ public FSDataInputStream open(Path hadoopPath, int bufferSize) throws IOExceptio
});
}

public FSDataInputStream open(FileStatus status) throws IOException {
singhravidutt marked this conversation as resolved.
Show resolved Hide resolved
singhravidutt marked this conversation as resolved.
Show resolved Hide resolved
singhravidutt marked this conversation as resolved.
Show resolved Hide resolved
logger.atFine().log("openWithStatus(%s)", status);

if (!GoogleHadoopFileStatus.class.isAssignableFrom(status.getClass())) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isInstanceOf would have sufficed, no?

throw new IllegalArgumentException(
String.format(
"Expected status to be of type GoogleHadoopFileStatus, but found %s",
status.getClass()));
}

GoogleHadoopFileStatus fileStatus = (GoogleHadoopFileStatus) status;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't it be casting issue?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Caller will have to handle the exception.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better to validate and throw IllegalArgument instead of throwing type cast exception from API?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


checkPath(status.getPath());

return trackDurationWithTracing(
instrumentation,
globalStorageStatistics,
GhfsStatistic.INVOCATION_OPEN,
status.getPath(),
this.traceFactory,
() -> {
checkOpen();
return new FSDataInputStream(
GoogleHadoopFSInputStream.create(this, fileStatus.getFileInfo(), statistics));
singhravidutt marked this conversation as resolved.
Show resolved Hide resolved
});
}

@Override
public FSDataOutputStream create(
Path hadoopPath,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,20 @@
import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertThrows;

import com.google.cloud.hadoop.gcsio.FileInfo;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystem;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemImpl;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageFileSystemOptions;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageItemInfo;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadOptions;
import com.google.cloud.hadoop.gcsio.MethodOutcome;
import com.google.cloud.hadoop.gcsio.StorageResourceId;
import com.google.cloud.hadoop.gcsio.testing.InMemoryGoogleCloudStorage;
import com.google.cloud.hadoop.util.AccessTokenProvider;
import com.google.cloud.hadoop.util.HadoopCredentialsConfiguration.AuthenticationType;
import com.google.cloud.hadoop.util.testing.TestingAccessTokenProvider;
import com.google.common.collect.ImmutableMap;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import java.io.FileNotFoundException;
import java.io.IOException;
Expand All @@ -49,6 +52,7 @@
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
Expand Down Expand Up @@ -269,6 +273,55 @@ public void testTotalTimeStatistics() throws IOException {
assertThat(stats.getLong(STREAM_WRITE_OPERATIONS.getSymbol() + "_duration")).isEqualTo(200);
}

@Test
public void testFileOpenWithStatus() throws Exception {
URI bucketName = new URI("gs://read-test-bucket/");
URI failureBucketName = new URI("gs://read-test-bucket-other/");

FileInfo fileInfo =
FileInfo.fromItemInfo(
GoogleCloudStorageItemInfo.createObject(
new StorageResourceId(bucketName.getAuthority(), "bar/test/object"),
/* creationTime= */ 10L,
/* modificationTime= */ 15L,
/* size= */ 200L,
"text/plain",
/* contentEncoding= */ "lzma",
/* metadata= */ ImmutableMap.of("foo-meta", new byte[] {5, 66, 56}),
/* contentGeneration= */ 312432L,
/* metaGeneration= */ 2L,
/* verificationAttributes= */ null));

GoogleHadoopFileStatus fileStatus =
new GoogleHadoopFileStatus(
fileInfo, new Path(fileInfo.getPath()), 1, 2, FsPermission.getFileDefault(), "foo");
try (GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem()) {
fs.initialize(bucketName, new Configuration());
fs.open(fileStatus);

fs.initialize(failureBucketName, new Configuration());

IllegalArgumentException exception =
assertThrows(IllegalArgumentException.class, () -> fs.open(fileStatus));
assertThat(exception.getMessage())
.isEqualTo(
"Wrong bucket: read-test-bucket, in path: gs://read-test-bucket/bar/test/object, expected bucket: read-test-bucket-other");
}
}

@Test
public void testFileOpenWithStatusInvalidType() throws Exception {
try (GoogleHadoopFileSystem fs = new GoogleHadoopFileSystem()) {
fs.initialize(new URI("gs://read-test-bucket/"), new Configuration());

IllegalArgumentException exception =
assertThrows(IllegalArgumentException.class, () -> fs.open(new FileStatus()));
assertThat(exception.getMessage())
.isEqualTo(
"Expected status to be of type GoogleHadoopFileStatus, but found class org.apache.hadoop.fs.FileStatus");
}
}

// -----------------------------------------------------------------
// Inherited tests that we suppress because their behavior differs
// from the base class.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
import java.io.EOFException;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.Method;
import java.net.URI;
import java.nio.ByteBuffer;
import java.time.Instant;
Expand All @@ -49,6 +50,7 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
Expand Down Expand Up @@ -1065,6 +1067,37 @@ public void testInvalidRangeRequest() throws Exception {
}
}

@Test
public void testGetOpenWithStatus() throws Exception {
String content = UUID.randomUUID().toString();
Path hadoopPath = ghfsHelper.castAsHadoopPath(getTempFilePath());

ghfsHelper.writeFile(hadoopPath, content, 1, /* overwrite= */ false);

FileStatus fileStatus = ghfs.getFileStatus(hadoopPath);

Method openWithStatus = ghfs.getClass().getMethod("open", FileStatus.class);
try (FSDataInputStream readStream =
(FSDataInputStream) openWithStatus.invoke(ghfs, fileStatus)) {
String readContent = readContent(readStream);

assertThat(readContent).isEqualTo(content);
}
}

private static String readContent(FSDataInputStream readStream) throws IOException {
byte[] readBuffer = new byte[1024];
StringBuilder returnBuffer = new StringBuilder();

int numBytesRead = readStream.read(readBuffer);
while (numBytesRead > 0) {
returnBuffer.append(new String(readBuffer, 0, numBytesRead, UTF_8));
numBytesRead = readStream.read(readBuffer);
}

return returnBuffer.toString();
}

private void validateVectoredReadResult(List<FileRange> fileRanges, Path hadoopPath)
throws Exception {
for (FileRange range : fileRanges) {
Expand Down