Skip to content

Commit

Permalink
Add support for WASB to native Axure file system implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
dain committed Sep 23, 2024
1 parent addda4a commit f553882
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 14 deletions.
6 changes: 6 additions & 0 deletions lib/trino-filesystem-azure/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,12 @@
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>slice</artifactId>
<scope>runtime</scope>
</dependency>

<dependency>
<groupId>io.airlift</groupId>
<artifactId>junit-extensions</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@

class AzureLocation
{
private static final String INVALID_LOCATION_MESSAGE = "Invalid Azure location. Expected form is 'abfs://[<containerName>@]<accountName>.dfs.<endpoint>/<filePath>': %s";
private static final String INVALID_ABFS_LOCATION_MESSAGE = "Invalid Azure ABFS location. Expected form is 'abfs://[<containerName>@]<accountName>.dfs.<endpoint>/<filePath>': %s";
private static final String INVALID_WASB_LOCATION_MESSAGE = "Invalid Azure WASB location. Expected form is 'wasb://[<containerName>@]<accountName>.blob.<endpoint>/<filePath>': %s";

// https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-name-rules
private static final CharMatcher CONTAINER_VALID_CHARACTERS = CharMatcher.inRange('a', 'z').or(CharMatcher.inRange('0', '9')).or(CharMatcher.is('-'));
Expand All @@ -44,13 +45,23 @@ class AzureLocation
public AzureLocation(Location location)
{
this.location = requireNonNull(location, "location is null");
// abfss is also supported but not documented
scheme = location.scheme().orElseThrow(() -> new IllegalArgumentException(String.format(INVALID_LOCATION_MESSAGE, location)));
checkArgument("abfs".equals(scheme) || "abfss".equals(scheme), INVALID_LOCATION_MESSAGE, location);
// abfss and wasb are also supported but not documented
scheme = location.scheme().orElseThrow(() -> new IllegalArgumentException(String.format(INVALID_ABFS_LOCATION_MESSAGE, location)));
String invalidLocationMessage;
if ("abfs".equals(scheme) || "abfss".equals(scheme)) {
invalidLocationMessage = INVALID_ABFS_LOCATION_MESSAGE;
}
else if ("wasb".equals(scheme)) {
invalidLocationMessage = INVALID_WASB_LOCATION_MESSAGE;
}
else {
// only mention abfs in error message as the other forms are deprecated
throw new IllegalArgumentException(String.format(INVALID_ABFS_LOCATION_MESSAGE, location));
}

// container is interpolated into the URL path, so perform extra checks
location.userInfo().ifPresent(container -> {
checkArgument(!container.isEmpty(), INVALID_LOCATION_MESSAGE, location);
checkArgument(!container.isEmpty(), invalidLocationMessage, location);
checkArgument(
CONTAINER_VALID_CHARACTERS.matchesAllOf(container),
"Invalid Azure storage container name. Valid characters are 'a-z', '0-9', and '-': %s",
Expand All @@ -66,21 +77,27 @@ public AzureLocation(Location location)
});

// storage account is the first label of the host
checkArgument(location.host().isPresent(), INVALID_LOCATION_MESSAGE, location);
checkArgument(location.host().isPresent(), invalidLocationMessage, location);
String host = location.host().get();
int accountSplit = host.indexOf('.');
checkArgument(
accountSplit > 0,
INVALID_LOCATION_MESSAGE,
invalidLocationMessage,
this.location);
this.account = host.substring(0, accountSplit);

// host must contain ".dfs." before endpoint
checkArgument(host.substring(accountSplit).startsWith(".dfs."), INVALID_LOCATION_MESSAGE, location);

// endpoint is the part after ".dfs."
this.endpoint = host.substring(accountSplit + ".dfs.".length());
checkArgument(!endpoint.isEmpty(), INVALID_LOCATION_MESSAGE, location);
// abfs[s] host must contain ".dfs.", and wasb host must contain ".blob." before endpoint
if (scheme.equals("abfs") || scheme.equals("abfss")) {
checkArgument(host.substring(accountSplit).startsWith(".dfs."), invalidLocationMessage, location);
// endpoint does not include dfs
this.endpoint = host.substring(accountSplit + ".dfs.".length());
}
else {
checkArgument(host.substring(accountSplit).startsWith(".blob."), invalidLocationMessage, location);
// endpoint does not include blob
this.endpoint = host.substring(accountSplit + ".blob.".length());
}
checkArgument(!endpoint.isEmpty(), invalidLocationMessage, location);

// storage account is interpolated into URL host name, so perform extra checks
checkArgument(STORAGE_ACCOUNT_VALID_CHARACTERS.matchesAllOf(account),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
import io.trino.filesystem.AbstractTestTrinoFileSystem;
import io.trino.filesystem.Location;
import io.trino.filesystem.TrinoFileSystem;
import io.trino.filesystem.TrinoInput;
import io.trino.filesystem.TrinoInputFile;
import io.trino.spi.security.ConnectorIdentity;
import org.junit.jupiter.api.AfterAll;
import org.junit.jupiter.api.AfterEach;
Expand Down Expand Up @@ -211,4 +213,28 @@ public void testPaths()
// Azure file paths are always hierarchical
testPathHierarchical();
}

@Test
void testWasb()
throws IOException
{
try (TempBlob tempBlob = new TempBlob(Location.of("wasb://%s@%s.blob.core.windows.net/wasb-test/%s".formatted(containerName, account, randomUUID())))) {
assertThat(tempBlob.exists()).isFalse();

TrinoInputFile inputFile = getFileSystem().newInputFile(tempBlob.location());
assertThat(inputFile.location()).isEqualTo(tempBlob.location());
assertThat(inputFile.exists()).isFalse();

// create file with data
tempBlob.createOrOverwrite("123456");
assertThat(inputFile.length()).isEqualTo(6);
try (TrinoInput input = inputFile.newInput()) {
assertThat(input.readFully(0, 6).toStringUtf8()).isEqualTo("123456");
}

// delete the file
tempBlob.close();
assertThat(inputFile.exists()).isFalse();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ void test()
{
assertValid("abfs://[email protected]/some/path/file", "account", "container", "some/path/file", "abfs", "core.windows.net");
assertValid("abfss://[email protected]/some/path/file", "account", "container", "some/path/file", "abfss", "core.windows.net");
assertValid("wasb://[email protected]/some/path/file", "account", "container", "some/path/file", "wasb", "core.windows.net");

assertValid("abfs://[email protected]/some/path/file", "account", "container-stuff", "some/path/file", "abfs", "core.windows.net");
assertValid("abfs://[email protected]/some/path/file", "account", "container2", "some/path/file", "abfs", "core.windows.net");
Expand All @@ -39,8 +40,17 @@ void test()
// other endpoints are allowed
assertValid("abfs://[email protected]/some/path/file", "account", "container", "some/path/file", "abfs", "core.usgovcloudapi.net");
assertValid("abfss://[email protected]/some/path/file", "account", "container", "some/path/file", "abfss", "core.usgovcloudapi.net");
assertValid("wasb://[email protected]/some/path/file", "account", "container", "some/path/file", "wasb", "core.usgovcloudapi.net");

// only abfs and abfss schemes allowed
// abfs[s] host must contain ".dfs.", and wasb host must contain ".blob." before endpoint
assertInvalid("abfs://[email protected]/some/path/file");
assertInvalid("abfss://[email protected]/some/path/file");
assertInvalid("wasb://[email protected]/some/path/file");
assertInvalid("abfs://[email protected]/some/path/file");
assertInvalid("abfss://[email protected]/some/path/file");
assertInvalid("wasb://[email protected]/some/path/file");

// only abfs, abfss, and wasb schemes allowed
assertInvalid("https://[email protected]/some/path/file");

// host must have at least to labels
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ protected void setup(Binder binder)
install(new AzureFileSystemModule());
factories.addBinding("abfs").to(AzureFileSystemFactory.class);
factories.addBinding("abfss").to(AzureFileSystemFactory.class);
factories.addBinding("wasb").to(AzureFileSystemFactory.class);
}

if (config.isNativeS3Enabled()) {
Expand Down

0 comments on commit f553882

Please sign in to comment.