Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor S3 path handling #9092

Merged
merged 24 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
private

from Standard.Base import all
from Standard.Base.System.File import file_as_java
import Standard.Base.Errors.File_Error.File_Error
Expand Down
104 changes: 104 additions & 0 deletions distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument

import project.Errors.S3_Error
import project.S3.S3

## PRIVATE
An abstraction for S3 paths, holding the bucket name and the key.

This helper implements resolving paths in a folder-like structure as
described at https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html
type S3_Path
## PRIVATE
Value (bucket : Text) (key : Text)

## PRIVATE
parse (uri : Text) -> S3_Path ! Illegal_Argument =
if uri.starts_with S3.uri_prefix . not then Error.throw (Illegal_Argument.Error "An S3 path must start with `"+S3.uri_prefix+"`.") else
without_prefix = uri.drop S3.uri_prefix.length
first_slash_index = without_prefix.index_of S3_Path.delimiter
if first_slash_index == 0 then Error.throw (Illegal_Argument.Error "Invalid S3 path: empty bucket name.") else
if first_slash_index.is_nothing then S3_Path.Value without_prefix "" else
S3_Path.Value (without_prefix.take first_slash_index) (without_prefix.drop first_slash_index+1)

## PRIVATE
to_text self -> Text = S3.uri_prefix + self.bucket + S3_Path.delimiter + self.key

## PRIVATE
to_display_text self -> Text = self.to_text.to_display_text

## PRIVATE
Checks if this path represents the root directory of a bucket.
is_root self -> Boolean = self.key.is_empty

## PRIVATE
Checks if this path represents a directory.
is_directory self -> Boolean = self.is_root || (self.key.ends_with S3_Path.delimiter)

## PRIVATE
Resolves a subdirectory entry.
This only makes logical sense for paths for which `path.is_directory == True`,
but it is not forbidden to call it on other types of paths.

Note that the newly returned path will be a directory or not depending on
if `subpath` ends with the delimiter.
resolve self (subpath : Text) -> S3_Path =
trimmed = if subpath.starts_with S3_Path.delimiter then subpath.drop (First 1) else subpath
parts = trimmed.split S3_Path.delimiter

extend current part =
if current == "" then part else
if current.ends_with S3_Path.delimiter then current + part else
current + S3_Path.delimiter + part

loop current remaining = if remaining.length == 0 then current else
new_current = case remaining.first of
".." ->
last_index = current.last_index_of S3_Path.delimiter
if last_index == Nothing then Error.throw (S3_Error.Error "Cannot move above root folder.") else current.take last_index
"." -> current
x -> extend current x
@Tail_Call loop new_current (remaining.drop 1)

initial = if subpath.starts_with S3_Path.delimiter then "" else self.key
new_path = loop initial parts
S3_Path.Value self.bucket new_path

## PRIVATE
Returns the parent directory.
It will return `Nothing` for the root directory.
parent self -> S3_Path | Nothing =
if self.key == "" then Nothing else
last_index = case self.is_directory of
# For directories we drop the trailing slash and find the one before it:
True -> (self.key.drop (Last 1)).last_index_of S3_Path.delimiter
False -> self.key.last_index_of S3_Path.delimiter
## We include the trailing slash in the path, as the parent is
always a directory and in S3 directories are distinguished only
by the presence of this slash.
new_key = if last_index == Nothing then "" else self.key.take last_index+1
S3_Path.Value self.bucket new_key

## PRIVATE
Returns the 'file name' of this path.
If the path is a regular file, it will be its name.
If the path is a directory, it will be the name of that directory,
without the trailing delimiter.
file_name self -> Text =
if self.is_root then S3_Path.delimiter else
trimmed = if self.key.ends_with S3_Path.delimiter then self.key.drop (Last 1) else self.key
last_index = trimmed.last_index_of S3_Path.delimiter
if last_index == Nothing then trimmed else trimmed.drop (First last_index+1)

## PRIVATE
Checks if the given other path is inside of this path.
is_descendant_of self (other : S3_Path) -> Boolean =
other.is_directory && self.to_text.starts_with other.to_text

## PRIVATE
The delimiter used for path emulation.

S3 does not treat `/` in any special way, it allows using any character as a
path delimiter. In the future we could allow customizing it.
delimiter = "/"
15 changes: 2 additions & 13 deletions distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3.enso
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ raw_head bucket key credentials =
- credentials: AWS credentials. If not provided, the default credentials will
be used.
get_object : Text -> Text -> AWS_Credential | Nothing -> Response_Body ! S3_Error
get_object bucket key credentials:(AWS_Credential | Nothing)=Nothing = handle_s3_errors bucket=bucket key=key <|
get_object bucket key credentials:(AWS_Credential | Nothing)=Nothing delimiter="/" = handle_s3_errors bucket=bucket key=key <|
request = GetObjectRequest.builder.bucket bucket . key key . build

client = make_client credentials
Expand All @@ -133,7 +133,7 @@ get_object bucket key credentials:(AWS_Credential | Nothing)=Nothing = handle_s3
s3_uri = URI.parse (uri_prefix + bucket + "/") / key
content_type = inner_response.contentType
name = filename_from_content_disposition inner_response.contentDisposition . if_nothing <|
key.split "/" . last
key.split delimiter . last
metadata = File_Format_Metadata.Value path=key name=name content_type=content_type

input_stream = Input_Stream.new response (handle_io_errors s3_uri)
Expand Down Expand Up @@ -169,17 +169,6 @@ copy_object (source_bucket : Text) (source_key : Text) (target_bucket : Text) (t
. build
client.copyObject request . if_not_error Nothing

## PRIVATE
Splits a S3 URI into bucket and key.
parse_uri : Text -> Pair Text Text | Nothing
parse_uri uri =
if uri.starts_with uri_prefix . not then Nothing else
no_prefix = uri.drop uri_prefix.length
index_of = no_prefix.index_of "/"
if index_of == 0 then Nothing else
if index_of.is_nothing then Pair.new no_prefix "" else
Pair.new (no_prefix.take index_of) (no_prefix.drop index_of+1)

## PRIVATE
handle_s3_errors : Any -> Text -> Text -> Any ! S3_Error | AWS_SDK_Error
handle_s3_errors ~action bucket="" key="" =
Expand Down
Loading
Loading