From e16e3350f4eacc187370d38f164d5898dc224e8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Sat, 17 Feb 2024 23:52:36 +0100 Subject: [PATCH 01/20] encapsulate S3 path handling to separate module --- .../0.0.0-dev/src/Internal/Request_Body.enso | 1 + .../AWS/0.0.0-dev/src/Internal/S3_Path.enso | 65 +++++++++++++ .../lib/Standard/AWS/0.0.0-dev/src/S3/S3.enso | 11 --- .../AWS/0.0.0-dev/src/S3/S3_File.enso | 96 +++++++------------ test/AWS_Tests/src/S3_Spec.enso | 38 +++++--- 5 files changed, 123 insertions(+), 88 deletions(-) create mode 100644 distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/Request_Body.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/Request_Body.enso index 80da7e0f3384..c22d998f8ed1 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/Request_Body.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/Request_Body.enso @@ -1,4 +1,5 @@ private + from Standard.Base import all from Standard.Base.System.File import file_as_java import Standard.Base.Errors.File_Error.File_Error diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso new file mode 100644 index 000000000000..6e6c98472774 --- /dev/null +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso @@ -0,0 +1,65 @@ +from Standard.Base import all +import Standard.Base.Errors.Illegal_Argument.Illegal_Argument + +import project.Errors.S3_Error +import project.S3.S3 + +type S3_Path + Value (bucket : Text) (key : Text) + + parse (uri : Text) -> S3_Path ! Illegal_Argument = + if uri.starts_with S3.uri_prefix . not then Error.throw (Illegal_Argument.Error "An S3 path must start with `"+S3.uri_prefix+"`.") else + without_prefix = uri.drop S3.uri_prefix.length + first_slash_index = without_prefix.index_of "/" + if first_slash_index == 0 then Error.throw (Illegal_Argument.Error "Invalid S3 path: empty bucket name.") else + if first_slash_index.is_nothing then S3_Path.Value without_prefix "" else + Pair.new (without_prefix.take first_slash_index) (without_prefix.drop first_slash_index+1) + + to_text self -> Text = S3.uri_prefix + self.bucket + "/" + self.key + + is_root self -> Boolean = self.key.is_empty + + is_directory self -> Boolean = self.is_root || (self.key.ends_with "/") + + resolve self (subpath : Text) -> S3_Path = + trimmed = if subpath.starts_with "/" then subpath.drop (First 1) else subpath + parts = trimmed.split "/" + + extend current part = + if current == "" then part else + if current.ends_with "/" then current + part else + current + "/" + part + + loop current remaining = if remaining.length == 0 then current else + new_current = case remaining.first of + ".." -> + last_index = current.last_index_of "/" + if last_index == Nothing then Error.throw (S3_Error.Error "Cannot move above root folder.") else current.take last_index + "." -> current + x -> extend current x + @Tail_Call loop new_current (remaining.drop 1) + + initial = if subpath.starts_with "/" then "" else self.key + new_path = loop initial parts + S3_Path.Value self.bucket new_path + + parent self -> S3_Path | Nothing = + if self.key == "" then Nothing else + last_index = case self.is_directory of + # For directories we drop the trailing slash and find the one before it: + True -> (self.key.drop (Last 1)).last_index_of "/" + False -> self.key.last_index_of "/" + ## We include the trailing slash in the path, as the parent is + always a directory and in S3 directories are distinguished only + by the presence of this slash. + new_key = if last_index == Nothing then "" else self.key.take last_index+1 + S3_Path.Value self.bucket new_key + + file_name self -> Text = + if self.is_root then "/" else + trimmed = if self.key.ends_with "/" then self.key.drop (Last 1) else self.key + last_index = trimmed.last_index_of "/" + if last_index == Nothing then trimmed else trimmed.drop (First last_index+1) + + is_descendant_of self (other : S3_Path) -> Boolean = + other.is_directory && self.to_text.starts_with other.to_text diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3.enso index 64286c697f92..3b3ed8c02164 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3.enso @@ -169,17 +169,6 @@ copy_object (source_bucket : Text) (source_key : Text) (target_bucket : Text) (t . build client.copyObject request . if_not_error Nothing -## PRIVATE - Splits a S3 URI into bucket and key. -parse_uri : Text -> Pair Text Text | Nothing -parse_uri uri = - if uri.starts_with uri_prefix . not then Nothing else - no_prefix = uri.drop uri_prefix.length - index_of = no_prefix.index_of "/" - if index_of == 0 then Nothing else - if index_of.is_nothing then Pair.new no_prefix "" else - Pair.new (no_prefix.take index_of) (no_prefix.drop index_of+1) - ## PRIVATE handle_s3_errors : Any -> Text -> Text -> Any ! S3_Error | AWS_SDK_Error handle_s3_errors ~action bucket="" key="" = diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso index 7a8d1c0c3f4c..480ff60f000a 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso @@ -17,6 +17,7 @@ import project.AWS_Credential.AWS_Credential import project.Errors.S3_Error import project.Errors.S3_Key_Not_Found import project.Internal.S3_File_Write_Strategy +import project.Internal.S3_Path.S3_Path import project.S3.S3 ## Represents an S3 file or folder @@ -29,32 +30,31 @@ type S3_File The URI must be in the form `s3://bucket/path/to/file`. - credentials: The credentials to use when accessing the file. If not specified, the default credentials are used. - new : Text -> AWS_Credential | Nothing -> S3_File + new : Text -> AWS_Credential | Nothing -> S3_File ! Illegal_Argument new uri=S3.uri_prefix credentials=Nothing = - parts = S3.parse_uri uri - if parts.is_nothing then Error.throw (Syntax_Error.Error "Invalid S3 URI.") else - S3_File.Value parts.first parts.second credentials + parts = S3_Path.parse uri + S3_File.Value parts.first parts.second credentials ## PRIVATE - Value bucket:Text prefix:Text credentials:(AWS_Credential | Nothing) + Value path:S3_Path credentials:(AWS_Credential | Nothing) ## GROUP Standard.Base.Metadata Gets the URI of this file uri : Text - uri self = S3.uri_prefix + self.bucket + "/" + self.prefix + uri self = self.path.to_text ## GROUP Standard.Base.Metadata Checks if the folder or file exists exists : Boolean - exists self = if self.bucket == "" then True else - if self.prefix == "" then translate_file_errors self <| S3.head self.bucket "" self.credentials . is_error . not else - pair = translate_file_errors self <| S3.read_bucket self.bucket self.prefix self.credentials max_count=1 - pair.second.contains self.prefix + exists self = if self.path.bucket == "" then True else + if self.path.is_root then translate_file_errors self <| S3.head self.path.bucket "" self.credentials . is_error . not else + pair = translate_file_errors self <| S3.read_bucket self.path.bucket self.path.key self.credentials max_count=1 + pair.second.contains self.path.key ## GROUP Standard.Base.Metadata Checks if this is a folder. is_directory : Boolean - is_directory self = (self.prefix == "") || (self.prefix.ends_with "/") + is_directory self = self.path.is_directory ## GROUP Standard.Base.Metadata Checks if this is a regular file. @@ -66,7 +66,7 @@ type S3_File size : Integer size self = if self.is_directory then Error.throw (S3_Error.Error "size can only be called on files." self.uri) else - content_length = translate_file_errors self <| S3.raw_head self.bucket self.prefix self.credentials . contentLength + content_length = translate_file_errors self <| S3.raw_head self.path.bucket self.path.key self.credentials . contentLength if content_length.is_nothing then Error.throw (S3_Error.Error "ContentLength header is missing." self.uri) else content_length ## PRIVATE @@ -98,7 +98,7 @@ type S3_File result = tmp_file.with_output_stream [File_Access.Write] action # Only proceed if the write succeeded result.if_not_error <| - (translate_file_errors self <| S3.upload_file tmp_file self.bucket self.prefix self.credentials) . if_not_error <| + (translate_file_errors self <| S3.upload_file tmp_file self.path.bucket self.path.key self.credentials) . if_not_error <| result @@ -118,7 +118,7 @@ type S3_File with_input_stream : Vector File_Access -> (Input_Stream -> Any ! File_Error) -> Any ! S3_Error | Illegal_Argument with_input_stream self (open_options : Vector) action = if self.is_directory then Error.throw (Illegal_Argument.Error "S3 folders cannot be opened as a stream." self.uri) else if (open_options != [File_Access.Read]) then Error.throw (S3_Error.Error "S3 files can only be opened for reading." self.uri) else - response_body = translate_file_errors self <| S3.get_object self.bucket self.prefix self.credentials + response_body = translate_file_errors self <| S3.get_object self.path.bucket self.path.key self.credentials response_body.with_stream action ## ALIAS load, open @@ -142,7 +142,7 @@ type S3_File _ = on_problems case format of Auto_Detect -> if self.is_directory then format.read self on_problems else - response = translate_file_errors self <| S3.get_object self.bucket self.prefix self.credentials + response = translate_file_errors self <| S3.get_object self.path.bucket self.path.key self.credentials response.decode Auto_Detect _ -> metadata = File_Format_Metadata.Value path=self.path name=self.name @@ -181,7 +181,7 @@ type S3_File delete_if_exists : Nothing delete_if_exists self = if self.is_directory then Error.throw (S3_Error.Error "Deleting S3 folders is currently not implemented." self.uri) else if Context.Output.is_enabled.not then Error.throw (Forbidden_Operation.Error "Deleting an S3_File is forbidden as the Output context is disabled.") else - translate_file_errors self <| S3.delete_object self.bucket self.prefix self.credentials . if_not_error Nothing + translate_file_errors self <| S3.delete_object self.path.bucket self.path.key self.credentials . if_not_error Nothing ## Copies the file to the specified destination. @@ -197,7 +197,7 @@ type S3_File # Special shortcut for more efficient handling of S3 file copying (no need to move the data to our machine) s3_destination : S3_File -> if replace_existing.not && s3_destination.exists then Error.throw (File_Error.Already_Exists destination) else - translate_file_errors self <| S3.copy_object self.bucket self.prefix s3_destination.bucket s3_destination.prefix self.credentials . if_not_error <| s3_destination + translate_file_errors self <| S3.copy_object self.path.bucket self.path.key s3_destination.bucket s3_destination.prefix self.credentials . if_not_error <| s3_destination _ -> generic_copy self destination.file replace_existing ## Moves the file to the specified destination. @@ -221,63 +221,36 @@ type S3_File self.delete.if_not_error r ## GROUP Standard.Base.Operators - Join two path segments together. + Join two path segments together, normalizing the `..` and `.` subpaths. Arguments: - subpath: The path to join to the path of `self`. / : Text -> S3_File / self subpath = if self.is_directory.not then Error.throw (S3_Error.Error "Only folders can have children." self.uri) else - trimmed = if subpath.starts_with "/" then subpath.drop (First 1) else subpath - parts = trimmed.split "/" - - extend current part = - if current == "" then part else - if current.ends_with "/" then current + part else - current + "/" + part - - loop current remaining = if remaining.length == 0 then current else - new_current = case remaining.first of - ".." -> - last_index = current.last_index_of "/" - if last_index == Nothing then (S3_Error.Error "Cannot move above root folder.") else current.take last_index - "." -> current - x -> extend current x - @Tail_Call loop new_current (remaining.drop 1) - - initial = if subpath.starts_with "/" then "" else self.prefix - path = loop initial parts - S3_File.Value self.bucket path self.credentials + S3_File.Value (self.path.resolve subpath) self.credentials ## GROUP Standard.Base.Calculations Join two or more path segments together, normalizing the `..` and `.` subpaths. Arguments: - subpaths: The path segment or segments to join to the path of `self`. - join : (Text | Vector) -> S3_File - join self subpaths = case subpaths of - _ : Vector -> (subpaths.fold self c->p-> c / p) - _ -> self.join [subpaths] + join : (Vector | Text) -> S3_File + join self (subpaths : Vector | Text) = + vec = Vector.unify_vector_or_element subpaths + vec.fold self (/) ## GROUP Standard.Base.Metadata Resolves the parent of this file. parent : S3_File | Nothing parent self = - if self.prefix == "" then Nothing else - is_directory = self.prefix.ends_with "/" - last_index = case is_directory of - # For directories we drop the trailing slash and find the one before it: - True -> (self.prefix.drop (Last 1)).last_index_of "/" - False -> self.prefix.last_index_of "/" - ## We include the trailing slash in the path, as the parent is - always a directory and in S3 directories are distinguished only - by the presence of this slash. - new_prefix = if last_index == Nothing then "" else self.prefix.take last_index+1 - S3_File.Value self.bucket new_prefix self.credentials + parent_path = self.path.parent + parent_path.if_not_nothing <| + S3_File.Value parent_path self.credentials ## GROUP Standard.Base.Metadata Checks if `self` is a descendant of `other`. is_descendant_of : S3_File -> Boolean - is_descendant_of self other = other.is_directory && self.uri.starts_with other.uri + is_descendant_of self other = self.path.is_descendant_of other.path ## GROUP Standard.Base.Metadata Returns the path of this file. @@ -287,10 +260,7 @@ type S3_File ## GROUP Standard.Base.Metadata Returns the name of this file. name : Text - name self = if self.prefix == "" then self.bucket else - trimmed = if self.prefix.ends_with "/" then self.prefix.drop (Last 1) else self.prefix - last_index = trimmed.last_index_of "/" - if last_index == Nothing then trimmed else trimmed.drop (First last_index+1) + name self = self.path.name ## GROUP Standard.Base.Metadata Returns the extension of the file. @@ -309,7 +279,7 @@ type S3_File last_modified_time : Date_Time ! File_Error last_modified_time self = if self.is_directory then Error.throw (S3_Error.Error "`last_modified_time` can only be called on files." self.uri) else - instant = translate_file_errors self <| S3.raw_head self.bucket self.prefix self.credentials . lastModified + instant = translate_file_errors self <| S3.raw_head self.path.bucket self.path.key self.credentials . lastModified if instant.is_nothing then Error.throw (S3_Error.Error "Missing information for: lastModified" self.uri) else instant.at_zone Time_Zone.system @@ -335,10 +305,10 @@ type S3_File check_directory action = if self.is_directory.not then Error.throw (S3_Error.Error "Only folders can have children." self.uri) else action check_directory <| check_recursion <| check_name_filter <| - if self.bucket == "" then translate_file_errors self <| S3.list_buckets self.credentials . map bucket-> S3_File.Value bucket "" self.credentials else - pair = translate_file_errors self <| S3.read_bucket self.bucket self.prefix self.credentials - sub_folders = pair.first . map key-> S3_File.Value self.bucket key self.credentials - files = pair.second . map key-> S3_File.Value self.bucket key self.credentials + if self.path.bucket == "" then translate_file_errors self <| S3.list_buckets self.credentials . map bucket-> S3_File.Value bucket "" self.credentials else + pair = translate_file_errors self <| S3.read_bucket self.path.bucket self.path.key self.credentials + sub_folders = pair.first . map key-> S3_File.Value self.path.bucket key self.credentials + files = pair.second . map key-> S3_File.Value self.path.bucket key self.credentials sub_folders + files ## PRIVATE diff --git a/test/AWS_Tests/src/S3_Spec.enso b/test/AWS_Tests/src/S3_Spec.enso index f8f23b15a102..48d402ce5da6 100644 --- a/test/AWS_Tests/src/S3_Spec.enso +++ b/test/AWS_Tests/src/S3_Spec.enso @@ -8,6 +8,7 @@ import Standard.Base.Runtime.Ref.Ref from Standard.AWS import S3, S3_File, AWS_Credential from Standard.AWS.Errors import AWS_SDK_Error, More_Records_Available, S3_Error, S3_Bucket_Not_Found, S3_Key_Not_Found +import Standard.AWS.Internal.S3_Path # Needed for custom formats test from Standard.Table import Table, Excel, Worksheet @@ -29,20 +30,20 @@ add_specs suite_builder = api_pending = if Environment.get "AWS_ACCESS_KEY_ID" . is_nothing then "No Access Key found." else Nothing cloud_setup = Cloud_Tests_Setup.prepare - suite_builder.group "S3.parse_uri" group_builder-> + suite_builder.group "S3_Path.parse" group_builder-> group_builder.specify "parse bucket only uris" <| - S3.parse_uri "s3://" . should_equal (Pair.new "" "") - S3.parse_uri "s3://asda" . should_equal (Pair.new "asda" "") - S3.parse_uri "s3://banana/" . should_equal (Pair.new "banana" "") + S3_Path.parse "s3://" . should_equal (S3_Path.Value "" "") + S3_Path.parse "s3://asda" . should_equal (S3_Path.Value "asda" "") + S3_Path.parse "s3://banana/" . should_equal (S3_Path.Value "banana" "") group_builder.specify "parse full paths uris" <| - S3.parse_uri "s3://banana/apple" . should_equal (Pair.new "banana" "apple") - S3.parse_uri "s3://banana/apple/orange" . should_equal (Pair.new "banana" "apple/orange") + S3_Path.parse "s3://banana/apple" . should_equal (S3_Path.Value "banana" "apple") + S3_Path.parse "s3://banana/apple/orange" . should_equal (S3_Path.Value "banana" "apple/orange") group_builder.specify "reject invalid urils" <| - S3.parse_uri "asda" . should_equal Nothing - S3.parse_uri "s3:///" . should_equal Nothing - S3.parse_uri "s3:///apple/orange" . should_equal Nothing + S3_Path.parse "asda" . should_fail_with Illegal_Argument + S3_Path.parse "s3:///" . should_fail_with Illegal_Argument + S3_Path.parse "s3:///apple/orange" . should_fail_with Illegal_Argument suite_builder.group "S3.list_buckets" pending=api_pending group_builder-> group_builder.specify "should be able to list buckets" <| @@ -170,15 +171,18 @@ add_specs suite_builder = bytes2.should_equal bytes group_builder.specify "should support path traversal" <| - root.prefix . should_equal "" - (root / "foo") . prefix . should_equal "foo" + root.name . should_equal "/" (root / "foo") . path . should_equal "s3://"+bucket_name+"/foo" + (root / "foo") . name . should_equal "foo" (root / "foo/" / "bar") . prefix . should_equal "foo/bar" (root / "foo/" / "bar") . path . should_equal "s3://"+bucket_name+"/foo/bar" - # If `foo` lacks a `/` it is treated as a directory and cannot have children. + # If `foo` lacks a `/` it is _not_ treated as a directory and cannot have children. (root / "foo" / "bar") . should_fail_with S3_Error + (root / "foo/../././bar") . should_equal (root / "bar") + (root / "..") . should_fail_with S3_Error + hello_txt.parent.parent.parent . should_equal root hello_txt.parent . should_equal (root / "examples/" / "folder 2/") hello_txt.parent.is_directory . should_be_true @@ -198,12 +202,18 @@ add_specs suite_builder = group_builder.specify "should be able to read file metadata" <| root.exists . should_be_true - hello_txt.exists . should_be_true root.is_directory . should_be_true - hello_txt.is_directory . should_be_false root.is_regular_file . should_be_false + + hello_txt.exists . should_be_true + hello_txt.is_directory . should_be_false hello_txt.is_regular_file . should_be_true + parent = hello_txt.parent + parent.exists . should_be_true + parent.is_directory . should_be_true + parent.is_regular_file . should_be_false + root.extension . should_fail_with S3_Error hello_txt.extension . should_equal ".txt" From cb3946b53769164d1576173afdcef13b60ee4900 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 19 Feb 2024 11:57:05 +0100 Subject: [PATCH 02/20] better abstraction for delimiter --- .../AWS/0.0.0-dev/src/Internal/S3_Path.enso | 37 ++++++++------ .../lib/Standard/AWS/0.0.0-dev/src/S3/S3.enso | 4 +- .../AWS/0.0.0-dev/src/S3/S3_File.enso | 48 +++++++++---------- 3 files changed, 49 insertions(+), 40 deletions(-) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso index 6e6c98472774..2102f3d2d8f2 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso @@ -10,36 +10,38 @@ type S3_Path parse (uri : Text) -> S3_Path ! Illegal_Argument = if uri.starts_with S3.uri_prefix . not then Error.throw (Illegal_Argument.Error "An S3 path must start with `"+S3.uri_prefix+"`.") else without_prefix = uri.drop S3.uri_prefix.length - first_slash_index = without_prefix.index_of "/" + first_slash_index = without_prefix.index_of S3_Path.delimiter if first_slash_index == 0 then Error.throw (Illegal_Argument.Error "Invalid S3 path: empty bucket name.") else if first_slash_index.is_nothing then S3_Path.Value without_prefix "" else Pair.new (without_prefix.take first_slash_index) (without_prefix.drop first_slash_index+1) - to_text self -> Text = S3.uri_prefix + self.bucket + "/" + self.key + to_text self -> Text = S3.uri_prefix + self.bucket + S3_Path.delimiter + self.key + + to_display_text self -> Text = self.to_text.to_display_text is_root self -> Boolean = self.key.is_empty - is_directory self -> Boolean = self.is_root || (self.key.ends_with "/") + is_directory self -> Boolean = self.is_root || (self.key.ends_with S3_Path.delimiter) resolve self (subpath : Text) -> S3_Path = - trimmed = if subpath.starts_with "/" then subpath.drop (First 1) else subpath - parts = trimmed.split "/" + trimmed = if subpath.starts_with S3_Path.delimiter then subpath.drop (First 1) else subpath + parts = trimmed.split S3_Path.delimiter extend current part = if current == "" then part else - if current.ends_with "/" then current + part else - current + "/" + part + if current.ends_with S3_Path.delimiter then current + part else + current + S3_Path.delimiter + part loop current remaining = if remaining.length == 0 then current else new_current = case remaining.first of ".." -> - last_index = current.last_index_of "/" + last_index = current.last_index_of S3_Path.delimiter if last_index == Nothing then Error.throw (S3_Error.Error "Cannot move above root folder.") else current.take last_index "." -> current x -> extend current x @Tail_Call loop new_current (remaining.drop 1) - initial = if subpath.starts_with "/" then "" else self.key + initial = if subpath.starts_with S3_Path.delimiter then "" else self.key new_path = loop initial parts S3_Path.Value self.bucket new_path @@ -47,8 +49,8 @@ type S3_Path if self.key == "" then Nothing else last_index = case self.is_directory of # For directories we drop the trailing slash and find the one before it: - True -> (self.key.drop (Last 1)).last_index_of "/" - False -> self.key.last_index_of "/" + True -> (self.key.drop (Last 1)).last_index_of S3_Path.delimiter + False -> self.key.last_index_of S3_Path.delimiter ## We include the trailing slash in the path, as the parent is always a directory and in S3 directories are distinguished only by the presence of this slash. @@ -56,10 +58,17 @@ type S3_Path S3_Path.Value self.bucket new_key file_name self -> Text = - if self.is_root then "/" else - trimmed = if self.key.ends_with "/" then self.key.drop (Last 1) else self.key - last_index = trimmed.last_index_of "/" + if self.is_root then S3_Path.delimiter else + trimmed = if self.key.ends_with S3_Path.delimiter then self.key.drop (Last 1) else self.key + last_index = trimmed.last_index_of S3_Path.delimiter if last_index == Nothing then trimmed else trimmed.drop (First last_index+1) is_descendant_of self (other : S3_Path) -> Boolean = other.is_directory && self.to_text.starts_with other.to_text + + ## PRIVATE + The delimiter used for path emulation. + + S3 does not treat `/` in any special way, it allows using any character as a + path delimiter. In the future we could allow customizing it. + delimiter = "/" diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3.enso index 3b3ed8c02164..c17416c4dced 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3.enso @@ -123,7 +123,7 @@ raw_head bucket key credentials = - credentials: AWS credentials. If not provided, the default credentials will be used. get_object : Text -> Text -> AWS_Credential | Nothing -> Response_Body ! S3_Error -get_object bucket key credentials:(AWS_Credential | Nothing)=Nothing = handle_s3_errors bucket=bucket key=key <| +get_object bucket key credentials:(AWS_Credential | Nothing)=Nothing delimiter="/" = handle_s3_errors bucket=bucket key=key <| request = GetObjectRequest.builder.bucket bucket . key key . build client = make_client credentials @@ -133,7 +133,7 @@ get_object bucket key credentials:(AWS_Credential | Nothing)=Nothing = handle_s3 s3_uri = URI.parse (uri_prefix + bucket + "/") / key content_type = inner_response.contentType name = filename_from_content_disposition inner_response.contentDisposition . if_nothing <| - key.split "/" . last + key.split delimiter . last metadata = File_Format_Metadata.Value path=key name=name content_type=content_type input_stream = Input_Stream.new response (handle_io_errors s3_uri) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso index 480ff60f000a..b94721e0707e 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso @@ -36,25 +36,25 @@ type S3_File S3_File.Value parts.first parts.second credentials ## PRIVATE - Value path:S3_Path credentials:(AWS_Credential | Nothing) + Value s3_path:S3_Path credentials:(AWS_Credential | Nothing) ## GROUP Standard.Base.Metadata Gets the URI of this file uri : Text - uri self = self.path.to_text + uri self = self.s3_path.to_text ## GROUP Standard.Base.Metadata Checks if the folder or file exists exists : Boolean - exists self = if self.path.bucket == "" then True else - if self.path.is_root then translate_file_errors self <| S3.head self.path.bucket "" self.credentials . is_error . not else - pair = translate_file_errors self <| S3.read_bucket self.path.bucket self.path.key self.credentials max_count=1 - pair.second.contains self.path.key + exists self = if self.s3_path.bucket == "" then True else + if self.s3_path.is_root then translate_file_errors self <| S3.head self.s3_path.bucket "" self.credentials . is_error . not else + pair = translate_file_errors self <| S3.read_bucket self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter max_count=1 + pair.second.contains self.s3_path.key ## GROUP Standard.Base.Metadata Checks if this is a folder. is_directory : Boolean - is_directory self = self.path.is_directory + is_directory self = self.s3_path.is_directory ## GROUP Standard.Base.Metadata Checks if this is a regular file. @@ -66,7 +66,7 @@ type S3_File size : Integer size self = if self.is_directory then Error.throw (S3_Error.Error "size can only be called on files." self.uri) else - content_length = translate_file_errors self <| S3.raw_head self.path.bucket self.path.key self.credentials . contentLength + content_length = translate_file_errors self <| S3.raw_head self.s3_path.bucket self.s3_path.key self.credentials . contentLength if content_length.is_nothing then Error.throw (S3_Error.Error "ContentLength header is missing." self.uri) else content_length ## PRIVATE @@ -98,7 +98,7 @@ type S3_File result = tmp_file.with_output_stream [File_Access.Write] action # Only proceed if the write succeeded result.if_not_error <| - (translate_file_errors self <| S3.upload_file tmp_file self.path.bucket self.path.key self.credentials) . if_not_error <| + (translate_file_errors self <| S3.upload_file tmp_file self.s3_path.bucket self.s3_path.key self.credentials) . if_not_error <| result @@ -118,7 +118,7 @@ type S3_File with_input_stream : Vector File_Access -> (Input_Stream -> Any ! File_Error) -> Any ! S3_Error | Illegal_Argument with_input_stream self (open_options : Vector) action = if self.is_directory then Error.throw (Illegal_Argument.Error "S3 folders cannot be opened as a stream." self.uri) else if (open_options != [File_Access.Read]) then Error.throw (S3_Error.Error "S3 files can only be opened for reading." self.uri) else - response_body = translate_file_errors self <| S3.get_object self.path.bucket self.path.key self.credentials + response_body = translate_file_errors self <| S3.get_object self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter response_body.with_stream action ## ALIAS load, open @@ -142,10 +142,10 @@ type S3_File _ = on_problems case format of Auto_Detect -> if self.is_directory then format.read self on_problems else - response = translate_file_errors self <| S3.get_object self.path.bucket self.path.key self.credentials + response = translate_file_errors self <| S3.get_object self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter response.decode Auto_Detect _ -> - metadata = File_Format_Metadata.Value path=self.path name=self.name + metadata = File_Format_Metadata.Value path=self.s3_path name=self.name self.with_input_stream [File_Access.Read] (stream-> format.read_stream stream metadata) ## ALIAS load bytes, open bytes @@ -181,7 +181,7 @@ type S3_File delete_if_exists : Nothing delete_if_exists self = if self.is_directory then Error.throw (S3_Error.Error "Deleting S3 folders is currently not implemented." self.uri) else if Context.Output.is_enabled.not then Error.throw (Forbidden_Operation.Error "Deleting an S3_File is forbidden as the Output context is disabled.") else - translate_file_errors self <| S3.delete_object self.path.bucket self.path.key self.credentials . if_not_error Nothing + translate_file_errors self <| S3.delete_object self.s3_path.bucket self.s3_path.key self.credentials . if_not_error Nothing ## Copies the file to the specified destination. @@ -197,7 +197,7 @@ type S3_File # Special shortcut for more efficient handling of S3 file copying (no need to move the data to our machine) s3_destination : S3_File -> if replace_existing.not && s3_destination.exists then Error.throw (File_Error.Already_Exists destination) else - translate_file_errors self <| S3.copy_object self.path.bucket self.path.key s3_destination.bucket s3_destination.prefix self.credentials . if_not_error <| s3_destination + translate_file_errors self <| S3.copy_object self.s3_path.bucket self.s3_path.key s3_destination.bucket s3_destination.prefix self.credentials . if_not_error <| s3_destination _ -> generic_copy self destination.file replace_existing ## Moves the file to the specified destination. @@ -227,7 +227,7 @@ type S3_File - subpath: The path to join to the path of `self`. / : Text -> S3_File / self subpath = if self.is_directory.not then Error.throw (S3_Error.Error "Only folders can have children." self.uri) else - S3_File.Value (self.path.resolve subpath) self.credentials + S3_File.Value (self.s3_path.resolve subpath) self.credentials ## GROUP Standard.Base.Calculations Join two or more path segments together, normalizing the `..` and `.` subpaths. @@ -243,24 +243,24 @@ type S3_File Resolves the parent of this file. parent : S3_File | Nothing parent self = - parent_path = self.path.parent + parent_path = self.s3_path.parent parent_path.if_not_nothing <| S3_File.Value parent_path self.credentials ## GROUP Standard.Base.Metadata Checks if `self` is a descendant of `other`. is_descendant_of : S3_File -> Boolean - is_descendant_of self other = self.path.is_descendant_of other.path + is_descendant_of self other = self.s3_path.is_descendant_of other.path ## GROUP Standard.Base.Metadata Returns the path of this file. path : Text - path self = self.uri + path self = self.s3_path.to_text ## GROUP Standard.Base.Metadata Returns the name of this file. name : Text - name self = self.path.name + name self = self.s3_path.name ## GROUP Standard.Base.Metadata Returns the extension of the file. @@ -279,7 +279,7 @@ type S3_File last_modified_time : Date_Time ! File_Error last_modified_time self = if self.is_directory then Error.throw (S3_Error.Error "`last_modified_time` can only be called on files." self.uri) else - instant = translate_file_errors self <| S3.raw_head self.path.bucket self.path.key self.credentials . lastModified + instant = translate_file_errors self <| S3.raw_head self.s3_path.bucket self.s3_path.key self.credentials . lastModified if instant.is_nothing then Error.throw (S3_Error.Error "Missing information for: lastModified" self.uri) else instant.at_zone Time_Zone.system @@ -305,10 +305,10 @@ type S3_File check_directory action = if self.is_directory.not then Error.throw (S3_Error.Error "Only folders can have children." self.uri) else action check_directory <| check_recursion <| check_name_filter <| - if self.path.bucket == "" then translate_file_errors self <| S3.list_buckets self.credentials . map bucket-> S3_File.Value bucket "" self.credentials else - pair = translate_file_errors self <| S3.read_bucket self.path.bucket self.path.key self.credentials - sub_folders = pair.first . map key-> S3_File.Value self.path.bucket key self.credentials - files = pair.second . map key-> S3_File.Value self.path.bucket key self.credentials + if self.s3_path.bucket == "" then translate_file_errors self <| S3.list_buckets self.credentials . map bucket-> S3_File.Value bucket "" self.credentials else + pair = translate_file_errors self <| S3.read_bucket self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter + sub_folders = pair.first . map key-> S3_File.Value self.s3_path.bucket key self.credentials + files = pair.second . map key-> S3_File.Value self.s3_path.bucket key self.credentials sub_folders + files ## PRIVATE From 1681eaec1d52f4464e8ef5f46ea45ce3c4e1fd94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 19 Feb 2024 12:04:33 +0100 Subject: [PATCH 03/20] docs --- .../AWS/0.0.0-dev/src/Internal/S3_Path.enso | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso index 2102f3d2d8f2..7f8978b52c57 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso @@ -4,9 +4,16 @@ import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import project.Errors.S3_Error import project.S3.S3 +## PRIVATE + An abstraction for S3 paths, holding the bucket name and the key. + + This helper implements resolving paths in a folder-like structure as + described at https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html type S3_Path + ## PRIVATE Value (bucket : Text) (key : Text) + ## PRIVATE parse (uri : Text) -> S3_Path ! Illegal_Argument = if uri.starts_with S3.uri_prefix . not then Error.throw (Illegal_Argument.Error "An S3 path must start with `"+S3.uri_prefix+"`.") else without_prefix = uri.drop S3.uri_prefix.length @@ -15,14 +22,27 @@ type S3_Path if first_slash_index.is_nothing then S3_Path.Value without_prefix "" else Pair.new (without_prefix.take first_slash_index) (without_prefix.drop first_slash_index+1) + ## PRIVATE to_text self -> Text = S3.uri_prefix + self.bucket + S3_Path.delimiter + self.key + ## PRIVATE to_display_text self -> Text = self.to_text.to_display_text + ## PRIVATE + Checks if this path represents the root directory of a bucket. is_root self -> Boolean = self.key.is_empty + ## PRIVATE + Checks if this path represents a directory. is_directory self -> Boolean = self.is_root || (self.key.ends_with S3_Path.delimiter) + ## PRIVATE + Resolves a subdirectory entry. + This only makes logical sense for paths for which `path.is_directory == True`, + but it is not forbidden to call it on other types of paths. + + Note that the newly returned path will be a directory or not depending on + if `subpath` ends with the delimiter. resolve self (subpath : Text) -> S3_Path = trimmed = if subpath.starts_with S3_Path.delimiter then subpath.drop (First 1) else subpath parts = trimmed.split S3_Path.delimiter @@ -45,6 +65,9 @@ type S3_Path new_path = loop initial parts S3_Path.Value self.bucket new_path + ## PRIVATE + Returns the parent directory. + It will return `Nothing` for the root directory. parent self -> S3_Path | Nothing = if self.key == "" then Nothing else last_index = case self.is_directory of @@ -57,12 +80,19 @@ type S3_Path new_key = if last_index == Nothing then "" else self.key.take last_index+1 S3_Path.Value self.bucket new_key + ## PRIVATE + Returns the 'file name' of this path. + If the path is a regular file, it will be its name. + If the path is a directory, it will be the name of that directory, + without the trailing delimiter. file_name self -> Text = if self.is_root then S3_Path.delimiter else trimmed = if self.key.ends_with S3_Path.delimiter then self.key.drop (Last 1) else self.key last_index = trimmed.last_index_of S3_Path.delimiter if last_index == Nothing then trimmed else trimmed.drop (First last_index+1) + ## PRIVATE + Checks if the given other path is inside of this path. is_descendant_of self (other : S3_Path) -> Boolean = other.is_directory && self.to_text.starts_with other.to_text From 52b1ba2362a9ea473a1276ab9c65fe0369b4e077 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 19 Feb 2024 12:07:59 +0100 Subject: [PATCH 04/20] note about dirs --- .../Standard/AWS/0.0.0-dev/src/S3/S3_File.enso | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso index b94721e0707e..6330d02b8437 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso @@ -225,6 +225,23 @@ type S3_File Arguments: - subpath: The path to join to the path of `self`. + + ! S3 Directory Handling + + Note that regular S3 buckets do not have a 'native' notion of + directories, instead they are emulated using prefixes and a delimiter + (in Enso, the delimiter is set to "/"). + + The trailing slash determines if the given path is treated as a + directory or as a regular file. + + Because of that, `root / "foo" / "bar"` will fail, as the entry denoted + by "foo" will be treated as a regular file which cannot have children. + Instead, you'd have to ensure that "foo" remains a directory by + remembering about the slash: `root / "foo/" / "bar"`. This can be + written shorter using just one operation: `root / "foo/bar"`. + + See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html / : Text -> S3_File / self subpath = if self.is_directory.not then Error.throw (S3_Error.Error "Only folders can have children." self.uri) else S3_File.Value (self.s3_path.resolve subpath) self.credentials From b3c5bcbb13fe25999e0496250093d955d46e61d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 19 Feb 2024 12:34:45 +0100 Subject: [PATCH 05/20] fix --- distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso index 6330d02b8437..1667783b214d 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso @@ -31,9 +31,8 @@ type S3_File - credentials: The credentials to use when accessing the file. If not specified, the default credentials are used. new : Text -> AWS_Credential | Nothing -> S3_File ! Illegal_Argument - new uri=S3.uri_prefix credentials=Nothing = - parts = S3_Path.parse uri - S3_File.Value parts.first parts.second credentials + new (uri : Text = S3.uri_prefix) (credentials : AWS_Credential | Nothing = Nothing) = + S3_File.Value (S3_Path.parse uri) credentials ## PRIVATE Value s3_path:S3_Path credentials:(AWS_Credential | Nothing) From 3e6e49908358e24a9af684785a06988660b0e3c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 19 Feb 2024 12:38:27 +0100 Subject: [PATCH 06/20] fmt --- distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso index 1667783b214d..dd16fc459464 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso @@ -35,7 +35,7 @@ type S3_File S3_File.Value (S3_Path.parse uri) credentials ## PRIVATE - Value s3_path:S3_Path credentials:(AWS_Credential | Nothing) + Value (s3_path : S3_Path) (credentials : AWS_Credential | Nothing) ## GROUP Standard.Base.Metadata Gets the URI of this file From 7bea1b2a0a886e74b87018b9803be5c3a3a78b4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 19 Feb 2024 12:38:58 +0100 Subject: [PATCH 07/20] fix --- .../lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso | 2 +- test/AWS_Tests/src/S3_Spec.enso | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso index 7f8978b52c57..ce4973272f9d 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso @@ -20,7 +20,7 @@ type S3_Path first_slash_index = without_prefix.index_of S3_Path.delimiter if first_slash_index == 0 then Error.throw (Illegal_Argument.Error "Invalid S3 path: empty bucket name.") else if first_slash_index.is_nothing then S3_Path.Value without_prefix "" else - Pair.new (without_prefix.take first_slash_index) (without_prefix.drop first_slash_index+1) + S3_Path.Value (without_prefix.take first_slash_index) (without_prefix.drop first_slash_index+1) ## PRIVATE to_text self -> Text = S3.uri_prefix + self.bucket + S3_Path.delimiter + self.key diff --git a/test/AWS_Tests/src/S3_Spec.enso b/test/AWS_Tests/src/S3_Spec.enso index 48d402ce5da6..8b21d50a8459 100644 --- a/test/AWS_Tests/src/S3_Spec.enso +++ b/test/AWS_Tests/src/S3_Spec.enso @@ -8,7 +8,7 @@ import Standard.Base.Runtime.Ref.Ref from Standard.AWS import S3, S3_File, AWS_Credential from Standard.AWS.Errors import AWS_SDK_Error, More_Records_Available, S3_Error, S3_Bucket_Not_Found, S3_Key_Not_Found -import Standard.AWS.Internal.S3_Path +import Standard.AWS.Internal.S3_Path.S3_Path # Needed for custom formats test from Standard.Table import Table, Excel, Worksheet From 5d61ec3bd0a1c67edb020cef1283f1dd1f89a885 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 20 Feb 2024 12:48:44 +0100 Subject: [PATCH 08/20] avoid folder being nested in test --- test/AWS_Tests/src/S3_Spec.enso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/AWS_Tests/src/S3_Spec.enso b/test/AWS_Tests/src/S3_Spec.enso index 8b21d50a8459..b7db832465b6 100644 --- a/test/AWS_Tests/src/S3_Spec.enso +++ b/test/AWS_Tests/src/S3_Spec.enso @@ -226,7 +226,7 @@ add_specs suite_builder = hello_txt.creation_time . should_fail_with S3_Error writable_root = S3_File.new "s3://"+writable_bucket_name+"/" - my_writable_dir = writable_root / "test-run-"+(Date_Time.now.format "yyyy-MM-dd_HHmmss.fV")+"/" + my_writable_dir = writable_root / "test-run-"+(Date_Time.now.format "yyyy-MM-dd_HHmmss.fV" . replace "/" "|")+"/" delete_on_panic file ~action = handler caught_panic = file.delete From 7fc8c58a6e6cb0ae9d0fcabef36d54491342227d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 20 Feb 2024 12:49:29 +0100 Subject: [PATCH 09/20] fix ref --- .../AWS/0.0.0-dev/src/Internal/S3_File_Write_Strategy.enso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_File_Write_Strategy.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_File_Write_Strategy.enso index f7171fe38759..dc320b09f227 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_File_Write_Strategy.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_File_Write_Strategy.enso @@ -52,7 +52,7 @@ s3_backup file action = recover_errors <| ## PRIVATE copy_from_local (source : File) (destination : S3_File) (replace_existing : Boolean) = if replace_existing.not && destination.exists then Error.throw (File_Error.Already_Exists destination) else - S3.upload_file source destination.bucket destination.prefix destination.credentials . if_not_error <| + S3.upload_file source destination.s3_path.bucket destination.s3_path.key destination.credentials . if_not_error <| destination ## PRIVATE From 91dd7e79bed27d2c6af483576d6d734389830443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 20 Feb 2024 13:49:46 +0100 Subject: [PATCH 10/20] update tests --- test/AWS_Tests/src/S3_Spec.enso | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/test/AWS_Tests/src/S3_Spec.enso b/test/AWS_Tests/src/S3_Spec.enso index b7db832465b6..8951ac553d83 100644 --- a/test/AWS_Tests/src/S3_Spec.enso +++ b/test/AWS_Tests/src/S3_Spec.enso @@ -174,7 +174,6 @@ add_specs suite_builder = root.name . should_equal "/" (root / "foo") . path . should_equal "s3://"+bucket_name+"/foo" (root / "foo") . name . should_equal "foo" - (root / "foo/" / "bar") . prefix . should_equal "foo/bar" (root / "foo/" / "bar") . path . should_equal "s3://"+bucket_name+"/foo/bar" # If `foo` lacks a `/` it is _not_ treated as a directory and cannot have children. @@ -192,8 +191,7 @@ add_specs suite_builder = group_builder.specify "returns a valid name for files and 'directories'" <| hello_txt.name . should_equal "hello.txt" - # For the root, the name is the bucket name: - root.name . should_equal "enso-data-samples" + root.name . should_equal "/" (root / "foo/" / "bar") . name . should_equal "bar" From b368d10548c13213d3d0cd467d0762e5eb3a4296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 20 Feb 2024 13:50:00 +0100 Subject: [PATCH 11/20] fixes --- .../AWS/0.0.0-dev/src/Internal/S3_Path.enso | 5 +++-- .../AWS/0.0.0-dev/src/S3/S3_File.enso | 19 ++++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso index ce4973272f9d..ec1941750034 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso @@ -23,7 +23,8 @@ type S3_Path S3_Path.Value (without_prefix.take first_slash_index) (without_prefix.drop first_slash_index+1) ## PRIVATE - to_text self -> Text = S3.uri_prefix + self.bucket + S3_Path.delimiter + self.key + to_text self -> Text = + S3.uri_prefix + self.bucket + S3_Path.delimiter + self.key ## PRIVATE to_display_text self -> Text = self.to_text.to_display_text @@ -56,7 +57,7 @@ type S3_Path new_current = case remaining.first of ".." -> last_index = current.last_index_of S3_Path.delimiter - if last_index == Nothing then Error.throw (S3_Error.Error "Cannot move above root folder.") else current.take last_index + if last_index == Nothing then Error.throw (S3_Error.Error "Cannot move above root folder." self.to_text) else current.take last_index "." -> current x -> extend current x @Tail_Call loop new_current (remaining.drop 1) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso index 03aaee233760..9e74f50ef4a4 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso @@ -39,7 +39,7 @@ type S3_File ## GROUP Standard.Base.Metadata Gets the URI of this file uri : Text - uri self = self.s3_path.to_text + uri self -> Text = self.s3_path.to_text ## GROUP Standard.Base.Metadata Checks if the folder or file exists @@ -143,7 +143,7 @@ type S3_File response = translate_file_errors self <| S3.get_object self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter response.decode Auto_Detect _ -> - metadata = File_Format_Metadata.Value path=self.s3_path name=self.name + metadata = File_Format_Metadata.Value path=self.path name=self.name self.with_input_stream [File_Access.Read] (stream-> format.read_stream stream metadata) ## ALIAS load bytes, open bytes @@ -195,7 +195,8 @@ type S3_File # Special shortcut for more efficient handling of S3 file copying (no need to move the data to our machine) s3_destination : S3_File -> if replace_existing.not && s3_destination.exists then Error.throw (File_Error.Already_Exists destination) else - translate_file_errors self <| S3.copy_object self.s3_path.bucket self.s3_path.key s3_destination.bucket s3_destination.prefix self.credentials . if_not_error <| s3_destination + destination_path = s3_destination.s3_path + translate_file_errors self <| S3.copy_object self.s3_path.bucket self.s3_path.key destination_path.bucket destination_path.key self.credentials . if_not_error <| s3_destination _ -> generic_copy self destination.file replace_existing ## Moves the file to the specified destination. @@ -275,7 +276,7 @@ type S3_File ## GROUP Standard.Base.Metadata Returns the name of this file. name : Text - name self = self.s3_path.name + name self = self.s3_path.file_name ## GROUP Standard.Base.Metadata Returns the extension of the file. @@ -322,8 +323,11 @@ type S3_File check_directory <| check_recursion <| check_name_filter <| if self.s3_path.bucket == "" then translate_file_errors self <| S3.list_buckets self.credentials . map bucket-> S3_File.Value bucket "" self.credentials else pair = translate_file_errors self <| S3.read_bucket self.s3_path.bucket self.s3_path.key self.credentials delimiter=S3_Path.delimiter - sub_folders = pair.first . map key-> S3_File.Value self.s3_path.bucket key self.credentials - files = pair.second . map key-> S3_File.Value self.s3_path.bucket key self.credentials + bucket = self.s3_path.bucket + sub_folders = pair.first . map key-> + S3_File.Value (S3_Path.Value bucket key) self.credentials + files = pair.second . map key-> + S3_File.Value (S3_Path.Value bucket key) self.credentials sub_folders + files ## PRIVATE @@ -340,5 +344,6 @@ Writable_File.from (that : S3_File) = A helper that translates lower level S3 errors to file-system errors. translate_file_errors related_file result = result.catch S3_Key_Not_Found error-> - s3_file = S3_File.Value error.bucket error.key related_file.credentials + s3_path = S3_Path.Value error.bucket error.key + s3_file = S3_File.Value s3_path related_file.credentials Error.throw (File_Error.Not_Found s3_file) From aebad17e72e8ba9c1ea1891d456c9b50812ed0e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 20 Feb 2024 14:44:00 +0100 Subject: [PATCH 12/20] better tests for path handling, update doc --- .../AWS/0.0.0-dev/src/S3/S3_File.enso | 15 ++-- test/AWS_Tests/src/S3_Spec.enso | 71 +++++++++++++------ 2 files changed, 57 insertions(+), 29 deletions(-) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso index 9e74f50ef4a4..dd6aa171a98b 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso @@ -234,15 +234,14 @@ type S3_File The trailing slash determines if the given path is treated as a directory or as a regular file. - Because of that, `root / "foo" / "bar"` will fail, as the entry denoted - by "foo" will be treated as a regular file which cannot have children. - Instead, you'd have to ensure that "foo" remains a directory by - remembering about the slash: `root / "foo/" / "bar"`. This can be - written shorter using just one operation: `root / "foo/bar"`. + However, for ease-of-use, if a path without a trailing slash is used + with the `/` operator it will be accepted and the sub paths will be + resolved, even though such a path would not be treated as a directory + by any other operations. See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-folders.html / : Text -> S3_File - / self subpath = if self.is_directory.not then Error.throw (S3_Error.Error "Only folders can have children." self.uri) else + / self subpath = S3_File.Value (self.s3_path.resolve subpath) self.credentials ## GROUP Standard.Base.Calculations @@ -250,6 +249,8 @@ type S3_File Arguments: - subpaths: The path segment or segments to join to the path of `self`. + + See `/` for more information about S3 directory handling. join : (Vector | Text) -> S3_File join self (subpaths : Vector | Text) = vec = Vector.unify_vector_or_element subpaths @@ -318,7 +319,7 @@ type S3_File list self name_filter:Text="" recursive:Boolean=False = check_name_filter action = if name_filter != "" then Unimplemented.throw "S3 listing with name filter is not currently implemented." else action check_recursion action = if recursive then Unimplemented.throw "S3 listing with recursion is not currently implemented." else action - check_directory action = if self.is_directory.not then Error.throw (S3_Error.Error "Only folders can have children." self.uri) else action + check_directory action = if self.is_directory.not then Error.throw (S3_Error.Error "Only folders can be listed." self.uri) else action check_directory <| check_recursion <| check_name_filter <| if self.s3_path.bucket == "" then translate_file_errors self <| S3.list_buckets self.credentials . map bucket-> S3_File.Value bucket "" self.credentials else diff --git a/test/AWS_Tests/src/S3_Spec.enso b/test/AWS_Tests/src/S3_Spec.enso index 8951ac553d83..8169f5098351 100644 --- a/test/AWS_Tests/src/S3_Spec.enso +++ b/test/AWS_Tests/src/S3_Spec.enso @@ -30,7 +30,10 @@ add_specs suite_builder = api_pending = if Environment.get "AWS_ACCESS_KEY_ID" . is_nothing then "No Access Key found." else Nothing cloud_setup = Cloud_Tests_Setup.prepare - suite_builder.group "S3_Path.parse" group_builder-> + root = S3_File.new "s3://"+bucket_name+"/" + hello_txt = S3_File.new "s3://"+bucket_name+"/examples/folder 2/hello.txt" + + suite_builder.group "S3 Path handling" group_builder-> group_builder.specify "parse bucket only uris" <| S3_Path.parse "s3://" . should_equal (S3_Path.Value "" "") S3_Path.parse "s3://asda" . should_equal (S3_Path.Value "asda" "") @@ -45,6 +48,46 @@ add_specs suite_builder = S3_Path.parse "s3:///" . should_fail_with Illegal_Argument S3_Path.parse "s3:///apple/orange" . should_fail_with Illegal_Argument + group_builder.specify "should support path traversal using `/` and `parent`" <| + root.name . should_equal "/" + (root / "foo") . path . should_equal "s3://"+bucket_name+"/foo" + (root / "foo") . name . should_equal "foo" + + (root / "foo").is_directory . should_be_true + (root / "foo/" / "bar") . path . should_equal "s3://"+bucket_name+"/foo/bar" + # It also works if the trailing slash is missing: + (root / "foo" / "bar") . path . should_equal "s3://"+bucket_name+"/foo/bar" + # Even though it is not normally treated as directory: + (root / "foo").is_directory . should_be_false + + (root / "foo/../././bar") . should_equal (root / "bar") + err = (root / "..") + err.should_fail_with S3_Error + err.catch.to_display_text . should_contain "Cannot move above root" + + hello_txt.parent.parent.parent . should_equal root + hello_txt / "../../.." . should_equal root + hello_txt.parent . should_equal (root / "examples/" / "folder 2/") + hello_txt.parent.is_directory . should_be_true + + group_builder.specify "should support path traversal using `join`" <| + root.join ["foo", "bar"] . path . should_equal "s3://"+bucket_name+"/foo/bar" + root.join ["a/b/", "c/d", "e"] . path . should_equal "s3://"+bucket_name+"/a/b/c/d/e" + hello_txt.join ["../..", ".."] . should_equal root + hello_txt.join ["..", "a", ".."] . should_equal hello_txt + + group_builder.specify "should allow to check if one path is inside of another" <| + hello_txt.is_descendant_of root . should_be_true + root.is_descendant_of hello_txt . should_be_false + + hello_txt.is_descendant_of hello_txt . should_be_false + hello_txt.is_descendant_of hello_txt.parent . should_be_false + + (root / "foo").is_descendant_of root . should_be_true + (root / "foo/bar").is_descendant_of (root / "foo/") . should_be_true + (root / "foo/bar").is_descendant_of (root / "fo/") . should_be_false + (root / "foo/bar").is_descendant_of (root / "fo") . should_be_false + suite_builder.group "S3.list_buckets" pending=api_pending group_builder-> group_builder.specify "should be able to list buckets" <| bucket_list = S3.list_buckets . should_succeed @@ -143,8 +186,6 @@ add_specs suite_builder = S3.get_object bucket_name object_name credentials=(AWS_Credential.Profile "NoSuchProfile") . should_fail_with AWS_SDK_Error suite_builder.group "S3_File reading" pending=api_pending group_builder-> - root = S3_File.new "s3://"+bucket_name+"/" - hello_txt = S3_File.new "s3://"+bucket_name+"/examples/folder 2/hello.txt" group_builder.specify "should be able to list the bucket's root directory" <| r = root.list r.should_succeed @@ -170,25 +211,6 @@ add_specs suite_builder = bytes2.should_equal bytes - group_builder.specify "should support path traversal" <| - root.name . should_equal "/" - (root / "foo") . path . should_equal "s3://"+bucket_name+"/foo" - (root / "foo") . name . should_equal "foo" - (root / "foo/" / "bar") . path . should_equal "s3://"+bucket_name+"/foo/bar" - - # If `foo` lacks a `/` it is _not_ treated as a directory and cannot have children. - (root / "foo" / "bar") . should_fail_with S3_Error - - (root / "foo/../././bar") . should_equal (root / "bar") - (root / "..") . should_fail_with S3_Error - - hello_txt.parent.parent.parent . should_equal root - hello_txt.parent . should_equal (root / "examples/" / "folder 2/") - hello_txt.parent.is_directory . should_be_true - - hello_txt.is_descendant_of root . should_be_true - root.is_descendant_of hello_txt . should_be_false - group_builder.specify "returns a valid name for files and 'directories'" <| hello_txt.name . should_equal "hello.txt" root.name . should_equal "/" @@ -247,6 +269,11 @@ add_specs suite_builder = my_writable_dir.list . should_contain new_file + # But the file cannot be listed: + err = new_file.list + err.should_fail_with S3_Error + err.catch.to_display_text . should_contain "Only folders can be listed" + new_file.delete . should_succeed with_retries <| From 9e789fe55975bb109e9c9b283afafa4524b5c3e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 20 Feb 2024 17:13:18 +0100 Subject: [PATCH 13/20] some fixes --- distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso | 2 +- test/AWS_Tests/src/S3_Spec.enso | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso index dd6aa171a98b..65ba7eacda46 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso @@ -267,7 +267,7 @@ type S3_File ## GROUP Standard.Base.Metadata Checks if `self` is a descendant of `other`. is_descendant_of : S3_File -> Boolean - is_descendant_of self other = self.s3_path.is_descendant_of other.path + is_descendant_of self other = self.s3_path.is_descendant_of other.s3_path ## GROUP Standard.Base.Metadata Returns the path of this file. diff --git a/test/AWS_Tests/src/S3_Spec.enso b/test/AWS_Tests/src/S3_Spec.enso index 8169f5098351..8f9a7eca2d84 100644 --- a/test/AWS_Tests/src/S3_Spec.enso +++ b/test/AWS_Tests/src/S3_Spec.enso @@ -53,7 +53,7 @@ add_specs suite_builder = (root / "foo") . path . should_equal "s3://"+bucket_name+"/foo" (root / "foo") . name . should_equal "foo" - (root / "foo").is_directory . should_be_true + (root / "foo/").is_directory . should_be_true (root / "foo/" / "bar") . path . should_equal "s3://"+bucket_name+"/foo/bar" # It also works if the trailing slash is missing: (root / "foo" / "bar") . path . should_equal "s3://"+bucket_name+"/foo/bar" @@ -81,7 +81,7 @@ add_specs suite_builder = root.is_descendant_of hello_txt . should_be_false hello_txt.is_descendant_of hello_txt . should_be_false - hello_txt.is_descendant_of hello_txt.parent . should_be_false + hello_txt.is_descendant_of hello_txt.parent . should_be_true (root / "foo").is_descendant_of root . should_be_true (root / "foo/bar").is_descendant_of (root / "foo/") . should_be_true From 45f241840127cc23f6f04ba3e537444d65c1c8c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 20 Feb 2024 17:18:16 +0100 Subject: [PATCH 14/20] test --- test/AWS_Tests/src/S3_Spec.enso | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/AWS_Tests/src/S3_Spec.enso b/test/AWS_Tests/src/S3_Spec.enso index 8f9a7eca2d84..21aa0f285fb0 100644 --- a/test/AWS_Tests/src/S3_Spec.enso +++ b/test/AWS_Tests/src/S3_Spec.enso @@ -70,6 +70,9 @@ add_specs suite_builder = hello_txt.parent . should_equal (root / "examples/" / "folder 2/") hello_txt.parent.is_directory . should_be_true + # Leading slash will mean starting back from bucket root: + hello_txt / "/foo/bar" . should_equal (root / "foo/bar") + group_builder.specify "should support path traversal using `join`" <| root.join ["foo", "bar"] . path . should_equal "s3://"+bucket_name+"/foo/bar" root.join ["a/b/", "c/d", "e"] . path . should_equal "s3://"+bucket_name+"/a/b/c/d/e" From c81c816b7be546f3e5c954dcb978b1a7e7a74cc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 20 Feb 2024 17:35:50 +0100 Subject: [PATCH 15/20] more test cases --- test/AWS_Tests/src/S3_Spec.enso | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/AWS_Tests/src/S3_Spec.enso b/test/AWS_Tests/src/S3_Spec.enso index 21aa0f285fb0..81afa747ca3e 100644 --- a/test/AWS_Tests/src/S3_Spec.enso +++ b/test/AWS_Tests/src/S3_Spec.enso @@ -83,14 +83,20 @@ add_specs suite_builder = hello_txt.is_descendant_of root . should_be_true root.is_descendant_of hello_txt . should_be_false + # A file or directory is not a descendant of itself: hello_txt.is_descendant_of hello_txt . should_be_false - hello_txt.is_descendant_of hello_txt.parent . should_be_true + root.is_descendant_of root . should_be_false + hello_txt.parent.is_descendant_of hello_txt.parent . should_be_false + hello_txt.is_descendant_of hello_txt.parent . should_be_true (root / "foo").is_descendant_of root . should_be_true (root / "foo/bar").is_descendant_of (root / "foo/") . should_be_true (root / "foo/bar").is_descendant_of (root / "fo/") . should_be_false (root / "foo/bar").is_descendant_of (root / "fo") . should_be_false + # Correct path but different bucket will yield false: + (S3_File.new "s3://bucketA/foo/bar").is_descendant_of (S3_File.new "s3://bucketB/foo/") . should_be_false + suite_builder.group "S3.list_buckets" pending=api_pending group_builder-> group_builder.specify "should be able to list buckets" <| bucket_list = S3.list_buckets . should_succeed From 86b14a588945c7780feff7b8f4784aa6b6de6d6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 20 Feb 2024 20:22:30 +0100 Subject: [PATCH 16/20] refactor path handling --- .../AWS/0.0.0-dev/src/Internal/S3_Path.enso | 111 +++++++++++++----- .../AWS/0.0.0-dev/src/S3/S3_File.enso | 4 +- test/AWS_Tests/src/S3_Spec.enso | 9 +- 3 files changed, 92 insertions(+), 32 deletions(-) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso index ec1941750034..de32035ef8ab 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso @@ -20,7 +20,10 @@ type S3_Path first_slash_index = without_prefix.index_of S3_Path.delimiter if first_slash_index == 0 then Error.throw (Illegal_Argument.Error "Invalid S3 path: empty bucket name.") else if first_slash_index.is_nothing then S3_Path.Value without_prefix "" else - S3_Path.Value (without_prefix.take first_slash_index) (without_prefix.drop first_slash_index+1) + bucket = (without_prefix.take first_slash_index) + key = (without_prefix.drop first_slash_index+1) + normalized = Decomposed_S3_Path.parse key . normalize . key + S3_Path.Value bucket normalized ## PRIVATE to_text self -> Text = @@ -45,41 +48,24 @@ type S3_Path Note that the newly returned path will be a directory or not depending on if `subpath` ends with the delimiter. resolve self (subpath : Text) -> S3_Path = - trimmed = if subpath.starts_with S3_Path.delimiter then subpath.drop (First 1) else subpath - parts = trimmed.split S3_Path.delimiter + joined = Decomposed_S3_Path.join [Decomposed_S3_Path.parse self.key, Decomposed_S3_Path.parse subpath] + new_key = joined.normalize.key + S3_Path.Value self.bucket new_key - extend current part = - if current == "" then part else - if current.ends_with S3_Path.delimiter then current + part else - current + S3_Path.delimiter + part - - loop current remaining = if remaining.length == 0 then current else - new_current = case remaining.first of - ".." -> - last_index = current.last_index_of S3_Path.delimiter - if last_index == Nothing then Error.throw (S3_Error.Error "Cannot move above root folder." self.to_text) else current.take last_index - "." -> current - x -> extend current x - @Tail_Call loop new_current (remaining.drop 1) + ## PRIVATE + join self (subpaths : Vector) -> S3_Path = + joined = Decomposed_S3_Path.join (([self.key]+subpaths).map Decomposed_S3_Path.parse) + new_key = joined.normalize.key + S3_Path.Value self.bucket new_key - initial = if subpath.starts_with S3_Path.delimiter then "" else self.key - new_path = loop initial parts - S3_Path.Value self.bucket new_path ## PRIVATE Returns the parent directory. It will return `Nothing` for the root directory. parent self -> S3_Path | Nothing = - if self.key == "" then Nothing else - last_index = case self.is_directory of - # For directories we drop the trailing slash and find the one before it: - True -> (self.key.drop (Last 1)).last_index_of S3_Path.delimiter - False -> self.key.last_index_of S3_Path.delimiter - ## We include the trailing slash in the path, as the parent is - always a directory and in S3 directories are distinguished only - by the presence of this slash. - new_key = if last_index == Nothing then "" else self.key.take last_index+1 - S3_Path.Value self.bucket new_key + if self.is_root then Nothing else + decomposed = Decomposed_S3_Path.parse self.key + S3_Path.Value self.bucket decomposed.parent.key ## PRIVATE Returns the 'file name' of this path. @@ -94,8 +80,9 @@ type S3_Path ## PRIVATE Checks if the given other path is inside of this path. + A directory is not a descendant of itself. is_descendant_of self (other : S3_Path) -> Boolean = - other.is_directory && self.to_text.starts_with other.to_text + (self != other) && other.is_directory && self.to_text.starts_with other.to_text ## PRIVATE The delimiter used for path emulation. @@ -103,3 +90,67 @@ type S3_Path S3 does not treat `/` in any special way, it allows using any character as a path delimiter. In the future we could allow customizing it. delimiter = "/" + +## PRIVATE +type Path_Entry + ## PRIVATE + Directory (name : Text) + + ## PRIVATE + File (name : Text) + +## PRIVATE +type Decomposed_S3_Path + ## PRIVATE + Value (parts : Vector Path_Entry) (go_to_root : Boolean) + + ## PRIVATE + Reconstructs the original path. + key self -> Text = + add_directory_suffix = self.parts.not_empty && self.parts.last.is_a Path_Entry.Directory + suffix = if add_directory_suffix then S3_Path.delimiter else "" + self.parts.map .name . join separator=S3_Path.delimiter suffix=suffix + + ## PRIVATE + parse (key : Text) -> Decomposed_S3_Path = + has_directory_suffix = key.ends_with S3_Path.delimiter + has_root_prefix = key.starts_with S3_Path.delimiter + parts = key.split S3_Path.delimiter . filter (p-> p.is_empty.not) + entries = case has_directory_suffix of + True -> parts.map Path_Entry.Directory + False -> + if parts.is_empty then [] else + (parts.drop (Last 1) . map Path_Entry.Directory) + [Path_Entry.File parts.last] + Decomposed_S3_Path.Value entries has_root_prefix + + ## PRIVATE + join (paths : Vector Decomposed_S3_Path) -> Decomposed_S3_Path = + if paths.is_empty then Error.throw (Illegal_Argument.Error "Cannot join an empty list of paths.") else + last_root_ix = paths.last_index_of (.go_to_root) + without_ignored_paths = if last_root_ix.is_nothing then paths else + paths.drop last_root_ix + flattened = without_ignored_paths.flat_map .parts + # Any `File` parts from the middle are now transformed to `Directory`: + aligned = flattened.map_with_index ix-> part-> case part of + Path_Entry.Directory _ -> part + Path_Entry.File name -> + is_last = ix == flattened.length-1 + if is_last then part else Path_Entry.Directory name + Decomposed_S3_Path.Value aligned (last_root_ix.is_nothing.not) + + ## PRIVATE + normalize self -> Decomposed_S3_Path ! S3_Error = + new_stack = self.parts.fold List.Nil stack-> part-> case part.name of + "." -> stack + ".." -> case stack of + List.Nil -> Error.throw (S3_Error.Error "Cannot move above root folder.") + List.Cons _ tail -> tail + _ -> List.Cons part stack + new_parts = new_stack.to_vector.reverse + Decomposed_S3_Path.Value new_parts self.go_to_root + + ## PRIVATE + parent self -> Decomposed_S3_Path | Nothing = + if self.parts.is_empty then Nothing else + new_parts = self.parts.drop (Last 1) + Decomposed_S3_Path.Value new_parts self.go_to_root diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso index 65ba7eacda46..d896afc97dcc 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/S3/S3_File.enso @@ -27,6 +27,7 @@ type S3_File Arguments: - uri: The URI of the file. The URI must be in the form `s3://bucket/path/to/file`. + If the path contains `.` or `..` segments, they will be normalized. - credentials: The credentials to use when accessing the file. If not specified, the default credentials are used. new : Text -> AWS_Credential | Nothing -> S3_File ! Illegal_Argument @@ -254,7 +255,8 @@ type S3_File join : (Vector | Text) -> S3_File join self (subpaths : Vector | Text) = vec = Vector.unify_vector_or_element subpaths - vec.fold self (/) + vec_as_texts = vec.map subpath-> (subpath : Text) + S3_File.Value (self.s3_path.join vec_as_texts) self.credentials ## GROUP Standard.Base.Metadata Resolves the parent of this file. diff --git a/test/AWS_Tests/src/S3_Spec.enso b/test/AWS_Tests/src/S3_Spec.enso index 81afa747ca3e..b5ebca9a8e14 100644 --- a/test/AWS_Tests/src/S3_Spec.enso +++ b/test/AWS_Tests/src/S3_Spec.enso @@ -72,12 +72,17 @@ add_specs suite_builder = # Leading slash will mean starting back from bucket root: hello_txt / "/foo/bar" . should_equal (root / "foo/bar") + hello_txt / "/" . should_equal root + hello_txt / "////" . should_equal root group_builder.specify "should support path traversal using `join`" <| root.join ["foo", "bar"] . path . should_equal "s3://"+bucket_name+"/foo/bar" root.join ["a/b/", "c/d", "e"] . path . should_equal "s3://"+bucket_name+"/a/b/c/d/e" hello_txt.join ["../..", ".."] . should_equal root - hello_txt.join ["..", "a", ".."] . should_equal hello_txt + hello_txt.join ["..", "a", ".."] . should_equal hello_txt.parent + + group_builder.specify "will normalize paths upon parsing" <| + S3_File.new "s3://bucketA/a/b/c/../././d/../e" . path . should_equal "s3://bucketA/a/b/e" group_builder.specify "should allow to check if one path is inside of another" <| hello_txt.is_descendant_of root . should_be_true @@ -88,6 +93,8 @@ add_specs suite_builder = root.is_descendant_of root . should_be_false hello_txt.parent.is_descendant_of hello_txt.parent . should_be_false + IO.println hello_txt + IO.println hello_txt.parent hello_txt.is_descendant_of hello_txt.parent . should_be_true (root / "foo").is_descendant_of root . should_be_true (root / "foo/bar").is_descendant_of (root / "foo/") . should_be_true From ecc489d4c0ead3196b57189ec1611be7565a9bf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 21 Feb 2024 13:32:25 +0100 Subject: [PATCH 17/20] fix a condition that was always false because I forgot how `is_a` works --- .../lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso index de32035ef8ab..fa3d7946287f 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso @@ -99,6 +99,11 @@ type Path_Entry ## PRIVATE File (name : Text) + ## PRIVATE + is_directory self -> Boolean = case self of + Path_Entry.Directory _ -> True + Path_Entry.File _ -> False + ## PRIVATE type Decomposed_S3_Path ## PRIVATE @@ -107,7 +112,7 @@ type Decomposed_S3_Path ## PRIVATE Reconstructs the original path. key self -> Text = - add_directory_suffix = self.parts.not_empty && self.parts.last.is_a Path_Entry.Directory + add_directory_suffix = self.parts.not_empty && self.parts.last.is_directory suffix = if add_directory_suffix then S3_Path.delimiter else "" self.parts.map .name . join separator=S3_Path.delimiter suffix=suffix From 5d105f4005d93c1a2322b93e7ff4615af32a9616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 21 Feb 2024 13:33:00 +0100 Subject: [PATCH 18/20] missing arg --- .../lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso index fa3d7946287f..9c2a5afb15b7 100644 --- a/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso +++ b/distribution/lib/Standard/AWS/0.0.0-dev/src/Internal/S3_Path.enso @@ -148,7 +148,7 @@ type Decomposed_S3_Path new_stack = self.parts.fold List.Nil stack-> part-> case part.name of "." -> stack ".." -> case stack of - List.Nil -> Error.throw (S3_Error.Error "Cannot move above root folder.") + List.Nil -> Error.throw (S3_Error.Error "Cannot move above root folder." Nothing) List.Cons _ tail -> tail _ -> List.Cons part stack new_parts = new_stack.to_vector.reverse From 1cb1e8cabafd086a2ea8dd4007b1509e94600ba8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 21 Feb 2024 13:33:50 +0100 Subject: [PATCH 19/20] parentheses are important --- test/AWS_Tests/src/S3_Spec.enso | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/AWS_Tests/src/S3_Spec.enso b/test/AWS_Tests/src/S3_Spec.enso index b5ebca9a8e14..456ca95d4d93 100644 --- a/test/AWS_Tests/src/S3_Spec.enso +++ b/test/AWS_Tests/src/S3_Spec.enso @@ -66,14 +66,14 @@ add_specs suite_builder = err.catch.to_display_text . should_contain "Cannot move above root" hello_txt.parent.parent.parent . should_equal root - hello_txt / "../../.." . should_equal root + (hello_txt / "../../..") . should_equal root hello_txt.parent . should_equal (root / "examples/" / "folder 2/") hello_txt.parent.is_directory . should_be_true # Leading slash will mean starting back from bucket root: - hello_txt / "/foo/bar" . should_equal (root / "foo/bar") - hello_txt / "/" . should_equal root - hello_txt / "////" . should_equal root + (hello_txt / "/foo/bar") . should_equal (root / "foo/bar") + (hello_txt / "/") . should_equal root + (hello_txt / "////") . should_equal root group_builder.specify "should support path traversal using `join`" <| root.join ["foo", "bar"] . path . should_equal "s3://"+bucket_name+"/foo/bar" From 3afab4c65af641929ce1d8bf1b2aaa1dd4b48a10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 21 Feb 2024 13:34:02 +0100 Subject: [PATCH 20/20] remove debug prints --- test/AWS_Tests/src/S3_Spec.enso | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/AWS_Tests/src/S3_Spec.enso b/test/AWS_Tests/src/S3_Spec.enso index 456ca95d4d93..d0f63db85320 100644 --- a/test/AWS_Tests/src/S3_Spec.enso +++ b/test/AWS_Tests/src/S3_Spec.enso @@ -93,8 +93,6 @@ add_specs suite_builder = root.is_descendant_of root . should_be_false hello_txt.parent.is_descendant_of hello_txt.parent . should_be_false - IO.println hello_txt - IO.println hello_txt.parent hello_txt.is_descendant_of hello_txt.parent . should_be_true (root / "foo").is_descendant_of root . should_be_true (root / "foo/bar").is_descendant_of (root / "foo/") . should_be_true