diff --git a/CHANGELOG.md b/CHANGELOG.md index e3b07a8b..39c2f89d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fixed bug where `S3Bucket::list_objects` was truncating prefix paths ending with slashes. - Fixed bug where incorrect credentials model was selected when `MinIOCredentials` was used with `S3Bucket` - [#254](https://github.com/PrefectHQ/prefect-aws/pull/254) ### 0.3.1 diff --git a/README.md b/README.md index bf0b5359..88c6caa7 100644 --- a/README.md +++ b/README.md @@ -34,4 +34,4 @@ pip install prefect-aws ### Contributing -Thanks for thinking about chipping in! Check out this [step-by-step guide](https://prefecthq.github.io/prefect-gcp/#installation) on how to get started. \ No newline at end of file +Thanks for thinking about chipping in! Check out this [step-by-step guide](https://prefecthq.github.io/prefect-aws/#installation) on how to get started. \ No newline at end of file diff --git a/prefect_aws/s3.py b/prefect_aws/s3.py index 4eb504d5..eebd96df 100644 --- a/prefect_aws/s3.py +++ b/prefect_aws/s3.py @@ -545,7 +545,10 @@ def _join_bucket_folder(self, bucket_path: str = "") -> str: f"Bucket path {bucket_path!r} is already prefixed with " f"bucket folder {self.bucket_folder!r}; is this intentional?" ) - return (Path(self.bucket_folder) / bucket_path).as_posix() + + return (Path(self.bucket_folder) / bucket_path).as_posix() + ( + "" if not bucket_path.endswith("/") else "/" + ) @sync_compatible async def list_objects( diff --git a/tests/test_s3.py b/tests/test_s3.py index f7eb3a48..342f46e8 100644 --- a/tests/test_s3.py +++ b/tests/test_s3.py @@ -63,6 +63,25 @@ def object_in_folder(bucket, tmp_path): return bucket.upload_fileobj(f, "folder/object") +@pytest.fixture +def objects_in_folder(bucket, tmp_path): + objects = [] + for filename in [ + "folderobject/foo.txt", + "folderobject/bar.txt", + "folder/object/foo.txt", + "folder/object/bar.txt", + ]: + file = tmp_path / filename + file.parent.mkdir(parents=True, exist_ok=True) + file.write_text("TEST OBJECTS IN FOLDER") + with open(file, "rb") as f: + filename = Path(filename) + obj = bucket.upload_fileobj(f, (filename.parent / filename.stem).as_posix()) + objects.append(obj) + return objects + + @pytest.fixture def a_lot_of_objects(bucket, tmp_path): objects = [] @@ -241,6 +260,36 @@ async def test_flow(): assert [object["Key"] for object in objects] == ["folder/object"] +@pytest.mark.parametrize("client_parameters", aws_clients, indirect=True) +async def test_s3_list_objects_prefix_slashes( + object, client_parameters, objects_in_folder, aws_credentials +): + @flow + async def test_flow(slash=False): + return await s3_list_objects( + bucket="bucket", + prefix="folder" + ("/" if slash else ""), + aws_credentials=aws_credentials, + aws_client_parameters=client_parameters, + ) + + objects = await test_flow(slash=True) + assert len(objects) == 2 + assert [object["Key"] for object in objects] == [ + "folder/object/bar", + "folder/object/foo", + ] + + objects = await test_flow(slash=False) + assert len(objects) == 4 + assert [object["Key"] for object in objects] == [ + "folder/object/bar", + "folder/object/foo", + "folderobject/bar", + "folderobject/foo", + ] + + @pytest.mark.parametrize("client_parameters", aws_clients, indirect=True) async def test_s3_list_objects_filter( object, client_parameters, object_in_folder, aws_credentials @@ -585,6 +634,13 @@ def s3_bucket_with_objects(self, s3_bucket_with_object, object_in_folder): ) return _s3_bucket_with_objects + @pytest.fixture + def s3_bucket_with_similar_objects(self, s3_bucket_with_objects, objects_in_folder): + _s3_bucket_with_multiple_objects = ( + s3_bucket_with_objects # objects in folder will be added + ) + return _s3_bucket_with_multiple_objects + def test_credentials_are_correct_type(self, credentials): s3_bucket = S3Bucket(bucket_name="bucket", credentials=credentials) assert isinstance(s3_bucket.credentials, type(credentials)) @@ -605,6 +661,27 @@ def test_list_objects(self, s3_bucket_with_objects, client_parameters): assert len(objects) == 2 assert [object["Key"] for object in objects] == ["folder/object", "object"] + @pytest.mark.parametrize("client_parameters", aws_clients[-1:], indirect=True) + def test_list_objects_with_params( + self, s3_bucket_with_similar_objects, client_parameters + ): + objects = s3_bucket_with_similar_objects.list_objects("folder/object/") + assert len(objects) == 2 + assert [object["Key"] for object in objects] == [ + "folder/object/bar", + "folder/object/foo", + ] + + objects = s3_bucket_with_similar_objects.list_objects("folder") + assert len(objects) == 5 + assert [object["Key"] for object in objects] == [ + "folder/object", + "folder/object/bar", + "folder/object/foo", + "folderobject/bar", + "folderobject/foo", + ] + @pytest.mark.parametrize("to_path", [Path("to_path"), "to_path", None]) @pytest.mark.parametrize("client_parameters", aws_clients[-1:], indirect=True) def test_download_object_to_path(