From d344803e9c624af5ff577e8fab73cdcb7957cb7f Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 30 Jan 2024 00:10:48 -0800 Subject: [PATCH 1/8] Remove upper version bound from fsspec Signed-off-by: Kevin Su --- pyproject.toml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ce911e63ce..d1e9838a0c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.8,<3.13" dependencies = [ # Please maintain an alphabetical order in the following list - "adlfs>=2023.3.0,<=2023.9.2", + "adlfs>=2023.3.0", "click>=6.6,<9.0", "cloudpickle>=2.0.0", "cookiecutter>=1.7.3", @@ -22,8 +22,8 @@ dependencies = [ "docker>=4.0.0,<7.0.0", "docstring-parser>=0.9.0", "flyteidl>=1.10.0", - "fsspec>=2023.3.0,<=2023.9.2", - "gcsfs>=2023.3.0,<=2023.9.2", + "fsspec>=2023.3.0", + "gcsfs>=2023.3.0", "googleapis-common-protos>=1.57", "grpcio", "grpcio-status", @@ -44,7 +44,7 @@ dependencies = [ "requests>=2.18.4,<3.0.0", "rich", "rich_click", - "s3fs>=2023.3.0,<=2023.9.2", + "s3fs>=2023.3.0", "statsd>=3.0.0,<4.0.0", "typing_extensions", "urllib3>=1.22,<2.0.0", From 16f5471b58b1a4c8c745eaa41d8455258290b2a5 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 30 Jan 2024 00:37:23 -0800 Subject: [PATCH 2/8] fix tests Signed-off-by: Kevin Su --- flytekit/types/directory/types.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flytekit/types/directory/types.py b/flytekit/types/directory/types.py index 169e9d0a6d..7a33f958ef 100644 --- a/flytekit/types/directory/types.py +++ b/flytekit/types/directory/types.py @@ -16,6 +16,7 @@ from flytekit.core.context_manager import FlyteContext, FlyteContextManager from flytekit.core.type_engine import TypeEngine, TypeTransformer, get_batch_size +from flytekit.exceptions.user import FlyteAssertion from flytekit.models import types as _type_models from flytekit.models.core import types as _core_types from flytekit.models.literals import Blob, BlobMetadata, Literal, Scalar @@ -431,6 +432,8 @@ def to_literal( if should_upload: if remote_directory is None: remote_directory = ctx.file_access.get_random_remote_directory() + if not os.path.isdir(source_path): + raise FlyteAssertion("Expected a directory. {} is not a directory".format(source_path)) ctx.file_access.put_data(source_path, remote_directory, is_multipart=True, batch_size=batch_size) return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_directory))) From 021026ce0d7c40c72403006772a8583cf7eea6ea Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Tue, 30 Jan 2024 01:13:16 -0800 Subject: [PATCH 3/8] test Signed-off-by: Kevin Su --- tests/flytekit/unit/core/test_flyte_directory.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/flytekit/unit/core/test_flyte_directory.py b/tests/flytekit/unit/core/test_flyte_directory.py index 206b58ec5d..35e73558c2 100644 --- a/tests/flytekit/unit/core/test_flyte_directory.py +++ b/tests/flytekit/unit/core/test_flyte_directory.py @@ -80,6 +80,9 @@ def test_transformer_to_literal_local(): fh.write("Hello world\n") literal = tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt) + print(literal.scalar.blob.uri) + p = os.path.join(literal.scalar.blob.uri, "xyz") + assert os.path.exists(p) mock_remote_files = os.listdir(literal.scalar.blob.uri) assert mock_remote_files == ["xyz"] From 9311d7744a9973a1828740dde8603433eb55d0cf Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 5 Feb 2024 15:17:55 -0800 Subject: [PATCH 4/8] test Signed-off-by: Kevin Su --- flytekit/types/directory/types.py | 6 +----- tests/flytekit/unit/core/test_flyte_directory.py | 5 ++--- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/flytekit/types/directory/types.py b/flytekit/types/directory/types.py index 7a33f958ef..bcea5152a1 100644 --- a/flytekit/types/directory/types.py +++ b/flytekit/types/directory/types.py @@ -404,11 +404,7 @@ def to_literal( if not isinstance(python_val.remote_directory, (pathlib.Path, str)) and ( python_val.remote_directory is False or ctx.file_access.is_remote(source_path) - or ctx.execution_state.mode - in { - ctx.execution_state.Mode.LOCAL_WORKFLOW_EXECUTION, - ctx.execution_state.Mode.LOCAL_TASK_EXECUTION, - } + or ctx.execution_state.is_local_execution() ): should_upload = False diff --git a/tests/flytekit/unit/core/test_flyte_directory.py b/tests/flytekit/unit/core/test_flyte_directory.py index 35e73558c2..946201b8ee 100644 --- a/tests/flytekit/unit/core/test_flyte_directory.py +++ b/tests/flytekit/unit/core/test_flyte_directory.py @@ -54,9 +54,7 @@ def test_transformer_to_literal_local(): fs = FileAccessProvider(local_sandbox_dir=random_dir, raw_output_prefix=os.path.join(random_dir, "raw")) ctx = context_manager.FlyteContext.current_context() with context_manager.FlyteContextManager.with_context(ctx.with_file_access(fs)) as ctx: - # Use a separate directory that we know won't be the same as anything generated by flytekit itself, lest we - # accidentally try to cp -R /some/folder /some/folder/sub which causes exceptions obviously. - p = "/tmp/flyte/test_fd_transformer" + p = tempfile.mkdtemp(prefix="temp_example_") # Create an empty directory and call to literal on it if os.path.exists(p): @@ -78,6 +76,7 @@ def test_transformer_to_literal_local(): pathlib.Path(p).mkdir(parents=True) with open(os.path.join(p, "xyz"), "w") as fh: fh.write("Hello world\n") + print(p) literal = tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt) print(literal.scalar.blob.uri) From 6f4d57387c164a92655c77294c058815f60efc07 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 5 Feb 2024 15:55:05 -0800 Subject: [PATCH 5/8] fix tests Signed-off-by: Kevin Su --- flytekit/types/directory/types.py | 3 --- tests/flytekit/unit/core/test_data.py | 2 +- tests/flytekit/unit/core/test_flyte_directory.py | 4 ---- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/flytekit/types/directory/types.py b/flytekit/types/directory/types.py index bcea5152a1..17bab94954 100644 --- a/flytekit/types/directory/types.py +++ b/flytekit/types/directory/types.py @@ -16,7 +16,6 @@ from flytekit.core.context_manager import FlyteContext, FlyteContextManager from flytekit.core.type_engine import TypeEngine, TypeTransformer, get_batch_size -from flytekit.exceptions.user import FlyteAssertion from flytekit.models import types as _type_models from flytekit.models.core import types as _core_types from flytekit.models.literals import Blob, BlobMetadata, Literal, Scalar @@ -428,8 +427,6 @@ def to_literal( if should_upload: if remote_directory is None: remote_directory = ctx.file_access.get_random_remote_directory() - if not os.path.isdir(source_path): - raise FlyteAssertion("Expected a directory. {} is not a directory".format(source_path)) ctx.file_access.put_data(source_path, remote_directory, is_multipart=True, batch_size=batch_size) return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_directory))) diff --git a/tests/flytekit/unit/core/test_data.py b/tests/flytekit/unit/core/test_data.py index 792a466978..f3bc2d261b 100644 --- a/tests/flytekit/unit/core/test_data.py +++ b/tests/flytekit/unit/core/test_data.py @@ -83,7 +83,7 @@ def test_path_getting(mock_uuid_class, mock_gcs): @pytest.fixture def source_folder(): # Set up source directory for testing - parent_temp = tempfile.mkdtemp() + parent_temp = tempfile.mkdtemp(prefix="test-") src_dir = os.path.join(parent_temp, "source", "") nested_dir = os.path.join(src_dir, "nested") local.mkdir(nested_dir) diff --git a/tests/flytekit/unit/core/test_flyte_directory.py b/tests/flytekit/unit/core/test_flyte_directory.py index 946201b8ee..b11f316bcc 100644 --- a/tests/flytekit/unit/core/test_flyte_directory.py +++ b/tests/flytekit/unit/core/test_flyte_directory.py @@ -76,12 +76,8 @@ def test_transformer_to_literal_local(): pathlib.Path(p).mkdir(parents=True) with open(os.path.join(p, "xyz"), "w") as fh: fh.write("Hello world\n") - print(p) literal = tf.to_literal(ctx, FlyteDirectory(p), FlyteDirectory, lt) - print(literal.scalar.blob.uri) - p = os.path.join(literal.scalar.blob.uri, "xyz") - assert os.path.exists(p) mock_remote_files = os.listdir(literal.scalar.blob.uri) assert mock_remote_files == ["xyz"] From b536bd1bb8fe8c1b20873668465b77b389361a4c Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 5 Feb 2024 16:19:03 -0800 Subject: [PATCH 6/8] fix tests Signed-off-by: Kevin Su --- flytekit/types/directory/types.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/flytekit/types/directory/types.py b/flytekit/types/directory/types.py index 17bab94954..bcea5152a1 100644 --- a/flytekit/types/directory/types.py +++ b/flytekit/types/directory/types.py @@ -16,6 +16,7 @@ from flytekit.core.context_manager import FlyteContext, FlyteContextManager from flytekit.core.type_engine import TypeEngine, TypeTransformer, get_batch_size +from flytekit.exceptions.user import FlyteAssertion from flytekit.models import types as _type_models from flytekit.models.core import types as _core_types from flytekit.models.literals import Blob, BlobMetadata, Literal, Scalar @@ -427,6 +428,8 @@ def to_literal( if should_upload: if remote_directory is None: remote_directory = ctx.file_access.get_random_remote_directory() + if not os.path.isdir(source_path): + raise FlyteAssertion("Expected a directory. {} is not a directory".format(source_path)) ctx.file_access.put_data(source_path, remote_directory, is_multipart=True, batch_size=batch_size) return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_directory))) From 6af17b27d4c827646b0c2bec512a049b6d6b3812 Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 5 Feb 2024 16:31:15 -0800 Subject: [PATCH 7/8] fix tests Signed-off-by: Kevin Su --- tests/flytekit/unit/core/test_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/flytekit/unit/core/test_data.py b/tests/flytekit/unit/core/test_data.py index f3bc2d261b..99963621a7 100644 --- a/tests/flytekit/unit/core/test_data.py +++ b/tests/flytekit/unit/core/test_data.py @@ -83,7 +83,7 @@ def test_path_getting(mock_uuid_class, mock_gcs): @pytest.fixture def source_folder(): # Set up source directory for testing - parent_temp = tempfile.mkdtemp(prefix="test-") + parent_temp = tempfile.mkdtemp() src_dir = os.path.join(parent_temp, "source", "") nested_dir = os.path.join(src_dir, "nested") local.mkdir(nested_dir) @@ -356,7 +356,7 @@ def test_crawl_local_non_nt(source_folder): assert set(files) == expected # Test crawling a single file - fd = FlyteDirectory(path=os.path.join(source_folder, "original.txt")) + fd = FlyteDirectory(path=os.path.join(source_folder, "original1.txt")) res = fd.crawl() files = [os.path.join(x, y) for x, y in res] assert len(files) == 0 From 87b276c6c01be8783a0a98a85d2decfef925661a Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Mon, 5 Feb 2024 20:53:41 -0800 Subject: [PATCH 8/8] use pathlib.Path().isdir Signed-off-by: Kevin Su --- flytekit/types/directory/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flytekit/types/directory/types.py b/flytekit/types/directory/types.py index bcea5152a1..09607ea2e3 100644 --- a/flytekit/types/directory/types.py +++ b/flytekit/types/directory/types.py @@ -428,7 +428,7 @@ def to_literal( if should_upload: if remote_directory is None: remote_directory = ctx.file_access.get_random_remote_directory() - if not os.path.isdir(source_path): + if not pathlib.Path(source_path).is_dir(): raise FlyteAssertion("Expected a directory. {} is not a directory".format(source_path)) ctx.file_access.put_data(source_path, remote_directory, is_multipart=True, batch_size=batch_size) return Literal(scalar=Scalar(blob=Blob(metadata=meta, uri=remote_directory)))