From c65baba40bfc4537622ac26f1949a5425fb32225 Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Mon, 17 Jun 2024 12:31:24 -0400 Subject: [PATCH 01/14] remove numpy dep, conditionally register numpy type transformer Signed-off-by: Niels Bantilan --- flytekit/types/numpy/__init__.py | 16 ++++++ flytekit/types/schema/types.py | 53 +++++++++++-------- .../types/structured/structured_dataset.py | 35 +++++++----- pyproject.toml | 1 - 4 files changed, 70 insertions(+), 35 deletions(-) diff --git a/flytekit/types/numpy/__init__.py b/flytekit/types/numpy/__init__.py index ec20e87970..a869662837 100644 --- a/flytekit/types/numpy/__init__.py +++ b/flytekit/types/numpy/__init__.py @@ -1 +1,17 @@ +from flytekit.loggers import logger + from .ndarray import NumpyArrayTransformer + +try: + # isolate the exception to the numpy import + import numpy + + _numpy_installed = True +except ImportError: + _numpy_installed = False + + +if _numpy_installed: + from .ndarray import NumpyArrayTransformer +else: + logger.info("We won't register NumpyArrayTransformer because numpy is not installed.") diff --git a/flytekit/types/schema/types.py b/flytekit/types/schema/types.py index 75a54292c5..d9f859a83e 100644 --- a/flytekit/types/schema/types.py +++ b/flytekit/types/schema/types.py @@ -9,7 +9,6 @@ from pathlib import Path from typing import Type -import numpy as _np from dataclasses_json import config from marshmallow import fields from mashumaro.mixins.json import DataClassJSONMixin @@ -19,10 +18,41 @@ from flytekit.loggers import logger from flytekit.models.literals import Literal, Scalar, Schema from flytekit.models.types import LiteralType, SchemaType +from flytekit.types.numpy import _numpy_installed T = typing.TypeVar("T") +SUPPORTED_SCHEMA_TYPES = { + int: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, + float: SchemaType.SchemaColumn.SchemaColumnType.FLOAT, + bool: SchemaType.SchemaColumn.SchemaColumnType.BOOLEAN, + datetime.datetime: SchemaType.SchemaColumn.SchemaColumnType.DATETIME, + datetime.timedelta: SchemaType.SchemaColumn.SchemaColumnType.DURATION, + str: SchemaType.SchemaColumn.SchemaColumnType.STRING, +} + +if _numpy_installed: + import numpy as np + + SUPPORTED_SCHEMA_TYPES.update( + { + np.int32: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, + np.int64: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, + np.uint32: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, + np.uint64: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, + np.float32: SchemaType.SchemaColumn.SchemaColumnType.FLOAT, + np.float64: SchemaType.SchemaColumn.SchemaColumnType.FLOAT, + np.bool_: SchemaType.SchemaColumn.SchemaColumnType.BOOLEAN, # type: ignore + np.datetime64: SchemaType.SchemaColumn.SchemaColumnType.DATETIME, + np.timedelta64: SchemaType.SchemaColumn.SchemaColumnType.DURATION, + np.bytes_: SchemaType.SchemaColumn.SchemaColumnType.STRING, + np.str_: SchemaType.SchemaColumn.SchemaColumnType.STRING, + np.object_: SchemaType.SchemaColumn.SchemaColumnType.STRING, + } + ) + + class SchemaFormat(Enum): """ Represents the schema storage format (at rest). @@ -319,26 +349,7 @@ def as_readonly(self) -> FlyteSchema: class FlyteSchemaTransformer(TypeTransformer[FlyteSchema]): - _SUPPORTED_TYPES: typing.Dict[Type, SchemaType.SchemaColumn.SchemaColumnType] = { - _np.int32: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, - _np.int64: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, - _np.uint32: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, - _np.uint64: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, - int: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, - _np.float32: SchemaType.SchemaColumn.SchemaColumnType.FLOAT, - _np.float64: SchemaType.SchemaColumn.SchemaColumnType.FLOAT, - float: SchemaType.SchemaColumn.SchemaColumnType.FLOAT, - _np.bool_: SchemaType.SchemaColumn.SchemaColumnType.BOOLEAN, # type: ignore - bool: SchemaType.SchemaColumn.SchemaColumnType.BOOLEAN, - _np.datetime64: SchemaType.SchemaColumn.SchemaColumnType.DATETIME, - datetime.datetime: SchemaType.SchemaColumn.SchemaColumnType.DATETIME, - _np.timedelta64: SchemaType.SchemaColumn.SchemaColumnType.DURATION, - datetime.timedelta: SchemaType.SchemaColumn.SchemaColumnType.DURATION, - _np.bytes_: SchemaType.SchemaColumn.SchemaColumnType.STRING, - _np.str_: SchemaType.SchemaColumn.SchemaColumnType.STRING, - _np.object_: SchemaType.SchemaColumn.SchemaColumnType.STRING, - str: SchemaType.SchemaColumn.SchemaColumnType.STRING, - } + _SUPPORTED_TYPES: typing.Dict[Type, SchemaType.SchemaColumn.SchemaColumnType] = SUPPORTED_SCHEMA_TYPES def __init__(self): super().__init__("FlyteSchema Transformer", FlyteSchema) diff --git a/flytekit/types/structured/structured_dataset.py b/flytekit/types/structured/structured_dataset.py index 18e9eeb09a..545bc41933 100644 --- a/flytekit/types/structured/structured_dataset.py +++ b/flytekit/types/structured/structured_dataset.py @@ -323,28 +323,37 @@ def convert_schema_type_to_structured_dataset_type( def get_supported_types(): - import numpy as _np + from flytekit.types.numpy import _numpy_installed _SUPPORTED_TYPES: typing.Dict[Type, LiteralType] = { # type: ignore - _np.int32: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), - _np.int64: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), - _np.uint32: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), - _np.uint64: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), int: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), - _np.float32: type_models.LiteralType(simple=type_models.SimpleType.FLOAT), - _np.float64: type_models.LiteralType(simple=type_models.SimpleType.FLOAT), float: type_models.LiteralType(simple=type_models.SimpleType.FLOAT), - _np.bool_: type_models.LiteralType(simple=type_models.SimpleType.BOOLEAN), # type: ignore bool: type_models.LiteralType(simple=type_models.SimpleType.BOOLEAN), - _np.datetime64: type_models.LiteralType(simple=type_models.SimpleType.DATETIME), _datetime.datetime: type_models.LiteralType(simple=type_models.SimpleType.DATETIME), - _np.timedelta64: type_models.LiteralType(simple=type_models.SimpleType.DURATION), _datetime.timedelta: type_models.LiteralType(simple=type_models.SimpleType.DURATION), - _np.bytes_: type_models.LiteralType(simple=type_models.SimpleType.STRING), - _np.str_: type_models.LiteralType(simple=type_models.SimpleType.STRING), - _np.object_: type_models.LiteralType(simple=type_models.SimpleType.STRING), str: type_models.LiteralType(simple=type_models.SimpleType.STRING), } + + if _numpy_installed: + import numpy as _np + + _SUPPORTED_TYPES.update( + { # type: ignore + _np.int32: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), + _np.int64: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), + _np.uint32: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), + _np.uint64: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), + _np.float32: type_models.LiteralType(simple=type_models.SimpleType.FLOAT), + _np.float64: type_models.LiteralType(simple=type_models.SimpleType.FLOAT), + _np.bool_: type_models.LiteralType(simple=type_models.SimpleType.BOOLEAN), # type: ignore + _np.datetime64: type_models.LiteralType(simple=type_models.SimpleType.DATETIME), + _np.timedelta64: type_models.LiteralType(simple=type_models.SimpleType.DURATION), + _np.bytes_: type_models.LiteralType(simple=type_models.SimpleType.STRING), + _np.str_: type_models.LiteralType(simple=type_models.SimpleType.STRING), + _np.object_: type_models.LiteralType(simple=type_models.SimpleType.STRING), + } + ) + return _SUPPORTED_TYPES diff --git a/pyproject.toml b/pyproject.toml index 74a41ae422..5aece34595 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,6 @@ dependencies = [ "marshmallow-enum", "marshmallow-jsonschema>=0.12.0", "mashumaro>=3.11", - "numpy<2", "protobuf!=4.25.0", "pyarrow", "pygments", From 4889adca40cc000c96f777b6b8d15a96dc7620f9 Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Mon, 17 Jun 2024 12:42:17 -0400 Subject: [PATCH 02/14] pin numpy version <2 in dev requirements Signed-off-by: Niels Bantilan --- dev-requirements.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-requirements.in b/dev-requirements.in index ca37177df7..d45a633135 100644 --- a/dev-requirements.in +++ b/dev-requirements.in @@ -49,7 +49,7 @@ types-mock autoflake pillow -numpy +numpy<2 pandas scikit-learn types-requests From e49357f3846b08b3c2de19082d6c746bbbd4d501 Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Mon, 17 Jun 2024 13:17:21 -0400 Subject: [PATCH 03/14] update numpy dep in build-with-pandas Signed-off-by: Niels Bantilan --- .github/workflows/pythonbuild.yml | 8 +++++++- dev-requirements.in | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index f56caffc0d..258eaa6f40 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -122,6 +122,12 @@ jobs: os: [ubuntu-latest] python-version: ${{fromJson(needs.detect-python-versions.outputs.python-versions)}} pandas: ["pandas<2.0.0", "pandas>=2.0.0"] + include: + - numpy: "numpy<2.0.0" + pandas: "pandas<2.0.0" + - numpy: "numpy>=2.0.0" + pandas: "pandas>=2.0.0" + steps: - uses: actions/checkout@v4 - name: 'Clear action cache' @@ -141,7 +147,7 @@ jobs: run: | pip install uv make setup-global-uv - uv pip install --system --force-reinstall "${{ matrix.pandas }}" + uv pip install --system --force-reinstall "${{ matrix.pandas }}" "${{ matrix.numpy }}" uv pip freeze - name: Test with coverage run: | diff --git a/dev-requirements.in b/dev-requirements.in index d45a633135..ca37177df7 100644 --- a/dev-requirements.in +++ b/dev-requirements.in @@ -49,7 +49,7 @@ types-mock autoflake pillow -numpy<2 +numpy pandas scikit-learn types-requests From 7f78203a30aa384b776589afcd8469ce88b4c6f7 Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Mon, 17 Jun 2024 13:43:54 -0400 Subject: [PATCH 04/14] numpy matrix Signed-off-by: Niels Bantilan --- .github/workflows/pythonbuild.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index 258eaa6f40..75395d0881 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -122,10 +122,11 @@ jobs: os: [ubuntu-latest] python-version: ${{fromJson(needs.detect-python-versions.outputs.python-versions)}} pandas: ["pandas<2.0.0", "pandas>=2.0.0"] - include: - - numpy: "numpy<2.0.0" - pandas: "pandas<2.0.0" + numpy: ["numpy<2.0.0", "numpy>=2.0.0"] + exclude: - numpy: "numpy>=2.0.0" + pandas: "pandas<2.0.0" + - numpy: "numpy<2.0.0" pandas: "pandas>=2.0.0" steps: From 5a7e5a80140fd3e67c83255536ecfb99f5d8b6aa Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Mon, 17 Jun 2024 16:44:58 -0400 Subject: [PATCH 05/14] pin numpy<2 in onnx plugins Signed-off-by: Niels Bantilan --- plugins/flytekit-onnx-pytorch/dev-requirements.in | 1 + plugins/flytekit-onnx-scikitlearn/dev-requirements.in | 1 + 2 files changed, 2 insertions(+) diff --git a/plugins/flytekit-onnx-pytorch/dev-requirements.in b/plugins/flytekit-onnx-pytorch/dev-requirements.in index ed8f130c85..808806fc24 100644 --- a/plugins/flytekit-onnx-pytorch/dev-requirements.in +++ b/plugins/flytekit-onnx-pytorch/dev-requirements.in @@ -1,3 +1,4 @@ onnxruntime pillow torchvision>=0.12.0 +numpy<2 \ No newline at end of file diff --git a/plugins/flytekit-onnx-scikitlearn/dev-requirements.in b/plugins/flytekit-onnx-scikitlearn/dev-requirements.in index 391b72eb3e..c797056113 100644 --- a/plugins/flytekit-onnx-scikitlearn/dev-requirements.in +++ b/plugins/flytekit-onnx-scikitlearn/dev-requirements.in @@ -1 +1,2 @@ onnxruntime +numpy<2 From c258fc3a335b572f3358be6f687f0dba33080e0f Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Mon, 17 Jun 2024 18:00:44 -0400 Subject: [PATCH 06/14] lint Signed-off-by: Niels Bantilan --- plugins/flytekit-onnx-pytorch/dev-requirements.in | 2 +- plugins/flytekit-onnx-scikitlearn/dev-requirements.in | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/flytekit-onnx-pytorch/dev-requirements.in b/plugins/flytekit-onnx-pytorch/dev-requirements.in index 808806fc24..638d377668 100644 --- a/plugins/flytekit-onnx-pytorch/dev-requirements.in +++ b/plugins/flytekit-onnx-pytorch/dev-requirements.in @@ -1,4 +1,4 @@ +numpy<2 onnxruntime pillow torchvision>=0.12.0 -numpy<2 \ No newline at end of file diff --git a/plugins/flytekit-onnx-scikitlearn/dev-requirements.in b/plugins/flytekit-onnx-scikitlearn/dev-requirements.in index c797056113..5d5bf2f3e8 100644 --- a/plugins/flytekit-onnx-scikitlearn/dev-requirements.in +++ b/plugins/flytekit-onnx-scikitlearn/dev-requirements.in @@ -1,2 +1,2 @@ -onnxruntime numpy<2 +onnxruntime From fe45d1f3cdd6102b030e61f1654367ac1755ba87 Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Mon, 17 Jun 2024 18:11:50 -0400 Subject: [PATCH 07/14] pin numpy in ci Signed-off-by: Niels Bantilan --- .github/workflows/pythonbuild.yml | 4 ++++ plugins/flytekit-onnx-pytorch/dev-requirements.in | 1 - plugins/flytekit-onnx-scikitlearn/dev-requirements.in | 1 - 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index 75395d0881..8a1b9cdd76 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -426,6 +426,10 @@ jobs: if [ -f dev-requirements.in ]; then uv pip install --system -r dev-requirements.in; fi # TODO: move to protobuf>=5. Github issue: https://github.com/flyteorg/flyte/issues/5448 uv pip install --system -U $GITHUB_WORKSPACE "protobuf<5" + # TODO: remove this when numpy v2 in onnx has been resolved + if [[ ${{ matrix.plugin-names }} == *"onnx"* ]]; then + uv pip install --system numpy<2.0.0 + fi uv pip freeze - name: Test with coverage run: | diff --git a/plugins/flytekit-onnx-pytorch/dev-requirements.in b/plugins/flytekit-onnx-pytorch/dev-requirements.in index 638d377668..ed8f130c85 100644 --- a/plugins/flytekit-onnx-pytorch/dev-requirements.in +++ b/plugins/flytekit-onnx-pytorch/dev-requirements.in @@ -1,4 +1,3 @@ -numpy<2 onnxruntime pillow torchvision>=0.12.0 diff --git a/plugins/flytekit-onnx-scikitlearn/dev-requirements.in b/plugins/flytekit-onnx-scikitlearn/dev-requirements.in index 5d5bf2f3e8..391b72eb3e 100644 --- a/plugins/flytekit-onnx-scikitlearn/dev-requirements.in +++ b/plugins/flytekit-onnx-scikitlearn/dev-requirements.in @@ -1,2 +1 @@ -numpy<2 onnxruntime From 52bb5c4f92f65eb2d5c1793e39f578e316944c6a Mon Sep 17 00:00:00 2001 From: Eduardo Apolinario <653394+eapolinario@users.noreply.github.com> Date: Mon, 17 Jun 2024 16:42:33 -0700 Subject: [PATCH 08/14] Update pythonbuild.yml --- .github/workflows/pythonbuild.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index 8a1b9cdd76..51e6f3894f 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -428,7 +428,7 @@ jobs: uv pip install --system -U $GITHUB_WORKSPACE "protobuf<5" # TODO: remove this when numpy v2 in onnx has been resolved if [[ ${{ matrix.plugin-names }} == *"onnx"* ]]; then - uv pip install --system numpy<2.0.0 + uv pip install --system "numpy<2.0.0" fi uv pip freeze - name: Test with coverage From 4f3a22bf153260b56da4a2f69e45f4ff362d71d1 Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Tue, 18 Jun 2024 08:34:13 -0400 Subject: [PATCH 09/14] remove numpy import from top-level of types.numpy.__init__ Signed-off-by: Niels Bantilan --- flytekit/types/numpy/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flytekit/types/numpy/__init__.py b/flytekit/types/numpy/__init__.py index a869662837..74495d193a 100644 --- a/flytekit/types/numpy/__init__.py +++ b/flytekit/types/numpy/__init__.py @@ -1,6 +1,5 @@ from flytekit.loggers import logger -from .ndarray import NumpyArrayTransformer try: # isolate the exception to the numpy import From da1611efc19eb4897ec9fae29c3a1994c9d1163d Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Tue, 18 Jun 2024 08:38:41 -0400 Subject: [PATCH 10/14] lint Signed-off-by: Niels Bantilan --- flytekit/types/numpy/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flytekit/types/numpy/__init__.py b/flytekit/types/numpy/__init__.py index 74495d193a..bf690d3ddb 100644 --- a/flytekit/types/numpy/__init__.py +++ b/flytekit/types/numpy/__init__.py @@ -1,6 +1,5 @@ from flytekit.loggers import logger - try: # isolate the exception to the numpy import import numpy From fe04a02d8ad57847c362c6f471bcdc55a8e0e0b7 Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Tue, 18 Jun 2024 08:42:32 -0400 Subject: [PATCH 11/14] revert changes to schema and structured dataset since these are lazily loaded anyway Signed-off-by: Niels Bantilan --- flytekit/types/schema/types.py | 53 ++++++++----------- .../types/structured/structured_dataset.py | 35 +++++------- 2 files changed, 34 insertions(+), 54 deletions(-) diff --git a/flytekit/types/schema/types.py b/flytekit/types/schema/types.py index d9f859a83e..75a54292c5 100644 --- a/flytekit/types/schema/types.py +++ b/flytekit/types/schema/types.py @@ -9,6 +9,7 @@ from pathlib import Path from typing import Type +import numpy as _np from dataclasses_json import config from marshmallow import fields from mashumaro.mixins.json import DataClassJSONMixin @@ -18,41 +19,10 @@ from flytekit.loggers import logger from flytekit.models.literals import Literal, Scalar, Schema from flytekit.models.types import LiteralType, SchemaType -from flytekit.types.numpy import _numpy_installed T = typing.TypeVar("T") -SUPPORTED_SCHEMA_TYPES = { - int: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, - float: SchemaType.SchemaColumn.SchemaColumnType.FLOAT, - bool: SchemaType.SchemaColumn.SchemaColumnType.BOOLEAN, - datetime.datetime: SchemaType.SchemaColumn.SchemaColumnType.DATETIME, - datetime.timedelta: SchemaType.SchemaColumn.SchemaColumnType.DURATION, - str: SchemaType.SchemaColumn.SchemaColumnType.STRING, -} - -if _numpy_installed: - import numpy as np - - SUPPORTED_SCHEMA_TYPES.update( - { - np.int32: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, - np.int64: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, - np.uint32: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, - np.uint64: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, - np.float32: SchemaType.SchemaColumn.SchemaColumnType.FLOAT, - np.float64: SchemaType.SchemaColumn.SchemaColumnType.FLOAT, - np.bool_: SchemaType.SchemaColumn.SchemaColumnType.BOOLEAN, # type: ignore - np.datetime64: SchemaType.SchemaColumn.SchemaColumnType.DATETIME, - np.timedelta64: SchemaType.SchemaColumn.SchemaColumnType.DURATION, - np.bytes_: SchemaType.SchemaColumn.SchemaColumnType.STRING, - np.str_: SchemaType.SchemaColumn.SchemaColumnType.STRING, - np.object_: SchemaType.SchemaColumn.SchemaColumnType.STRING, - } - ) - - class SchemaFormat(Enum): """ Represents the schema storage format (at rest). @@ -349,7 +319,26 @@ def as_readonly(self) -> FlyteSchema: class FlyteSchemaTransformer(TypeTransformer[FlyteSchema]): - _SUPPORTED_TYPES: typing.Dict[Type, SchemaType.SchemaColumn.SchemaColumnType] = SUPPORTED_SCHEMA_TYPES + _SUPPORTED_TYPES: typing.Dict[Type, SchemaType.SchemaColumn.SchemaColumnType] = { + _np.int32: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, + _np.int64: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, + _np.uint32: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, + _np.uint64: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, + int: SchemaType.SchemaColumn.SchemaColumnType.INTEGER, + _np.float32: SchemaType.SchemaColumn.SchemaColumnType.FLOAT, + _np.float64: SchemaType.SchemaColumn.SchemaColumnType.FLOAT, + float: SchemaType.SchemaColumn.SchemaColumnType.FLOAT, + _np.bool_: SchemaType.SchemaColumn.SchemaColumnType.BOOLEAN, # type: ignore + bool: SchemaType.SchemaColumn.SchemaColumnType.BOOLEAN, + _np.datetime64: SchemaType.SchemaColumn.SchemaColumnType.DATETIME, + datetime.datetime: SchemaType.SchemaColumn.SchemaColumnType.DATETIME, + _np.timedelta64: SchemaType.SchemaColumn.SchemaColumnType.DURATION, + datetime.timedelta: SchemaType.SchemaColumn.SchemaColumnType.DURATION, + _np.bytes_: SchemaType.SchemaColumn.SchemaColumnType.STRING, + _np.str_: SchemaType.SchemaColumn.SchemaColumnType.STRING, + _np.object_: SchemaType.SchemaColumn.SchemaColumnType.STRING, + str: SchemaType.SchemaColumn.SchemaColumnType.STRING, + } def __init__(self): super().__init__("FlyteSchema Transformer", FlyteSchema) diff --git a/flytekit/types/structured/structured_dataset.py b/flytekit/types/structured/structured_dataset.py index 545bc41933..18e9eeb09a 100644 --- a/flytekit/types/structured/structured_dataset.py +++ b/flytekit/types/structured/structured_dataset.py @@ -323,37 +323,28 @@ def convert_schema_type_to_structured_dataset_type( def get_supported_types(): - from flytekit.types.numpy import _numpy_installed + import numpy as _np _SUPPORTED_TYPES: typing.Dict[Type, LiteralType] = { # type: ignore + _np.int32: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), + _np.int64: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), + _np.uint32: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), + _np.uint64: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), int: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), + _np.float32: type_models.LiteralType(simple=type_models.SimpleType.FLOAT), + _np.float64: type_models.LiteralType(simple=type_models.SimpleType.FLOAT), float: type_models.LiteralType(simple=type_models.SimpleType.FLOAT), + _np.bool_: type_models.LiteralType(simple=type_models.SimpleType.BOOLEAN), # type: ignore bool: type_models.LiteralType(simple=type_models.SimpleType.BOOLEAN), + _np.datetime64: type_models.LiteralType(simple=type_models.SimpleType.DATETIME), _datetime.datetime: type_models.LiteralType(simple=type_models.SimpleType.DATETIME), + _np.timedelta64: type_models.LiteralType(simple=type_models.SimpleType.DURATION), _datetime.timedelta: type_models.LiteralType(simple=type_models.SimpleType.DURATION), + _np.bytes_: type_models.LiteralType(simple=type_models.SimpleType.STRING), + _np.str_: type_models.LiteralType(simple=type_models.SimpleType.STRING), + _np.object_: type_models.LiteralType(simple=type_models.SimpleType.STRING), str: type_models.LiteralType(simple=type_models.SimpleType.STRING), } - - if _numpy_installed: - import numpy as _np - - _SUPPORTED_TYPES.update( - { # type: ignore - _np.int32: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), - _np.int64: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), - _np.uint32: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), - _np.uint64: type_models.LiteralType(simple=type_models.SimpleType.INTEGER), - _np.float32: type_models.LiteralType(simple=type_models.SimpleType.FLOAT), - _np.float64: type_models.LiteralType(simple=type_models.SimpleType.FLOAT), - _np.bool_: type_models.LiteralType(simple=type_models.SimpleType.BOOLEAN), # type: ignore - _np.datetime64: type_models.LiteralType(simple=type_models.SimpleType.DATETIME), - _np.timedelta64: type_models.LiteralType(simple=type_models.SimpleType.DURATION), - _np.bytes_: type_models.LiteralType(simple=type_models.SimpleType.STRING), - _np.str_: type_models.LiteralType(simple=type_models.SimpleType.STRING), - _np.object_: type_models.LiteralType(simple=type_models.SimpleType.STRING), - } - ) - return _SUPPORTED_TYPES From 8c00681df0a9c976876d4d4ad45b50b96dd7aa25 Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Tue, 18 Jun 2024 08:52:54 -0400 Subject: [PATCH 12/14] debug Signed-off-by: Niels Bantilan --- flytekit/types/numpy/__init__.py | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/flytekit/types/numpy/__init__.py b/flytekit/types/numpy/__init__.py index bf690d3ddb..ec20e87970 100644 --- a/flytekit/types/numpy/__init__.py +++ b/flytekit/types/numpy/__init__.py @@ -1,15 +1 @@ -from flytekit.loggers import logger - -try: - # isolate the exception to the numpy import - import numpy - - _numpy_installed = True -except ImportError: - _numpy_installed = False - - -if _numpy_installed: - from .ndarray import NumpyArrayTransformer -else: - logger.info("We won't register NumpyArrayTransformer because numpy is not installed.") +from .ndarray import NumpyArrayTransformer From d9705e6fcea1ebad2932bfbc62e3df31bf3507a6 Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Tue, 18 Jun 2024 09:31:18 -0400 Subject: [PATCH 13/14] numpy version pin on sqlalchemy Signed-off-by: Niels Bantilan --- .github/workflows/pythonbuild.yml | 2 +- flytekit/types/numpy/__init__.py | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index 51e6f3894f..dfa83fca20 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -427,7 +427,7 @@ jobs: # TODO: move to protobuf>=5. Github issue: https://github.com/flyteorg/flyte/issues/5448 uv pip install --system -U $GITHUB_WORKSPACE "protobuf<5" # TODO: remove this when numpy v2 in onnx has been resolved - if [[ ${{ matrix.plugin-names }} == *"onnx"* ]]; then + if [[ ${{ matrix.plugin-names }} == *"onnx"* || ${{ matrix.plugin-names }} == "flytekit-sqlalchemy" ]]; then uv pip install --system "numpy<2.0.0" fi uv pip freeze diff --git a/flytekit/types/numpy/__init__.py b/flytekit/types/numpy/__init__.py index ec20e87970..74495d193a 100644 --- a/flytekit/types/numpy/__init__.py +++ b/flytekit/types/numpy/__init__.py @@ -1 +1,16 @@ -from .ndarray import NumpyArrayTransformer +from flytekit.loggers import logger + + +try: + # isolate the exception to the numpy import + import numpy + + _numpy_installed = True +except ImportError: + _numpy_installed = False + + +if _numpy_installed: + from .ndarray import NumpyArrayTransformer +else: + logger.info("We won't register NumpyArrayTransformer because numpy is not installed.") From 9281870e1ed69e0e731d046d60ca6d83ee016109 Mon Sep 17 00:00:00 2001 From: Niels Bantilan Date: Tue, 18 Jun 2024 09:33:26 -0400 Subject: [PATCH 14/14] lint Signed-off-by: Niels Bantilan --- flytekit/types/numpy/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/flytekit/types/numpy/__init__.py b/flytekit/types/numpy/__init__.py index 74495d193a..bf690d3ddb 100644 --- a/flytekit/types/numpy/__init__.py +++ b/flytekit/types/numpy/__init__.py @@ -1,6 +1,5 @@ from flytekit.loggers import logger - try: # isolate the exception to the numpy import import numpy