From 676f544dd059ef1e8300991d51c4ce750da51766 Mon Sep 17 00:00:00 2001 From: fecet Date: Sat, 14 Sep 2024 17:02:05 +0800 Subject: [PATCH 1/3] feat: add support for pa.ExtensionType --- python/deltalake/schema.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/deltalake/schema.py b/python/deltalake/schema.py index 8bc5c7e155..2008c43de0 100644 --- a/python/deltalake/schema.py +++ b/python/deltalake/schema.py @@ -95,6 +95,8 @@ def dtype_to_delta_dtype(dtype: pa.DataType) -> pa.DataType: return pa.timestamp("us", "UTC") elif type(dtype) is pa.FixedSizeBinaryType: return pa.binary() + elif isinstance(dtype, pa.ExtensionType): + return dtype.storage_type try: return dtype_map[dtype] except KeyError: From 899170b94d23879abf2244f54ba4a28e2747c2a7 Mon Sep 17 00:00:00 2001 From: fecet Date: Sat, 14 Sep 2024 18:03:15 +0800 Subject: [PATCH 2/3] add test for ext type --- python/tests/test_schema.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py index a3ad6b62e1..34c833eb17 100644 --- a/python/tests/test_schema.py +++ b/python/tests/test_schema.py @@ -223,6 +223,26 @@ def test_delta_schema(): assert schema_without_metadata == Schema.from_pyarrow(pa_schema) +def _generate_test_type(): + class UuidType(pa.ExtensionType): + def __init__(self): + pa.ExtensionType.__init__(self, pa.binary(16), "my_package.uuid") + + def __arrow_ext_serialize__(self): + # since we don't have a parameterized type, we don't need extra + # metadata to be deserialized + return b"" + + @classmethod + def __arrow_ext_deserialize__(self, storage_type, serialized): + # return an instance of this subclass given the serialized + # metadata. + return UuidType() + + pa.register_extension_type(UuidType()) + return UuidType() + + def _generate_test_tuples(): test_tuples = [ ( @@ -515,6 +535,11 @@ def _generate_test_tuples(): ), ArrowSchemaConversionMode.NORMAL, ), + ( + pa.schema([("uuid", _generate_test_type())]), + pa.schema([("uuid", pa.binary(16))]), + ArrowSchemaConversionMode.NORMAL, + ), ] return test_tuples From 125f9bff10620f9b0ef04d1a9bd3465d5a3e2129 Mon Sep 17 00:00:00 2001 From: fecet Date: Sun, 15 Sep 2024 00:24:10 +0800 Subject: [PATCH 3/3] make mypy happy --- python/stubs/pyarrow/__init__.pyi | 1 + 1 file changed, 1 insertion(+) diff --git a/python/stubs/pyarrow/__init__.pyi b/python/stubs/pyarrow/__init__.pyi index e500d11191..31943db8b8 100644 --- a/python/stubs/pyarrow/__init__.pyi +++ b/python/stubs/pyarrow/__init__.pyi @@ -14,6 +14,7 @@ FixedSizeListType: Any LargeListViewType: Any ListViewType: Any FixedSizeBinaryType: Any +ExtensionType: Any schema: Any map_: Any list_: Any