Skip to content

Commit

Permalink
Python: Change Python API naming to the spec (#4992)
Browse files Browse the repository at this point in the history
See #4985
  • Loading branch information
Fokko authored Jun 9, 2022
1 parent 8499856 commit 77ac584
Show file tree
Hide file tree
Showing 8 changed files with 171 additions and 183 deletions.
10 changes: 5 additions & 5 deletions python/src/iceberg/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,16 +546,16 @@ class _BuildPositionAccessors(SchemaVisitor[Dict[Position, Accessor]]):
>>> from iceberg.schema import Schema
>>> from iceberg.types import *
>>> schema = Schema(
... NestedField(field_id=2, name="id", field_type=IntegerType(), is_optional=False),
... NestedField(field_id=1, name="data", field_type=StringType(), is_optional=True),
... NestedField(field_id=2, name="id", field_type=IntegerType(), required=False),
... NestedField(field_id=1, name="data", field_type=StringType(), required=True),
... NestedField(
... field_id=3,
... name="location",
... field_type=StructType(
... NestedField(field_id=5, name="latitude", field_type=FloatType(), is_optional=False),
... NestedField(field_id=6, name="longitude", field_type=FloatType(), is_optional=False),
... NestedField(field_id=5, name="latitude", field_type=FloatType(), required=False),
... NestedField(field_id=6, name="longitude", field_type=FloatType(), required=False),
... ),
... is_optional=True,
... required=True,
... ),
... schema_id=1,
... identifier_field_ids=[1],
Expand Down
44 changes: 21 additions & 23 deletions python/src/iceberg/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,14 +142,14 @@ class NestedField(IcebergType):
... field_id=1,
... name='foo',
... field_type=FixedType(22),
... is_optional=False,
... required=False,
... ))
'1: foo: required fixed[22]'
>>> str(NestedField(
... field_id=2,
... name='bar',
... field_type=LongType(),
... is_optional=False,
... required=False,
... doc="Just a long"
... ))
'2: bar: required long (Just a long)'
Expand All @@ -158,7 +158,7 @@ class NestedField(IcebergType):
field_id: int = field()
name: str = field()
field_type: IcebergType = field()
is_optional: bool = field(default=True)
required: bool = field(default=True)
doc: Optional[str] = field(default=None, repr=False)

_instances: ClassVar[Dict[Tuple[bool, int, str, IcebergType, Optional[str]], "NestedField"]] = {}
Expand All @@ -168,21 +168,21 @@ def __new__(
field_id: int,
name: str,
field_type: IcebergType,
is_optional: bool = True,
required: bool = True,
doc: Optional[str] = None,
):
key = (is_optional, field_id, name, field_type, doc)
key = (required, field_id, name, field_type, doc)
cls._instances[key] = cls._instances.get(key) or object.__new__(cls)
return cls._instances[key]

@property
def is_required(self) -> bool:
return not self.is_optional
def optional(self) -> bool:
return not self.required

@property
def string_type(self) -> str:
doc = "" if not self.doc else f" ({self.doc})"
req = "optional" if self.is_optional else "required"
req = "optional" if self.required else "required"
return f"{self.field_id}: {self.name}: {req} {self.field_type}{doc}"


Expand Down Expand Up @@ -223,13 +223,13 @@ class ListType(IcebergType):
"""A list type in Iceberg
Example:
>>> ListType(element_id=3, element_type=StringType(), element_is_optional=True)
ListType(element_id=3, element_type=StringType(), element_is_optional=True)
>>> ListType(element_id=3, element_type=StringType(), element_required=True)
ListType(element_id=3, element_type=StringType(), element_required=True)
"""

element_id: int = field()
element_type: IcebergType = field()
element_is_optional: bool = field(default=True)
element_required: bool = field(default=True)
element: NestedField = field(init=False, repr=False)

_instances: ClassVar[Dict[Tuple[bool, int, IcebergType], "ListType"]] = {}
Expand All @@ -238,9 +238,9 @@ def __new__(
cls,
element_id: int,
element_type: IcebergType,
element_is_optional: bool = True,
element_required: bool = True,
):
key = (element_is_optional, element_id, element_type)
key = (element_required, element_id, element_type)
cls._instances[key] = cls._instances.get(key) or object.__new__(cls)
return cls._instances[key]

Expand All @@ -250,7 +250,7 @@ def __post_init__(self):
"element",
NestedField(
name="element",
is_optional=self.element_is_optional,
required=self.element_required,
field_id=self.element_id,
field_type=self.element_type,
),
Expand All @@ -266,15 +266,15 @@ class MapType(IcebergType):
"""A map type in Iceberg
Example:
>>> MapType(key_id=1, key_type=StringType(), value_id=2, value_type=IntegerType(), value_is_optional=True)
MapType(key_id=1, key_type=StringType(), value_id=2, value_type=IntegerType(), value_is_optional=True)
>>> MapType(key_id=1, key_type=StringType(), value_id=2, value_type=IntegerType(), value_required=True)
MapType(key_id=1, key_type=StringType(), value_id=2, value_type=IntegerType(), value_required=True)
"""

key_id: int = field()
key_type: IcebergType = field()
value_id: int = field()
value_type: IcebergType = field()
value_is_optional: bool = field(default=True)
value_required: bool = field(default=True)
key: NestedField = field(init=False, repr=False)
value: NestedField = field(init=False, repr=False)

Expand All @@ -287,24 +287,22 @@ def __new__(
key_type: IcebergType,
value_id: int,
value_type: IcebergType,
value_is_optional: bool = True,
value_required: bool = True,
):
impl_key = (key_id, key_type, value_id, value_type, value_is_optional)
impl_key = (key_id, key_type, value_id, value_type, value_required)
cls._instances[impl_key] = cls._instances.get(impl_key) or object.__new__(cls)
return cls._instances[impl_key]

def __post_init__(self):
object.__setattr__(
self, "key", NestedField(name="key", field_id=self.key_id, field_type=self.key_type, is_optional=False)
)
object.__setattr__(self, "key", NestedField(name="key", field_id=self.key_id, field_type=self.key_type, required=False))
object.__setattr__(
self,
"value",
NestedField(
name="value",
field_id=self.value_id,
field_type=self.value_type,
is_optional=self.value_is_optional,
required=self.value_required,
),
)

Expand Down
20 changes: 10 additions & 10 deletions python/src/iceberg/utils/schema_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,10 @@ def avro_to_iceberg(self, avro_schema: dict[str, Any]) -> Schema:
... })
>>> iceberg_schema = Schema(
... NestedField(
... field_id=500, name="manifest_path", field_type=StringType(), is_optional=False, doc="Location URI with FS scheme"
... field_id=500, name="manifest_path", field_type=StringType(), required=False, doc="Location URI with FS scheme"
... ),
... NestedField(
... field_id=501, name="manifest_length", field_type=LongType(), is_optional=False, doc="Total file size in bytes"
... field_id=501, name="manifest_length", field_type=LongType(), required=False, doc="Total file size in bytes"
... ),
... schema_id=1
... )
Expand Down Expand Up @@ -211,7 +211,7 @@ def _convert_field(self, field: dict[str, Any]) -> NestedField:
field_id=field["field-id"],
name=field["name"],
field_type=self._convert_schema(plain_type),
is_optional=is_optional,
required=is_optional,
doc=field.get("doc"),
)

Expand Down Expand Up @@ -244,14 +244,14 @@ def _convert_record_type(self, record_type: dict[str, Any]) -> StructType:
... field_id=509,
... name="contains_null",
... field_type=BooleanType(),
... is_optional=False,
... required=False,
... doc="True if any file has a null partition value",
... ),
... NestedField(
... field_id=518,
... name="contains_nan",
... field_type=BooleanType(),
... is_optional=True,
... required=True,
... doc="True if any file has a nan partition value",
... ),
... )
Expand All @@ -278,7 +278,7 @@ def _convert_array_type(self, array_type: dict[str, Any]) -> ListType:
return ListType(
element_id=array_type["element-id"],
element_type=self._convert_schema(plain_type),
element_is_optional=element_is_optional,
element_required=element_is_optional,
)

def _convert_map_type(self, map_type: dict[str, Any]) -> MapType:
Expand All @@ -300,7 +300,7 @@ def _convert_map_type(self, map_type: dict[str, Any]) -> MapType:
... key_type=StringType(),
... value_id=102,
... value_type=LongType(),
... value_is_optional=True
... value_required=True
... )
>>> actual == expected
True
Expand All @@ -314,7 +314,7 @@ def _convert_map_type(self, map_type: dict[str, Any]) -> MapType:
key_type=StringType(),
value_id=map_type["value-id"],
value_type=self._convert_schema(value_type),
value_is_optional=value_is_optional,
value_required=value_is_optional,
)

def _convert_logical_type(self, avro_logical_type: dict[str, Any]) -> IcebergType:
Expand Down Expand Up @@ -407,7 +407,7 @@ def _convert_logical_map_type(self, avro_type: dict[str, Any]) -> MapType:
... key_type=IntegerType(),
... value_id=102,
... value_type=StringType(),
... value_is_optional=False
... value_required=False
... )
>>> actual == expected
True
Expand All @@ -428,7 +428,7 @@ def _convert_logical_map_type(self, avro_type: dict[str, Any]) -> MapType:
key_type=key.field_type,
value_id=value.field_id,
value_type=value.field_type,
value_is_optional=value.is_optional,
value_required=value.required,
)

def _convert_fixed_type(self, avro_type: dict[str, Any]) -> FixedType:
Expand Down
38 changes: 18 additions & 20 deletions python/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ def set(self, pos: int, value) -> None:
@pytest.fixture(scope="session")
def table_schema_simple():
return schema.Schema(
NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False),
NestedField(field_id=2, name="bar", field_type=IntegerType(), is_optional=True),
NestedField(field_id=3, name="baz", field_type=BooleanType(), is_optional=False),
NestedField(field_id=1, name="foo", field_type=StringType(), required=False),
NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True),
NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False),
schema_id=1,
identifier_field_ids=[1],
)
Expand All @@ -60,14 +60,14 @@ def table_schema_simple():
@pytest.fixture(scope="session")
def table_schema_nested():
return schema.Schema(
NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False),
NestedField(field_id=2, name="bar", field_type=IntegerType(), is_optional=True),
NestedField(field_id=3, name="baz", field_type=BooleanType(), is_optional=False),
NestedField(field_id=1, name="foo", field_type=StringType(), required=False),
NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True),
NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False),
NestedField(
field_id=4,
name="qux",
field_type=ListType(element_id=5, element_type=StringType(), element_is_optional=True),
is_optional=True,
field_type=ListType(element_id=5, element_type=StringType(), element_required=True),
required=True,
),
NestedField(
field_id=6,
Expand All @@ -76,34 +76,32 @@ def table_schema_nested():
key_id=7,
key_type=StringType(),
value_id=8,
value_type=MapType(
key_id=9, key_type=StringType(), value_id=10, value_type=IntegerType(), value_is_optional=True
),
value_is_optional=True,
value_type=MapType(key_id=9, key_type=StringType(), value_id=10, value_type=IntegerType(), value_required=True),
value_required=True,
),
is_optional=True,
required=True,
),
NestedField(
field_id=11,
name="location",
field_type=ListType(
element_id=12,
element_type=StructType(
NestedField(field_id=13, name="latitude", field_type=FloatType(), is_optional=False),
NestedField(field_id=14, name="longitude", field_type=FloatType(), is_optional=False),
NestedField(field_id=13, name="latitude", field_type=FloatType(), required=False),
NestedField(field_id=14, name="longitude", field_type=FloatType(), required=False),
),
element_is_optional=True,
element_required=True,
),
is_optional=True,
required=True,
),
NestedField(
field_id=15,
name="person",
field_type=StructType(
NestedField(field_id=16, name="name", field_type=StringType(), is_optional=False),
NestedField(field_id=17, name="age", field_type=IntegerType(), is_optional=True),
NestedField(field_id=16, name="name", field_type=StringType(), required=False),
NestedField(field_id=17, name="age", field_type=IntegerType(), required=True),
),
is_optional=False,
required=False,
),
schema_id=1,
identifier_field_ids=[1],
Expand Down
6 changes: 3 additions & 3 deletions python/tests/expressions/test_expressions_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,7 @@ def test_accessor_base_class(foo_struct):

def test_bound_reference_str_and_repr():
"""Test str and repr of BoundReference"""
field = NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False)
field = NestedField(field_id=1, name="foo", field_type=StringType(), required=False)
position1_accessor = base.Accessor(position=1)
bound_ref = base.BoundReference(field=field, accessor=position1_accessor)
assert str(bound_ref) == f"BoundReference(field={repr(field)}, accessor={repr(position1_accessor)})"
Expand All @@ -290,10 +290,10 @@ def test_bound_reference_str_and_repr():

def test_bound_reference_field_property():
"""Test str and repr of BoundReference"""
field = NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False)
field = NestedField(field_id=1, name="foo", field_type=StringType(), required=False)
position1_accessor = base.Accessor(position=1)
bound_ref = base.BoundReference(field=field, accessor=position1_accessor)
assert bound_ref.field == NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False)
assert bound_ref.field == NestedField(field_id=1, name="foo", field_type=StringType(), required=False)


def test_bound_reference(table_schema_simple, foo_struct):
Expand Down
Loading

0 comments on commit 77ac584

Please sign in to comment.