diff --git a/python/src/iceberg/schema.py b/python/src/iceberg/schema.py index b0fd8427c51f..97b980150014 100644 --- a/python/src/iceberg/schema.py +++ b/python/src/iceberg/schema.py @@ -546,16 +546,16 @@ class _BuildPositionAccessors(SchemaVisitor[Dict[Position, Accessor]]): >>> from iceberg.schema import Schema >>> from iceberg.types import * >>> schema = Schema( - ... NestedField(field_id=2, name="id", field_type=IntegerType(), is_optional=False), - ... NestedField(field_id=1, name="data", field_type=StringType(), is_optional=True), + ... NestedField(field_id=2, name="id", field_type=IntegerType(), required=False), + ... NestedField(field_id=1, name="data", field_type=StringType(), required=True), ... NestedField( ... field_id=3, ... name="location", ... field_type=StructType( - ... NestedField(field_id=5, name="latitude", field_type=FloatType(), is_optional=False), - ... NestedField(field_id=6, name="longitude", field_type=FloatType(), is_optional=False), + ... NestedField(field_id=5, name="latitude", field_type=FloatType(), required=False), + ... NestedField(field_id=6, name="longitude", field_type=FloatType(), required=False), ... ), - ... is_optional=True, + ... required=True, ... ), ... schema_id=1, ... identifier_field_ids=[1], diff --git a/python/src/iceberg/types.py b/python/src/iceberg/types.py index 6f1ad701c474..46828e6d408e 100644 --- a/python/src/iceberg/types.py +++ b/python/src/iceberg/types.py @@ -142,14 +142,14 @@ class NestedField(IcebergType): ... field_id=1, ... name='foo', ... field_type=FixedType(22), - ... is_optional=False, + ... required=False, ... )) '1: foo: required fixed[22]' >>> str(NestedField( ... field_id=2, ... name='bar', ... field_type=LongType(), - ... is_optional=False, + ... required=False, ... doc="Just a long" ... )) '2: bar: required long (Just a long)' @@ -158,7 +158,7 @@ class NestedField(IcebergType): field_id: int = field() name: str = field() field_type: IcebergType = field() - is_optional: bool = field(default=True) + required: bool = field(default=True) doc: Optional[str] = field(default=None, repr=False) _instances: ClassVar[Dict[Tuple[bool, int, str, IcebergType, Optional[str]], "NestedField"]] = {} @@ -168,21 +168,21 @@ def __new__( field_id: int, name: str, field_type: IcebergType, - is_optional: bool = True, + required: bool = True, doc: Optional[str] = None, ): - key = (is_optional, field_id, name, field_type, doc) + key = (required, field_id, name, field_type, doc) cls._instances[key] = cls._instances.get(key) or object.__new__(cls) return cls._instances[key] @property - def is_required(self) -> bool: - return not self.is_optional + def optional(self) -> bool: + return not self.required @property def string_type(self) -> str: doc = "" if not self.doc else f" ({self.doc})" - req = "optional" if self.is_optional else "required" + req = "optional" if self.required else "required" return f"{self.field_id}: {self.name}: {req} {self.field_type}{doc}" @@ -223,13 +223,13 @@ class ListType(IcebergType): """A list type in Iceberg Example: - >>> ListType(element_id=3, element_type=StringType(), element_is_optional=True) - ListType(element_id=3, element_type=StringType(), element_is_optional=True) + >>> ListType(element_id=3, element_type=StringType(), element_required=True) + ListType(element_id=3, element_type=StringType(), element_required=True) """ element_id: int = field() element_type: IcebergType = field() - element_is_optional: bool = field(default=True) + element_required: bool = field(default=True) element: NestedField = field(init=False, repr=False) _instances: ClassVar[Dict[Tuple[bool, int, IcebergType], "ListType"]] = {} @@ -238,9 +238,9 @@ def __new__( cls, element_id: int, element_type: IcebergType, - element_is_optional: bool = True, + element_required: bool = True, ): - key = (element_is_optional, element_id, element_type) + key = (element_required, element_id, element_type) cls._instances[key] = cls._instances.get(key) or object.__new__(cls) return cls._instances[key] @@ -250,7 +250,7 @@ def __post_init__(self): "element", NestedField( name="element", - is_optional=self.element_is_optional, + required=self.element_required, field_id=self.element_id, field_type=self.element_type, ), @@ -266,15 +266,15 @@ class MapType(IcebergType): """A map type in Iceberg Example: - >>> MapType(key_id=1, key_type=StringType(), value_id=2, value_type=IntegerType(), value_is_optional=True) - MapType(key_id=1, key_type=StringType(), value_id=2, value_type=IntegerType(), value_is_optional=True) + >>> MapType(key_id=1, key_type=StringType(), value_id=2, value_type=IntegerType(), value_required=True) + MapType(key_id=1, key_type=StringType(), value_id=2, value_type=IntegerType(), value_required=True) """ key_id: int = field() key_type: IcebergType = field() value_id: int = field() value_type: IcebergType = field() - value_is_optional: bool = field(default=True) + value_required: bool = field(default=True) key: NestedField = field(init=False, repr=False) value: NestedField = field(init=False, repr=False) @@ -287,16 +287,14 @@ def __new__( key_type: IcebergType, value_id: int, value_type: IcebergType, - value_is_optional: bool = True, + value_required: bool = True, ): - impl_key = (key_id, key_type, value_id, value_type, value_is_optional) + impl_key = (key_id, key_type, value_id, value_type, value_required) cls._instances[impl_key] = cls._instances.get(impl_key) or object.__new__(cls) return cls._instances[impl_key] def __post_init__(self): - object.__setattr__( - self, "key", NestedField(name="key", field_id=self.key_id, field_type=self.key_type, is_optional=False) - ) + object.__setattr__(self, "key", NestedField(name="key", field_id=self.key_id, field_type=self.key_type, required=False)) object.__setattr__( self, "value", @@ -304,7 +302,7 @@ def __post_init__(self): name="value", field_id=self.value_id, field_type=self.value_type, - is_optional=self.value_is_optional, + required=self.value_required, ), ) diff --git a/python/src/iceberg/utils/schema_conversion.py b/python/src/iceberg/utils/schema_conversion.py index 6415fb836f17..4e57daa0314e 100644 --- a/python/src/iceberg/utils/schema_conversion.py +++ b/python/src/iceberg/utils/schema_conversion.py @@ -96,10 +96,10 @@ def avro_to_iceberg(self, avro_schema: dict[str, Any]) -> Schema: ... }) >>> iceberg_schema = Schema( ... NestedField( - ... field_id=500, name="manifest_path", field_type=StringType(), is_optional=False, doc="Location URI with FS scheme" + ... field_id=500, name="manifest_path", field_type=StringType(), required=False, doc="Location URI with FS scheme" ... ), ... NestedField( - ... field_id=501, name="manifest_length", field_type=LongType(), is_optional=False, doc="Total file size in bytes" + ... field_id=501, name="manifest_length", field_type=LongType(), required=False, doc="Total file size in bytes" ... ), ... schema_id=1 ... ) @@ -211,7 +211,7 @@ def _convert_field(self, field: dict[str, Any]) -> NestedField: field_id=field["field-id"], name=field["name"], field_type=self._convert_schema(plain_type), - is_optional=is_optional, + required=is_optional, doc=field.get("doc"), ) @@ -244,14 +244,14 @@ def _convert_record_type(self, record_type: dict[str, Any]) -> StructType: ... field_id=509, ... name="contains_null", ... field_type=BooleanType(), - ... is_optional=False, + ... required=False, ... doc="True if any file has a null partition value", ... ), ... NestedField( ... field_id=518, ... name="contains_nan", ... field_type=BooleanType(), - ... is_optional=True, + ... required=True, ... doc="True if any file has a nan partition value", ... ), ... ) @@ -278,7 +278,7 @@ def _convert_array_type(self, array_type: dict[str, Any]) -> ListType: return ListType( element_id=array_type["element-id"], element_type=self._convert_schema(plain_type), - element_is_optional=element_is_optional, + element_required=element_is_optional, ) def _convert_map_type(self, map_type: dict[str, Any]) -> MapType: @@ -300,7 +300,7 @@ def _convert_map_type(self, map_type: dict[str, Any]) -> MapType: ... key_type=StringType(), ... value_id=102, ... value_type=LongType(), - ... value_is_optional=True + ... value_required=True ... ) >>> actual == expected True @@ -314,7 +314,7 @@ def _convert_map_type(self, map_type: dict[str, Any]) -> MapType: key_type=StringType(), value_id=map_type["value-id"], value_type=self._convert_schema(value_type), - value_is_optional=value_is_optional, + value_required=value_is_optional, ) def _convert_logical_type(self, avro_logical_type: dict[str, Any]) -> IcebergType: @@ -407,7 +407,7 @@ def _convert_logical_map_type(self, avro_type: dict[str, Any]) -> MapType: ... key_type=IntegerType(), ... value_id=102, ... value_type=StringType(), - ... value_is_optional=False + ... value_required=False ... ) >>> actual == expected True @@ -428,7 +428,7 @@ def _convert_logical_map_type(self, avro_type: dict[str, Any]) -> MapType: key_type=key.field_type, value_id=value.field_id, value_type=value.field_type, - value_is_optional=value.is_optional, + value_required=value.required, ) def _convert_fixed_type(self, avro_type: dict[str, Any]) -> FixedType: diff --git a/python/tests/conftest.py b/python/tests/conftest.py index bf0161de03b7..b5dd18b8d80a 100644 --- a/python/tests/conftest.py +++ b/python/tests/conftest.py @@ -49,9 +49,9 @@ def set(self, pos: int, value) -> None: @pytest.fixture(scope="session") def table_schema_simple(): return schema.Schema( - NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False), - NestedField(field_id=2, name="bar", field_type=IntegerType(), is_optional=True), - NestedField(field_id=3, name="baz", field_type=BooleanType(), is_optional=False), + NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), schema_id=1, identifier_field_ids=[1], ) @@ -60,14 +60,14 @@ def table_schema_simple(): @pytest.fixture(scope="session") def table_schema_nested(): return schema.Schema( - NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False), - NestedField(field_id=2, name="bar", field_type=IntegerType(), is_optional=True), - NestedField(field_id=3, name="baz", field_type=BooleanType(), is_optional=False), + NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), NestedField( field_id=4, name="qux", - field_type=ListType(element_id=5, element_type=StringType(), element_is_optional=True), - is_optional=True, + field_type=ListType(element_id=5, element_type=StringType(), element_required=True), + required=True, ), NestedField( field_id=6, @@ -76,12 +76,10 @@ def table_schema_nested(): key_id=7, key_type=StringType(), value_id=8, - value_type=MapType( - key_id=9, key_type=StringType(), value_id=10, value_type=IntegerType(), value_is_optional=True - ), - value_is_optional=True, + value_type=MapType(key_id=9, key_type=StringType(), value_id=10, value_type=IntegerType(), value_required=True), + value_required=True, ), - is_optional=True, + required=True, ), NestedField( field_id=11, @@ -89,21 +87,21 @@ def table_schema_nested(): field_type=ListType( element_id=12, element_type=StructType( - NestedField(field_id=13, name="latitude", field_type=FloatType(), is_optional=False), - NestedField(field_id=14, name="longitude", field_type=FloatType(), is_optional=False), + NestedField(field_id=13, name="latitude", field_type=FloatType(), required=False), + NestedField(field_id=14, name="longitude", field_type=FloatType(), required=False), ), - element_is_optional=True, + element_required=True, ), - is_optional=True, + required=True, ), NestedField( field_id=15, name="person", field_type=StructType( - NestedField(field_id=16, name="name", field_type=StringType(), is_optional=False), - NestedField(field_id=17, name="age", field_type=IntegerType(), is_optional=True), + NestedField(field_id=16, name="name", field_type=StringType(), required=False), + NestedField(field_id=17, name="age", field_type=IntegerType(), required=True), ), - is_optional=False, + required=False, ), schema_id=1, identifier_field_ids=[1], diff --git a/python/tests/expressions/test_expressions_base.py b/python/tests/expressions/test_expressions_base.py index 43fde0ad1c7a..7fe3e9817e1a 100644 --- a/python/tests/expressions/test_expressions_base.py +++ b/python/tests/expressions/test_expressions_base.py @@ -281,7 +281,7 @@ def test_accessor_base_class(foo_struct): def test_bound_reference_str_and_repr(): """Test str and repr of BoundReference""" - field = NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False) + field = NestedField(field_id=1, name="foo", field_type=StringType(), required=False) position1_accessor = base.Accessor(position=1) bound_ref = base.BoundReference(field=field, accessor=position1_accessor) assert str(bound_ref) == f"BoundReference(field={repr(field)}, accessor={repr(position1_accessor)})" @@ -290,10 +290,10 @@ def test_bound_reference_str_and_repr(): def test_bound_reference_field_property(): """Test str and repr of BoundReference""" - field = NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False) + field = NestedField(field_id=1, name="foo", field_type=StringType(), required=False) position1_accessor = base.Accessor(position=1) bound_ref = base.BoundReference(field=field, accessor=position1_accessor) - assert bound_ref.field == NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False) + assert bound_ref.field == NestedField(field_id=1, name="foo", field_type=StringType(), required=False) def test_bound_reference(table_schema_simple, foo_struct): diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py index d48198edd9b4..e5afe5fb74bf 100644 --- a/python/tests/test_schema.py +++ b/python/tests/test_schema.py @@ -53,13 +53,11 @@ def test_schema_str(table_schema_simple): [ ( schema.Schema(NestedField(1, "foo", StringType()), schema_id=1), - "Schema(fields=(NestedField(field_id=1, name='foo', field_type=StringType(), is_optional=True),), schema_id=1, identifier_field_ids=[])", + "Schema(fields=(NestedField(field_id=1, name='foo', field_type=StringType(), required=True),), schema_id=1, identifier_field_ids=[])", ), ( - schema.Schema( - NestedField(1, "foo", StringType()), NestedField(2, "bar", IntegerType(), is_optional=False), schema_id=1 - ), - "Schema(fields=(NestedField(field_id=1, name='foo', field_type=StringType(), is_optional=True), NestedField(field_id=2, name='bar', field_type=IntegerType(), is_optional=False)), schema_id=1, identifier_field_ids=[])", + schema.Schema(NestedField(1, "foo", StringType()), NestedField(2, "bar", IntegerType(), required=False), schema_id=1), + "Schema(fields=(NestedField(field_id=1, name='foo', field_type=StringType(), required=True), NestedField(field_id=2, name='bar', field_type=IntegerType(), required=False)), schema_id=1, identifier_field_ids=[])", ), ], ) @@ -72,10 +70,10 @@ def test_schema_raise_on_duplicate_names(): """Test schema representation""" with pytest.raises(ValueError) as exc_info: schema.Schema( - NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False), - NestedField(field_id=2, name="bar", field_type=IntegerType(), is_optional=True), - NestedField(field_id=3, name="baz", field_type=BooleanType(), is_optional=False), - NestedField(field_id=4, name="baz", field_type=BooleanType(), is_optional=False), + NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), + NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), + NestedField(field_id=4, name="baz", field_type=BooleanType(), required=False), schema_id=1, identifier_field_ids=[1], ) @@ -87,16 +85,16 @@ def test_schema_index_by_id_visitor(table_schema_nested): """Test index_by_id visitor function""" index = schema.index_by_id(table_schema_nested) assert index == { - 1: NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False), - 2: NestedField(field_id=2, name="bar", field_type=IntegerType(), is_optional=True), - 3: NestedField(field_id=3, name="baz", field_type=BooleanType(), is_optional=False), + 1: NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + 2: NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), + 3: NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), 4: NestedField( field_id=4, name="qux", - field_type=ListType(element_id=5, element_type=StringType(), element_is_optional=True), - is_optional=True, + field_type=ListType(element_id=5, element_type=StringType(), element_required=True), + required=True, ), - 5: NestedField(field_id=5, name="element", field_type=StringType(), is_optional=True), + 5: NestedField(field_id=5, name="element", field_type=StringType(), required=True), 6: NestedField( field_id=6, name="quux", @@ -104,57 +102,55 @@ def test_schema_index_by_id_visitor(table_schema_nested): key_id=7, key_type=StringType(), value_id=8, - value_type=MapType( - key_id=9, key_type=StringType(), value_id=10, value_type=IntegerType(), value_is_optional=True - ), - value_is_optional=True, + value_type=MapType(key_id=9, key_type=StringType(), value_id=10, value_type=IntegerType(), value_required=True), + value_required=True, ), - is_optional=True, + required=True, ), - 7: NestedField(field_id=7, name="key", field_type=StringType(), is_optional=False), - 9: NestedField(field_id=9, name="key", field_type=StringType(), is_optional=False), + 7: NestedField(field_id=7, name="key", field_type=StringType(), required=False), + 9: NestedField(field_id=9, name="key", field_type=StringType(), required=False), 8: NestedField( field_id=8, name="value", - field_type=MapType(key_id=9, key_type=StringType(), value_id=10, value_type=IntegerType(), value_is_optional=True), - is_optional=True, + field_type=MapType(key_id=9, key_type=StringType(), value_id=10, value_type=IntegerType(), value_required=True), + required=True, ), - 10: NestedField(field_id=10, name="value", field_type=IntegerType(), is_optional=True), + 10: NestedField(field_id=10, name="value", field_type=IntegerType(), required=True), 11: NestedField( field_id=11, name="location", field_type=ListType( element_id=12, element_type=StructType( - NestedField(field_id=13, name="latitude", field_type=FloatType(), is_optional=False), - NestedField(field_id=14, name="longitude", field_type=FloatType(), is_optional=False), + NestedField(field_id=13, name="latitude", field_type=FloatType(), required=False), + NestedField(field_id=14, name="longitude", field_type=FloatType(), required=False), ), - element_is_optional=True, + element_required=True, ), - is_optional=True, + required=True, ), 12: NestedField( field_id=12, name="element", field_type=StructType( - NestedField(field_id=13, name="latitude", field_type=FloatType(), is_optional=False), - NestedField(field_id=14, name="longitude", field_type=FloatType(), is_optional=False), + NestedField(field_id=13, name="latitude", field_type=FloatType(), required=False), + NestedField(field_id=14, name="longitude", field_type=FloatType(), required=False), ), - is_optional=True, + required=True, ), - 13: NestedField(field_id=13, name="latitude", field_type=FloatType(), is_optional=False), - 14: NestedField(field_id=14, name="longitude", field_type=FloatType(), is_optional=False), + 13: NestedField(field_id=13, name="latitude", field_type=FloatType(), required=False), + 14: NestedField(field_id=14, name="longitude", field_type=FloatType(), required=False), 15: NestedField( field_id=15, name="person", field_type=StructType( - NestedField(field_id=16, name="name", field_type=StringType(), is_optional=False), - NestedField(field_id=17, name="age", field_type=IntegerType(), is_optional=True), + NestedField(field_id=16, name="name", field_type=StringType(), required=False), + NestedField(field_id=17, name="age", field_type=IntegerType(), required=True), ), - is_optional=False, + required=False, ), - 16: NestedField(field_id=16, name="name", field_type=StringType(), is_optional=False), - 17: NestedField(field_id=17, name="age", field_type=IntegerType(), is_optional=True), + 16: NestedField(field_id=16, name="name", field_type=StringType(), required=False), + 17: NestedField(field_id=17, name="age", field_type=IntegerType(), required=True), } @@ -222,19 +218,19 @@ def test_schema_find_field_by_id(table_schema_simple): assert isinstance(column1, NestedField) assert column1.field_id == 1 assert column1.field_type == StringType() - assert column1.is_optional == False + assert column1.required == False column2 = index[2] assert isinstance(column2, NestedField) assert column2.field_id == 2 assert column2.field_type == IntegerType() - assert column2.is_optional == True + assert column2.required == True column3 = index[3] assert isinstance(column3, NestedField) assert column3.field_id == 3 assert column3.field_type == BooleanType() - assert column3.is_optional == False + assert column3.required == False def test_schema_find_field_by_id_raise_on_unknown_field(table_schema_simple): @@ -248,24 +244,24 @@ def test_schema_find_field_by_id_raise_on_unknown_field(table_schema_simple): def test_schema_find_field_type_by_id(table_schema_simple): """Test retrieving a columns' type using its field ID""" index = schema.index_by_id(table_schema_simple) - assert index[1] == NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False) - assert index[2] == NestedField(field_id=2, name="bar", field_type=IntegerType(), is_optional=True) - assert index[3] == NestedField(field_id=3, name="baz", field_type=BooleanType(), is_optional=False) + assert index[1] == NestedField(field_id=1, name="foo", field_type=StringType(), required=False) + assert index[2] == NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True) + assert index[3] == NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False) def test_index_by_id_schema_visitor(table_schema_nested): """Test the index_by_id function that uses the IndexById schema visitor""" assert schema.index_by_id(table_schema_nested) == { - 1: NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False), - 2: NestedField(field_id=2, name="bar", field_type=IntegerType(), is_optional=True), - 3: NestedField(field_id=3, name="baz", field_type=BooleanType(), is_optional=False), + 1: NestedField(field_id=1, name="foo", field_type=StringType(), required=False), + 2: NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True), + 3: NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False), 4: NestedField( field_id=4, name="qux", - field_type=ListType(element_id=5, element_type=StringType(), element_is_optional=True), - is_optional=True, + field_type=ListType(element_id=5, element_type=StringType(), element_required=True), + required=True, ), - 5: NestedField(field_id=5, name="element", field_type=StringType(), is_optional=True), + 5: NestedField(field_id=5, name="element", field_type=StringType(), required=True), 6: NestedField( field_id=6, name="quux", @@ -273,57 +269,55 @@ def test_index_by_id_schema_visitor(table_schema_nested): key_id=7, key_type=StringType(), value_id=8, - value_type=MapType( - key_id=9, key_type=StringType(), value_id=10, value_type=IntegerType(), value_is_optional=True - ), - value_is_optional=True, + value_type=MapType(key_id=9, key_type=StringType(), value_id=10, value_type=IntegerType(), value_required=True), + value_required=True, ), - is_optional=True, + required=True, ), - 7: NestedField(field_id=7, name="key", field_type=StringType(), is_optional=False), + 7: NestedField(field_id=7, name="key", field_type=StringType(), required=False), 8: NestedField( field_id=8, name="value", - field_type=MapType(key_id=9, key_type=StringType(), value_id=10, value_type=IntegerType(), value_is_optional=True), - is_optional=True, + field_type=MapType(key_id=9, key_type=StringType(), value_id=10, value_type=IntegerType(), value_required=True), + required=True, ), - 9: NestedField(field_id=9, name="key", field_type=StringType(), is_optional=False), - 10: NestedField(field_id=10, name="value", field_type=IntegerType(), is_optional=True), + 9: NestedField(field_id=9, name="key", field_type=StringType(), required=False), + 10: NestedField(field_id=10, name="value", field_type=IntegerType(), required=True), 11: NestedField( field_id=11, name="location", field_type=ListType( element_id=12, element_type=StructType( - NestedField(field_id=13, name="latitude", field_type=FloatType(), is_optional=False), - NestedField(field_id=14, name="longitude", field_type=FloatType(), is_optional=False), + NestedField(field_id=13, name="latitude", field_type=FloatType(), required=False), + NestedField(field_id=14, name="longitude", field_type=FloatType(), required=False), ), - element_is_optional=True, + element_required=True, ), - is_optional=True, + required=True, ), 12: NestedField( field_id=12, name="element", field_type=StructType( - NestedField(field_id=13, name="latitude", field_type=FloatType(), is_optional=False), - NestedField(field_id=14, name="longitude", field_type=FloatType(), is_optional=False), + NestedField(field_id=13, name="latitude", field_type=FloatType(), required=False), + NestedField(field_id=14, name="longitude", field_type=FloatType(), required=False), ), - is_optional=True, + required=True, ), - 13: NestedField(field_id=13, name="latitude", field_type=FloatType(), is_optional=False), - 14: NestedField(field_id=14, name="longitude", field_type=FloatType(), is_optional=False), + 13: NestedField(field_id=13, name="latitude", field_type=FloatType(), required=False), + 14: NestedField(field_id=14, name="longitude", field_type=FloatType(), required=False), 15: NestedField( field_id=15, name="person", field_type=StructType( - NestedField(field_id=16, name="name", field_type=StringType(), is_optional=False), - NestedField(field_id=17, name="age", field_type=IntegerType(), is_optional=True), + NestedField(field_id=16, name="name", field_type=StringType(), required=False), + NestedField(field_id=17, name="age", field_type=IntegerType(), required=True), ), - is_optional=False, + required=False, ), - 16: NestedField(field_id=16, name="name", field_type=StringType(), is_optional=False), - 17: NestedField(field_id=17, name="age", field_type=IntegerType(), is_optional=True), + 16: NestedField(field_id=16, name="name", field_type=StringType(), required=False), + 17: NestedField(field_id=17, name="age", field_type=IntegerType(), required=True), } @@ -340,19 +334,19 @@ def test_schema_find_field(table_schema_simple): table_schema_simple.find_field(1) == table_schema_simple.find_field("foo") == table_schema_simple.find_field("FOO", case_sensitive=False) - == NestedField(field_id=1, name="foo", field_type=StringType(), is_optional=False) + == NestedField(field_id=1, name="foo", field_type=StringType(), required=False) ) assert ( table_schema_simple.find_field(2) == table_schema_simple.find_field("bar") == table_schema_simple.find_field("BAR", case_sensitive=False) - == NestedField(field_id=2, name="bar", field_type=IntegerType(), is_optional=True) + == NestedField(field_id=2, name="bar", field_type=IntegerType(), required=True) ) assert ( table_schema_simple.find_field(3) == table_schema_simple.find_field("baz") == table_schema_simple.find_field("BAZ", case_sensitive=False) - == NestedField(field_id=3, name="baz", field_type=BooleanType(), is_optional=False) + == NestedField(field_id=3, name="baz", field_type=BooleanType(), required=False) ) diff --git a/python/tests/test_types.py b/python/tests/test_types.py index 844e3ab7deef..26f6f421a7fc 100644 --- a/python/tests/test_types.py +++ b/python/tests/test_types.py @@ -84,12 +84,12 @@ def test_repr_primitive_types(input_index, input_type): ), ( StructType( - NestedField(1, "required_field", StringType(), is_optional=False), - NestedField(2, "optional_field", IntegerType(), is_optional=True), + NestedField(1, "required_field", StringType(), required=False), + NestedField(2, "optional_field", IntegerType(), required=True), ), False, ), - (NestedField(1, "required_field", StringType(), is_optional=False), False), + (NestedField(1, "required_field", StringType(), required=False), False), ], ) def test_is_primitive(input_type, result): @@ -119,30 +119,30 @@ def test_decimal_type(): def test_struct_type(): type_var = StructType( - NestedField(1, "optional_field", IntegerType(), is_optional=True), - NestedField(2, "required_field", FixedType(5), is_optional=False), + NestedField(1, "optional_field", IntegerType(), required=True), + NestedField(2, "required_field", FixedType(5), required=False), NestedField( 3, "required_field", StructType( - NestedField(4, "optional_field", DecimalType(8, 2), is_optional=True), - NestedField(5, "required_field", LongType(), is_optional=False), + NestedField(4, "optional_field", DecimalType(8, 2), required=True), + NestedField(5, "required_field", LongType(), required=False), ), - is_optional=False, + required=False, ), ) assert len(type_var.fields) == 3 assert str(type_var) == str(eval(repr(type_var))) assert type_var == eval(repr(type_var)) - assert type_var != StructType(NestedField(1, "optional_field", IntegerType(), is_optional=True)) + assert type_var != StructType(NestedField(1, "optional_field", IntegerType(), required=True)) def test_list_type(): type_var = ListType( 1, StructType( - NestedField(2, "optional_field", DecimalType(8, 2), is_optional=True), - NestedField(3, "required_field", LongType(), is_optional=False), + NestedField(2, "optional_field", DecimalType(8, 2), required=True), + NestedField(3, "required_field", LongType(), required=False), ), False, ) @@ -154,7 +154,7 @@ def test_list_type(): assert type_var != ListType( 1, StructType( - NestedField(2, "optional_field", DecimalType(8, 2), is_optional=True), + NestedField(2, "optional_field", DecimalType(8, 2), required=True), ), True, ) @@ -183,15 +183,15 @@ def test_nested_field(): ListType( 3, DoubleType(), - element_is_optional=False, + element_required=False, ), - is_optional=True, + required=True, ), ), - is_optional=True, + required=True, ) - assert field_var.is_optional - assert not field_var.is_required + assert field_var.required + assert not field_var.optional assert field_var.field_id == 1 assert isinstance(field_var.field_type, StructType) assert str(field_var) == str(eval(repr(field_var))) diff --git a/python/tests/utils/test_schema_conversion.py b/python/tests/utils/test_schema_conversion.py index 0af7e3478da8..8776e1a14ffc 100644 --- a/python/tests/utils/test_schema_conversion.py +++ b/python/tests/utils/test_schema_conversion.py @@ -38,29 +38,27 @@ def test_iceberg_to_avro(manifest_schema): iceberg_schema = AvroSchemaConversion().avro_to_iceberg(manifest_schema) expected_iceberg_schema = Schema( NestedField( - field_id=500, name="manifest_path", field_type=StringType(), is_optional=False, doc="Location URI with FS scheme" + field_id=500, name="manifest_path", field_type=StringType(), required=False, doc="Location URI with FS scheme" ), + NestedField(field_id=501, name="manifest_length", field_type=LongType(), required=False, doc="Total file size in bytes"), NestedField( - field_id=501, name="manifest_length", field_type=LongType(), is_optional=False, doc="Total file size in bytes" - ), - NestedField( - field_id=502, name="partition_spec_id", field_type=IntegerType(), is_optional=False, doc="Spec ID used to write" + field_id=502, name="partition_spec_id", field_type=IntegerType(), required=False, doc="Spec ID used to write" ), NestedField( field_id=503, name="added_snapshot_id", field_type=LongType(), - is_optional=True, + required=True, doc="Snapshot ID that added the manifest", ), NestedField( - field_id=504, name="added_data_files_count", field_type=IntegerType(), is_optional=True, doc="Added entry count" + field_id=504, name="added_data_files_count", field_type=IntegerType(), required=True, doc="Added entry count" ), NestedField( - field_id=505, name="existing_data_files_count", field_type=IntegerType(), is_optional=True, doc="Existing entry count" + field_id=505, name="existing_data_files_count", field_type=IntegerType(), required=True, doc="Existing entry count" ), NestedField( - field_id=506, name="deleted_data_files_count", field_type=IntegerType(), is_optional=True, doc="Deleted entry count" + field_id=506, name="deleted_data_files_count", field_type=IntegerType(), required=True, doc="Deleted entry count" ), NestedField( field_id=507, @@ -73,40 +71,40 @@ def test_iceberg_to_avro(manifest_schema): field_id=509, name="contains_null", field_type=BooleanType(), - is_optional=False, + required=False, doc="True if any file has a null partition value", ), NestedField( field_id=518, name="contains_nan", field_type=BooleanType(), - is_optional=True, + required=True, doc="True if any file has a nan partition value", ), NestedField( field_id=510, name="lower_bound", field_type=BinaryType(), - is_optional=True, + required=True, doc="Partition lower bound for all files", ), NestedField( field_id=511, name="upper_bound", field_type=BinaryType(), - is_optional=True, + required=True, doc="Partition upper bound for all files", ), ) ), - element_is_optional=False, + element_required=False, ), - is_optional=True, + required=True, doc="Summary for each partition", ), - NestedField(field_id=512, name="added_rows_count", field_type=LongType(), is_optional=True, doc="Added rows count"), - NestedField(field_id=513, name="existing_rows_count", field_type=LongType(), is_optional=True, doc="Existing rows count"), - NestedField(field_id=514, name="deleted_rows_count", field_type=LongType(), is_optional=True, doc="Deleted rows count"), + NestedField(field_id=512, name="added_rows_count", field_type=LongType(), required=True, doc="Added rows count"), + NestedField(field_id=513, name="existing_rows_count", field_type=LongType(), required=True, doc="Existing rows count"), + NestedField(field_id=514, name="deleted_rows_count", field_type=LongType(), required=True, doc="Deleted rows count"), schema_id=1, identifier_field_ids=[], ) @@ -135,8 +133,8 @@ def test_avro_list_required_primitive(): NestedField( field_id=100, name="array_with_string", - field_type=ListType(element_id=101, element_type=StringType(), element_is_optional=False), - is_optional=False, + field_type=ListType(element_id=101, element_type=StringType(), element_required=False), + required=False, ), schema_id=1, ) @@ -168,8 +166,8 @@ def test_avro_list_wrapped_primitive(): NestedField( field_id=100, name="array_with_string", - field_type=ListType(element_id=101, element_type=StringType(), element_is_optional=False), - is_optional=False, + field_type=ListType(element_id=101, element_type=StringType(), element_required=False), + required=False, ), schema_id=1, ) @@ -219,13 +217,13 @@ def test_avro_list_required_record(): element_id=101, element_type=StructType( fields=( - NestedField(field_id=102, name="contains_null", field_type=BooleanType(), is_optional=False), - NestedField(field_id=103, name="contains_nan", field_type=BooleanType(), is_optional=True), + NestedField(field_id=102, name="contains_null", field_type=BooleanType(), required=False), + NestedField(field_id=103, name="contains_nan", field_type=BooleanType(), required=True), ) ), - element_is_optional=False, + element_required=False, ), - is_optional=False, + required=False, ), schema_id=1, identifier_field_ids=[], @@ -256,7 +254,7 @@ def test_map_type(): "value-id": 102, } actual = AvroSchemaConversion()._convert_schema(avro_type) - expected = MapType(key_id=101, key_type=StringType(), value_id=102, value_type=LongType(), value_is_optional=True) + expected = MapType(key_id=101, key_type=StringType(), value_id=102, value_type=LongType(), value_required=True) assert actual == expected