Skip to content

Commit

Permalink
Fix Iceberg to Avro Schema Conversion (#53)
Browse files Browse the repository at this point in the history
  • Loading branch information
HonahX authored Oct 13, 2023
1 parent cb9b4f1 commit 0644664
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
14 changes: 11 additions & 3 deletions pyiceberg/utils/schema_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
TimeType,
UUIDType,
)
from pyiceberg.utils.decimal import decimal_required_bytes

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -565,10 +566,17 @@ def map(self, map_type: MapType, key_result: AvroType, value_result: AvroType) -
}

def visit_fixed(self, fixed_type: FixedType) -> AvroType:
return {"type": "fixed", "size": len(fixed_type)}
return {"type": "fixed", "size": len(fixed_type), "name": f"fixed_{len(fixed_type)}"}

def visit_decimal(self, decimal_type: DecimalType) -> AvroType:
return {"type": "bytes", "logicalType": "decimal", "precision": decimal_type.precision, "scale": decimal_type.scale}
return {
"type": "fixed",
"size": decimal_required_bytes(decimal_type.precision),
"logicalType": "decimal",
"precision": decimal_type.precision,
"scale": decimal_type.scale,
"name": f"decimal_{decimal_type.precision}_{decimal_type.scale}",
}

def visit_boolean(self, boolean_type: BooleanType) -> AvroType:
return "boolean"
Expand Down Expand Up @@ -603,7 +611,7 @@ def visit_string(self, string_type: StringType) -> AvroType:
return "string"

def visit_uuid(self, uuid_type: UUIDType) -> AvroType:
return {"type": "fixed", "size": "16", "logicalType": "uuid"}
return {"type": "fixed", "size": 16, "logicalType": "uuid", "name": "uuid_fixed"}

def visit_binary(self, binary_type: BinaryType) -> AvroType:
return "bytes"
7 changes: 7 additions & 0 deletions tests/avro/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,5 +357,12 @@ def __init__(self, *data: Any, **named_data: Any) -> None:
it = iter(avro_reader)
avro_entry = next(it)

# read with fastavro
with open(tmp_avro_file, "rb") as fo:
r = reader(fo=fo)
it_fastavro = iter(r)
avro_entry_read_with_fastavro = list(next(it_fastavro).values())

for idx, field in enumerate(all_primitives_schema.as_struct()):
assert record[idx] == avro_entry[idx], f"Invalid {field}"
assert record[idx] == avro_entry_read_with_fastavro[idx], f"Invalid {field} read with fastavro"

0 comments on commit 0644664

Please sign in to comment.