From 629a510d12d4341ca008bbb97b73f683af1c9f3b Mon Sep 17 00:00:00 2001 From: Larry Marburger Date: Mon, 14 Nov 2022 18:39:07 -0500 Subject: [PATCH] Read decimal column (#406) Reading a parquet file with a decimal column isn't loaded with logical type information. This behavior was not implemented. `decimalType` is more complex from the other types because a parquet decimal can be backed by multiple different physical types. This PR loads logical type information for `DECIMAL` fields. Closes #365 --- column.go | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/column.go b/column.go index a376c7d..c030df4 100644 --- a/column.go +++ b/column.go @@ -381,8 +381,27 @@ func schemaElementTypeOf(s *format.SchemaElement) Type { case lt.Enum != nil: return (*enumType)(lt.Enum) case lt.Decimal != nil: - // TODO: - // return (*decimalType)(lt.Decimal) + // A parquet decimal can be one of several different physical types. + if t := s.Type; t != nil { + var typ Type + switch kind := Kind(*s.Type); kind { + case Int32: + typ = Int32Type + case Int64: + typ = Int64Type + case FixedLenByteArray: + if s.TypeLength == nil { + panic("DECIMAL using FIXED_LEN_BYTE_ARRAY must specify a length") + } + typ = FixedLenByteArrayType(int(*s.TypeLength)) + default: + panic("DECIMAL must be of type INT32, INT64, or FIXED_LEN_BYTE_ARRAY but got " + kind.String()) + } + return &decimalType{ + decimal: *lt.Decimal, + Type: typ, + } + } case lt.Date != nil: return (*dateType)(lt.Date) case lt.Time != nil: