From 5655181745556c5f3349f2ac2651c95ae559b085 Mon Sep 17 00:00:00 2001 From: Mark Wolfe Date: Fri, 21 Jul 2023 05:02:24 +1000 Subject: [PATCH] GH-36696: [Go] Improve the MapOf and ListOf helpers (#36697) ### Rationale for this change The aim is to improve the MapOf and ListOf helper functions without breaking anything. I have added a `ListOfWithName` which matches the `MapOf` function in that it takes a name, rather than deriving it from the elements name, which should actually be `element`. This just seems clearer to me as an interface, and makes construction a bit more obvious. ### What changes are included in this PR? * Removed references to panics I can't find * Updated error messages for list and map to be clearer with validation errors * Added a ListOfWithName to provide a clearer matching method to MapOf which takes a name Closes #36696 ### Are these changes tested? Yes, I added a test for the new `ListOfWithName` function. ### Are there any user-facing changes? * Closes: #36696 Authored-by: Mark Wolfe Signed-off-by: Matt Topol --- go/parquet/schema/helpers.go | 62 ++++++++++++++++++++----------- go/parquet/schema/helpers_test.go | 19 ++++++++++ 2 files changed, 60 insertions(+), 21 deletions(-) diff --git a/go/parquet/schema/helpers.go b/go/parquet/schema/helpers.go index 7cc89efca6e8e..1198b0b926ac8 100644 --- a/go/parquet/schema/helpers.go +++ b/go/parquet/schema/helpers.go @@ -24,43 +24,62 @@ import ( // ListOf is a convenience helper function to create a properly structured // list structure according to the Parquet Spec. // -// group (LIST) { -// repeated group list { -// element; -// } -// } +// group (LIST) { +// repeated group list { +// element; +// } +// } // -// can only be optional or required. panics if repeated. -// can only be optional or required. panics if repeated. +// can only be optional or required. +// can only be optional or required. func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) { - if rep == parquet.Repetitions.Repeated || n.RepetitionType() == parquet.Repetitions.Repeated { - return nil, xerrors.New("parquet: listof repetition and element repetition must not be repeated.") + return ListOfWithName(n.Name(), n, rep, fieldID) +} + +// ListOf is a convenience helper function to create a properly structured +// list structure according to the Parquet Spec. +// +// group (LIST) { +// repeated group list { +// element; +// } +// } +// +// can only be optional or required. +// can only be optional or required. +func ListOfWithName(listName string, element Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) { + if rep == parquet.Repetitions.Repeated { + return nil, xerrors.Errorf("parquet: listof repetition must not be repeated, got :%s", rep) } - listName := n.Name() - switch n := n.(type) { + if element.RepetitionType() == parquet.Repetitions.Repeated { + return nil, xerrors.Errorf("parquet: element repetition must not be repeated, got: %s", element.RepetitionType()) + } + + switch n := element.(type) { case *PrimitiveNode: n.name = "element" case *GroupNode: n.name = "element" } - list, err := NewGroupNode("list" /* name */, parquet.Repetitions.Repeated, FieldList{n}, -1 /* fieldID */) + list, err := NewGroupNode("list" /* name */, parquet.Repetitions.Repeated, FieldList{element}, -1 /* fieldID */) if err != nil { return nil, err } + return NewGroupNodeLogical(listName, rep, FieldList{list}, ListLogicalType{}, fieldID) } // MapOf is a convenience helper function to create a properly structured // parquet map node setup according to the Parquet Spec. // -// group (MAP) { -// repeated group key_value { -// required key; -// value; -// } -// } +// group (MAP) { +// repeated group key_value { +// required key; +// value; +// } +// } // // key node will be renamed to "key", value node if not nil will be renamed to "value" // @@ -69,14 +88,15 @@ func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) { // the key node *must* be required repetition. panics if optional or repeated // // value node can be nil (omitted) or have a repetition of required or optional *only*. -// panics if value node is not nil and has a repetition of repeated. func MapOf(name string, key Node, value Node, mapRep parquet.Repetition, fieldID int32) (*GroupNode, error) { if mapRep == parquet.Repetitions.Repeated { - return nil, xerrors.New("parquet: map repetition cannot be Repeated") + return nil, xerrors.Errorf("parquet: map repetition cannot be Repeated, got: %s", mapRep) } + if key.RepetitionType() != parquet.Repetitions.Required { - return nil, xerrors.New("parquet: map key repetition must be Required") + return nil, xerrors.Errorf("parquet: map key repetition must be Required, got: %s", key.RepetitionType()) } + if value != nil { if value.RepetitionType() == parquet.Repetitions.Repeated { return nil, xerrors.New("parquet: map value cannot have repetition Repeated") diff --git a/go/parquet/schema/helpers_test.go b/go/parquet/schema/helpers_test.go index 055fe7f46d127..b4f0b684003db 100644 --- a/go/parquet/schema/helpers_test.go +++ b/go/parquet/schema/helpers_test.go @@ -62,6 +62,25 @@ func TestListOfNested(t *testing.T) { }`, strings.TrimSpace(buf.String())) } +func TestListOfWithNameNested(t *testing.T) { + n, err := schema.ListOfWithName("arrays", schema.NewInt32Node("element", parquet.Repetitions.Required, -1), parquet.Repetitions.Required, -1) + assert.NoError(t, err) + final, err := schema.ListOf(n, parquet.Repetitions.Required, -1) + assert.NoError(t, err) + + var buf bytes.Buffer + schema.PrintSchema(final, &buf, 4) + assert.Equal(t, + `required group field_id=-1 arrays (List) { + repeated group field_id=-1 list { + required group field_id=-1 element (List) { + repeated group field_id=-1 list { + required int32 field_id=-1 element; + } + } + } +}`, strings.TrimSpace(buf.String())) +} func TestMapOfNestedTypes(t *testing.T) { n, err := schema.NewGroupNode("student", parquet.Repetitions.Required, schema.FieldList{ schema.NewByteArrayNode("name", parquet.Repetitions.Required, -1),