Skip to content

Commit

Permalink
apacheGH-36696: [Go] Improve the MapOf and ListOf helpers (apache#36697)
Browse files Browse the repository at this point in the history
### Rationale for this change

The aim is to improve the MapOf and ListOf helper functions without breaking anything. I have added a `ListOfWithName` which matches the `MapOf` function in that it takes a name, rather than deriving it from the elements name, which should actually be `element`.

This just seems clearer to me as an interface, and makes construction a bit more obvious.

### What changes are included in this PR?

* Removed references to panics I can't find
* Updated error messages for list and map to be clearer with validation errors
* Added a ListOfWithName to provide a clearer matching method to MapOf which takes a name

Closes apache#36696 

### Are these changes tested?

Yes, I added a test for the new `ListOfWithName` function.

### Are there any user-facing changes?

* Closes: apache#36696

Authored-by: Mark Wolfe <[email protected]>
Signed-off-by: Matt Topol <[email protected]>
  • Loading branch information
wolfeidau authored and loicalleyne committed Nov 13, 2023
1 parent f56d0a9 commit 5655181
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 21 deletions.
62 changes: 41 additions & 21 deletions go/parquet/schema/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,43 +24,62 @@ import (
// ListOf is a convenience helper function to create a properly structured
// list structure according to the Parquet Spec.
//
// <list-repetition> group <name> (LIST) {
// repeated group list {
// <element-repetition> <element-type> element;
// }
// }
// <list-repetition> group <name> (LIST) {
// repeated group list {
// <element-repetition> <element-type> element;
// }
// }
//
// <list-repetition> can only be optional or required. panics if repeated.
// <element-repetition> can only be optional or required. panics if repeated.
// <list-repetition> can only be optional or required.
// <element-repetition> can only be optional or required.
func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) {
if rep == parquet.Repetitions.Repeated || n.RepetitionType() == parquet.Repetitions.Repeated {
return nil, xerrors.New("parquet: listof repetition and element repetition must not be repeated.")
return ListOfWithName(n.Name(), n, rep, fieldID)
}

// ListOf is a convenience helper function to create a properly structured
// list structure according to the Parquet Spec.
//
// <list-repetition> group <name> (LIST) {
// repeated group list {
// <element-repetition> <element-type> element;
// }
// }
//
// <list-repetition> can only be optional or required.
// <element-repetition> can only be optional or required.
func ListOfWithName(listName string, element Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) {
if rep == parquet.Repetitions.Repeated {
return nil, xerrors.Errorf("parquet: listof repetition must not be repeated, got :%s", rep)
}
listName := n.Name()

switch n := n.(type) {
if element.RepetitionType() == parquet.Repetitions.Repeated {
return nil, xerrors.Errorf("parquet: element repetition must not be repeated, got: %s", element.RepetitionType())
}

switch n := element.(type) {
case *PrimitiveNode:
n.name = "element"
case *GroupNode:
n.name = "element"
}

list, err := NewGroupNode("list" /* name */, parquet.Repetitions.Repeated, FieldList{n}, -1 /* fieldID */)
list, err := NewGroupNode("list" /* name */, parquet.Repetitions.Repeated, FieldList{element}, -1 /* fieldID */)
if err != nil {
return nil, err
}

return NewGroupNodeLogical(listName, rep, FieldList{list}, ListLogicalType{}, fieldID)
}

// MapOf is a convenience helper function to create a properly structured
// parquet map node setup according to the Parquet Spec.
//
// <map-repetition> group <name> (MAP) {
// repeated group key_value {
// required <key-type> key;
// <value-repetition> <value-type> value;
// }
// }
// <map-repetition> group <name> (MAP) {
// repeated group key_value {
// required <key-type> key;
// <value-repetition> <value-type> value;
// }
// }
//
// key node will be renamed to "key", value node if not nil will be renamed to "value"
//
Expand All @@ -69,14 +88,15 @@ func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) {
// the key node *must* be required repetition. panics if optional or repeated
//
// value node can be nil (omitted) or have a repetition of required or optional *only*.
// panics if value node is not nil and has a repetition of repeated.
func MapOf(name string, key Node, value Node, mapRep parquet.Repetition, fieldID int32) (*GroupNode, error) {
if mapRep == parquet.Repetitions.Repeated {
return nil, xerrors.New("parquet: map repetition cannot be Repeated")
return nil, xerrors.Errorf("parquet: map repetition cannot be Repeated, got: %s", mapRep)
}

if key.RepetitionType() != parquet.Repetitions.Required {
return nil, xerrors.New("parquet: map key repetition must be Required")
return nil, xerrors.Errorf("parquet: map key repetition must be Required, got: %s", key.RepetitionType())
}

if value != nil {
if value.RepetitionType() == parquet.Repetitions.Repeated {
return nil, xerrors.New("parquet: map value cannot have repetition Repeated")
Expand Down
19 changes: 19 additions & 0 deletions go/parquet/schema/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,25 @@ func TestListOfNested(t *testing.T) {
}`, strings.TrimSpace(buf.String()))
}

func TestListOfWithNameNested(t *testing.T) {
n, err := schema.ListOfWithName("arrays", schema.NewInt32Node("element", parquet.Repetitions.Required, -1), parquet.Repetitions.Required, -1)
assert.NoError(t, err)
final, err := schema.ListOf(n, parquet.Repetitions.Required, -1)
assert.NoError(t, err)

var buf bytes.Buffer
schema.PrintSchema(final, &buf, 4)
assert.Equal(t,
`required group field_id=-1 arrays (List) {
repeated group field_id=-1 list {
required group field_id=-1 element (List) {
repeated group field_id=-1 list {
required int32 field_id=-1 element;
}
}
}
}`, strings.TrimSpace(buf.String()))
}
func TestMapOfNestedTypes(t *testing.T) {
n, err := schema.NewGroupNode("student", parquet.Repetitions.Required, schema.FieldList{
schema.NewByteArrayNode("name", parquet.Repetitions.Required, -1),
Expand Down

0 comments on commit 5655181

Please sign in to comment.