Skip to content

Commit

Permalink
apacheGH-36696: [Go] Improve the MapOf and ListOf helpers
Browse files Browse the repository at this point in the history
* Removed references to panics I can't find
* Updated error messages for list and map to be clearer with validation errors
* Added a ListOfWithName to provide a clearer matching method to MapOf which takes a name
  • Loading branch information
wolfeidau committed Jul 14, 2023
1 parent d8982fe commit dab43fc
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 16 deletions.
66 changes: 50 additions & 16 deletions go/parquet/schema/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ import (
// ListOf is a convenience helper function to create a properly structured
// list structure according to the Parquet Spec.
//
// <list-repetition> group <name> (LIST) {
// repeated group list {
// <element-repetition> <element-type> element;
// }
// }
// <list-repetition> group <name> (LIST) {
// repeated group list {
// <element-repetition> <element-type> element;
// }
// }
//
// <list-repetition> can only be optional or required. panics if repeated.
// <element-repetition> can only be optional or required. panics if repeated.
// <list-repetition> can only be optional or required.
// <element-repetition> can only be optional or required.
func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) {
if rep == parquet.Repetitions.Repeated || n.RepetitionType() == parquet.Repetitions.Repeated {
return nil, xerrors.New("parquet: listof repetition and element repetition must not be repeated.")
Expand All @@ -52,15 +52,48 @@ func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) {
return NewGroupNodeLogical(listName, rep, FieldList{list}, ListLogicalType{}, fieldID)
}

// ListOf is a convenience helper function to create a properly structured
// list structure according to the Parquet Spec.
//
// <list-repetition> group <name> (LIST) {
// repeated group list {
// <element-repetition> <element-type> element;
// }
// }
//
// <list-repetition> can only be optional or required.
// <element-repetition> can only be optional or required.
// The element nodes name must match `element`.
func ListOfWithName(listName string, element Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) {
if rep == parquet.Repetitions.Repeated {
return nil, xerrors.Errorf("parquet: listof repetition must not be repeated, got :%s", rep)
}

if rep == parquet.Repetitions.Repeated || element.RepetitionType() == parquet.Repetitions.Repeated {
return nil, xerrors.Errorf("parquet: element repetition must not be repeated, got: %s", element.RepetitionType())
}

if element.Name() != "element" {
return nil, xerrors.Errorf("parquet: element node name must be `element`, got: %s", element.Name())
}

list, err := NewGroupNode("list" /* name */, parquet.Repetitions.Repeated, FieldList{element}, -1 /* fieldID */)
if err != nil {
return nil, err
}

return NewGroupNodeLogical(listName, rep, FieldList{list}, ListLogicalType{}, fieldID)
}

// MapOf is a convenience helper function to create a properly structured
// parquet map node setup according to the Parquet Spec.
//
// <map-repetition> group <name> (MAP) {
// repeated group key_value {
// required <key-type> key;
// <value-repetition> <value-type> value;
// }
// }
// <map-repetition> group <name> (MAP) {
// repeated group key_value {
// required <key-type> key;
// <value-repetition> <value-type> value;
// }
// }
//
// key node will be renamed to "key", value node if not nil will be renamed to "value"
//
Expand All @@ -69,14 +102,15 @@ func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) {
// the key node *must* be required repetition. panics if optional or repeated
//
// value node can be nil (omitted) or have a repetition of required or optional *only*.
// panics if value node is not nil and has a repetition of repeated.
func MapOf(name string, key Node, value Node, mapRep parquet.Repetition, fieldID int32) (*GroupNode, error) {
if mapRep == parquet.Repetitions.Repeated {
return nil, xerrors.New("parquet: map repetition cannot be Repeated")
return nil, xerrors.Errorf("parquet: map repetition cannot be Repeated, got: %s", mapRep)
}

if key.RepetitionType() != parquet.Repetitions.Required {
return nil, xerrors.New("parquet: map key repetition must be Required")
return nil, xerrors.Errorf("parquet: map key repetition must be Required, got: %s", key.RepetitionType())
}

if value != nil {
if value.RepetitionType() == parquet.Repetitions.Repeated {
return nil, xerrors.New("parquet: map value cannot have repetition Repeated")
Expand Down
19 changes: 19 additions & 0 deletions go/parquet/schema/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,25 @@ func TestListOfNested(t *testing.T) {
}`, strings.TrimSpace(buf.String()))
}

func TestListOfWithNameNested(t *testing.T) {
n, err := schema.ListOfWithName("arrays", schema.NewInt32Node("element", parquet.Repetitions.Required, -1), parquet.Repetitions.Required, -1)
assert.NoError(t, err)
final, err := schema.ListOf(n, parquet.Repetitions.Required, -1)
assert.NoError(t, err)

var buf bytes.Buffer
schema.PrintSchema(final, &buf, 4)
assert.Equal(t,
`required group field_id=-1 arrays (List) {
repeated group field_id=-1 list {
required group field_id=-1 element (List) {
repeated group field_id=-1 list {
required int32 field_id=-1 element;
}
}
}
}`, strings.TrimSpace(buf.String()))
}
func TestMapOfNestedTypes(t *testing.T) {
n, err := schema.NewGroupNode("student", parquet.Repetitions.Required, schema.FieldList{
schema.NewByteArrayNode("name", parquet.Repetitions.Required, -1),
Expand Down

0 comments on commit dab43fc

Please sign in to comment.