From bf8efbbecfb5c736674f942814417ea15f9d0a63 Mon Sep 17 00:00:00 2001 From: Mark Wolfe Date: Sat, 15 Jul 2023 07:57:22 +1000 Subject: [PATCH] GH-36696: [Go] Improve the MapOf and ListOf helpers * Removed references to panics I can't find * Updated error messages for list and map to be clearer with validation errors * Added a ListOfWithName to provide a clearer matching method to MapOf which takes a name --- go/parquet/schema/helpers.go | 68 +++++++++++++++++++++++-------- go/parquet/schema/helpers_test.go | 19 +++++++++ 2 files changed, 71 insertions(+), 16 deletions(-) diff --git a/go/parquet/schema/helpers.go b/go/parquet/schema/helpers.go index 7cc89efca6e8e..06a92e86b3afe 100644 --- a/go/parquet/schema/helpers.go +++ b/go/parquet/schema/helpers.go @@ -24,14 +24,14 @@ import ( // ListOf is a convenience helper function to create a properly structured // list structure according to the Parquet Spec. // -// group (LIST) { -// repeated group list { -// element; -// } -// } +// group (LIST) { +// repeated group list { +// element; +// } +// } // -// can only be optional or required. panics if repeated. -// can only be optional or required. panics if repeated. +// can only be optional or required. +// can only be optional or required. func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) { if rep == parquet.Repetitions.Repeated || n.RepetitionType() == parquet.Repetitions.Repeated { return nil, xerrors.New("parquet: listof repetition and element repetition must not be repeated.") @@ -52,15 +52,50 @@ func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) { return NewGroupNodeLogical(listName, rep, FieldList{list}, ListLogicalType{}, fieldID) } +// ListOf is a convenience helper function to create a properly structured +// list structure according to the Parquet Spec. +// +// group (LIST) { +// repeated group list { +// element; +// } +// } +// +// can only be optional or required. +// can only be optional or required. +func ListOfWithName(listName string, element Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) { + if rep == parquet.Repetitions.Repeated { + return nil, xerrors.Errorf("parquet: listof repetition must not be repeated, got :%s", rep) + } + + if rep == parquet.Repetitions.Repeated || element.RepetitionType() == parquet.Repetitions.Repeated { + return nil, xerrors.Errorf("parquet: element repetition must not be repeated, got: %s", element.RepetitionType()) + } + + switch n := element.(type) { + case *PrimitiveNode: + n.name = "element" + case *GroupNode: + n.name = "element" + } + + list, err := NewGroupNode("list" /* name */, parquet.Repetitions.Repeated, FieldList{element}, -1 /* fieldID */) + if err != nil { + return nil, err + } + + return NewGroupNodeLogical(listName, rep, FieldList{list}, ListLogicalType{}, fieldID) +} + // MapOf is a convenience helper function to create a properly structured // parquet map node setup according to the Parquet Spec. // -// group (MAP) { -// repeated group key_value { -// required key; -// value; -// } -// } +// group (MAP) { +// repeated group key_value { +// required key; +// value; +// } +// } // // key node will be renamed to "key", value node if not nil will be renamed to "value" // @@ -69,14 +104,15 @@ func ListOf(n Node, rep parquet.Repetition, fieldID int32) (*GroupNode, error) { // the key node *must* be required repetition. panics if optional or repeated // // value node can be nil (omitted) or have a repetition of required or optional *only*. -// panics if value node is not nil and has a repetition of repeated. func MapOf(name string, key Node, value Node, mapRep parquet.Repetition, fieldID int32) (*GroupNode, error) { if mapRep == parquet.Repetitions.Repeated { - return nil, xerrors.New("parquet: map repetition cannot be Repeated") + return nil, xerrors.Errorf("parquet: map repetition cannot be Repeated, got: %s", mapRep) } + if key.RepetitionType() != parquet.Repetitions.Required { - return nil, xerrors.New("parquet: map key repetition must be Required") + return nil, xerrors.Errorf("parquet: map key repetition must be Required, got: %s", key.RepetitionType()) } + if value != nil { if value.RepetitionType() == parquet.Repetitions.Repeated { return nil, xerrors.New("parquet: map value cannot have repetition Repeated") diff --git a/go/parquet/schema/helpers_test.go b/go/parquet/schema/helpers_test.go index 055fe7f46d127..b4f0b684003db 100644 --- a/go/parquet/schema/helpers_test.go +++ b/go/parquet/schema/helpers_test.go @@ -62,6 +62,25 @@ func TestListOfNested(t *testing.T) { }`, strings.TrimSpace(buf.String())) } +func TestListOfWithNameNested(t *testing.T) { + n, err := schema.ListOfWithName("arrays", schema.NewInt32Node("element", parquet.Repetitions.Required, -1), parquet.Repetitions.Required, -1) + assert.NoError(t, err) + final, err := schema.ListOf(n, parquet.Repetitions.Required, -1) + assert.NoError(t, err) + + var buf bytes.Buffer + schema.PrintSchema(final, &buf, 4) + assert.Equal(t, + `required group field_id=-1 arrays (List) { + repeated group field_id=-1 list { + required group field_id=-1 element (List) { + repeated group field_id=-1 list { + required int32 field_id=-1 element; + } + } + } +}`, strings.TrimSpace(buf.String())) +} func TestMapOfNestedTypes(t *testing.T) { n, err := schema.NewGroupNode("student", parquet.Repetitions.Required, schema.FieldList{ schema.NewByteArrayNode("name", parquet.Repetitions.Required, -1),