From bdd0efdb1074163d5c4c0cf53ae87ed19fc1d242 Mon Sep 17 00:00:00 2001 From: congqixia Date: Wed, 14 Aug 2024 18:04:59 +0800 Subject: [PATCH] enhance: Support dynamic field in SchemaHelper (#35461) Related to #35415 --------- Signed-off-by: Congqi Xia --- .../planparserv2/plan_parser_v2_test.go | 1 - pkg/util/typeutil/schema.go | 19 ++- pkg/util/typeutil/schema_test.go | 118 ++++++++++++++++++ 3 files changed, 136 insertions(+), 2 deletions(-) diff --git a/internal/parser/planparserv2/plan_parser_v2_test.go b/internal/parser/planparserv2/plan_parser_v2_test.go index 63ba7c506c192..e6da966893da5 100644 --- a/internal/parser/planparserv2/plan_parser_v2_test.go +++ b/internal/parser/planparserv2/plan_parser_v2_test.go @@ -37,7 +37,6 @@ func newTestSchema() *schemapb.CollectionSchema { FieldID: 131, Name: "StringArrayField", IsPrimaryKey: false, Description: "string array field", DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_VarChar, - IsDynamic: true, }) return &schemapb.CollectionSchema{ diff --git a/pkg/util/typeutil/schema.go b/pkg/util/typeutil/schema.go index 9d35f0fd95058..6a03474ac692b 100644 --- a/pkg/util/typeutil/schema.go +++ b/pkg/util/typeutil/schema.go @@ -256,6 +256,7 @@ type SchemaHelper struct { idOffset map[int64]int primaryKeyOffset int partitionKeyOffset int + dynamicFieldOffset int } // CreateSchemaHelper returns a new SchemaHelper object @@ -263,7 +264,7 @@ func CreateSchemaHelper(schema *schemapb.CollectionSchema) (*SchemaHelper, error if schema == nil { return nil, errors.New("schema is nil") } - schemaHelper := SchemaHelper{schema: schema, nameOffset: make(map[string]int), idOffset: make(map[int64]int), primaryKeyOffset: -1, partitionKeyOffset: -1} + schemaHelper := SchemaHelper{schema: schema, nameOffset: make(map[string]int), idOffset: make(map[int64]int), primaryKeyOffset: -1, partitionKeyOffset: -1, dynamicFieldOffset: -1} for offset, field := range schema.Fields { if _, ok := schemaHelper.nameOffset[field.Name]; ok { return nil, fmt.Errorf("duplicated fieldName: %s", field.Name) @@ -286,6 +287,13 @@ func CreateSchemaHelper(schema *schemapb.CollectionSchema) (*SchemaHelper, error } schemaHelper.partitionKeyOffset = offset } + + if field.IsDynamic { + if schemaHelper.dynamicFieldOffset != -1 { + return nil, errors.New("dynamic field is not unique") + } + schemaHelper.dynamicFieldOffset = offset + } } return &schemaHelper, nil } @@ -306,6 +314,15 @@ func (helper *SchemaHelper) GetPartitionKeyField() (*schemapb.FieldSchema, error return helper.schema.Fields[helper.partitionKeyOffset], nil } +// GetDynamicField returns the field schema of dynamic field if exists. +// if there is no dynamic field defined in schema, error will be returned. +func (helper *SchemaHelper) GetDynamicField() (*schemapb.FieldSchema, error) { + if helper.dynamicFieldOffset == -1 { + return nil, fmt.Errorf("failed to get dynamic field: no dynamic field in schema") + } + return helper.schema.Fields[helper.dynamicFieldOffset], nil +} + // GetFieldFromName is used to find the schema by field name func (helper *SchemaHelper) GetFieldFromName(fieldName string) (*schemapb.FieldSchema, error) { offset, ok := helper.nameOffset[fieldName] diff --git a/pkg/util/typeutil/schema_test.go b/pkg/util/typeutil/schema_test.go index b671fcbc2a5fd..a42e54d867bd1 100644 --- a/pkg/util/typeutil/schema_test.go +++ b/pkg/util/typeutil/schema_test.go @@ -356,6 +356,124 @@ func TestSchema_GetVectorFieldSchema(t *testing.T) { }) } +func TestSchemaHelper_GetDynamicField(t *testing.T) { + t.Run("with_dynamic_schema", func(t *testing.T) { + sch := &schemapb.CollectionSchema{ + Name: "testColl", + Description: "", + AutoID: false, + Fields: []*schemapb.FieldSchema{ + { + FieldID: 100, + Name: "field_int64", + IsPrimaryKey: true, + DataType: schemapb.DataType_Int64, + }, + { + FieldID: 101, + Name: "field_float_vector", + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "128", + }, + }, + }, + { + FieldID: 102, + Name: "$meta", + DataType: schemapb.DataType_JSON, + IsDynamic: true, + }, + }, + } + + helper, err := CreateSchemaHelper(sch) + require.NoError(t, err) + + f, err := helper.GetDynamicField() + assert.NoError(t, err) + assert.NotNil(t, f) + assert.EqualValues(t, 102, f.FieldID) + }) + + t.Run("without_dynamic_schema", func(t *testing.T) { + sch := &schemapb.CollectionSchema{ + Name: "testColl", + Description: "", + AutoID: false, + Fields: []*schemapb.FieldSchema{ + { + FieldID: 100, + Name: "field_int64", + IsPrimaryKey: true, + DataType: schemapb.DataType_Int64, + }, + { + FieldID: 101, + Name: "field_float_vector", + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "128", + }, + }, + }, + }, + } + + helper, err := CreateSchemaHelper(sch) + require.NoError(t, err) + + _, err = helper.GetDynamicField() + assert.Error(t, err) + }) + + t.Run("multiple_dynamic_fields", func(t *testing.T) { + sch := &schemapb.CollectionSchema{ + Name: "testColl", + Description: "", + AutoID: false, + Fields: []*schemapb.FieldSchema{ + { + FieldID: 100, + Name: "field_int64", + IsPrimaryKey: true, + DataType: schemapb.DataType_Int64, + }, + { + FieldID: 101, + Name: "field_float_vector", + DataType: schemapb.DataType_FloatVector, + TypeParams: []*commonpb.KeyValuePair{ + { + Key: common.DimKey, + Value: "128", + }, + }, + }, + { + FieldID: 102, + Name: "$meta", + DataType: schemapb.DataType_JSON, + IsDynamic: true, + }, + { + FieldID: 103, + Name: "other_json", + DataType: schemapb.DataType_JSON, + IsDynamic: true, + }, + }, + } + + _, err := CreateSchemaHelper(sch) + assert.Error(t, err) + }) +} + func TestSchema_invalid(t *testing.T) { t.Run("Duplicate field name", func(t *testing.T) { schema := &schemapb.CollectionSchema{