From 168fac6764fcac62fcece121e9970279560c3031 Mon Sep 17 00:00:00 2001 From: Abhinav Dangeti Date: Mon, 22 Apr 2024 14:34:19 -0600 Subject: [PATCH] MB-59846, MB-59616, MB-61009: Cluster version tracking for features + Cluster version tracking for features: - xattrs - vector_base64 - dims:4096 + This change separates out feature strings for xattrs and vector_base64 because xattrs is not restricted to vectors alone. + vector_base64 is combined with dims:4096 which are meant to go out together in v2 (7.6.2). + Also requires: https://github.com/blevesearch/bleve/pull/2015 Change-Id: I44daa225b5c397e9f8cae26ce3fcb36cab9b26d6 Reviewed-on: https://review.couchbase.org/c/cbft/+/208927 Well-Formed: Build Bot Well-Formed: Restriction Checker Reviewed-by: Likith B Tested-by: Abhi Dangeti --- cmd/cbft/main.go | 2 +- knn_nosup.go | 6 + knn_sup.go | 20 +++- pindex_bleve.go | 276 +++++++++++++++++++++++-------------------- pindex_bleve_doc.go | 4 +- pindex_bleve_test.go | 60 +++++++--- 6 files changed, 218 insertions(+), 150 deletions(-) diff --git a/cmd/cbft/main.go b/cmd/cbft/main.go index 3ec4fb7a..eec3c8b6 100644 --- a/cmd/cbft/main.go +++ b/cmd/cbft/main.go @@ -343,7 +343,7 @@ func mainStart(cfg cbgt.Cfg, uuid string, tags []string, container string, "," + cbft.FeatureFileTransferRebalance + "," + cbft.FeatureGeoSpatial + cbft.FeatureVectorSearchSupport() + - "," + cbft.FeatureXattrsAndBase64 + "," + cbft.FeatureXattrs extrasMap["version-cbft.app"] = version extrasMap["version-cbft.lib"] = cbft.VERSION diff --git a/knn_nosup.go b/knn_nosup.go index 19969b85..45db177f 100644 --- a/knn_nosup.go +++ b/knn_nosup.go @@ -17,10 +17,16 @@ import ( "github.com/blevesearch/bleve/v2" ) +const featuresVectorBase64Dims4096 = "" + func FeatureVectorSearchSupport() string { return "" } +func featureFlagForDims(int) string { + return "" +} + func interpretKNNForRequest(knn, knnOperator json.RawMessage, r *bleve.SearchRequest) ( *bleve.SearchRequest, error) { // Not supported diff --git a/knn_sup.go b/knn_sup.go index d43a7aa7..9aefa510 100644 --- a/knn_sup.go +++ b/knn_sup.go @@ -17,8 +17,26 @@ import ( "github.com/blevesearch/bleve/v2" ) +// v2: 7.6.2 +const featuresVectorBase64Dims4096 = "vector_base64_dims:4096" + func FeatureVectorSearchSupport() string { - return "," + FeatureVectorSearch + return "," + featureVectorSearch + + "," + featuresVectorBase64Dims4096 +} + +// method will return appropriate flag to check cluster wide +// if & when dims' ceiling is raised in the future +func featureFlagForDims(dims int) string { + if dims <= 2048 { + return "" + } + + if dims <= 4096 { + return featuresVectorBase64Dims4096 + } + + return "" } func interpretKNNForRequest(knn, knnOperator json.RawMessage, r *bleve.SearchRequest) ( diff --git a/pindex_bleve.go b/pindex_bleve.go index 15c39bd0..dac7670e 100644 --- a/pindex_bleve.go +++ b/pindex_bleve.go @@ -41,72 +41,67 @@ import ( log "github.com/couchbase/clog" ) -// Use sync/atomic to access these stats -var BatchBytesAdded uint64 -var BatchBytesRemoved uint64 -var NumBatchesIntroduced uint64 - -var TotBatchesFlushedOnMaxOps uint64 -var TotBatchesFlushedOnTimer uint64 -var TotBatchesNew uint64 -var TotBatchesMerged uint64 - -var TotRollbackPartial uint64 -var TotRollbackFull uint64 - -var featureIndexType = "indexType" -var FeatureScorchIndex = featureIndexType + ":" + scorch.Name - -var FeatureCollections = cbgt.SOURCE_GOCBCORE + ":collections" - -var FeatureGeoSpatial = "geoSpatial" - -var FeatureVectorSearch = "vectors" - -var FeatureXattrsAndBase64 = "XattrsBase64" - -var FeatureBlevePreferredSegmentVersion = fmt.Sprintf("segmentVersion:%d", BlevePreferredZapVersion) - -var xAttrsMappingName = "_$xattrs" +const ( + featureIndexType = "indexType" + FeatureScorchIndex = featureIndexType + ":" + scorch.Name + + FeatureCollections = cbgt.SOURCE_GOCBCORE + ":collections" + FeatureGeoSpatial = "geoSpatial" + featureVectorSearch = "vectors" + FeatureXattrs = "xattrs" + + // BleveDefaultZapVersion represents the default zap version. + // This version is expected to remain a constant as all the + // future indexes are going to have a default segment version. + // Only pre CC indexes are expected to have an empty segment version + // which would be treated like the default zap version. + BleveDefaultZapVersion = int(11) + + // BlevePreferredZapVersion is the recommended zap version for newer indexes. + // This version needs to be bumped to reflect the latest recommended zap + // version in any given release. + BlevePreferredZapVersion = int(15) + + // Preview ZapVersion for indexes that come with vector search support. + // FIXME: This is a temporary placeholder which will be removed once the + // BlevePreferredZapVersion is updated to this. See: MB-59918 + BleveVectorZapVersion = int(16) + + xattrsMappingName = "_$xattrs" + DefaultBleveMaxClauseCount = 1024 +) -var BleveMaxOpsPerBatch = 200 // Unlimited when <= 0. +var ( + FeatureBlevePreferredSegmentVersion = fmt.Sprintf("segmentVersion:%d", BlevePreferredZapVersion) -var BleveBatchFlushDuration = time.Duration(100 * time.Millisecond) + // Use sync/atomic to access these stats + BatchBytesAdded uint64 + BatchBytesRemoved uint64 + NumBatchesIntroduced uint64 -var BleveKVStoreMetricsAllow = false // Use metrics wrapper KVStore by default. + TotBatchesFlushedOnMaxOps uint64 + TotBatchesFlushedOnTimer uint64 + TotBatchesNew uint64 + TotBatchesMerged uint64 -const DefaultBleveMaxClauseCount = 1024 + TotRollbackPartial uint64 + TotRollbackFull uint64 -// represents the number of async batch workers per pindex -var asyncBatchWorkerCount = 4 // need to make it configurable, + BleveMaxOpsPerBatch = 200 // Unlimited when <= 0. -var TotBleveDestOpened uint64 -var TotBleveDestClosed uint64 + BleveBatchFlushDuration = time.Duration(100 * time.Millisecond) -// BleveDefaultZapVersion represents the default zap version. -// This version is expected to remain a constant as all the -// future indexes are going to have a default segment version. -// Only pre CC indexes are expected to have an empty segment version -// which would be treated like the default zap version. -const BleveDefaultZapVersion = int(11) + BleveKVStoreMetricsAllow = false // Use metrics wrapper KVStore by default. -// BlevePreferredZapVersion is the recommended zap version for newer indexes. -// This version needs to be bumped to reflect the latest recommended zap -// version in any given release. -const BlevePreferredZapVersion = int(15) -// Preview ZapVersion for indexes that come with vector search support. -// FIXME: This is a temporary placeholder which will be removed once the -// BlevePreferredZapVersion is updated to this. See: MB-59918 -const BleveVectorZapVersion = int(16) + // represents the number of async batch workers per pindex + asyncBatchWorkerCount = 4 // need to make it configurable, -var defaultLimitingMinTime = 500 -var defaultLimitingMaxTime = 120000 + TotBleveDestOpened uint64 + TotBleveDestClosed uint64 -const ( - noVectorFields int = iota - vectorFields // only vector fields (7.6.0+) - vectorAndBase64Fields // vector + vector_base64 fields (7.6.2+) + defaultLimitingMinTime = 500 + defaultLimitingMaxTime = 120000 ) // BleveParams represents the bleve index params. See also @@ -549,7 +544,7 @@ func PrepareIndexDef(mgr *cbgt.Manager, indexDef *cbgt.IndexDef) ( indexDef.SourceType = cbgt.SOURCE_GOCBCORE } - var vectorFieldsSpecifiedInMapping int + var indexVectorPicture vectorPicture bp := NewBleveParams() if len(indexDef.Params) > 0 { @@ -622,22 +617,22 @@ func PrepareIndexDef(mgr *cbgt.Manager, indexDef *cbgt.IndexDef) ( } } - vectorFieldsSpecifiedInMapping = vectorFieldsExistWithinIndexMapping(bp.Mapping) - if vectorFieldsSpecifiedInMapping != noVectorFields && + indexVectorPicture = vectorPictureFromIndexMapping(bp.Mapping) + if indexVectorPicture.fields != noVectorFields && (!isClusterCompatibleFor(FeatureVectorSearchSupportVersion) || - !cbgt.IsFeatureSupportedByCluster(FeatureVectorSearch, nodeDefs)) { + !cbgt.IsFeatureSupportedByCluster(featureVectorSearch, nodeDefs)) { // Vector indexing & search is NOT supported on this cluster // (lower version or mixed lower version) return nil, cbgt.NewBadRequestError("PrepareIndex, err: vector typed fields " + - "not supported in mixed version cluster") + "not supported in this cluster") } if mappingContainsXAttrs(bp) { - if !cbgt.IsFeatureSupportedByCluster(FeatureXattrsAndBase64, nodeDefs) { + if !cbgt.IsFeatureSupportedByCluster(FeatureXattrs, nodeDefs) { // XAttrs is NOT supported on this cluster // (lower version or mixed lower version) return nil, cbgt.NewBadRequestError("PrepareIndex, err: xattr fields " + - "and properties not supported in mixed version cluster") + "and properties not supported in this cluster") } sourceParams := make(map[string]interface{}) @@ -660,10 +655,18 @@ func PrepareIndexDef(mgr *cbgt.Manager, indexDef *cbgt.IndexDef) ( indexDef.SourceParams = string(updatedSourceParams) } - if vectorFieldsSpecifiedInMapping == vectorAndBase64Fields && - !cbgt.IsFeatureSupportedByCluster(FeatureXattrsAndBase64, nodeDefs) { - return nil, cbgt.NewBadRequestError("PrepareIndex, err: vector_base64 typed fields " + - "not supported in mixed version cluster") + if indexVectorPicture.fields == vectorAndBase64Fields && + (len(featuresVectorBase64Dims4096) == 0 || + !cbgt.IsFeatureSupportedByCluster(featuresVectorBase64Dims4096, nodeDefs)) { + return nil, cbgt.NewBadRequestError("PrepareIndex, err: vector_base64 typed fields " + + "not supported in this cluster") + } + + featureFlagToCheckForDims := featureFlagForDims(indexVectorPicture.maxDims) + if (len(featureFlagToCheckForDims) > 0 && + !cbgt.IsFeatureSupportedByCluster(featureFlagToCheckForDims, nodeDefs)) { + return nil, cbgt.NewBadRequestError(fmt.Sprintf("PrepareIndex, err: vector typed fields "+ + "with dims %v not supported in this cluster", indexVectorPicture.maxDims)) } } @@ -683,7 +686,7 @@ func PrepareIndexDef(mgr *cbgt.Manager, indexDef *cbgt.IndexDef) ( "supported", int(zv)) } - if vectorFieldsSpecifiedInMapping != noVectorFields && int(zv) < BleveVectorZapVersion { + if indexVectorPicture.fields != noVectorFields && int(zv) < BleveVectorZapVersion { // overrride segmentVersion to minimum version needed to support vector mappings bp.Store["segmentVersion"] = BleveVectorZapVersion } @@ -696,7 +699,7 @@ func PrepareIndexDef(mgr *cbgt.Manager, indexDef *cbgt.IndexDef) ( // zap version for newer indexes in a sufficiently advanced // cluster, else consider the default zap version. if segmentVersionSupported { - if vectorFieldsSpecifiedInMapping != noVectorFields { + if indexVectorPicture.fields != noVectorFields { bp.Store["segmentVersion"] = BleveVectorZapVersion } else { bp.Store["segmentVersion"] = BlevePreferredZapVersion @@ -723,64 +726,6 @@ func PrepareIndexDef(mgr *cbgt.Manager, indexDef *cbgt.IndexDef) ( return rv, nil } -// Utility function check if a "vector" typed field is present within -// the index mapping -func vectorFieldsExistWithinIndexMapping(m mapping.IndexMapping) int { - im, ok := m.(*mapping.IndexMappingImpl) - if !ok { - // cannot interpret index mapping - return noVectorFields - } - - var vectorFieldExistsWithinDocMapping func(*mapping.DocumentMapping) int - vectorFieldExistsWithinDocMapping = func(d *mapping.DocumentMapping) int { - rv := noVectorFields - if d != nil && d.Enabled { - for _, v := range d.Properties { - val := vectorFieldExistsWithinDocMapping(v) - if val == vectorAndBase64Fields { - return val - } else if val > rv { - rv = val - } - } - - for _, field := range d.Fields { - if field.Type == "vector" && vectorFields > rv { - rv = vectorFields - } - if field.Type == "vector_base64" { - return vectorAndBase64Fields - } - } - } - - return rv - } - - rv := noVectorFields - - // Check DefaultMapping - val := vectorFieldExistsWithinDocMapping(im.DefaultMapping) - if val == vectorAndBase64Fields { - return val - } else if val > rv { - rv = val - } - - // Iterate over TypeMapping(s) - for _, d := range im.TypeMapping { - val := vectorFieldExistsWithinDocMapping(d) - if val == vectorAndBase64Fields { - return val - } else if val > rv { - rv = val - } - } - - return rv -} - func ValidateBleve(indexType, indexName, indexParams string) error { if len(indexParams) <= 0 { return nil @@ -3609,14 +3554,14 @@ func mappingContainsXAttrs(bp *BleveParams) bool { if im, ok := bp.Mapping.(*mapping.IndexMappingImpl); ok { if im.DefaultMapping.Enabled { - if _, ok := im.DefaultMapping.Properties[xAttrsMappingName]; ok { + if _, ok := im.DefaultMapping.Properties[xattrsMappingName]; ok { return true } } for _, tm := range im.TypeMapping { if tm.Enabled { - if _, ok := tm.Properties[xAttrsMappingName]; ok { + if _, ok := tm.Properties[xattrsMappingName]; ok { return true } } @@ -3625,3 +3570,78 @@ func mappingContainsXAttrs(bp *BleveParams) bool { return false } + +const ( + noVectorFields int = iota + vectorFields // only vector fields (7.6.0+) + vectorAndBase64Fields // vector + vector_base64 fields (7.6.2+) +) + +type vectorPicture struct { + fields int + maxDims int +} + +// Utility function check if a "vector" typed field is present within +// the index mapping +func vectorPictureFromIndexMapping(m mapping.IndexMapping) vectorPicture { + im, ok := m.(*mapping.IndexMappingImpl) + if !ok { + // cannot interpret index mapping + return vectorPicture{} + } + + var vectorPictureFromDocMapping func(*mapping.DocumentMapping) vectorPicture + vectorPictureFromDocMapping = func(d *mapping.DocumentMapping) vectorPicture { + rv := vectorPicture{} + if d != nil && d.Enabled { + for _, v := range d.Properties { + val := vectorPictureFromDocMapping(v) + if val.fields > rv.fields { + rv.fields = val.fields + } + if val.maxDims > rv.maxDims { + rv.maxDims = val.maxDims + } + } + + for _, field := range d.Fields { + if field.Type == "vector" && vectorFields > rv.fields { + rv.fields = vectorFields + } + if field.Type == "vector_base64" && vectorAndBase64Fields > rv.fields { + rv.fields = vectorAndBase64Fields + } + if field.Dims > rv.maxDims { + rv.maxDims = field.Dims + } + } + } + + return rv + } + + rv := vectorPicture{} + + // Check DefaultMapping + val := vectorPictureFromDocMapping(im.DefaultMapping) + if val.fields > rv.fields { + rv.fields = val.fields + } + if val.maxDims > rv.maxDims { + rv.maxDims = val.maxDims + } + + // Iterate over TypeMapping(s) + for _, d := range im.TypeMapping { + val := vectorPictureFromDocMapping(d) + if val.fields > rv.fields { + rv.fields = val.fields + } + if val.maxDims > rv.maxDims { + rv.maxDims = val.maxDims + } + } + + return rv +} diff --git a/pindex_bleve_doc.go b/pindex_bleve_doc.go index 3f5c4ddf..70279e36 100644 --- a/pindex_bleve_doc.go +++ b/pindex_bleve_doc.go @@ -174,8 +174,8 @@ func (b *BleveDocumentConfig) BuildDocumentEx(key, val []byte, // Add the xattr fields back into the document mapping // under the xattrs field mapping - if _, ok := v[xAttrsMappingName]; !ok && xattrs != nil { - v[xAttrsMappingName] = xattrs + if _, ok := v[xattrsMappingName]; !ok && xattrs != nil { + v[xattrsMappingName] = xattrs } if cmf != nil && len(b.CollPrefixLookup) > 1 { diff --git a/pindex_bleve_test.go b/pindex_bleve_test.go index 782ad788..c2edcb2a 100644 --- a/pindex_bleve_test.go +++ b/pindex_bleve_test.go @@ -943,7 +943,7 @@ func TestHasXAttrs(t *testing.T) { bleveParams: NewBleveParams(), indexMapping: bleve.NewIndexMapping(), fields: map[string]interface{}{ - xAttrsMappingName: map[string]interface{}{ + xattrsMappingName: map[string]interface{}{ "value": struct{}{}, }, }, @@ -955,7 +955,7 @@ func TestHasXAttrs(t *testing.T) { indexMapping: bleve.NewIndexMapping(), fields: map[string]interface{}{ "key": map[string]interface{}{ - xAttrsMappingName: struct{}{}, + xattrsMappingName: struct{}{}, }, }, typeMappingName: "type", @@ -969,7 +969,7 @@ func TestHasXAttrs(t *testing.T) { "value": struct{}{}, }, }, - typeMappingName: xAttrsMappingName, + typeMappingName: xattrsMappingName, xattrs: false, }, } @@ -1005,10 +1005,12 @@ func TestHasXAttrs(t *testing.T) { } } -func TestVectorFieldsExistWithinIndexMapping(t *testing.T) { +func TestVectorPictureFromIndexMapping(t *testing.T) { tests := []struct { - idxMapping *mapping.IndexMappingImpl - vectorField int + idxMapping *mapping.IndexMappingImpl + expectFields int + expectDims int + expectDimsFlag string }{ { idxMapping: &mapping.IndexMappingImpl{ @@ -1023,7 +1025,8 @@ func TestVectorFieldsExistWithinIndexMapping(t *testing.T) { }, }, }, - vectorField: noVectorFields, + expectFields: noVectorFields, + expectDimsFlag: "", }, { idxMapping: &mapping.IndexMappingImpl{ @@ -1034,11 +1037,14 @@ func TestVectorFieldsExistWithinIndexMapping(t *testing.T) { Fields: []*mapping.FieldMapping{ { Type: "vector", + Dims: 3072, }, }, }, }, - vectorField: vectorFields, + expectFields: vectorFields, + expectDims: 3072, + expectDimsFlag: featuresVectorBase64Dims4096, }, { idxMapping: &mapping.IndexMappingImpl{ @@ -1053,7 +1059,8 @@ func TestVectorFieldsExistWithinIndexMapping(t *testing.T) { }, }, }, - vectorField: vectorAndBase64Fields, + expectFields: vectorAndBase64Fields, + expectDimsFlag: "", }, { idxMapping: &mapping.IndexMappingImpl{ @@ -1064,6 +1071,7 @@ func TestVectorFieldsExistWithinIndexMapping(t *testing.T) { Fields: []*mapping.FieldMapping{ { Type: "vector_base64", + Dims: 3072, }, }, }, @@ -1078,7 +1086,9 @@ func TestVectorFieldsExistWithinIndexMapping(t *testing.T) { }, }, }, - vectorField: vectorAndBase64Fields, + expectFields: vectorAndBase64Fields, + expectDims: 3072, + expectDimsFlag: featuresVectorBase64Dims4096, }, { idxMapping: &mapping.IndexMappingImpl{ @@ -1089,6 +1099,7 @@ func TestVectorFieldsExistWithinIndexMapping(t *testing.T) { Fields: []*mapping.FieldMapping{ { Type: "vector", + Dims: 3072, }, }, }, @@ -1099,11 +1110,14 @@ func TestVectorFieldsExistWithinIndexMapping(t *testing.T) { Fields: []*mapping.FieldMapping{ { Type: "vector_base64", + Dims: 4096, }, }, }, }, - vectorField: vectorAndBase64Fields, + expectFields: vectorAndBase64Fields, + expectDims: 4096, + expectDimsFlag: featuresVectorBase64Dims4096, }, { idxMapping: &mapping.IndexMappingImpl{ @@ -1128,7 +1142,8 @@ func TestVectorFieldsExistWithinIndexMapping(t *testing.T) { }, }, }, - vectorField: vectorFields, + expectFields: vectorFields, + expectDimsFlag: "", }, { idxMapping: &mapping.IndexMappingImpl{ @@ -1153,16 +1168,25 @@ func TestVectorFieldsExistWithinIndexMapping(t *testing.T) { }, }, }, - vectorField: vectorAndBase64Fields, + expectFields: vectorAndBase64Fields, }, } - for _, test := range tests { - res := vectorFieldsExistWithinIndexMapping(test.idxMapping) + for testi, test := range tests { + res := vectorPictureFromIndexMapping(test.idxMapping) + + if res.fields != test.expectFields { + t.Errorf("[%d] Expected %v as output, but got %v. Index Mapping - %+v", + testi+1, test.expectFields, res.fields, test.idxMapping) + } + + if res.maxDims != test.expectDims { + t.Errorf("[%d] Expected %v as output, but got %v. Index Mapping - %+v", + testi+1, test.expectDims, res.maxDims, test.idxMapping) + } - if res != test.vectorField { - t.Errorf("Expected %v as output, but got %v. Index Mapping - %+v", - test.vectorField, res, test.idxMapping) + if featureFlagForDims(res.maxDims) != test.expectDimsFlag { + t.Errorf("[%d] Unexpected flag for dims: %v", testi+1, res.maxDims) } } }