Skip to content

Commit

Permalink
MB-59616: Adding vector_base64 field
Browse files Browse the repository at this point in the history
 - Changed the quick editor, regular editor and import to
incorporate a new field type called vector_base64.
 - It acts the same as vector field at this layer.
 - vectorFieldsExistWithinIndexMapping(...) now also detects
vector_base64
 - Added feature check for base64 by attaching it to xattrs

Change-Id: I3c7a9839e0c09a8e2a06fc92dff076f6a88e2ed5
Reviewed-on: https://review.couchbase.org/c/cbft/+/204899
Reviewed-by: Abhi Dangeti <[email protected]>
Tested-by: Abhi Dangeti <[email protected]>
Well-Formed: Build Bot <[email protected]>
Well-Formed: Restriction Checker
  • Loading branch information
Likith101 authored and abhinavdangeti committed Apr 22, 2024
1 parent 75795a7 commit faf95c9
Show file tree
Hide file tree
Showing 10 changed files with 264 additions and 38 deletions.
2 changes: 1 addition & 1 deletion cmd/cbft/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ func mainStart(cfg cbgt.Cfg, uuid string, tags []string, container string,
"," + cbft.FeatureFileTransferRebalance +
"," + cbft.FeatureGeoSpatial +
cbft.FeatureVectorSearchSupport() +
"," + cbft.FeatureXattrs
"," + cbft.FeatureXattrsAndBase64

extrasMap["version-cbft.app"] = version
extrasMap["version-cbft.lib"] = cbft.VERSION
Expand Down
12 changes: 11 additions & 1 deletion ns_server_static/fts/fts.js
Original file line number Diff line number Diff line change
Expand Up @@ -1732,7 +1732,7 @@ function IndexNewCtrlFT_NS($scope, $http, $state, $stateParams,
mapping.docvalues = value.fields[i].docvalues
}

if (mapping.type == "vector") {
if (mapping.type == "vector" || mapping.type == "vector_base64") {
if ("dims" in value.fields[i]) {
mapping.dims = value.fields[i].dims
}
Expand Down Expand Up @@ -2522,6 +2522,16 @@ function IndexNewCtrlFTEasy_NS($scope, $http, $state, $stateParams,
$scope.editField.similarity = "dot_product";
}
$scope.editField.vector_index_optimized_for = "recall";
} else if (valType === "vector_base64") {
$scope.editField.type = "vector_base64";
var dims = parsedDoc.getDims(newRow);
$scope.editField.dims = dims;
if (dims && dims <= 4) {
$scope.editField.similarity = "l2_norm";
} else {
$scope.editField.similarity = "dot_product";
}
$scope.editField.vector_index_optimized_for = "recall";
} else {
// default to text if we aren't sure
$scope.editField.type = "text";
Expand Down
2 changes: 1 addition & 1 deletion ns_server_static/fts/fts_easy_field.js
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ function newEditField() {
rv = "boolean ";
} else if (this.type == "IP") {
rv = "IP ";
} else if (this.type == "vector") {
} else if (this.type == "vector" || this.type == "vector_base64") {
rv = "vector (dims: " + this.dims + "; metric: " + this.similarity + "; optimized for: " + this.vector_index_optimized_for + ")";
}

Expand Down
10 changes: 10 additions & 0 deletions ns_server_static/fts/fts_easy_mapping.js
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,12 @@ function newEasyMapping() {
return fieldMapping;
};

var newVectorBase64Field = function(field) {
var fieldMapping = newVectorField(field)
fieldMapping.type = "vector_base64";
return fieldMapping;
};

var addDocumentMappingFromPathField = function(mapping, path, field) {
// split dotted-path into path elements
var pathElements = path.split('.');
Expand Down Expand Up @@ -332,6 +338,8 @@ function newEasyMapping() {
mapping.fields.push(newIPField(field));
} else if (field.type == "vector") {
mapping.fields.push(newVectorField(field));
} else if (field.type == "vector_base64") {
mapping.fields.push(newVectorBase64Field(field))
}
};

Expand Down Expand Up @@ -406,6 +414,8 @@ function newEasyMapping() {
editField.type = "IP";
} else if (field.type == "vector") {
editField.type = "vector";
} else if (field.type == "vector_base64") {
editField.type = "vector_base64";
}

// finish some common settings
Expand Down
18 changes: 18 additions & 0 deletions ns_server_static/fts/fts_easy_parse.js
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,15 @@ function parseDocument(doc) {
}
}

// check whether the object is a vector_base64
if (rowTypes[col] === "string") {
var vecLen = parseBase64Length(parsedObj[rowPaths[col]])
if (vecLen > 2) {
dims[col] = vecLen
return "vector_base64"
}
}

return rowTypes[col];
},
getDocument: function () {
Expand All @@ -164,4 +173,13 @@ function parseDocument(doc) {
};
}

function parseBase64Length(str) {
try {
var vecStr = atob(str)
return vecStr.length / 4
} catch {
return -1
}
}

export { newParsedDocs };
8 changes: 4 additions & 4 deletions ns_server_static/fts/fts_new_easy.html
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ <h4 class="margin-bottom-half" ng-if="!editField.new">Edit Fields</h4>
<select ng-model="editField.analyzer" ng-options="t.id as t.label for t in easyLanguages">
</select>
</div>
<div ng-if="editField.type != 'disabled' && editField.type != 'vector'">
<div ng-if="editField.type != 'disabled' && editField.type != 'vector' && editField.type != 'vector_base64'">
<input
type="checkbox" id="field-is-stored"
ng-model="editField.store"
Expand Down Expand Up @@ -305,7 +305,7 @@ <h4 class="margin-bottom-half" ng-if="!editField.new">Edit Fields</h4>
</span>
</div>

<div ng-if="editField.type != 'disabled' && editField.type != 'vector'">
<div ng-if="editField.type != 'disabled' && editField.type != 'vector' && editField.type != 'vector_base64'">
<input
type="checkbox"
id="field-include-in-all"
Expand All @@ -322,7 +322,7 @@ <h4 class="margin-bottom-half" ng-if="!editField.new">Edit Fields</h4>
</span>
</div>

<div ng-if="editField.type != 'disabled' && editField.type != 'vector'"
<div ng-if="editField.type != 'disabled' && editField.type != 'vector' && editField.type != 'vector_base64'"
class="formrow">
<input
type="checkbox"
Expand All @@ -339,7 +339,7 @@ <h4 class="margin-bottom-half" ng-if="!editField.new">Edit Fields</h4>
<span class="icon fa-info fa-stack-1x"></span>
</span>
</div>
<div ng-if="editField.type == 'vector'">
<div ng-if="editField.type == 'vector' || editField.type == 'vector_base64'">
<div class="formrow">
<label for="dims">Dimension</label>
<div class="row">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ function initBleveTypeMappingController($scope, typeMappingIn, options) {
var mappings = bleveConvertFromTypeMapping(typeMappingIn);


$scope.fieldTypes = ['text', 'number', 'datetime', 'boolean', 'disabled', 'geopoint', 'geoshape', 'IP', 'vector'];
$scope.fieldTypes = ['text', 'number', 'datetime', 'boolean', 'disabled', 'geopoint', 'geoshape', 'IP', 'vector', 'vector_base64'];

var kindAttrs = {
"field": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -246,15 +246,15 @@
</select>
</label>
</div>
<div ng-if="field.type == 'vector'">
<div ng-if="field.type == 'vector' || field.type == 'vector_base64'">
<label>
<span ng-class="{'invalid': field._invalid}">dimension</span>
<input type="number"
placeholder="enter vectors' dimension"
ng-model="field.dims"/>
</label>
</div>
<div ng-if="field.type == 'vector'">
<div ng-if="field.type == 'vector' || field.type == 'vector_base64'">
<label>
<span ng-class="{'invalid': field._invalid}">similarity metric</span>
<select ng-init="field.similarity = (field.similarity ? field.similarity : 'dot_product')"
Expand All @@ -264,7 +264,7 @@
</select>
</label>
</div>
<div ng-if="field.type == 'vector'">
<div ng-if="field.type == 'vector' || field.type == 'vector_base64'">
<label>
<span ng-class="{'invalid': field._invalid}">optimized for</span>
<select ng-init="field.vector_index_optimized_for = (field.vector_index_optimized_for ? field.vector_index_optimized_for : 'recall')"
Expand All @@ -279,7 +279,7 @@
class="editAreaFieldCheckboxes">
<input type="checkbox" ng-model="field.index" id="field-index">
<label for="field-index">index</label>
<span ng-if="field.type != 'vector'">
<span ng-if="field.type != 'vector' && field.type != 'vector_base64'">
<input type="checkbox" ng-model="field.store" id="field-store">
<label for="field-store">store</label>
<input type="checkbox" ng-model="field.include_in_all" id="field-includeall">
Expand All @@ -289,7 +289,7 @@
<input type="checkbox" ng-model="field.include_term_vectors" id="field-term">
<label for="field-term">include term vectors</label>
</span>
<span ng-if="field.type != 'geopoint' && field.type != 'geoshape' && field.type != 'vector'">
<span ng-if="field.type != 'geopoint' && field.type != 'geoshape' && field.type != 'vector' && field.type != 'vector_base64'">
<input type="checkbox" ng-model="field.docvalues" id="field-docvalues">
<label for="field-docvalues">docvalues</label>
</span>
Expand Down
76 changes: 51 additions & 25 deletions pindex_bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ var FeatureGeoSpatial = "geoSpatial"

var FeatureVectorSearch = "vectors"

var FeatureXattrs = "Xattrs"
var FeatureXattrsAndBase64 = "XattrsBase64"

var FeatureBlevePreferredSegmentVersion = fmt.Sprintf("segmentVersion:%d", BlevePreferredZapVersion)

Expand Down Expand Up @@ -103,6 +103,12 @@ const BleveVectorZapVersion = int(16)
var defaultLimitingMinTime = 500
var defaultLimitingMaxTime = 120000

const (
noVectorFields int = iota
vectorFields // only vector fields (7.6.0+)
vectorAndBase64Fields // vector + vector_base64 fields (7.6.2+)
)

// BleveParams represents the bleve index params. See also
// cbgt.IndexDef.Params. A JSON'ified BleveParams looks like...
//
Expand Down Expand Up @@ -543,7 +549,7 @@ func PrepareIndexDef(mgr *cbgt.Manager, indexDef *cbgt.IndexDef) (
indexDef.SourceType = cbgt.SOURCE_GOCBCORE
}

var vectorFieldsSpecifiedInMapping bool
var vectorFieldsSpecifiedInMapping int

bp := NewBleveParams()
if len(indexDef.Params) > 0 {
Expand Down Expand Up @@ -617,18 +623,17 @@ func PrepareIndexDef(mgr *cbgt.Manager, indexDef *cbgt.IndexDef) (
}

vectorFieldsSpecifiedInMapping = vectorFieldsExistWithinIndexMapping(bp.Mapping)
if !isClusterCompatibleFor(FeatureVectorSearchSupportVersion) ||
!cbgt.IsFeatureSupportedByCluster(FeatureVectorSearch, nodeDefs) {
if vectorFieldsSpecifiedInMapping != noVectorFields &&
(!isClusterCompatibleFor(FeatureVectorSearchSupportVersion) ||
!cbgt.IsFeatureSupportedByCluster(FeatureVectorSearch, nodeDefs)) {
// Vector indexing & search is NOT supported on this cluster
// (lower version or mixed lower version)
if vectorFieldsSpecifiedInMapping {
return nil, cbgt.NewBadRequestError("PrepareIndex, err: vector typed fields " +
"not supported in mixed version cluster")
}
return nil, cbgt.NewBadRequestError("PrepareIndex, err: vector typed fields " +
"not supported in mixed version cluster")
}

if mappingContainsXAttrs(bp) {
if !cbgt.IsFeatureSupportedByCluster(FeatureXattrs, nodeDefs) {
if !cbgt.IsFeatureSupportedByCluster(FeatureXattrsAndBase64, nodeDefs) {
// XAttrs is NOT supported on this cluster
// (lower version or mixed lower version)
return nil, cbgt.NewBadRequestError("PrepareIndex, err: xattr fields " +
Expand All @@ -654,6 +659,12 @@ func PrepareIndexDef(mgr *cbgt.Manager, indexDef *cbgt.IndexDef) (

indexDef.SourceParams = string(updatedSourceParams)
}

if vectorFieldsSpecifiedInMapping == vectorAndBase64Fields &&
!cbgt.IsFeatureSupportedByCluster(FeatureXattrsAndBase64, nodeDefs) {
return nil, cbgt.NewBadRequestError("PrepareIndex, err: vector_base64 typed fields " +
"not supported in mixed version cluster")
}
}

segmentVersionSupported := cbgt.IsFeatureSupportedByCluster(
Expand All @@ -672,7 +683,7 @@ func PrepareIndexDef(mgr *cbgt.Manager, indexDef *cbgt.IndexDef) (
"supported", int(zv))
}

if vectorFieldsSpecifiedInMapping && int(zv) < BleveVectorZapVersion {
if vectorFieldsSpecifiedInMapping != noVectorFields && int(zv) < BleveVectorZapVersion {
// overrride segmentVersion to minimum version needed to support vector mappings
bp.Store["segmentVersion"] = BleveVectorZapVersion
}
Expand All @@ -685,7 +696,7 @@ func PrepareIndexDef(mgr *cbgt.Manager, indexDef *cbgt.IndexDef) (
// zap version for newer indexes in a sufficiently advanced
// cluster, else consider the default zap version.
if segmentVersionSupported {
if vectorFieldsSpecifiedInMapping {
if vectorFieldsSpecifiedInMapping != noVectorFields {
bp.Store["segmentVersion"] = BleveVectorZapVersion
} else {
bp.Store["segmentVersion"] = BlevePreferredZapVersion
Expand Down Expand Up @@ -714,45 +725,60 @@ func PrepareIndexDef(mgr *cbgt.Manager, indexDef *cbgt.IndexDef) (

// Utility function check if a "vector" typed field is present within
// the index mapping
func vectorFieldsExistWithinIndexMapping(m mapping.IndexMapping) bool {
func vectorFieldsExistWithinIndexMapping(m mapping.IndexMapping) int {
im, ok := m.(*mapping.IndexMappingImpl)
if !ok {
// cannot interpret index mapping
return false
return noVectorFields
}

var vectorFieldExistsWithinDocMapping func(*mapping.DocumentMapping) bool
vectorFieldExistsWithinDocMapping = func(d *mapping.DocumentMapping) bool {
var vectorFieldExistsWithinDocMapping func(*mapping.DocumentMapping) int
vectorFieldExistsWithinDocMapping = func(d *mapping.DocumentMapping) int {
rv := noVectorFields
if d != nil && d.Enabled {
for _, v := range d.Properties {
if vectorFieldExistsWithinDocMapping(v) {
return true
val := vectorFieldExistsWithinDocMapping(v)
if val == vectorAndBase64Fields {
return val
} else if val > rv {
rv = val
}
}

for _, field := range d.Fields {
if field.Type == "vector" {
return true
if field.Type == "vector" && vectorFields > rv {
rv = vectorFields
}
if field.Type == "vector_base64" {
return vectorAndBase64Fields
}
}
}

return false
return rv
}

rv := noVectorFields

// Check DefaultMapping
if vectorFieldExistsWithinDocMapping(im.DefaultMapping) {
return true
val := vectorFieldExistsWithinDocMapping(im.DefaultMapping)
if val == vectorAndBase64Fields {
return val
} else if val > rv {
rv = val
}

// Iterate over TypeMapping(s)
for _, d := range im.TypeMapping {
if vectorFieldExistsWithinDocMapping(d) {
return true
val := vectorFieldExistsWithinDocMapping(d)
if val == vectorAndBase64Fields {
return val
} else if val > rv {
rv = val
}
}

return false
return rv
}

func ValidateBleve(indexType, indexName, indexParams string) error {
Expand Down
Loading

0 comments on commit faf95c9

Please sign in to comment.