From 72ca7c0a7e125fc9449da82b8911eece22476e30 Mon Sep 17 00:00:00 2001 From: Mark Duckworth <1124037+MarkDuckworth@users.noreply.github.com> Date: Fri, 26 Jul 2024 10:53:33 -0600 Subject: [PATCH] API layer changes for vector distance. --- dev/src/reference/query.ts | 93 ++++++++++++++++++++--- dev/src/reference/vector-query-options.ts | 26 +++++-- dev/src/reference/vector-query.ts | 8 +- 3 files changed, 106 insertions(+), 21 deletions(-) diff --git a/dev/src/reference/query.ts b/dev/src/reference/query.ts index f7664b4d5..37c5aeaa0 100644 --- a/dev/src/reference/query.ts +++ b/dev/src/reference/query.ts @@ -628,6 +628,9 @@ export class Query< * @param options - Options control the vector query. `limit` specifies the upper bound of documents to return, must * be a positive integer with a maximum value of 1000. `distanceMeasure` specifies what type of distance is calculated * when performing the query. + * + * @deprecated Use the new {@link findNearest} implementation + * accepting `limit` and `distanceMeasure` as independent arguments. */ findNearest( vectorField: string | firestore.FieldPath, @@ -636,29 +639,97 @@ export class Query< limit: number; distanceMeasure: 'EUCLIDEAN' | 'COSINE' | 'DOT_PRODUCT'; } + ): VectorQuery; + + /** + * Returns a query that can perform vector distance (similarity) search with given parameters. + * + * The returned query, when executed, performs a distance (similarity) search on the specified + * `vectorField` against the given `queryVector` and returns the top documents that are closest + * to the `queryVector`. + * + * Only documents whose `vectorField` field is a {@link VectorValue} of the same dimension as `queryVector` + * participate in the query, all other documents are ignored. + * + * @example + * ``` + * // Returns the closest 10 documents whose Euclidean distance from their 'embedding' fields are closed to [41, 42]. + * const vectorQuery = col.findNearest('embedding', [41, 42], {limit: 10, distanceMeasure: 'EUCLIDEAN'}); + * + * const querySnapshot = await aggregateQuery.get(); + * querySnapshot.forEach(...); + * ``` + * + * @param vectorField - A string or {@link FieldPath} specifying the vector field to search on. + * @param queryVector - The {@link VectorValue} used to measure the distance from `vectorField` values in the documents. + * @param options - Options control the vector query. `limit` specifies the upper bound of documents to return, must + * be a positive integer with a maximum value of 1000. `distanceMeasure` specifies what type of distance is calculated + * when performing the query. + */ + findNearest( + vectorField: string | firestore.FieldPath, + queryVector: firestore.VectorValue | Array, + limit: number, + distanceMeasure: 'EUCLIDEAN' | 'COSINE' | 'DOT_PRODUCT', + options?: { + distanceResultField?: string | firestore.FieldPath; + distanceThreshold?: number; + }): VectorQuery; + + findNearest( + vectorField: string | firestore.FieldPath, + queryVector: firestore.VectorValue | Array, + limitOrOptions: number |{ + limit?: number; + distanceMeasure?: 'EUCLIDEAN' | 'COSINE' | 'DOT_PRODUCT'; + }, + distanceMeasure?: 'EUCLIDEAN' | 'COSINE' | 'DOT_PRODUCT', + options?: { + distanceResultField?: string | firestore.FieldPath; + distanceThreshold?: number; + } + ): VectorQuery { + if (typeof limitOrOptions == 'number') { + return this._findNearest(vectorField, queryVector, limitOrOptions, distanceMeasure!, options); + } else { + return this._findNearest(vectorField, queryVector, limitOrOptions!.limit!, limitOrOptions!.distanceMeasure!); + } + } + + _findNearest( + vectorField: string | firestore.FieldPath, + queryVector: firestore.VectorValue | Array, + limit: number, + distanceMeasure: 'EUCLIDEAN' | 'COSINE' | 'DOT_PRODUCT', + options?: { + distanceResultField?: string | firestore.FieldPath; + distanceThreshold?: number; + } ): VectorQuery { validateFieldPath('vectorField', vectorField); - if (options.limit <= 0) { - throw invalidArgumentMessage('options.limit', 'positive limit number'); + if (limit <= 0) { + throw invalidArgumentMessage('limit', 'positive limit number'); } if ( - (Array.isArray(queryVector) - ? queryVector.length - : queryVector.toArray().length) === 0 + (Array.isArray(queryVector) + ? queryVector.length + : queryVector.toArray().length) === 0 ) { throw invalidArgumentMessage( - 'queryVector', - 'vector size must be larger than 0' + 'queryVector', + 'vector size must be larger than 0' ); } return new VectorQuery( - this, - vectorField, - queryVector, - new VectorQueryOptions(options.limit, options.distanceMeasure) + this, + vectorField, + queryVector, + limit, + distanceMeasure, + new VectorQueryOptions(options?.distanceResultField, options?.distanceThreshold) ); } diff --git a/dev/src/reference/vector-query-options.ts b/dev/src/reference/vector-query-options.ts index cc083aa62..8cca9367e 100644 --- a/dev/src/reference/vector-query-options.ts +++ b/dev/src/reference/vector-query-options.ts @@ -13,12 +13,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +import * as firestore from '@google-cloud/firestore'; +import {FieldPath} from "../path"; export class VectorQueryOptions { + readonly distanceResultField?: firestore.FieldPath; + constructor( - readonly limit: number, - readonly distanceMeasure: 'EUCLIDEAN' | 'COSINE' | 'DOT_PRODUCT' - ) {} + distanceResultField?: string | firestore.FieldPath, + readonly distanceThreshold?: number + ) { + if (typeof distanceResultField == 'string') { + this.distanceResultField = new FieldPath(distanceResultField); + } + } isEqual(other: VectorQueryOptions): boolean { if (this === other) { @@ -28,9 +36,13 @@ export class VectorQueryOptions { return false; } - return ( - this.limit === other.limit && - this.distanceMeasure === other.distanceMeasure - ); + let distanceResultFieldEqual = false; + if (typeof other.distanceResultField == 'undefined') { + distanceResultFieldEqual = (typeof this.distanceResultField == 'undefined'); + } else { + distanceResultFieldEqual = (this.distanceResultField?.isEqual(other.distanceResultField) == true); + } + + return this.distanceThreshold === other.distanceThreshold && distanceResultFieldEqual; } } diff --git a/dev/src/reference/vector-query.ts b/dev/src/reference/vector-query.ts index 4df93e60f..aa82a9fa4 100644 --- a/dev/src/reference/vector-query.ts +++ b/dev/src/reference/vector-query.ts @@ -58,6 +58,8 @@ export class VectorQuery< private readonly _query: Query, private readonly vectorField: string | firestore.FieldPath, private readonly queryVector: firestore.VectorValue | Array, + private readonly limit: number, + private readonly distanceMeasure: 'EUCLIDEAN' | 'COSINE' | 'DOT_PRODUCT', private readonly options: VectorQueryOptions ) { this._queryUtil = new QueryUtil< @@ -157,7 +159,7 @@ export class VectorQuery< } /** - * Internal method for serializing a query to its RunAggregationQuery proto + * Internal method for serializing a query to its proto * representation with an optional transaction id. * * @private @@ -175,8 +177,8 @@ export class VectorQuery< : (this.queryVector as VectorValue); queryProto.structuredQuery!.findNearest = { - limit: {value: this.options.limit}, - distanceMeasure: this.options.distanceMeasure, + limit: {value: this.limit}, + distanceMeasure: this.distanceMeasure, vectorField: { fieldPath: FieldPath.fromArgument(this.vectorField).formattedName, },