diff --git a/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/pinecone.adoc b/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/pinecone.adoc new file mode 100644 index 0000000000..2f8578a5ce --- /dev/null +++ b/docs/asciidoc/modules/ROOT/pages/database-integration/vectordb/pinecone.adoc @@ -0,0 +1,275 @@ + += Pinecone + +[NOTE] +==== +In Pinecone a collection is a static and non-queryable copy of an index, +therefore, unlike other vector dbs, the Pinecone procedures work on indexes instead of collections. + +However, the vectordb procedures to handle CRUD operations on collections are usually named `apoc.ml..createCollection` and `apoc.ml..deleteCollection`, +so to be consistent, the Pinecone index procedures are named `apoc.ml.pinecone.createCollection` and `apoc.ml.pinecone.deleteCollection`. +==== + +Here is a list of all available Pinecone procedures: + +[opts=header, cols="1, 3"] +|=== +| name | description +| apoc.vectordb.pinecone.info(hostOrKey, index, $config) | Get information about the specified existing index or throws a 404 error if it does not exist +| apoc.vectordb.pinecone.createCollection(hostOrKey, index, similarity, size, $config) | + Creates an index, with the name specified in the 2nd parameter, and with the specified `similarity` and `size`. + The default endpoint is `/indexes`. +| apoc.vectordb.pinecone.deleteCollection(hostOrKey, index, $config) | + Deletes an index with the name specified in the 2nd parameter. + The default endpoint is `/indexes/`. +| apoc.vectordb.pinecone.upsert(hostOrKey, index, vectors, $config) | + Upserts, in the index with the name specified in the 2nd parameter, the vectors [{id: 'id', vector: '', medatada: ''}]. + The default endpoint is `/vectors/upsert`. +| apoc.vectordb.pinecone.delete(hostOrKey, index, ids, $config) | + Delete the vectors with the specified `ids`. + The default endpoint is `/indexes/`. +| apoc.vectordb.pinecone.get(hostOrKey, index, ids, $config) | + Get the vectors with the specified `ids`. + The default endpoint is `/vectors/fetch`. +| apoc.vectordb.pinecone.getAndUpdate(hostOrKey, index, ids, $config) | + Get the vectors with the specified `ids`, and optionally creates/updates neo4j entities. + The default endpoint is `/vectors/fetch`. +| apoc.vectordb.pinecone.query(hostOrKey, index, vector, filter, limit, $config) | + Retrieve closest vectors the the defined `vector`, `limit` of results, in the index with the name specified in the 2nd parameter. + The default endpoint is `/query`. +| apoc.vectordb.pinecone.queryAndUpdate(hostOrKey, index, vector, filter, limit, $config) | + Retrieve closest vectors the the defined `vector`, `limit` of results, in the index with the name specified in the 2nd parameter, and optionally creates/updates neo4j entities. + The default endpoint is `/query`. +|=== + +where the 1st parameter can be a key defined by the apoc config `apoc.pinecone..host=myHost`. + +The default `hostOrKey` is `"https://api.pinecone.io"`, +therefore in general can be null with the `createCollection` and `deleteCollection` procedures, +and equal to the host name, with the other ones, that is, the one indicated in the Pinecone dashboard: + +image::pinecone-index.png[width=800] + + +== Examples + +The following example assume we want to create and manage an index called `test-index`. + +.Get index info (it leverages https://docs.pinecone.io/guides/indexes/view-index-information[this API]) +[source,cypher] +---- +CALL apoc.vectordb.pinecone.info(hostOrKey, 'test-index', {}) +---- + +.Example results +[opts="header"] +|=== +| value +| { "dimension": 3, + "environment": "us-east1-gcp", + "name": "tiny-index", + "size": 3126700, + "status": "Ready", + "vector_count": 99 +} +|=== + + +.Create an index (it leverages https://docs.pinecone.io/reference/api/control-plane/create_index[this API]) +[source,cypher] +---- +CALL apoc.vectordb.pinecone.createCollection(null, 'test-index', 'cosine', 4, {}) +---- + + +.Delete an index (it leverages https://docs.pinecone.io/reference/api/control-plane/delete_index[this API]) +[source,cypher] +---- +CALL apoc.vectordb.pinecone.deleteCollection(null, 'test-index', {}) +---- + + +.Upsert vectors (it leverages https://docs.pinecone.io/reference/api/data-plane/upsert[this API]) +[source,cypher] +---- +CALL apoc.vectordb.pinecone.upsert('https://test-index-ilx67g5.svc.aped-4627-b74a.pinecone.io', + 'test-index', + [ + {id: '1', vector: [0.05, 0.61, 0.76, 0.74], metadata: {city: "Berlin", foo: "one"}}, + {id: '2', vector: [0.19, 0.81, 0.75, 0.11], metadata: {city: "London", foo: "two"}} + ], + {}) +---- + + +.Get vectors (it leverages https://docs.pinecone.io/reference/api/data-plane/fetch[this API]) + +[source,cypher] +---- +CALL apoc.vectordb.pinecone.get($host, 'test-index', [1,2], {}) +---- + + +.Example results +[opts="header"] +|=== +| score | metadata | id | vector | text | entity +| null | {city: "Berlin", foo: "one"} | null | null | null | null +| null | {city: "Berlin", foo: "two"} | null | null | null | null +| ... +|=== + +.Get vectors with `{allResults: true}` +[source,cypher] +---- +CALL apoc.vectordb.pinecone.get($host, 'test-index', ['1','2'], {allResults: true, }) +---- + + +.Example results +[opts="header"] +|=== +| score | metadata | id | vector | text | entity +| null | {city: "Berlin", foo: "one"} | 1 | [...] | null | null +| null | {city: "Berlin", foo: "two"} | 2 | [...] | null | null +| ... +|=== + +.Query vectors (it leverages https://docs.pinecone.io/reference/api/data-plane/query[this API]) +[source,cypher] +---- +CALL apoc.vectordb.pinecone.query($host, + 'test-index', + [0.2, 0.1, 0.9, 0.7], + { city: { `$eq`: "London" } }, + 5, + {allResults: true, }) +---- + + +.Example results +[opts="header"] +|=== +| score | metadata | id | vector | text | entity +| 1, | {city: "Berlin", foo: "one"} | 1 | [...] | null | null +| 0.1 | {city: "Berlin", foo: "two"} | 2 | [...] | null | null +| ... +|=== + + +We can define a mapping, to auto-create one/multiple nodes and relationships, by leveraging the vector metadata. + +For example, if we have created 2 vectors with the above upsert procedures, +we can populate some existing nodes (i.e. `(:Test {myId: 'one'})` and `(:Test {myId: 'two'})`): + + +[source,cypher] +---- +CALL apoc.vectordb.pinecone.queryAndUpdate($host, 'test-index', + [0.2, 0.1, 0.9, 0.7], + {}, + 5, + { mapping: { + embeddingKey: "vect", + nodeLabel: "Test", + entityKey: "myId", + metadataKey: "foo" + } + }) +---- + +which populates the two nodes as: `(:Test {myId: 'one', city: 'Berlin', vect: [vector1]})` and `(:Test {myId: 'two', city: 'London', vect: [vector2]})`, +which will be returned in the `entity` column result. + + +We can also set the mapping configuration `mode` to `CREATE_IF_MISSING` (which creates nodes if not exist), `READ_ONLY` (to search for nodes/rels, without making updates) or `UPDATE_EXISTING` (default behavior): + +[source,cypher] +---- +CALL apoc.vectordb.pinecone.queryAndUpdate($host, 'test-index', + [0.2, 0.1, 0.9, 0.7], + {}, + 5, + { mapping: { + mode: "CREATE_IF_MISSING", + embeddingKey: "vect", + nodeLabel: "Test", + entityKey: "myId", + metadataKey: "foo" + } + }) +---- + +which creates and 2 new nodes as above. + +Or, we can populate an existing relationship (i.e. `(:Start)-[:TEST {myId: 'one'}]->(:End)` and `(:Start)-[:TEST {myId: 'two'}]->(:End)`): + + +[source,cypher] +---- +CALL apoc.vectordb.pinecone.queryAndUpdate($host, 'test-index', + [0.2, 0.1, 0.9, 0.7], + {}, + 5, + { mapping: { + embeddingKey: "vect", + relType: "TEST", + entityKey: "myId", + metadataKey: "foo" + } + }) +---- + +which populates the two relationships as: `()-[:TEST {myId: 'one', city: 'Berlin', vect: [vector1]}]-()` +and `()-[:TEST {myId: 'two', city: 'London', vect: [vector2]}]-()`, +which will be returned in the `entity` column result. + + +We can also use mapping for `apoc.vectordb.pinecone.query` procedure, to search for nodes/rels fitting label/type and metadataKey, without making updates +(i.e. equivalent to `*.queryOrUpdate` procedure with mapping config having `mode: "READ_ONLY"`). + +For example, with the previous relationships, we can execute the following procedure, which just return the relationships in the column `rel`: + +[source,cypher] +---- +CALL apoc.vectordb.pinecone.query($host, 'test-index', + [0.2, 0.1, 0.9, 0.7], + {}, + 5, + { mapping: { + embeddingKey: "vect", + relType: "TEST", + entityKey: "myId", + metadataKey: "foo" + } + }) +---- + +[NOTE] +==== +We can use mapping with `apoc.vectordb.pinecone.get*` procedures as well +==== + +[NOTE] +==== +To optimize performances, we can choose what to `YIELD` with the `apoc.vectordb.pinecone.query*` and the `apoc.vectordb.pinecone.get*` procedures. + +For example, by executing a `CALL apoc.vectordb.pinecone.query(...) YIELD metadata, score, id`, the RestAPI request will have an {"with_payload": false, "with_vectors": false}, +so that we do not return the other values that we do not need. +==== + +It is possible to execute vector db procedures together with the xref::ml/rag.adoc[apoc.ml.rag] as follow: + +[source,cypher] +---- +CALL apoc.vectordb.pinecone.getAndUpdate($host, $index, [, ], $conf) YIELD node, metadata, id, vector +WITH collect(node) as paths +CALL apoc.ml.rag(paths, $attributes, $question, $confPrompt) YIELD value +RETURN value +---- + +.Delete vectors (it leverages https://docs.pinecone.io/reference/api/data-plane/delete[this API]) +[source,cypher] +---- +CALL apoc.vectordb.pinecone.delete($host, 'test-index', ['1','2'], {}) +---- diff --git a/full/src/main/java/apoc/vectordb/Pinecone.java b/full/src/main/java/apoc/vectordb/Pinecone.java new file mode 100644 index 0000000000..3aca834ace --- /dev/null +++ b/full/src/main/java/apoc/vectordb/Pinecone.java @@ -0,0 +1,247 @@ +/* + * Copyright (c) "Neo4j" + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package apoc.vectordb; + +import static apoc.ml.RestAPIConfig.METHOD_KEY; +import static apoc.vectordb.VectorDb.executeRequest; +import static apoc.vectordb.VectorDb.getEmbeddingResultStream; +import static apoc.vectordb.VectorDbHandler.Type.PINECONE; +import static apoc.vectordb.VectorDbUtil.getCommonVectorDbInfo; + +import apoc.Extended; +import apoc.ml.RestAPIConfig; +import apoc.result.MapResult; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import org.neo4j.graphdb.GraphDatabaseService; +import org.neo4j.graphdb.Transaction; +import org.neo4j.internal.kernel.api.procs.ProcedureCallContext; +import org.neo4j.procedure.Context; +import org.neo4j.procedure.Description; +import org.neo4j.procedure.Mode; +import org.neo4j.procedure.Name; +import org.neo4j.procedure.Procedure; + +@Extended +public class Pinecone { + public static final VectorDbHandler DB_HANDLER = PINECONE.get(); + + @Context + public ProcedureCallContext procedureCallContext; + + @Context + public Transaction tx; + + @Context + public GraphDatabaseService db; + + @Procedure("apoc.vectordb.pinecone.info") + @Description( + "apoc.vectordb.pinecone.info(hostOrKey, index, $configuration) - Get information about the specified existing index or throws an error if it does not exist") + public Stream getInfo( + @Name("hostOrKey") String hostOrKey, + @Name("index") String index, + @Name(value = "configuration", defaultValue = "{}") Map configuration) + throws Exception { + String url = "%s/indexes/%s"; + Map config = getVectorDbInfo(hostOrKey, index, configuration, url); + + RestAPIConfig restAPIConfig = new RestAPIConfig(config, Map.of(), Map.of()); + return executeRequest(restAPIConfig).map(v -> (Map) v).map(MapResult::new); + } + + @Procedure("apoc.vectordb.pinecone.createCollection") + @Description( + "apoc.vectordb.pinecone.createCollection(hostOrKey, index, similarity, size, $configuration) - Creates a index, with the name specified in the 2nd parameter, and with the specified `similarity` and `size`") + public Stream createCollection( + @Name("hostOrKey") String hostOrKey, + @Name("index") String index, + @Name("similarity") String similarity, + @Name("size") Long size, + @Name(value = "configuration", defaultValue = "{}") Map configuration) + throws Exception { + String url = "%s/indexes"; + Map config = getVectorDbInfo(hostOrKey, index, configuration, url); + config.putIfAbsent(METHOD_KEY, "POST"); + + Map additionalBodies = Map.of( + "name", index, + "dimension", size, + "metric", similarity); + RestAPIConfig restAPIConfig = new RestAPIConfig(config, Map.of(), additionalBodies); + return executeRequest(restAPIConfig).map(v -> (Map) v).map(MapResult::new); + } + + @Procedure("apoc.vectordb.pinecone.deleteCollection") + @Description( + "apoc.vectordb.pinecone.deleteCollection(hostOrKey, index, $configuration) - Deletes a index with the name specified in the 2nd parameter") + public Stream deleteCollection( + @Name("hostOrKey") String hostOrKey, + @Name("index") String index, + @Name(value = "configuration", defaultValue = "{}") Map configuration) + throws Exception { + + String url = "%s/indexes/%s"; + Map config = getVectorDbInfo(hostOrKey, index, configuration, url); + config.putIfAbsent(METHOD_KEY, "DELETE"); + + RestAPIConfig restAPIConfig = new RestAPIConfig(config); + return executeRequest(restAPIConfig).map(v -> (Map) v).map(MapResult::new); + } + + @Procedure("apoc.vectordb.pinecone.upsert") + @Description( + "apoc.vectordb.pinecone.upsert(hostOrKey, index, vectors, $configuration) - Upserts, in the index with the name specified in the 2nd parameter, the vectors [{id: 'id', vector: '', medatada: ''}]") + public Stream upsert( + @Name("hostOrKey") String hostOrKey, + @Name("index") String index, + @Name("vectors") List> vectors, + @Name(value = "configuration", defaultValue = "{}") Map configuration) + throws Exception { + + String url = "%s/vectors/upsert"; + + Map config = getVectorDbInfo(hostOrKey, index, configuration, url); + config.putIfAbsent(METHOD_KEY, "POST"); + + vectors = vectors.stream() + .map(i -> { + Map map = new HashMap<>(i); + map.putIfAbsent("values", map.remove("vector")); + return map; + }) + .collect(Collectors.toList()); + + Map additionalBodies = Map.of("vectors", vectors); + RestAPIConfig restAPIConfig = new RestAPIConfig(config, Map.of(), additionalBodies); + return executeRequest(restAPIConfig).map(v -> (Map) v).map(MapResult::new); + } + + @Procedure("apoc.vectordb.pinecone.delete") + @Description( + "apoc.vectordb.pinecone.delete(hostOrKey, index, ids, $configuration) - Delete the vectors with the specified `ids`") + public Stream delete( + @Name("hostOrKey") String hostOrKey, + @Name("index") String index, + @Name("vectors") List ids, + @Name(value = "configuration", defaultValue = "{}") Map configuration) + throws Exception { + + String url = "%s/vectors/delete"; + Map config = getVectorDbInfo(hostOrKey, index, configuration, url); + config.putIfAbsent(METHOD_KEY, "POST"); + + Map additionalBodies = Map.of("ids", ids); + RestAPIConfig apiConfig = new RestAPIConfig(config, Map.of(), additionalBodies); + return executeRequest(apiConfig).map(v -> (Map) v).map(MapResult::new); + } + + @Procedure(value = "apoc.vectordb.pinecone.get") + @Description( + "apoc.vectordb.pinecone.get(hostOrKey, index, ids, $configuration) - Get the vectors with the specified `ids`") + public Stream get( + @Name("hostOrKey") String hostOrKey, + @Name("index") String index, + @Name("ids") List ids, + @Name(value = "configuration", defaultValue = "{}") Map configuration) + throws Exception { + return getCommon(hostOrKey, index, ids, configuration); + } + + @Procedure(value = "apoc.vectordb.pinecone.getAndUpdate", mode = Mode.WRITE) + @Description( + "apoc.vectordb.pinecone.getAndUpdate(hostOrKey, index, ids, $configuration) - Get the vectors with the specified `ids`") + public Stream getAndUpdate( + @Name("hostOrKey") String hostOrKey, + @Name("index") String index, + @Name("ids") List ids, + @Name(value = "configuration", defaultValue = "{}") Map configuration) + throws Exception { + return getCommon(hostOrKey, index, ids, configuration); + } + + private Stream getCommon( + String hostOrKey, String index, List ids, Map configuration) throws Exception { + String url = "%s/vectors/fetch"; + Map config = getVectorDbInfo(hostOrKey, index, configuration, url); + + VectorEmbeddingConfig conf = DB_HANDLER.getEmbedding().fromGet(config, procedureCallContext, ids); + + return getEmbeddingResultStream(conf, procedureCallContext, tx, v -> { + Object vectors = ((Map) v).get("vectors"); + return ((Map) vectors).values().stream(); + }); + } + + @Procedure(value = "apoc.vectordb.pinecone.query") + @Description( + "apoc.vectordb.pinecone.query(hostOrKey, index, vector, filter, limit, $configuration) - Retrieve closest vectors the the defined `vector`, `limit` of results, in the index with the name specified in the 2nd parameter") + public Stream query( + @Name("hostOrKey") String hostOrKey, + @Name("index") String index, + @Name(value = "vector", defaultValue = "[]") List vector, + @Name(value = "filter", defaultValue = "{}") Map filter, + @Name(value = "limit", defaultValue = "10") long limit, + @Name(value = "configuration", defaultValue = "{}") Map configuration) + throws Exception { + return queryCommon(hostOrKey, index, vector, filter, limit, configuration); + } + + @Procedure(value = "apoc.vectordb.pinecone.queryAndUpdate", mode = Mode.WRITE) + @Description( + "apoc.vectordb.pinecone.queryAndUpdate(hostOrKey, index, vector, filter, limit, $configuration) - Retrieve closest vectors the the defined `vector`, `limit` of results, in the index with the name specified in the 2nd parameter") + public Stream queryAndUpdate( + @Name("hostOrKey") String hostOrKey, + @Name("index") String index, + @Name(value = "vector", defaultValue = "[]") List vector, + @Name(value = "filter", defaultValue = "{}") Map filter, + @Name(value = "limit", defaultValue = "10") long limit, + @Name(value = "configuration", defaultValue = "{}") Map configuration) + throws Exception { + return queryCommon(hostOrKey, index, vector, filter, limit, configuration); + } + + private Stream queryCommon( + String hostOrKey, + String index, + List vector, + Map filter, + long limit, + Map configuration) + throws Exception { + String url = "%s/query"; + Map config = getVectorDbInfo(hostOrKey, index, configuration, url); + + VectorEmbeddingConfig conf = + DB_HANDLER.getEmbedding().fromQuery(config, procedureCallContext, vector, filter, limit, index); + + return getEmbeddingResultStream(conf, procedureCallContext, tx, v -> { + Map map = (Map) v; + return ((List) map.get("matches")).stream(); + }); + } + + private Map getVectorDbInfo( + String hostOrKey, String index, Map configuration, String templateUrl) { + return getCommonVectorDbInfo(hostOrKey, index, configuration, templateUrl, DB_HANDLER); + } +} diff --git a/full/src/main/java/apoc/vectordb/PineconeHandler.java b/full/src/main/java/apoc/vectordb/PineconeHandler.java new file mode 100644 index 0000000000..7f1b2df673 --- /dev/null +++ b/full/src/main/java/apoc/vectordb/PineconeHandler.java @@ -0,0 +1,124 @@ +/* + * Copyright (c) "Neo4j" + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package apoc.vectordb; + +import static apoc.ml.RestAPIConfig.BODY_KEY; +import static apoc.ml.RestAPIConfig.ENDPOINT_KEY; +import static apoc.ml.RestAPIConfig.HEADERS_KEY; +import static apoc.ml.RestAPIConfig.METHOD_KEY; +import static apoc.util.MapUtil.map; +import static apoc.vectordb.VectorEmbeddingConfig.VECTOR_KEY; + +import apoc.ml.RestAPIConfig; +import java.net.URL; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; +import org.apache.commons.lang3.StringUtils; +import org.neo4j.internal.kernel.api.procs.ProcedureCallContext; + +public class PineconeHandler implements VectorDbHandler { + + @Override + public String getUrl(String hostOrKey) { + return StringUtils.isBlank(hostOrKey) ? "https://api.pinecone.io" : hostOrKey; + } + + @Override + public VectorEmbeddingHandler getEmbedding() { + return new PineconeEmbeddingHandler(); + } + + @Override + public String getLabel() { + return "Pinecone"; + } + + @Override + public Map getCredentials(Object credentialsObj, Map config) { + Map headers = (Map) config.getOrDefault(HEADERS_KEY, new HashMap<>()); + headers.putIfAbsent("Api-Key", credentialsObj); + config.put(HEADERS_KEY, headers); + return config; + } + + // -- embedding handler + static class PineconeEmbeddingHandler implements VectorEmbeddingHandler { + + /** + * "method" should be "GET", but is null as a workaround. + * Since with `method: POST` the {@link apoc.util.Util#openUrlConnection(URL, Map)} has a `setChunkedStreamingMode` + * that makes the request to respond 200 OK, but returns an empty result + */ + @Override + public VectorEmbeddingConfig fromGet( + Map config, ProcedureCallContext procedureCallContext, List ids) { + List fields = procedureCallContext.outputFields().collect(Collectors.toList()); + + config.put(BODY_KEY, null); + + String endpoint = (String) config.get(ENDPOINT_KEY); + if (!endpoint.contains("ids=")) { + String idsQueryUrl = ids.stream().map(i -> "ids=" + i).collect(Collectors.joining("&")); + + if (endpoint.contains("?")) { + endpoint += "&" + idsQueryUrl; + } else { + endpoint += "?" + idsQueryUrl; + } + } + + config.put(ENDPOINT_KEY, endpoint); + return getVectorEmbeddingConfig(config, fields, map()); + } + + @Override + public VectorEmbeddingConfig fromQuery( + Map config, + ProcedureCallContext procedureCallContext, + List vector, + Object filter, + long limit, + String index) { + List fields = procedureCallContext.outputFields().collect(Collectors.toList()); + + Map additionalBodies = map("vector", vector, "filter", filter, "topK", limit); + + return getVectorEmbeddingConfig(config, fields, additionalBodies); + } + + private VectorEmbeddingConfig getVectorEmbeddingConfig( + Map config, List fields, Map additionalBodies) { + config.putIfAbsent(VECTOR_KEY, "values"); + + VectorEmbeddingConfig conf = new VectorEmbeddingConfig(config); + + additionalBodies.put("includeMetadata", fields.contains("metadata")); + additionalBodies.put("includeValues", fields.contains("vector") && conf.isAllResults()); + + RestAPIConfig apiConfig = conf.getApiConfig(); + Map headers = apiConfig.getHeaders(); + headers.remove(METHOD_KEY); + apiConfig.setHeaders(headers); + + return VectorEmbeddingHandler.populateApiBodyRequest(conf, additionalBodies); + } + } +} diff --git a/full/src/main/java/apoc/vectordb/VectorDbHandler.java b/full/src/main/java/apoc/vectordb/VectorDbHandler.java index 894b805646..76c9c73576 100644 --- a/full/src/main/java/apoc/vectordb/VectorDbHandler.java +++ b/full/src/main/java/apoc/vectordb/VectorDbHandler.java @@ -1,3 +1,21 @@ +/* + * Copyright (c) "Neo4j" + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package apoc.vectordb; import static apoc.ml.RestAPIConfig.HEADERS_KEY; @@ -22,6 +40,7 @@ default Map getCredentials(Object credentialsObj, Map getCommonVectorDbInfo( Map config = new HashMap<>(configuration); Map props = withSystemDb(transaction -> { - Label label = Label.label(handler.getLabel()); - Node node = transaction.findNode(label, SystemPropertyKeys.name.name(), hostOrKey); - return node == null ? Map.of() : node.getAllProperties(); + try { + Label label = Label.label(handler.getLabel()); + Node node = transaction.findNode(label, SystemPropertyKeys.name.name(), hostOrKey); + return node == null ? Map.of() : node.getAllProperties(); + } catch (Exception e) { + // Fallback in case of null keys/values + return Map.of(); + } }); String url = getUrl(hostOrKey, handler, props); diff --git a/full/src/main/resources/extended.txt b/full/src/main/resources/extended.txt index 12c69f5f1b..a29698f709 100644 --- a/full/src/main/resources/extended.txt +++ b/full/src/main/resources/extended.txt @@ -219,6 +219,15 @@ apoc.vectordb.chroma.get apoc.vectordb.chroma.getAndUpdate apoc.vectordb.chroma.query apoc.vectordb.chroma.queryAndUpdate +apoc.vectordb.pinecone.createCollection +apoc.vectordb.pinecone.delete +apoc.vectordb.pinecone.deleteCollection +apoc.vectordb.pinecone.get +apoc.vectordb.pinecone.getAndUpdate +apoc.vectordb.pinecone.info +apoc.vectordb.pinecone.query +apoc.vectordb.pinecone.queryAndUpdate +apoc.vectordb.pinecone.upsert apoc.vectordb.qdrant.createCollection apoc.vectordb.qdrant.deleteCollection apoc.vectordb.qdrant.upsert diff --git a/full/src/test/java/apoc/vectordb/PineconeTest.java b/full/src/test/java/apoc/vectordb/PineconeTest.java index b12fbf5403..30fd52d88d 100644 --- a/full/src/test/java/apoc/vectordb/PineconeTest.java +++ b/full/src/test/java/apoc/vectordb/PineconeTest.java @@ -1,98 +1,608 @@ package apoc.vectordb; -import static apoc.ml.RestAPIConfig.BODY_KEY; +import static apoc.ml.Prompt.API_KEY_CONF; import static apoc.ml.RestAPIConfig.HEADERS_KEY; -import static apoc.ml.RestAPIConfig.JSON_PATH_KEY; -import static apoc.ml.RestAPIConfig.METHOD_KEY; +import static apoc.util.ExtendedTestUtil.assertFails; +import static apoc.util.ExtendedTestUtil.testRetryCallEventually; +import static apoc.util.MapUtil.map; import static apoc.util.TestUtil.testCall; +import static apoc.util.TestUtil.testCallEmpty; import static apoc.util.TestUtil.testResult; -import static apoc.util.Util.map; import static apoc.util.UtilsExtendedTest.checkEnvVar; -import static apoc.vectordb.VectorEmbeddingConfig.VECTOR_KEY; +import static apoc.vectordb.VectorDbHandler.Type.PINECONE; +import static apoc.vectordb.VectorDbTestUtil.EntityType.FALSE; +import static apoc.vectordb.VectorDbTestUtil.EntityType.NODE; +import static apoc.vectordb.VectorDbTestUtil.EntityType.REL; +import static apoc.vectordb.VectorDbTestUtil.assertBerlinResult; +import static apoc.vectordb.VectorDbTestUtil.assertLondonResult; +import static apoc.vectordb.VectorDbTestUtil.assertNodesCreated; +import static apoc.vectordb.VectorDbTestUtil.assertReadOnlyProcWithMappingResults; +import static apoc.vectordb.VectorDbTestUtil.assertRelsCreated; +import static apoc.vectordb.VectorDbTestUtil.dropAndDeleteAll; +import static apoc.vectordb.VectorDbTestUtil.ragSetup; +import static apoc.vectordb.VectorEmbeddingConfig.ALL_RESULTS_KEY; +import static apoc.vectordb.VectorEmbeddingConfig.MAPPING_KEY; +import static apoc.vectordb.VectorMappingConfig.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertNull; +import static org.neo4j.configuration.GraphDatabaseSettings.DEFAULT_DATABASE_NAME; +import static org.neo4j.configuration.GraphDatabaseSettings.SYSTEM_DATABASE_NAME; +import apoc.ml.Prompt; +import apoc.util.MapUtil; import apoc.util.TestUtil; -import java.net.URL; -import java.util.Collections; +import apoc.util.Util; import java.util.List; import java.util.Map; +import java.util.UUID; +import org.junit.AfterClass; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.ClassRule; import org.junit.Test; -import org.neo4j.test.rule.DbmsRule; -import org.neo4j.test.rule.ImpermanentDbmsRule; - -/** - * It leverages `apoc.vectordb.custom*` procedures - * * - * * - * Example of Pinecone RestAPI: - * PINECONE_HOST: `https://INDEX-ID.svc.gcp-starter.pinecone.io` - * PINECONE_KEY: `API Key` - * PINECONE_NAMESPACE: `the one to be specified in body: {.. "ns": NAMESPACE}` - * PINECONE_DIMENSION: vector dimension - */ +import org.junit.rules.TemporaryFolder; +import org.neo4j.dbms.api.DatabaseManagementService; +import org.neo4j.graphdb.GraphDatabaseService; +import org.neo4j.test.TestDatabaseManagementServiceBuilder; + public class PineconeTest { - private static String apiKey; - private static String host; - private static String size; - private static String namespace; + private static String API_KEY; + private static String HOST; + + private static final String collName = UUID.randomUUID().toString(); @ClassRule - public static DbmsRule db = new ImpermanentDbmsRule(); + public static TemporaryFolder storeDir = new TemporaryFolder(); + + private static GraphDatabaseService sysDb; + private static GraphDatabaseService db; + private static DatabaseManagementService databaseManagementService; + + private static Map ADMIN_AUTHORIZATION; + private static Map ADMIN_HEADER_CONF; @BeforeClass - public static void setUp() throws Exception { - apiKey = checkEnvVar("PINECONE_KEY"); - host = checkEnvVar("PINECONE_HOST"); - size = checkEnvVar("PINECONE_DIMENSION"); - namespace = checkEnvVar("PINECONE_NAMESPACE"); + public static void setUp() { + API_KEY = checkEnvVar("PINECONE_KEY"); + HOST = checkEnvVar("PINECONE_HOST"); + + databaseManagementService = + new TestDatabaseManagementServiceBuilder(storeDir.getRoot().toPath()).build(); + db = databaseManagementService.database(DEFAULT_DATABASE_NAME); + sysDb = databaseManagementService.database(SYSTEM_DATABASE_NAME); + + TestUtil.registerProcedure(db, VectorDb.class, Pinecone.class, Prompt.class); + + ADMIN_AUTHORIZATION = map("Api-Key", API_KEY); + ADMIN_HEADER_CONF = map(HEADERS_KEY, ADMIN_AUTHORIZATION); + + testRetryCallEventually( + db, + "CALL apoc.vectordb.pinecone.createCollection($host, $coll, 'cosine', 4, $conf)", + map( + "host", + HOST, + "coll", + collName, + "conf", + map( + HEADERS_KEY, + ADMIN_AUTHORIZATION, + "body", + map("spec", map("serverless", map("cloud", "aws", "region", "us-east-1"))))), + r -> { + Map value = (Map) r.get("value"); + assertEquals(map("ready", false, "state", "Initializing"), value.get("status")); + HOST = "https://" + value.get("host"); + }, + 5L); + + // the upsert takes a while + Util.sleep(5000); + + testResult( + db, + "CALL apoc.vectordb.pinecone.upsert($host, $coll,\n" + "[\n" + + " {id: '1', vector: [0.05, 0.61, 0.76, 0.74], metadata: {city: \"Berlin\", foo: \"one\"}},\n" + + " {id: '2', vector: [0.19, 0.81, 0.75, 0.11], metadata: {city: \"London\", foo: \"two\"}}\n" + + "],\n" + + "$conf)", + map("host", HOST, "coll", collName, "conf", ADMIN_HEADER_CONF), + r -> { + Map row = r.next(); + Map value = (Map) row.get("value"); + assertEquals(2L, value.get("upsertedCount")); + }); + + // the upsert takes a while + Util.sleep(20000); + } + + @AfterClass + public static void tearDown() { + if (API_KEY == null || HOST == null) { + return; + } + + Util.sleep(2000); + + testCallEmpty( + db, + "CALL apoc.vectordb.pinecone.deleteCollection($host, $coll, $conf)", + map("host", "", "coll", collName, "conf", ADMIN_HEADER_CONF)); + + databaseManagementService.shutdown(); + } + + @Before + public void before() { + dropAndDeleteAll(db); + } + + @Test + public void getInfo() { + testResult( + db, + "CALL apoc.vectordb.pinecone.info($host, $coll, $conf) ", + map( + "host", + null, + "coll", + collName, + "conf", + map(ALL_RESULTS_KEY, true, HEADERS_KEY, ADMIN_AUTHORIZATION)), + r -> { + Map row = r.next(); + Map value = (Map) row.get("value"); + assertEquals(collName, value.get("name")); + }); + } + + @Test + public void getInfoNotExistentCollection() { + String wrongCollection = "wrong_collection"; + assertFails( + db, + "CALL apoc.vectordb.pinecone.info($host, $coll, $conf)", + map( + "host", + null, + "coll", + wrongCollection, + "conf", + map(ALL_RESULTS_KEY, true, HEADERS_KEY, ADMIN_AUTHORIZATION)), + "java.io.FileNotFoundException: https://api.pinecone.io/indexes/" + wrongCollection); + } + + @Test + public void getVectors() { + testResult( + db, + "CALL apoc.vectordb.pinecone.get($host, $coll, ['1', '2'], $conf) " + + "YIELD vector, id, metadata, node RETURN * ORDER BY id", + map( + "host", + HOST, + "coll", + collName, + "conf", + map(ALL_RESULTS_KEY, true, HEADERS_KEY, ADMIN_AUTHORIZATION)), + r -> { + Map row = r.next(); + assertBerlinResult(row, FALSE); + assertNotNull(row.get("vector")); + + row = r.next(); + assertLondonResult(row, FALSE); + assertNotNull(row.get("vector")); + + assertFalse(r.hasNext()); + }); + } + + @Test + public void getVectorsWithoutVectorResult() { + testResult( + db, + "CALL apoc.vectordb.pinecone.get($host, $coll, ['1'], $conf) ", + map("host", HOST, "coll", collName, "conf", ADMIN_HEADER_CONF), + r -> { + Map row = r.next(); + assertEquals(Map.of("city", "Berlin", "foo", "one"), row.get("metadata")); + assertNull(row.get("vector")); + assertNull(row.get("id")); + + assertFalse(r.hasNext()); + }); + } + + @Test + public void deleteVector() { + testCall( + db, + "CALL apoc.vectordb.pinecone.upsert($host, $coll,\n" + "[\n" + + " {id: '3', vector: [0.19, 0.81, 0.75, 0.11], metadata: {foo: \"baz\"}},\n" + + " {id: '4', vector: [0.19, 0.81, 0.75, 0.11], metadata: {foo: \"baz\"}}\n" + + "],\n" + + "$conf)", + map("host", HOST, "coll", collName, "conf", ADMIN_HEADER_CONF), + r -> { + Map value = (Map) r.get("value"); + assertEquals(2L, value.get("upsertedCount")); + }); + + // the upsert takes a while + Util.sleep(10000); + + testCall( + db, + "CALL apoc.vectordb.pinecone.delete($host, $coll, ['3', '4'], $conf) ", + map("host", HOST, "coll", collName, "conf", ADMIN_HEADER_CONF), + r -> { + assertEquals(Map.of(), r.get("value")); + }); + } + + @Test + public void queryVectors() { + testResult( + db, + "CALL apoc.vectordb.pinecone.query($host, $coll, [0.2, 0.1, 0.9, 0.7], {}, 5, $conf)", + map( + "host", + HOST, + "coll", + collName, + "conf", + map(ALL_RESULTS_KEY, true, HEADERS_KEY, ADMIN_AUTHORIZATION)), + r -> { + Map row = r.next(); + assertBerlinResult(row, FALSE); + assertNotNull(row.get("score")); + assertNotNull(row.get("vector")); + + row = r.next(); + assertLondonResult(row, FALSE); + assertNotNull(row.get("score")); + assertNotNull(row.get("vector")); + }); + } + + @Test + public void queryVectorsWithoutVectorResult() { + testResult( + db, + "CALL apoc.vectordb.pinecone.queryAndUpdate($host, $coll, [0.2, 0.1, 0.9, 0.7], {}, 5, $conf)", + map("host", HOST, "coll", collName, "conf", map(HEADERS_KEY, ADMIN_AUTHORIZATION)), + r -> { + Map row = r.next(); + assertEquals(Map.of("city", "Berlin", "foo", "one"), row.get("metadata")); + assertNotNull(row.get("score")); + assertNull(row.get("vector")); + assertNull(row.get("id")); + + row = r.next(); + assertEquals(Map.of("city", "London", "foo", "two"), row.get("metadata")); + assertNotNull(row.get("score")); + assertNull(row.get("vector")); + assertNull(row.get("id")); + }); + } + + @Test + public void queryVectorsWithYield() { + testResult( + db, + "CALL apoc.vectordb.pinecone.query($host, $coll, [0.2, 0.1, 0.9, 0.7], {}, 5, $conf) YIELD metadata, id", + map( + "host", + HOST, + "coll", + collName, + "conf", + map(ALL_RESULTS_KEY, true, HEADERS_KEY, ADMIN_AUTHORIZATION)), + r -> { + assertBerlinResult(r.next(), FALSE); + assertLondonResult(r.next(), FALSE); + }); + } + + @Test + public void queryVectorsWithFilter() { + testResult( + db, + "CALL apoc.vectordb.pinecone.query($host, $coll, [0.2, 0.1, 0.9, 0.7],\n" + + "{ city: { `$eq`: \"London\" } },\n" + + "5, $conf) YIELD metadata, id", + map( + "host", + HOST, + "coll", + collName, + "conf", + map(ALL_RESULTS_KEY, true, HEADERS_KEY, ADMIN_AUTHORIZATION)), + r -> { + assertLondonResult(r.next(), FALSE); + }); + } + + @Test + public void queryVectorsWithLimit() { + testResult( + db, + "CALL apoc.vectordb.pinecone.query($host, $coll, [0.2, 0.1, 0.9, 0.7], {}, 1, $conf) YIELD metadata, id", + map( + "host", + HOST, + "coll", + collName, + "conf", + map(ALL_RESULTS_KEY, true, HEADERS_KEY, ADMIN_AUTHORIZATION)), + r -> { + assertBerlinResult(r.next(), FALSE); + }); + } + + @Test + public void queryVectorsWithCreateNode() { + Map conf = map( + ALL_RESULTS_KEY, + true, + HEADERS_KEY, + ADMIN_AUTHORIZATION, + MAPPING_KEY, + map(EMBEDDING_KEY, "vect", NODE_LABEL, "Test", ENTITY_KEY, "myId", METADATA_KEY, "foo")); + testResult( + db, + "CALL apoc.vectordb.pinecone.queryAndUpdate($host, $coll, [0.2, 0.1, 0.9, 0.7], {}, 5, $conf)", + map("host", HOST, "coll", collName, "conf", conf), + r -> { + Map row = r.next(); + assertBerlinResult(row, NODE); + assertNotNull(row.get("score")); + assertNotNull(row.get("vector")); + + row = r.next(); + assertLondonResult(row, NODE); + assertNotNull(row.get("score")); + assertNotNull(row.get("vector")); + }); + + assertNodesCreated(db); + + testResult( + db, + "MATCH (n:Test) RETURN properties(n) AS props ORDER BY n.myId", + VectorDbTestUtil::vectorEntityAssertions); + + testResult( + db, + "CALL apoc.vectordb.pinecone.queryAndUpdate($host, $coll, [0.2, 0.1, 0.9, 0.7], {}, 5, $conf)", + map("host", HOST, "coll", collName, "conf", conf), + r -> { + Map row = r.next(); + assertBerlinResult(row, NODE); + assertNotNull(row.get("score")); + assertNotNull(row.get("vector")); + + row = r.next(); + assertLondonResult(row, NODE); + assertNotNull(row.get("score")); + assertNotNull(row.get("vector")); + }); + + assertNodesCreated(db); + } + + @Test + public void queryVectorsWithCreateNodeUsingExistingNode() { + + db.executeTransactionally("CREATE (:Test {myId: 'one'}), (:Test {myId: 'two'})"); + + Map conf = map( + ALL_RESULTS_KEY, + true, + HEADERS_KEY, + ADMIN_AUTHORIZATION, + MAPPING_KEY, + map(EMBEDDING_KEY, "vect", NODE_LABEL, "Test", ENTITY_KEY, "myId", METADATA_KEY, "foo")); + + testResult( + db, + "CALL apoc.vectordb.pinecone.queryAndUpdate($host, $coll, [0.2, 0.1, 0.9, 0.7], {}, 5, $conf)", + map("host", HOST, "coll", collName, "conf", conf), + r -> { + Map row = r.next(); + assertBerlinResult(row, NODE); + assertNotNull(row.get("score")); + assertNotNull(row.get("vector")); + + row = r.next(); + assertLondonResult(row, NODE); + assertNotNull(row.get("score")); + assertNotNull(row.get("vector")); + }); + + assertNodesCreated(db); + } + + @Test + public void getVectorsWithCreateNodeUsingExistingNode() { + + db.executeTransactionally("CREATE (:Test {myId: 'one'}), (:Test {myId: 'two'})"); + + Map conf = MapUtil.map( + ALL_RESULTS_KEY, + true, + HEADERS_KEY, + ADMIN_AUTHORIZATION, + MAPPING_KEY, + MapUtil.map(EMBEDDING_KEY, "vect", NODE_LABEL, "Test", ENTITY_KEY, "myId", METADATA_KEY, "foo")); + + testResult( + db, + "CALL apoc.vectordb.pinecone.getAndUpdate($host, 'TestCollection', [1, 2], $conf) " + + "YIELD vector, id, metadata, node RETURN * ORDER BY id", + Util.map("host", HOST, "coll", collName, "conf", conf), + r -> { + Map row = r.next(); + assertBerlinResult(row, NODE); + assertNotNull(row.get("vector")); + + row = r.next(); + assertLondonResult(row, NODE); + assertNotNull(row.get("vector")); + }); + + assertNodesCreated(db); + } + + @Test + public void getReadOnlyVectorsWithMapping() { + db.executeTransactionally("CREATE (:Test {readID: 'one'}), (:Test {readID: 'two'})"); + + Map conf = map( + ALL_RESULTS_KEY, + true, + HEADERS_KEY, + ADMIN_AUTHORIZATION, + MAPPING_KEY, + map( + NODE_LABEL, "Test", + ENTITY_KEY, "readID", + METADATA_KEY, "foo")); - TestUtil.registerProcedure(db, VectorDb.class); + testResult( + db, + "CALL apoc.vectordb.pinecone.get($host, 'TestCollection', [1, 2], $conf) " + + "YIELD vector, id, metadata, node RETURN * ORDER BY id", + Util.map("host", HOST, "coll", collName, "conf", conf), + r -> assertReadOnlyProcWithMappingResults(r, "node")); } @Test - public void callQueryEndpointViaCustomGetProc() { + public void queryVectorsWithCreateRel() { - Map conf = getConf(); - conf.put(VECTOR_KEY, "values"); + db.executeTransactionally( + "CREATE (:Start)-[:TEST {myId: 'one'}]->(:End), (:Start)-[:TEST {myId: 'two'}]->(:End)"); - testResult(db, "CALL apoc.vectordb.custom.get($host, $conf)", map("host", host + "/query", "conf", conf), r -> { - r.forEachRemaining(i -> { - assertNotNull(i.get("score")); - assertNotNull(i.get("metadata")); - assertNotNull(i.get("id")); - assertNotNull(i.get("vector")); - }); - }); + Map conf = map( + ALL_RESULTS_KEY, + true, + HEADERS_KEY, + ADMIN_AUTHORIZATION, + MAPPING_KEY, + map(EMBEDDING_KEY, "vect", REL_TYPE, "TEST", ENTITY_KEY, "myId", METADATA_KEY, "foo")); + testResult( + db, + "CALL apoc.vectordb.pinecone.queryAndUpdate($host, $coll, [0.2, 0.1, 0.9, 0.7], {}, 5, $conf)", + map("host", HOST, "coll", collName, "conf", conf), + r -> { + Map row = r.next(); + assertBerlinResult(row, REL); + assertNotNull(row.get("score")); + assertNotNull(row.get("vector")); + + row = r.next(); + assertLondonResult(row, REL); + assertNotNull(row.get("score")); + assertNotNull(row.get("vector")); + }); + + assertRelsCreated(db); } @Test - public void callQueryEndpointViaCustomProc() { - testCall(db, "CALL apoc.vectordb.custom($host, $conf)", map("host", host + "/query", "conf", getConf()), r -> { - List value = (List) r.get("value"); - value.forEach(i -> { - assertTrue(i.containsKey("score")); - assertTrue(i.containsKey("metadata")); - assertTrue(i.containsKey("id")); - }); - }); + public void queryReadOnlyVectorsWithMapping() { + db.executeTransactionally( + "CREATE (:Start)-[:TEST {readID: 'one'}]->(:End), (:Start)-[:TEST {readID: 'two'}]->(:End)"); + + Map conf = map( + ALL_RESULTS_KEY, + true, + HEADERS_KEY, + ADMIN_AUTHORIZATION, + MAPPING_KEY, + map( + REL_TYPE, "TEST", + ENTITY_KEY, "readID", + METADATA_KEY, "foo")); + + testResult( + db, + "CALL apoc.vectordb.pinecone.query($host, $coll, [0.2, 0.1, 0.9, 0.7], {}, 5, $conf)", + map("host", HOST, "coll", collName, "conf", conf), + r -> assertReadOnlyProcWithMappingResults(r, "rel")); } - /** - * TODO: "method" is null as a workaround. - * Since with `method: POST` the {@link apoc.util.Util#openUrlConnection(URL, Map)} has a `setChunkedStreamingMode` - * that makes the request to respond 200 OK, but returns an empty result - */ - private static Map getConf() { - List vector = Collections.nCopies(Integer.parseInt(size), 0.1); + @Test + public void queryVectorsWithSystemDbStorage() { + String keyConfig = "pinecone-config-foo"; + Map mapping = + map(EMBEDDING_KEY, "vect", NODE_LABEL, "Test", ENTITY_KEY, "myId", METADATA_KEY, "foo"); + + sysDb.executeTransactionally( + "CALL apoc.vectordb.configure($vectorName, $keyConfig, $databaseName, $conf)", + map( + "vectorName", + PINECONE.toString(), + "keyConfig", + keyConfig, + "databaseName", + DEFAULT_DATABASE_NAME, + "conf", + map( + "host", HOST, + "credentials", API_KEY, + "mapping", mapping))); - Map body = map( - "namespace", namespace, "vector", vector, "topK", 3, "includeValues", true, "includeMetadata", true); + db.executeTransactionally("CREATE (:Test {myId: 'one'}), (:Test {myId: 'two'})"); + + testResult( + db, + "CALL apoc.vectordb.pinecone.queryAndUpdate($host, $coll, [0.2, 0.1, 0.9, 0.7], {}, 5, $conf)", + map("host", keyConfig, "coll", collName, "conf", map(ALL_RESULTS_KEY, true)), + r -> { + Map row = r.next(); + assertBerlinResult(row, NODE); + assertNotNull(row.get("score")); + assertNotNull(row.get("vector")); + + row = r.next(); + assertLondonResult(row, NODE); + assertNotNull(row.get("score")); + assertNotNull(row.get("vector")); + }); + + assertNodesCreated(db); + } + + @Test + public void queryVectorsWithRag() { + String openAIKey = ragSetup(db); - Map header = map("Api-Key", apiKey); + Map conf = map( + ALL_RESULTS_KEY, + true, + HEADERS_KEY, + ADMIN_AUTHORIZATION, + MAPPING_KEY, + map(NODE_LABEL, "Rag", ENTITY_KEY, "readID", METADATA_KEY, "foo")); - return map(BODY_KEY, body, HEADERS_KEY, header, METHOD_KEY, null, JSON_PATH_KEY, "matches"); + testResult( + db, + "CALL apoc.vectordb.pinecone.getAndUpdate($host, $collection, ['1', '2'], $conf) YIELD node, metadata, id, vector\n" + + "WITH collect(node) as paths\n" + + "CALL apoc.ml.rag(paths, $attributes, \"Which city has foo equals to one?\", $confPrompt) YIELD value\n" + + "RETURN value", + map( + "host", HOST, + "conf", conf, + "collection", collName, + "confPrompt", map(API_KEY_CONF, openAIKey), + "attributes", List.of("city", "foo")), + VectorDbTestUtil::assertRagWithVectors); } } diff --git a/full/src/test/java/apoc/vectordb/VectorDbTestUtil.java b/full/src/test/java/apoc/vectordb/VectorDbTestUtil.java index c430ecb19c..7147ac749c 100644 --- a/full/src/test/java/apoc/vectordb/VectorDbTestUtil.java +++ b/full/src/test/java/apoc/vectordb/VectorDbTestUtil.java @@ -2,12 +2,12 @@ import static apoc.util.TestUtil.testResult; import static apoc.util.Util.map; +import static apoc.vectordb.VectorEmbeddingConfig.DEFAULT_METADATA; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; -import apoc.util.MapUtil; import java.util.Map; import org.junit.Assume; import org.neo4j.graphdb.Entity; @@ -96,13 +96,13 @@ public static Map getAuthHeader(String key) { public static void assertReadOnlyProcWithMappingResults(Result r, String node) { Map row = r.next(); Map props = ((Entity) row.get(node)).getAllProperties(); - assertEquals(MapUtil.map("readID", "one"), props); + assertEquals(map("readID", "one"), props); assertNotNull(row.get("vector")); assertNotNull(row.get("id")); row = r.next(); props = ((Entity) row.get(node)).getAllProperties(); - assertEquals(MapUtil.map("readID", "two"), props); + assertEquals(map("readID", "two"), props); assertNotNull(row.get("vector")); assertNotNull(row.get("id")); @@ -122,4 +122,14 @@ public static String ragSetup(GraphDatabaseService db) { db.executeTransactionally("CREATE (:Rag {readID: 'one'}), (:Rag {readID: 'two'})"); return openAIKey; } + + public static void assertMetadataFooResult(Result r) { + Map row = r.next(); + Map metadata = (Map) row.get(DEFAULT_METADATA); + assertEquals("one", metadata.get("foo")); + row = r.next(); + metadata = (Map) row.get(DEFAULT_METADATA); + assertEquals("two", metadata.get("foo")); + assertFalse(r.hasNext()); + } }