forked from cockroachdb/cockroach
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
124292: sql: implement pgvector datatype and evaluation r=jordanlewis a=jordanlewis This commit adds the pgvector datatype and associated evaluation operators and functions. It doesn't include index acceleration. Functionality included: - `CREATE EXTENSION vector` - `vector` datatype with optional length, storage and retrieval in non-indexed table columns - Equality and inequality operators - `<->` operator - L2 distance - `<#>` operator - (negative) inner product - `<=>` operator - cosine distance - `l1_distance` builtin - `l2_distance` builtin - `cosine_distance` builtin - `inner_product` builtin - `vector_dims` builtin - `vector_norm` builtin Updates cockroachdb#121432 Epic: None Release note (sql change): implement pgvector encoding, decoding, and operators, without index acceleration. Co-authored-by: Jordan Lewis <[email protected]>
- Loading branch information
Showing
102 changed files
with
2,100 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
63 changes: 63 additions & 0 deletions
63
pkg/ccl/logictestccl/testdata/logic_test/mixed_version_pgvector
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
# LogicTest: cockroach-go-testserver-23.2 | ||
|
||
# Verify that all nodes are running the previous version. | ||
|
||
query T nodeidx=0 | ||
SELECT crdb_internal.node_executable_version() | ||
---- | ||
23.2 | ||
|
||
query T nodeidx=1 | ||
SELECT crdb_internal.node_executable_version() | ||
---- | ||
23.2 | ||
|
||
query T nodeidx=2 | ||
SELECT crdb_internal.node_executable_version() | ||
---- | ||
23.2 | ||
|
||
statement error syntax error | ||
CREATE TABLE t (v VECTOR(1)) | ||
|
||
# Upgrade one node to 24.2 | ||
|
||
upgrade 0 | ||
|
||
# Verify that node index 0 is now running 24.2 binary. | ||
|
||
query T nodeidx=0 | ||
SELECT crdb_internal.release_series(crdb_internal.node_executable_version()) | ||
---- | ||
24.2 | ||
|
||
statement error pg_vector not supported until version 24.2 | ||
CREATE TABLE t (v VECTOR(1)) | ||
|
||
upgrade 1 | ||
|
||
upgrade 2 | ||
|
||
statement ok | ||
SET CLUSTER SETTING version = crdb_internal.node_executable_version(); | ||
|
||
query T nodeidx=1 | ||
SELECT crdb_internal.release_series(crdb_internal.node_executable_version()) | ||
---- | ||
24.2 | ||
|
||
query T nodeidx=2 | ||
SELECT crdb_internal.release_series(crdb_internal.node_executable_version()) | ||
---- | ||
24.2 | ||
|
||
query B retry | ||
SELECT crdb_internal.is_at_least_version('24.1-02') | ||
---- | ||
true | ||
|
||
# Note: the following statement would succeed if there cluster had an enterprise | ||
# license, but the mixed version logic framework doesn't support adding one. | ||
# This is tested normally in the vector ccl logic test. | ||
statement error pgcode XXC02 use of vector datatype requires an enterprise license | ||
CREATE TABLE t (v VECTOR(1)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
# LogicTest: !local-mixed-23.2 | ||
|
||
query F | ||
SELECT '[1,2,3]'::vector <-> '[4,5,6]'::vector | ||
---- | ||
5.196152422706632 | ||
|
||
statement error pgcode 42601 dimensions for type vector must be at least 1 | ||
CREATE TABLE v (v vector(0)) | ||
|
||
statement error pgcode 42601 dimensions for type vector cannot exceed 16000 | ||
CREATE TABLE v (v vector(16001)) | ||
|
||
statement error column v is of type vector and thus is not indexable | ||
CREATE TABLE v (v vector(2) PRIMARY KEY) | ||
|
||
statement ok | ||
CREATE TABLE v (v vector); | ||
CREATE TABLE v2 (v vector(2)) | ||
|
||
statement ok | ||
INSERT INTO v VALUES('[1]'), ('[2,3]') | ||
|
||
query T rowsort | ||
SELECT * FROM v | ||
---- | ||
[1] | ||
[2,3] | ||
|
||
query T | ||
SELECT * FROM v WHERE v = '[1,2]' | ||
---- | ||
|
||
query error pgcode 22000 different vector dimensions 2 and 1 | ||
SELECT l2_distance('[1,2]', '[1]') | ||
|
||
statement error pgcode 22000 expected 2 dimensions, not 1 | ||
INSERT INTO v2 VALUES('[1]'), ('[2,3]') | ||
|
||
statement ok | ||
INSERT INTO v2 VALUES('[1,2]'), ('[3,4]') | ||
|
||
query T rowsort | ||
SELECT * FROM v2 | ||
---- | ||
[1,2] | ||
[3,4] | ||
|
||
query T | ||
SELECT * FROM v2 WHERE v = '[1,2]' | ||
---- | ||
[1,2] | ||
|
||
query TT | ||
SELECT '[1,2]'::text::vector, ARRAY[1,2]::vector | ||
---- | ||
[1,2] [1,2] | ||
|
||
query error pgcode 22004 array must not contain nulls | ||
SELECT ARRAY[1,2,null]::vector | ||
|
||
query error pgcode 22000 expected 1 dimensions, not 2 | ||
select '[3,1]'::vector(1) | ||
|
||
query error pgcode 22000 NaN not allowed in vector | ||
select '[3,NaN]'::vector | ||
|
||
query error pgcode 22000 infinite value not allowed in vector | ||
select '[3,Inf]'::vector | ||
|
||
query error pgcode 22000 infinite value not allowed in vector | ||
select '[3,-Inf]'::vector | ||
|
||
statement ok | ||
CREATE TABLE x (a float[], b real[]) | ||
|
||
# Test implicit cast from vector to array. | ||
statement ok | ||
INSERT INTO x VALUES('[1,2]'::vector, '[3,4]'::vector) | ||
|
||
statement ok | ||
CREATE TABLE v3 (v1 vector(1), v2 vector(1)); | ||
INSERT INTO v3 VALUES | ||
('[1]', '[2]'), | ||
('[1]', '[-2]'), | ||
(NULL, '[1]'), | ||
('[1]', NULL) | ||
|
||
query FFFTTT rowsort | ||
SELECT v1<->v2, v1<#>v2, v1<=>v2, v1+v2, v1-v2, v1*v2 FROM v3 | ||
---- | ||
1 -2 0 [3] [-1] [2] | ||
3 2 2 [-1] [3] [-2] | ||
NULL NULL NULL NULL NULL NULL | ||
NULL NULL NULL NULL NULL NULL | ||
|
||
query FFFFFI rowsort | ||
SELECT l1_distance(v1,v2), l2_distance(v1,v2), cosine_distance(v1,v2), inner_product(v1,v2), vector_norm(v1), vector_dims(v1) FROM v3 | ||
---- | ||
1 1 0 2 1 1 | ||
3 3 2 -2 1 1 | ||
NULL NULL NULL NULL NULL NULL | ||
NULL NULL NULL NULL 1 1 |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.