diff --git a/.vscode/settings.json b/.vscode/settings.json index ff8b9b17..cbcec460 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -104,7 +104,8 @@ "ranges": "cpp", "span": "cpp", "format": "cpp", - "charconv": "cpp" + "charconv": "cpp", + "strstream": "cpp" }, "cSpell.words": [ "allclose", @@ -153,7 +154,9 @@ "SLOC", "Sonatype", "sorensen", + "sqeuclidean", "Struct", + "swar", "tanimoto", "tqdm", "uninitialize", diff --git a/python/lib_sqlite.cpp b/python/lib_sqlite.cpp index acbc8189..e340e19b 100644 --- a/python/lib_sqlite.cpp +++ b/python/lib_sqlite.cpp @@ -79,7 +79,7 @@ static void sqlite_dense(sqlite3_context* context, int argc, sqlite3_value** arg // Parse the floating-point numbers std::from_chars_result result1 = std::from_chars(vec1, vec1 + bytes1, parsed1[i]); - std::from_chars_result result2 = std::from_chars(vec2, vec2 + bytes2, parsed1[i]); + std::from_chars_result result2 = std::from_chars(vec2, vec2 + bytes2, parsed2[i]); if (result1.ec != std::errc() || result2.ec != std::errc()) { sqlite3_result_error(context, "Number can't be parsed", -1); return; diff --git a/python/scripts/test_sqlite.py b/python/scripts/test_sqlite.py new file mode 100644 index 00000000..40d0d57a --- /dev/null +++ b/python/scripts/test_sqlite.py @@ -0,0 +1,80 @@ +import sqlite3 +import json +import math +import pytest +import numpy as np + +import usearch +from usearch.io import load_matrix, save_matrix +from usearch.index import search +from usearch.eval import random_vectors + +from usearch.index import Match, Matches, BatchMatches, Index, Indexes + + +dimensions = [3, 97, 256] +batch_sizes = [1, 77, 100] + + +def test_sqlite_distances(): + conn = sqlite3.connect(":memory:") + conn.enable_load_extension(True) + conn.load_extension(usearch.sqlite) + + cursor = conn.cursor() + + # Create a table with additional columns for f32 and f16 BLOBs + cursor.execute( + """ + CREATE TABLE IF NOT EXISTS vector_table ( + id INTEGER PRIMARY KEY, + vector_json JSON, + vector_f32 BLOB, + vector_f16 BLOB + ) + """ + ) + + # Generate and insert random vectors + num_vectors = 3 # Number of vectors to generate + dim = 4 # Dimension of each vector + vectors = [] + + for i in range(num_vectors): + # Generate a random 256-dimensional vector + vector = np.random.rand(dim) + vectors.append(vector) + + # Convert the vector to f32 and f16 + vector_f32 = np.float32(vector) + vector_f16 = np.float16(vector) + + # Insert the vector into the database as JSON and as BLOBs + cursor.execute( + """ + INSERT INTO vector_table (vector_json, vector_f32, vector_f16) VALUES (?, ?, ?) + """, + (json.dumps(vector.tolist()), vector_f32.tobytes(), vector_f16.tobytes()), + ) + + # Commit changes + conn.commit() + + similarities = """ + SELECT + a.id AS id1, + b.id AS id2, + distance_cosine_f32(a.vector_json, b.vector_json) AS cosine_similarity_json, + distance_cosine_f32(a.vector_f32, b.vector_f32) AS cosine_similarity_f32, + distance_cosine_f16(a.vector_f16, b.vector_f16) AS cosine_similarity_f16 + FROM + vector_table AS a, + vector_table AS b + WHERE + a.id < b.id; + """ + cursor.execute(similarities) + + for a, b, similarity_json, similarity_f32, similarity_f16 in cursor.fetchall(): + assert math.isclose(similarity_json, similarity_f32, abs_tol=0.1) + assert math.isclose(similarity_json, similarity_f16, abs_tol=0.1) diff --git a/python/usearch/__init__.py b/python/usearch/__init__.py index 609f8a72..7e263b3f 100644 --- a/python/usearch/__init__.py +++ b/python/usearch/__init__.py @@ -1,3 +1,5 @@ +import importlib + from usearch.compiled import ( VERSION_MAJOR, VERSION_MINOR, @@ -5,3 +7,7 @@ ) __version__ = f"{VERSION_MAJOR}.{VERSION_MINOR}.{VERSION_PATCH}" + +# The same binary file (.so, .dll, or .dylib) that contains the pre-compiled +# USearch code also contains the SQLite3 binding +sqlite = importlib.util.find_spec("usearch.compiled").origin