From 48609a8127692243904b10dd7c86a4e0d2effb3c Mon Sep 17 00:00:00 2001 From: Brian Sam-Bodden Date: Tue, 28 May 2024 17:54:35 -0700 Subject: [PATCH] Fix vector type fields should not be encoded as strings (#2772) --- dev_requirements.txt | 1 + redis/_parsers/encoders.py | 6 ++++- redis/commands/search/result.py | 17 +++++--------- tests/test_search.py | 39 +++++++++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 13 deletions(-) diff --git a/dev_requirements.txt b/dev_requirements.txt index 48ec278d83..2357115934 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -16,3 +16,4 @@ ujson>=4.2.0 wheel>=0.30.0 urllib3<2 uvloop +numpy>=1.24.4 diff --git a/redis/_parsers/encoders.py b/redis/_parsers/encoders.py index 6fdf0ad882..818d244d9d 100644 --- a/redis/_parsers/encoders.py +++ b/redis/_parsers/encoders.py @@ -40,5 +40,9 @@ def decode(self, value, force=False): if isinstance(value, memoryview): value = value.tobytes() if isinstance(value, bytes): - value = value.decode(self.encoding, self.encoding_errors) + try: + value = value.decode(self.encoding, self.encoding_errors) + except UnicodeDecodeError as e: + # Return the bytes unmodified + return value return value diff --git a/redis/commands/search/result.py b/redis/commands/search/result.py index 5b19e6faa4..29d63e86f7 100644 --- a/redis/commands/search/result.py +++ b/redis/commands/search/result.py @@ -39,18 +39,11 @@ def __init__( fields = {} if hascontent and res[i + fields_offset] is not None: - fields = ( - dict( - dict( - zip( - map(to_string, res[i + fields_offset][::2]), - map(to_string, res[i + fields_offset][1::2]), - ) - ) - ) - if hascontent - else {} - ) + for j in range(0, len(res[i + fields_offset]), 2): + key = to_string(res[i + fields_offset][j]) + value = res[i + fields_offset][j + 1] + fields[key] = value + try: del fields["id"] except KeyError: diff --git a/tests/test_search.py b/tests/test_search.py index f19c193891..d58d3b6a2d 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -4,6 +4,8 @@ import time from io import TextIOWrapper +import numpy as np + import pytest import redis import redis.commands.search @@ -2282,3 +2284,40 @@ def test_geoshape(client: redis.Redis): assert result.docs[0]["id"] == "small" result = client.ft().search(q2, query_params=qp2) assert len(result.docs) == 2 + +@pytest.mark.redismod +def test_vector_storage_and_retrieval(client): + # Constants + INDEX_NAME = "vector_index" + DOC_PREFIX = "doc:" + VECTOR_DIMENSIONS = 4 + VECTOR_FIELD_NAME = "my_vector" + + # Create index + client.ft(INDEX_NAME).create_index(( + VectorField(VECTOR_FIELD_NAME, + "FLAT", { + "TYPE": "FLOAT32", + "DIM": VECTOR_DIMENSIONS, + "DISTANCE_METRIC": "COSINE", + } + ), + ), definition=IndexDefinition(prefix=[DOC_PREFIX], index_type=IndexType.HASH)) + + # Add a document with a vector value + vector_data = [0.1, 0.2, 0.3, 0.4] + client.hset(f"{DOC_PREFIX}1", mapping={ + VECTOR_FIELD_NAME: np.array(vector_data, dtype=np.float32).tobytes() + }) + + # Perform a search to retrieve the document + query = ( + Query("*").return_fields(VECTOR_FIELD_NAME).dialect(2) + ) + res = client.ft(INDEX_NAME).search(query) + + # Assert that the document is retrieved and the vector matches the original data + assert res.total == 1 + assert res.docs[0].id == f"{DOC_PREFIX}1" + retrieved_vector_data = np.frombuffer(res.docs[0].__dict__[VECTOR_FIELD_NAME], dtype=np.float32) + assert np.allclose(retrieved_vector_data, vector_data) \ No newline at end of file