Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use byte vectors instead of repeated floats #629

Merged
merged 5 commits into from
Dec 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/docker-compose-async.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ services:
- '8090'
- --scheme
- http
image: semitechnologies/weaviate:preview-send-floats-as-bytes2-dc88ca3
image: semitechnologies/weaviate:preview--wip-optimize-more-float-lists-0f71f35
ports:
- "8090:8090"
- "50060:50051"
Expand Down
2 changes: 1 addition & 1 deletion ci/docker-compose-azure.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ services:
- --scheme
- http
- --write-timeout=600s
image: semitechnologies/weaviate:preview-send-floats-as-bytes2-dc88ca3
image: semitechnologies/weaviate:preview--wip-optimize-more-float-lists-0f71f35
ports:
- 8081:8081
restart: on-failure:0
Expand Down
4 changes: 2 additions & 2 deletions ci/docker-compose-cluster.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
version: '3.4'
services:
weaviate-node-1:
image: semitechnologies/weaviate:preview-send-floats-as-bytes2-dc88ca3
image: semitechnologies/weaviate:preview--wip-optimize-more-float-lists-0f71f35
restart: on-failure:0
ports:
- "8087:8080"
Expand All @@ -26,7 +26,7 @@ services:
- '8080'
- --scheme
- http
image: semitechnologies/weaviate:preview-send-floats-as-bytes2-dc88ca3
image: semitechnologies/weaviate:preview--wip-optimize-more-float-lists-0f71f35
ports:
- 8088:8080
- "50059:50051"
Expand Down
2 changes: 1 addition & 1 deletion ci/docker-compose-generative.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ services:
- '8086'
- --scheme
- http
image: semitechnologies/weaviate:preview-send-floats-as-bytes2-dc88ca3
image: semitechnologies/weaviate:preview--wip-optimize-more-float-lists-0f71f35
ports:
- 8086:8086
- "50057:50051"
Expand Down
2 changes: 1 addition & 1 deletion ci/docker-compose-okta-cc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ services:
- --scheme
- http
- --write-timeout=600s
image: semitechnologies/weaviate:preview-send-floats-as-bytes2-dc88ca3
image: semitechnologies/weaviate:preview--wip-optimize-more-float-lists-0f71f35
ports:
- 8082:8082
restart: on-failure:0
Expand Down
2 changes: 1 addition & 1 deletion ci/docker-compose-okta-users.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ services:
- --scheme
- http
- --write-timeout=600s
image: semitechnologies/weaviate:preview-send-floats-as-bytes2-dc88ca3
image: semitechnologies/weaviate:preview--wip-optimize-more-float-lists-0f71f35
ports:
- 8083:8083
restart: on-failure:0
Expand Down
2 changes: 1 addition & 1 deletion ci/docker-compose-wcs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ services:
- --scheme
- http
- --write-timeout=600s
image: semitechnologies/weaviate:preview-send-floats-as-bytes2-dc88ca3
image: semitechnologies/weaviate:preview--wip-optimize-more-float-lists-0f71f35
ports:
- 8085:8085
restart: on-failure:0
Expand Down
2 changes: 1 addition & 1 deletion ci/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ services:
- --scheme
- http
- --write-timeout=600s
image: semitechnologies/weaviate:preview-send-floats-as-bytes2-dc88ca3
image: semitechnologies/weaviate:preview--wip-optimize-more-float-lists-0f71f35
ports:
- "8080:8080"
- "50051:50051"
Expand Down
2 changes: 1 addition & 1 deletion integration/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import weaviate

GIT_HASH = "dc88ca3"
GIT_HASH = "0f71f35"
SERVER_VERSION = "1.22.5"
NODE_NAME = "node1"
NUM_OBJECT = 10
Expand Down
36 changes: 26 additions & 10 deletions integration/test_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pathlib
import uuid
from dataclasses import dataclass
from typing import Callable, Dict, List, Optional, Type, TypedDict, Union
from typing import Any, Callable, Dict, List, Optional, Type, TypedDict, Union

import pytest
import weaviate
Expand Down Expand Up @@ -303,7 +303,7 @@ def test_insert_many(
client: weaviate.WeaviateClient,
objects: List[Union[Properties, DataObject[Properties]]],
should_error: bool,
):
) -> None:
name = "TestInsertMany"
client.collections.delete(name)
collection = client.collections.create(
Expand Down Expand Up @@ -601,8 +601,9 @@ def test_update_with_tenant(client: weaviate.WeaviateClient):
(DataType.NUMBER_ARRAY, [1.0, 2.1]),
],
)
def test_types(client: weaviate.WeaviateClient, data_type: DataType, value):
def test_types(client: weaviate.WeaviateClient, data_type: DataType, value: Any) -> None:
name = "name"
client.collections.delete("Something")
collection = client.collections.create(
name="Something",
properties=[Property(name=name, data_type=data_type)],
Expand All @@ -611,22 +612,36 @@ def test_types(client: weaviate.WeaviateClient, data_type: DataType, value):
uuid_object = collection.data.insert(properties={name: value})

object_get = collection.query.fetch_object_by_id(uuid_object)
assert object_get.properties[name] == value
assert object_get is not None and object_get.properties[name] == value

batch_return = collection.data.insert_many([{name: value}])
assert not batch_return.has_errors

object_get_from_batch = collection.query.fetch_object_by_id(batch_return.uuids[0])
assert object_get_from_batch is not None and object_get_from_batch.properties[name] == value

client.collections.delete("Something")


@pytest.mark.parametrize("fusion_type", [HybridFusion.RANKED, HybridFusion.RELATIVE_SCORE])
def test_search_hybrid(client: weaviate.WeaviateClient, fusion_type):
def test_search_hybrid(client: weaviate.WeaviateClient, fusion_type: HybridFusion) -> None:
collection = client.collections.create(
name="Testing",
properties=[Property(name="Name", data_type=DataType.TEXT)],
vectorizer_config=Configure.Vectorizer.text2vec_contextionary(),
)
collection.data.insert({"Name": "some name"}, uuid.uuid4())
collection.data.insert({"Name": "other word"}, uuid.uuid4())
objs = collection.query.hybrid(alpha=0, query="name", fusion_type=fusion_type).objects
objs = collection.query.hybrid(
alpha=0, query="name", fusion_type=fusion_type, include_vector=True
).objects
assert len(objs) == 1

objs = collection.query.hybrid(
alpha=1, query="name", fusion_type=fusion_type, vector=objs[0].vector
).objects
assert len(objs) == 2

client.collections.delete("Testing")


Expand Down Expand Up @@ -1551,7 +1566,7 @@ class _Data(TypedDict):
assert "ints" not in objects[0].properties


def test_batch_with_arrays(client: weaviate.WeaviateClient):
def test_batch_with_arrays(client: weaviate.WeaviateClient) -> None:
client.collections.delete("TestBatchArrays")
collection = client.collections.create(
name="TestBatchArrays",
Expand All @@ -1566,8 +1581,8 @@ def test_batch_with_arrays(client: weaviate.WeaviateClient):
],
)

objects_in: List[DataObject[dict]] = [
DataObject[dict](
objects_in: List[DataObject] = [
DataObject(
{
"texts": ["first", "second"],
"ints": [1, 2],
Expand All @@ -1577,7 +1592,7 @@ def test_batch_with_arrays(client: weaviate.WeaviateClient):
"uuids": [UUID1, UUID3],
}
),
DataObject[dict](
DataObject(
{
"texts": ["third", "fourth"],
"ints": [3, 4, 5],
Expand All @@ -1595,6 +1610,7 @@ def test_batch_with_arrays(client: weaviate.WeaviateClient):

for i, obj_id in enumerate(ret.uuids.values()):
obj_out = collection.query.fetch_object_by_id(obj_id)
assert obj_out is not None

for prop, val in objects_in[i].properties.items():
if prop == "dates":
Expand Down
4 changes: 2 additions & 2 deletions integration/test_collection_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ def test_manual_batching(client_sync_indexing: weaviate.WeaviateClient):

def test_add_1000_objects_with_async_indexing_and_wait(
client_async_indexing: weaviate.WeaviateClient,
):
) -> None:
name = "BatchTestAsyncTenants"
client_async_indexing.collections.delete(name)
test = client_async_indexing.collections.create(
Expand All @@ -431,7 +431,7 @@ def test_add_1000_objects_with_async_indexing_and_wait(
Property(name="text", data_type=DataType.TEXT),
],
)
nr_objects = 1000
nr_objects = 100
objs = [
{
"collection": name,
Expand Down
Empty file added profiling/__init__.py
Empty file.
114 changes: 114 additions & 0 deletions profiling/test_profiling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# run:
# - profiling: pytest -m profiling profiling/test_profiling.py --profile-svg
# - benchmark: pytest profiling/test_profiling.py --benchmark-only --benchmark-disable-gc

from typing import Any, List
import pytest
import weaviate
from weaviate.collections.classes.config import Configure, DataType, Property


def are_floats_equal(num1: float, num2: float, decimal_places: int = 4) -> bool:
# Use round to limit the precision to the desired decimal places
rounded_num1 = round(num1, decimal_places)
rounded_num2 = round(num2, decimal_places)
if rounded_num1 != rounded_num2:
print(f"{num1} != {num2}")

return rounded_num1 == rounded_num2


def compare_float_lists(list1: List[float], list2: List[float], decimal_places: int = 4) -> bool:
# Check if the lists have the same length
if len(list1) != len(list2):
return False

# Compare each pair of elements up to the specified decimal place
for num1, num2 in zip(list1, list2):
if not are_floats_equal(num1, num2, decimal_places):
return False

return True


@pytest.fixture(scope="module")
def client() -> weaviate.WeaviateClient:
client = weaviate.WeaviateClient(
connection_params=weaviate.ConnectionParams.from_url("http://localhost:8080", 50051),
skip_init_checks=True,
)
client.collections.delete_all()
yield client
client.collections.delete_all()


@pytest.mark.profiling
def test_get_vector(client: weaviate.WeaviateClient) -> None:
name = "TestProfilingVector"
col = client.collections.create(
name=name,
properties=[Property(name="Name", data_type=DataType.TEXT)],
vectorizer_config=Configure.Vectorizer.text2vec_contextionary(),
)

col = client.collections.get(name)

batchReturn = col.data.insert_many([{"Name": "Test" * (i % 10)} for i in range(1000)])
assert len(batchReturn.uuids) == 1000

obj = col.query.fetch_object_by_id(batchReturn.uuids[0], include_vector=True)
assert obj is not None and obj.vector is not None

for _ in range(100):
objs = col.query.fetch_objects(
limit=1000, include_vector=True, return_properties=None, return_metadata=None
)
assert len(objs.objects) == 1000
assert objs.objects[0].vector is not None
assert compare_float_lists(objs.objects[0].vector, obj.vector)

client.collections.delete(name)


@pytest.mark.profiling
def test_get_float_properties(client: weaviate.WeaviateClient) -> None:
name = "TestProfilingFloatProperties"
client.collections.delete(name)

col = client.collections.create(
name=name,
properties=[
Property(name="Numbers", data_type=DataType.NUMBER_ARRAY),
Property(name="index", data_type=DataType.INT),
],
vectorizer_config=Configure.Vectorizer.none(),
)

col = client.collections.get(name)

batchReturn = col.data.insert_many(
[{"index": (i % 10 + 1), "numbers": [3.3] * (i % 10 + 1)} for i in range(1000)]
)
assert len(batchReturn.uuids) == 1000

for _ in range(100):
objs = col.query.fetch_objects(
limit=1000,
include_vector=False,
return_properties=["numbers", "index"],
return_metadata=None,
)
assert len(objs.objects) == 1000
assert objs.objects[0].properties["numbers"] == [3.3] * int(
objs.objects[0].properties["index"]
)

client.collections.delete(name)


def test_benchmark_get_vector(benchmark: Any, client: weaviate.WeaviateClient) -> None:
benchmark(test_get_vector, client)


def test_benchmark_get_float_properties(benchmark: Any, client: weaviate.WeaviateClient) -> None:
benchmark(test_get_float_properties, client)
Loading