Skip to content

Commit

Permalink
V1.9.x (#608)
Browse files Browse the repository at this point in the history
* update models

* up backward compatibility version

* refactoring: address review comment

---------

Co-authored-by: George Panchuk <[email protected]>
  • Loading branch information
generall and joein committed Apr 22, 2024
1 parent e0820e0 commit 6b001b8
Show file tree
Hide file tree
Showing 7 changed files with 302 additions and 168 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
- name: Backward compatibility integration tests
run: |
export RUNNER_OS=${{ runner.os }}
QDRANT_VERSION='v1.7.4' ./tests/integration-tests.sh
QDRANT_VERSION='v1.8.4' ./tests/integration-tests.sh
shell: bash
- name: Run fastembed tests without fastembed
run: |
Expand Down
32 changes: 28 additions & 4 deletions qdrant_client/conversions/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ def convert_collection_info(cls, model: grpc.CollectionInfo) -> rest.CollectionI
payload_schema=cls.convert_payload_schema(model.payload_schema),
segments_count=model.segments_count,
status=cls.convert_collection_status(model.status),
vectors_count=model.vectors_count,
vectors_count=model.vectors_count if model.HasField("vectors_count") else None,
points_count=model.points_count,
indexed_vectors_count=model.indexed_vectors_count or 0,
)
Expand Down Expand Up @@ -380,8 +380,10 @@ def convert_collection_status(cls, model: grpc.CollectionStatus) -> rest.Collect
return rest.CollectionStatus.YELLOW
elif model == grpc.CollectionStatus.Red:
return rest.CollectionStatus.RED
else:
raise ValueError(f"invalid CollectionStatus model: {model}") # pragma: no cover
elif model == grpc.CollectionStatus.Grey:
return rest.CollectionStatus.GREY

raise ValueError(f"invalid CollectionStatus model: {model}") # pragma: no cover

@classmethod
def convert_update_result(cls, model: grpc.UpdateResult) -> rest.UpdateResult:
Expand Down Expand Up @@ -756,6 +758,15 @@ def convert_snapshot_description(
size=model.size,
)

@classmethod
def convert_datatype(cls, model: grpc.Datatype) -> rest.Datatype:
if model == grpc.Datatype.Float32:
return rest.Datatype.FLOAT32
elif model == grpc.Datatype.Uint8:
return rest.Datatype.UINT8
else:
raise ValueError(f"invalid Datatype model: {model}")

@classmethod
def convert_vector_params(cls, model: grpc.VectorParams) -> rest.VectorParams:
return rest.VectorParams(
Expand All @@ -772,6 +783,7 @@ def convert_vector_params(cls, model: grpc.VectorParams) -> rest.VectorParams:
else None
),
on_disk=model.on_disk if model.HasField("on_disk") else None,
datatype=cls.convert_datatype(model.datatype) if model.HasField("datatype") else None,
)

@classmethod
Expand Down Expand Up @@ -1517,7 +1529,7 @@ def convert_collection_info(cls, model: rest.CollectionInfo) -> grpc.CollectionI
),
segments_count=model.segments_count,
status=cls.convert_collection_status(model.status),
vectors_count=model.vectors_count,
vectors_count=model.vectors_count if model.vectors_count is not None else None,
points_count=model.points_count,
)

Expand All @@ -1529,6 +1541,8 @@ def convert_collection_status(cls, model: rest.CollectionStatus) -> grpc.Collect
return grpc.CollectionStatus.Yellow
if model == rest.CollectionStatus.GREEN:
return grpc.CollectionStatus.Green
if model == rest.CollectionStatus.GREY:
return grpc.CollectionStatus.Grey

raise ValueError(f"invalid CollectionStatus model: {model}") # pragma: no cover

Expand Down Expand Up @@ -2118,6 +2132,15 @@ def convert_snapshot_description(
size=model.size,
)

@classmethod
def convert_datatype(cls, model: rest.Datatype) -> grpc.Datatype:
if model == rest.Datatype.FLOAT32:
return grpc.Datatype.Float32
if model == rest.Datatype.UINT8:
return grpc.Datatype.Uint8

raise ValueError(f"invalid Datatype model: {model}") # pragma: no cover

@classmethod
def convert_vector_params(cls, model: rest.VectorParams) -> grpc.VectorParams:
return grpc.VectorParams(
Expand All @@ -2134,6 +2157,7 @@ def convert_vector_params(cls, model: rest.VectorParams) -> grpc.VectorParams:
else None
),
on_disk=model.on_disk,
datatype=cls.convert_datatype(model.datatype) if model.datatype is not None else None,
)

@classmethod
Expand Down
330 changes: 174 additions & 156 deletions qdrant_client/grpc/collections_pb2.py

Large diffs are not rendered by default.

64 changes: 61 additions & 3 deletions qdrant_client/http/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,14 @@
VectorsConfigDiff = Dict[str, "VectorParamsDiff"]


class AbortShardTransfer(BaseModel, extra="forbid"):
shard_id: int = Field(..., description="")
to_peer_id: int = Field(..., description="")
from_peer_id: int = Field(..., description="")


class AbortTransferOperation(BaseModel, extra="forbid"):
abort_transfer: "MoveShard" = Field(..., description="")
abort_transfer: "AbortShardTransfer" = Field(..., description="")


class AliasDescription(BaseModel):
Expand All @@ -24,6 +30,7 @@ class AppBuildTelemetry(BaseModel):
version: str = Field(..., description="")
features: Optional["AppFeaturesTelemetry"] = Field(default=None, description="")
system: Optional["RunningEnvironmentTelemetry"] = Field(default=None, description="")
jwt_rbac: Optional[bool] = Field(default=None, description="")
startup: Union[datetime, date] = Field(..., description="")


Expand Down Expand Up @@ -153,7 +160,7 @@ class CollectionInfo(BaseModel):
)
vectors_count: Optional[int] = Field(
default=None,
description="Approximate number of vectors in collection. All vectors in collection are available for querying. Calculated as `points_count x vectors_per_point`. Where `vectors_per_point` is a number of named vectors in schema.",
description="DEPRECATED: Approximate number of vectors in collection. All vectors in collection are available for querying. Calculated as `points_count x vectors_per_point`. Where `vectors_per_point` is a number of named vectors in schema.",
)
indexed_vectors_count: Optional[int] = Field(
default=None,
Expand Down Expand Up @@ -220,6 +227,7 @@ def __str__(self) -> str:

GREEN = "green"
YELLOW = "yellow"
GREY = "grey"
RED = "red"


Expand Down Expand Up @@ -400,6 +408,18 @@ class CreateShardingKeyOperation(BaseModel, extra="forbid"):
create_sharding_key: "CreateShardingKey" = Field(..., description="")


class Datatype(str, Enum):
"""
Defines which datatype should be used to represent vectors in the storage. Choosing different datatypes allows to optimize memory usage and performance vs accuracy. - For `float32` datatype - vectors are stored as single-precision floating point numbers, 4bytes. - For `uint8` datatype - vectors are stored as unsigned 8-bit integers, 1byte. It expects vector elements to be in range `[0, 255]`.
"""

def __str__(self) -> str:
return str(self.value)

FLOAT32 = "float32"
UINT8 = "uint8"


class DatetimeRange(BaseModel, extra="forbid"):
"""
Range filter request
Expand Down Expand Up @@ -1018,6 +1038,10 @@ class MoveShardOperation(BaseModel, extra="forbid"):
move_shard: "MoveShard" = Field(..., description="")


class MultiVectorConfigOneOf(BaseModel):
max_sim: Any = Field(..., description="")


class NamedSparseVector(BaseModel, extra="forbid"):
"""
Sparse vector data with name
Expand All @@ -1029,7 +1053,7 @@ class NamedSparseVector(BaseModel, extra="forbid"):

class NamedVector(BaseModel, extra="forbid"):
"""
Vector data with name
Dense vector data with name
"""

name: str = Field(..., description="Name of vector data")
Expand Down Expand Up @@ -1822,6 +1846,17 @@ def __str__(self) -> str:
SNAPSHOT = "snapshot"


class ShardTransferMethodOneOf2(str, Enum):
"""
Attempt to transfer shard difference by WAL delta.
"""

def __str__(self) -> str:
return str(self.value)

WAL_DELTA = "wal_delta"


class ShardingMethod(str, Enum):
AUTO = "auto"
CUSTOM = "custom"
Expand Down Expand Up @@ -2098,6 +2133,12 @@ class VectorDataConfig(BaseModel):
quantization_config: Optional["QuantizationConfig"] = Field(
default=None, description="Vector specific quantization config that overrides collection config"
)
multi_vec_config: Optional["MultiVectorConfig"] = Field(
default=None, description="Vector specific configuration to enable multiple vectors per point"
)
datatype: Optional["VectorStorageDatatype"] = Field(
default=None, description="Vector specific configuration to set specific storage element type"
)


class VectorDataInfo(BaseModel):
Expand Down Expand Up @@ -2138,6 +2179,7 @@ class VectorParams(BaseModel, extra="forbid"):
default=None,
description="If true, vectors are served from disk, improving RAM usage at the cost of latency Default: false",
)
datatype: Optional["Datatype"] = Field(default=None, description="Params of single vector data storage")


class VectorParamsDiff(BaseModel, extra="forbid"):
Expand All @@ -2152,6 +2194,18 @@ class VectorParamsDiff(BaseModel, extra="forbid"):
)


class VectorStorageDatatype(str, Enum):
"""
Storage types for vectors
"""

def __str__(self) -> str:
return str(self.value)

FLOAT32 = "float32"
UINT8 = "uint8"


class VectorStorageTypeOneOf(str, Enum):
"""
Storage in memory (RAM) Will be very fast at the cost of consuming a lot of memory.
Expand Down Expand Up @@ -2287,6 +2341,9 @@ def __str__(self) -> str:
MatchAny,
MatchExcept,
]
MultiVectorConfig = Union[
MultiVectorConfigOneOf,
]
NamedVectorStruct = Union[
List[StrictFloat],
NamedVector,
Expand Down Expand Up @@ -2349,6 +2406,7 @@ def __str__(self) -> str:
ShardTransferMethod = Union[
ShardTransferMethodOneOf,
ShardTransferMethodOneOf1,
ShardTransferMethodOneOf2,
]
SparseIndexType = Union[
SparseIndexTypeOneOf,
Expand Down
19 changes: 16 additions & 3 deletions qdrant_client/proto/collections.proto
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,19 @@ syntax = "proto3";
package qdrant;
option csharp_namespace = "Qdrant.Client.Grpc";

enum Datatype {
Default = 0;
Float32 = 1;
Uint8 = 2;
}

message VectorParams {
uint64 size = 1; // Size of the vectors
Distance distance = 2; // Distance function used for comparing vectors
optional HnswConfigDiff hnsw_config = 3; // Configuration of vector HNSW graph. If omitted - the collection configuration will be used
optional QuantizationConfig quantization_config = 4; // Configuration of vector quantization config. If omitted - the collection configuration will be used
optional bool on_disk = 5; // If true - serve vectors from disk. If set to false, the vectors will be loaded in RAM.
optional Datatype datatype = 6; // Data type of the vectors
}

message VectorParamsDiff {
Expand Down Expand Up @@ -93,6 +100,7 @@ enum CollectionStatus {
Green = 1; // All segments are ready
Yellow = 2; // Optimization in process
Red = 3; // Something went wrong
Grey = 4; // Optimization is pending
}

enum PayloadSchemaType {
Expand Down Expand Up @@ -448,9 +456,8 @@ enum ReplicaState {
Partial = 2; // The shard is partially loaded and is currently receiving data from other shards
Initializing = 3; // Collection is being created
Listener = 4; // A shard which receives data, but is not used for search; Useful for backup shards
PartialSnapshot = 5; // Snapshot shard transfer is in progress; Updates should not be sent to (and are ignored by) the shard
PartialSnapshot = 5; // Deprecated: snapshot shard transfer is in progress; Updates should not be sent to (and are ignored by) the shard
Recovery = 6; // Shard is undergoing recovered by an external node; Normally rejects updates, accepts updates if force is true
// TODO(1.9): deprecate PartialSnapshot state
}

message ShardKey {
Expand Down Expand Up @@ -496,6 +503,12 @@ message MoveShard {
optional ShardTransferMethod method = 4;
}

message AbortShardTransfer {
uint32 shard_id = 1; // Local shard id
uint64 from_peer_id = 2;
uint64 to_peer_id = 3;
}

message RestartTransfer {
uint32 shard_id = 1; // Local shard id
uint64 from_peer_id = 2;
Expand Down Expand Up @@ -530,7 +543,7 @@ message UpdateCollectionClusterSetupRequest {
oneof operation {
MoveShard move_shard = 2;
MoveShard replicate_shard = 3;
MoveShard abort_transfer = 4;
AbortShardTransfer abort_transfer = 4;
Replica drop_replica = 5;
CreateShardKey create_shard_key = 7;
DeleteShardKey delete_shard_key = 8;
Expand Down
20 changes: 19 additions & 1 deletion tests/conversions/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@
ef_construct=1000,
),
on_disk=True,
datatype=grpc.Datatype.Float32,
)

product_quantizations = [
Expand Down Expand Up @@ -190,6 +191,7 @@
size=100,
distance=grpc.Distance.Cosine,
quantization_config=grpc.QuantizationConfig(scalar=scalar_quantization),
datatype=grpc.Datatype.Uint8,
)

single_vector_config = grpc.VectorsConfig(params=vector_param)
Expand Down Expand Up @@ -302,6 +304,7 @@
collection_status = grpc.CollectionStatus.Yellow
collection_status_green = grpc.CollectionStatus.Green
collection_status_error = grpc.CollectionStatus.Red
collection_status_grey = grpc.CollectionStatus.Grey

optimizer_status = grpc.OptimizerStatus(ok=True)
optimizer_status_error = grpc.OptimizerStatus(ok=False, error="Error!")
Expand Down Expand Up @@ -359,6 +362,16 @@
points=0,
)

collection_info_grey = grpc.CollectionInfo(
status=collection_status_grey,
optimizer_status=optimizer_status_error,
# vectors_count=100000,
points_count=100000,
segments_count=6,
config=collection_config,
payload_schema={},
)

collection_info_ok = grpc.CollectionInfo(
status=collection_status_green,
optimizer_status=optimizer_status,
Expand Down Expand Up @@ -888,7 +901,12 @@
"ValuesCount": [values_count],
"Filter": [filter_nested, filter_],
"OptimizersConfigDiff": [optimizer_config, optimizer_config_half],
"CollectionInfo": [collection_info, collection_info_ok, collection_info_red],
"CollectionInfo": [
collection_info,
collection_info_ok,
collection_info_red,
collection_info_grey,
],
"FieldCondition": [
field_condition_match,
field_condition_range,
Expand Down
3 changes: 3 additions & 0 deletions tools/generate_grpc_client.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ set -e

PROJECT_ROOT="$(pwd)/$(dirname "$0")/../"

pip install grpcio==1.48.2
pip install grpcio-tools==1.48.2

cd $(mktemp -d)

git clone --sparse --filter=blob:none --depth=1 [email protected]:qdrant/qdrant.git
Expand Down

0 comments on commit 6b001b8

Please sign in to comment.