Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance: Reorganize the examples #2340

Merged
merged 1 commit into from
Nov 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ The following collection shows Milvus versions and recommended PyMilvus versions
| 2.1.\* | 2.1.3 |
| 2.2.\* | 2.2.15 |
| 2.3.\* | 2.3.7 |
| 2.4.\* | 2.4.4 |
| 2.4.\* | 2.4.9 |


## Installation
Expand All @@ -43,7 +43,7 @@ $ pip3 install pymilvus[bulk_writer] # for bulk_writer
You can install a specific version of PyMilvus by:

```shell
$ pip3 install pymilvus==2.4.4
$ pip3 install pymilvus==2.4.9
```

You can upgrade PyMilvus to the latest version by:
Expand Down
1 change: 1 addition & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Examples
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
62 changes: 22 additions & 40 deletions examples/hybrid_search.py
Original file line number Diff line number Diff line change
@@ -1,35 +1,34 @@
import numpy as np
from pymilvus import (
connections,
utility,
FieldSchema, CollectionSchema, DataType,
Collection,
MilvusClient,
DataType,
AnnSearchRequest, RRFRanker, WeightedRanker,
)

fmt = "\n=== {:30} ===\n"
search_latency_fmt = "search latency = {:.4f}s"
num_entities, dim = 3000, 8

print(fmt.format("start connecting to Milvus"))
connections.connect("default", host="localhost", port="19530")
collection_name = "hello_milvus"
milvus_client = MilvusClient("http://localhost:19530")

has = utility.has_collection("hello_milvus")
print(f"Does collection hello_milvus exist in Milvus: {has}")
if has:
utility.drop_collection("hello_milvus")
has_collection = milvus_client.has_collection(collection_name, timeout=5)
if has_collection:
milvus_client.drop_collection(collection_name)

fields = [
FieldSchema(name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=False, max_length=100),
FieldSchema(name="random", dtype=DataType.DOUBLE),
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim),
FieldSchema(name="embeddings2", dtype=DataType.FLOAT_VECTOR, dim=dim)
]
schema = milvus_client.create_schema(auto_id=False, description="hello_milvus is the simplest demo to introduce the APIs")
schema.add_field("pk", DataType.VARCHAR, is_primary=True, max_length=100)
schema.add_field("random", DataType.DOUBLE)
schema.add_field("embeddings", DataType.FLOAT_VECTOR, dim=dim)
schema.add_field("embeddings2", DataType.FLOAT_VECTOR, dim=dim)

schema = CollectionSchema(fields, "hello_milvus is the simplest demo to introduce the APIs")
index_params = milvus_client.prepare_index_params()
index_params.add_index(field_name = "embeddings", index_type = "IVF_FLAT", metric_type="L2", nlist=128)
index_params.add_index(field_name = "embeddings2",index_type = "IVF_FLAT", metric_type="L2", nlist=128)

print(fmt.format("Create collection `hello_milvus`"))
hello_milvus = Collection("hello_milvus", schema, consistency_level="Strong", num_shards = 4)

milvus_client.create_collection(collection_name, schema=schema, index_params=index_params, consistency_level="Strong")

print(fmt.format("Start inserting entities"))
rng = np.random.default_rng(seed=19530)
Expand All @@ -41,29 +40,19 @@
rng.random((num_entities, dim)), # field embeddings2, supports numpy.ndarray and list
]

insert_result = hello_milvus.insert(entities)
rows = [ {"pk": entities[0][i], "random": entities[1][i], "embeddings": entities[2][i], "embeddings2": entities[3][i]} for i in range (num_entities)]

hello_milvus.flush()
print(f"Number of entities in Milvus: {hello_milvus.num_entities}") # check the num_entities
insert_result = milvus_client.insert(collection_name, rows)

print(fmt.format("Start Creating index IVF_FLAT"))
index = {
"index_type": "IVF_FLAT",
"metric_type": "L2",
"params": {"nlist": 128},
}

hello_milvus.create_index("embeddings", index)
hello_milvus.create_index("embeddings2", index)

print(fmt.format("Start loading"))
hello_milvus.load()
milvus_client.load_collection(collection_name)

field_names = ["embeddings", "embeddings2"]
field_names = ["embeddings"]

req_list = []
nq = 1
weights = [0.2, 0.3]
default_limit = 5
vectors_to_search = []

Expand All @@ -79,15 +68,8 @@
req = AnnSearchRequest(**search_param)
req_list.append(req)

hybrid_res = hello_milvus.hybrid_search(req_list, WeightedRanker(*weights), default_limit, output_fields=["random"])

print("rank by WightedRanker")
for hits in hybrid_res:
for hit in hits:
print(f" hybrid search hit: {hit}")

print("rank by RRFRanker")
hybrid_res = hello_milvus.hybrid_search(req_list, RRFRanker(), default_limit, output_fields=["random"])
hybrid_res = milvus_client.hybrid_search(collection_name, req_list, RRFRanker(), default_limit, output_fields=["random"])
for hits in hybrid_res:
for hit in hits:
print(f" hybrid search hit: {hit}")
93 changes: 93 additions & 0 deletions examples/hybrid_search/hybrid_search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import numpy as np
from pymilvus import (
connections,
utility,
FieldSchema, CollectionSchema, DataType,
Collection,
AnnSearchRequest, RRFRanker, WeightedRanker,
)

fmt = "\n=== {:30} ===\n"
search_latency_fmt = "search latency = {:.4f}s"
num_entities, dim = 3000, 8

print(fmt.format("start connecting to Milvus"))
connections.connect("default", host="localhost", port="19530")

has = utility.has_collection("hello_milvus")
print(f"Does collection hello_milvus exist in Milvus: {has}")
if has:
utility.drop_collection("hello_milvus")

fields = [
FieldSchema(name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=False, max_length=100),
FieldSchema(name="random", dtype=DataType.DOUBLE),
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim),
FieldSchema(name="embeddings2", dtype=DataType.FLOAT_VECTOR, dim=dim)
]

schema = CollectionSchema(fields, "hello_milvus is the simplest demo to introduce the APIs")

print(fmt.format("Create collection `hello_milvus`"))
hello_milvus = Collection("hello_milvus", schema, consistency_level="Strong", num_shards = 4)

print(fmt.format("Start inserting entities"))
rng = np.random.default_rng(seed=19530)
entities = [
# provide the pk field because `auto_id` is set to False
[str(i) for i in range(num_entities)],
rng.random(num_entities).tolist(), # field random, only supports list
rng.random((num_entities, dim)), # field embeddings, supports numpy.ndarray and list
rng.random((num_entities, dim)), # field embeddings2, supports numpy.ndarray and list
]

insert_result = hello_milvus.insert(entities)

hello_milvus.flush()
print(f"Number of entities in Milvus: {hello_milvus.num_entities}") # check the num_entities

print(fmt.format("Start Creating index IVF_FLAT"))
index = {
"index_type": "IVF_FLAT",
"metric_type": "L2",
"params": {"nlist": 128},
}

hello_milvus.create_index("embeddings", index)
hello_milvus.create_index("embeddings2", index)

print(fmt.format("Start loading"))
hello_milvus.load()

field_names = ["embeddings", "embeddings2"]

req_list = []
nq = 1
weights = [0.2, 0.3]
default_limit = 5
vectors_to_search = []

for i in range(len(field_names)):
# 4. generate search data
vectors_to_search = rng.random((nq, dim))
search_param = {
"data": vectors_to_search,
"anns_field": field_names[i],
"param": {"metric_type": "L2"},
"limit": default_limit,
"expr": "random > 0.5"}
req = AnnSearchRequest(**search_param)
req_list.append(req)

hybrid_res = hello_milvus.hybrid_search(req_list, WeightedRanker(*weights), default_limit, output_fields=["random"])

print("rank by WightedRanker")
for hits in hybrid_res:
for hit in hits:
print(f" hybrid search hit: {hit}")

print("rank by RRFRanker")
hybrid_res = hello_milvus.hybrid_search(req_list, RRFRanker(), default_limit, output_fields=["random"])
for hits in hybrid_res:
for hit in hits:
print(f" hybrid search hit: {hit}")
File renamed without changes.
File renamed without changes.
75 changes: 0 additions & 75 deletions examples/milvus_client/hybrid_search.py

This file was deleted.

85 changes: 0 additions & 85 deletions examples/milvus_client/partition.py

This file was deleted.

File renamed without changes.
Loading