-
Notifications
You must be signed in to change notification settings - Fork 77
/
faiss_bert_search_server_demo.py
68 lines (59 loc) · 1.7 KB
/
faiss_bert_search_server_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# -*- coding: utf-8 -*-
"""
@author:XuMing([email protected])
@description: Use Faiss for text similarity search demo
"""
import sys
sys.path.append('..')
from similarities import bert_embedding, bert_index, bert_filter, bert_server
def main():
model_name = "shibing624/text2vec-base-chinese"
# Build embedding
bert_embedding(
input_dir='data/toy_bert/',
embeddings_dir='bert_engine/text_emb/',
corpus_dir='bert_engine/corpus/',
model_name=model_name,
batch_size=128,
target_devices=None,
normalize_embeddings=True,
text_column_name="sentence",
header=None,
names=['sentence'],
)
# Build index
bert_index(
embeddings_dir='bert_engine/text_emb/',
index_dir="bert_engine/text_index/",
index_name="faiss.index",
max_index_memory_usage="1G",
current_memory_available="2G",
use_gpu=False,
nb_cores=None,
)
# Filter(search) support multi query, batch search
sentences = ['如何更换花呗绑定银行卡', '花呗更改绑定银行卡']
bert_filter(
queries=sentences,
output_file=f"outputs/result.jsonl",
model_name=model_name,
index_dir='bert_engine/text_index/',
index_name="faiss.index",
corpus_dir="bert_engine/corpus/",
num_results=5,
threshold=None,
device=None,
)
# Server
bert_server(
model_name=model_name,
index_dir='bert_engine/text_index/',
index_name="faiss.index",
corpus_dir="bert_engine/corpus/",
num_results=5,
threshold=None,
device=None,
port=8001,
)
if __name__ == '__main__':
main()