Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci

Signed-off-by: Dong, Bo1 <[email protected]>
  • Loading branch information
pre-commit-ci[bot] authored and a32543254 committed Sep 3, 2024
1 parent 4bf3b59 commit aa28c54
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 15 deletions.
23 changes: 12 additions & 11 deletions comps/reranks/neural-speed/neuralspeed-docker/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class Request(Struct, kw_only=True):
query: str
docs: List[str]


class Response(Struct, kw_only=True):
scores: List[float]

Expand All @@ -45,17 +46,17 @@ def __init__(self):
n_ctx=INFERENCE_CONTEXT + 2,
)

def forward(self, datas: List[Request]) -> List[Response]:
batch = len(datas)
def forward(self, data: List[Request]) -> List[Response]:
batch = len(data)
ndoc = []
inps = []
for data in datas:
for data in data:
inp = [[data.query, doc] for doc in data.docs]
inps.extend(inp)
ndoc.append(len(data.docs))
outs=[]
for i in range(0,len(inps),INFERENCE_BATCH_SIZE):
inp_bs = inps[i:i+INFERENCE_BATCH_SIZE]
outs = []
for i in range(0, len(inps), INFERENCE_BATCH_SIZE):
inp_bs = inps[i : i + INFERENCE_BATCH_SIZE]
inputs = self.tokenizer(
inp_bs, padding=True, truncation=True, max_length=INFERENCE_CONTEXT, return_tensors="pt"
)
Expand All @@ -67,14 +68,14 @@ def forward(self, datas: List[Request]) -> List[Response]:
continuous_batching=False,
ignore_padding=True,
)
logger.info(f'Toal batch {batch} input shape {inputs.input_ids.shape} time {time.time()-st}')
logger.info(f"Toal batch {batch} input shape {inputs.input_ids.shape} time {time.time()-st}")
outs.append(output)
ns_outputs=numpy.concatenate(outs,axis=0)
resps =[]
ns_outputs = numpy.concatenate(outs, axis=0)
resps = []
pos = 0
for i in range(batch):
resp = Response(scores=ns_outputs[pos:pos+ndoc[i]].tolist())
pos+=ndoc[i]
resp = Response(scores=ns_outputs[pos : pos + ndoc[i]].tolist())
pos += ndoc[i]
resps.append(resp)
return resps

Expand Down
6 changes: 2 additions & 4 deletions comps/reranks/neural-speed/reranking_neuralspeed_svc.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
statistics_dict,
)


# class MosecEmbeddings(OpenAIEmbeddings):

# def _get_len_safe_embeddings(
Expand Down Expand Up @@ -67,8 +66,7 @@ def reranking(input: SearchedDoc) -> LLMParamsDoc:
url = mosec_reranking_endpoint + "/inference"
data = {"query": input.initial_query, "texts": docs}
resp = requests.post(url, data=msgspec.msgpack.encode(data))
response = msgspec.msgpack.decode(resp.content)['scores']

response = msgspec.msgpack.decode(resp.content)["scores"]

response_data = response.json()
best_response = max(response_data, key=lambda response: response["score"])
Expand All @@ -90,7 +88,7 @@ def reranking(input: SearchedDoc) -> LLMParamsDoc:
if logflag:
logger.info(input.initial_query)
return LLMParamsDoc(query=input.initial_query)


if __name__ == "__main__":
mosec_reranking_endpoint = os.getenv("MOSEC_RERANKING_ENDPOINT", "http://localhost:6001")
Expand Down

0 comments on commit aa28c54

Please sign in to comment.