Skip to content

Commit

Permalink
Baisc test case
Browse files Browse the repository at this point in the history
  • Loading branch information
GuanLuo committed Oct 3, 2023
1 parent 6510570 commit aa81059
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 93 deletions.
11 changes: 9 additions & 2 deletions qa/L0_http/llm_models/vllm_proxy/1/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import json

import triton_python_backend_utils as pb_utils
import numpy as np


class TritonPythonModel:
Expand All @@ -46,6 +47,9 @@ def execute(self, requests):
def exec(self, requests):
responses = []
for request in requests:
params = json.loads(request.parameters())
rep_count = params["REPETITION"] if "REPETITION" in params else 1

input_np = pb_utils.get_input_tensor_by_name(request, "PROMPT").as_numpy()
stream_np = pb_utils.get_input_tensor_by_name(request, "STREAM").as_numpy()
stream = stream_np.flatten()[0]
Expand All @@ -58,12 +62,15 @@ def exec(self, requests):
)
)
else:
out_tensor = pb_utils.Tensor("TEXT", input_np)
out_tensor = pb_utils.Tensor("TEXT", np.repeat(input_np, rep_count, axis=1))
responses.append(pb_utils.InferenceResponse([out_tensor]))
return responses

def exec_decoupled(self, requests):
for request in requests:
params = json.loads(request.parameters())
rep_count = params["REPETITION"] if "REPETITION" in params else 1

sender = request.get_response_sender()
input_np = pb_utils.get_input_tensor_by_name(request, "PROMPT").as_numpy()
stream_np = pb_utils.get_input_tensor_by_name(request, "STREAM").as_numpy()
Expand All @@ -73,7 +80,7 @@ def exec_decoupled(self, requests):
# FIXME: Could split up response string into tokens, but this is simpler for now.
stream = stream_np.flatten()[0]
if stream:
for _ in range(3):
for _ in range(rep_count):
sender.send(response)
sender.send(None, flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL)
# If stream disabled, just send one response
Expand Down
168 changes: 99 additions & 69 deletions qa/L0_http/llm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,95 +40,125 @@ class HttpTest(tu.TestResultCollector):
def _get_infer_url(self, model_name, route):
return f"http://localhost:8000/v2/models/{model_name}/{route}"

def _simple_infer(self, model_name, inputs, expected_outputs):
headers = {"Content-Type": "application/json"}
url = self._get_infer_url(model_name, "infer")
r = requests.post(url, data=json.dumps(inputs), headers=headers)
# def _simple_infer(self, model_name, inputs, expected_outputs):
# headers = {"Content-Type": "application/json"}
# url = self._get_infer_url(model_name, "infer")
# r = requests.post(url, data=json.dumps(inputs), headers=headers)
# r.raise_for_status()

# content = r.json()
# print(content)

# self.assertEqual(content["model_name"], model_name)
# self.assertIn("outputs", content)
# self.assertEqual(content["outputs"], expected_outputs)

# def _simple_generate_stream(self, model_name, inputs, expected_outputs):
# import sseclient

# headers = {"Accept": "text/event-stream"}
# url = self._get_infer_url(model_name, "generate_stream")
# # stream=True used to indicate response can be iterated over
# r = requests.post(url, data=json.dumps(inputs), headers=headers, stream=True)

# # Validate SSE format
# print(r.headers)
# self.assertIn("Content-Type", r.headers)
# # FIXME: Clarify correct header here.
# # self.assertEqual(r.headers['Content-Type'], 'text/event-stream')
# self.assertEqual(r.headers["Content-Type"], "text/event-stream; charset=utf-8")

# # SSE format (data: []) is hard to parse, use helper library for simplicity
# client = sseclient.SSEClient(r)
# tokens = []
# for i, event in enumerate(client.events()):
# # End of event stream
# if event.data == "[DONE]":
# continue

# # Parse event data, join events into a single response
# data = json.loads(event.data)
# print(f"Event {i}:", data)
# if "TEXT" not in data:
# print("FIXME: EXPECTED OUTPUT FIELD NOT FOUND")
# else:
# tokens.append(data["TEXT"])
# print("TOKENS:", tokens)

# def test_infer(self):
# model_name = "onnx_zero_1_object"
# parameters = {}

# # Setup text-based input
# input0_data = ["hello"]
# input0 = {
# "name": "INPUT0",
# "datatype": "BYTES",
# "shape": [1, 1],
# "data": input0_data,

Check notice

Code scanning / CodeQL

Commented-out code Note

This comment appears to contain commented-out code.
# }
# inputs = {"inputs": [input0], "parameters": parameters}
# # Identity model, output should match input
# expected_outputs = [
# {
# "name": "OUTPUT0",
# "datatype": "BYTES",
# "shape": [1, 1],
# "data": input0_data,
# }
# ]
# self._simple_infer(model_name, inputs, expected_outputs)

def test_generate(self):
model_name = "vllm_proxy"
# Setup text-based input
text = "hello world"
inputs = {"PROMPT": [text], "STREAM": False}

url = self._get_infer_url(model_name, "generate")
# stream=True used to indicate response can be iterated over
r = requests.post(url, data=json.dumps(inputs))

r.raise_for_status()

content = r.json()
print(content)
self.assertIn("Content-Type", r.headers)
self.assertIn("application/json", r.headers["Content-Type"])

self.assertEqual(content["model_name"], model_name)
self.assertIn("outputs", content)
self.assertEqual(content["outputs"], expected_outputs)
data = r.json()
self.assertTrue("TEXT" in data)

Check notice

Code scanning / CodeQL

Imprecise assert Note

assertTrue(a in b) cannot provide an informative message. Using assertIn(a, b) instead will give more informative messages.
self.assertEqual(text, data["TEXT"])

def test_generate_stream(self):
model_name = "vllm_proxy"
# Setup text-based input
text = "hello world"
rep_count = 3
inputs = {"PROMPT": [text], "STREAM": True, "REPETITION": rep_count}

def _simple_generate_stream(self, model_name, inputs, expected_outputs):
import sseclient

headers = {"Accept": "text/event-stream"}
url = self._get_infer_url(model_name, "generate_stream")
# stream=True used to indicate response can be iterated over
r = requests.post(url, data=json.dumps(inputs), headers=headers, stream=True)

r.raise_for_status()

# Validate SSE format
print(r.headers)
self.assertIn("Content-Type", r.headers)
# FIXME: Clarify correct header here.
# self.assertEqual(r.headers['Content-Type'], 'text/event-stream')
self.assertEqual(r.headers["Content-Type"], "text/event-stream; charset=utf-8")
self.assertIn("text/event-stream", r.headers["Content-Type"])

# SSE format (data: []) is hard to parse, use helper library for simplicity
client = sseclient.SSEClient(r)
tokens = []
res_count = 0
for i, event in enumerate(client.events()):
# End of event stream
if event.data == "[DONE]":
continue

# Parse event data, join events into a single response
data = json.loads(event.data)
print(f"Event {i}:", data)
if "TEXT" not in data:
print("FIXME: EXPECTED OUTPUT FIELD NOT FOUND")
else:
tokens.append(data["TEXT"])
print("TOKENS:", tokens)

def test_infer(self):
model_name = "onnx_zero_1_object"
parameters = {}

# Setup text-based input
input0_data = ["hello"]
input0 = {
"name": "INPUT0",
"datatype": "BYTES",
"shape": [1, 1],
"data": input0_data,
}
inputs = {"inputs": [input0], "parameters": parameters}
# Identity model, output should match input
expected_outputs = [
{
"name": "OUTPUT0",
"datatype": "BYTES",
"shape": [1, 1],
"data": input0_data,
}
]
self._simple_infer(model_name, inputs, expected_outputs)

# def test_generate(self):
# pass

def test_generate_stream(self):
# TODO: vllm
model_name = "onnx_zero_1_object"
parameters = {}
# Setup text-based input
input0_data = ["hello"]
inputs = {"prompt": input0_data, "stream": True, "parameters": parameters}
# Identity model, output should match input
expected_outputs = [
{
"name": "OUTPUT0",
"datatype": "BYTES",
"shape": [1, 1],
"data": input0_data,
}
]
self._simple_generate_stream(model_name, inputs, expected_outputs)
self.assertTrue("TEXT" in data)

Check notice

Code scanning / CodeQL

Imprecise assert Note

assertTrue(a in b) cannot provide an informative message. Using assertIn(a, b) instead will give more informative messages.
self.assertEqual(text, data["TEXT"])
res_count += 1
self.assertTrue(rep_count, res_count)


if __name__ == "__main__":
Expand Down
24 changes: 2 additions & 22 deletions qa/L0_http/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -635,13 +635,7 @@ wait $SERVER_PID
# https://github.com/mpetazzoni/sseclient
pip install sseclient-py

# Setup model repository
rm -r ${MODELDIR}/*
# TODO: Replace identity model with vllm model
MODEL_NAME="onnx_zero_1_object"
cp -r $DATADIR/qa_identity_model_repository/${MODEL_NAME} ${MODELDIR}/vllm

SERVER_ARGS="-model-repository=${MODELDIR}"
SERVER_ARGS="--model-repository=`pwd`/llm_models"
SERVER_LOG="./inference_server_llm_test.log"
CLIENT_LOG="./llm_test.log"
run_server
Expand All @@ -651,22 +645,8 @@ if [ "$SERVER_PID" == "0" ]; then
exit 1
fi

## Curl Tests
# Test that direct curl infer request returns success, more detailed checking
# will be done via python requests in unit tests.
# TODO: Use /generate and /generate_stream routes instead.
curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/models/${MODEL_NAME}/infer -d '{"inputs": [{"name":"INPUT0","datatype":"BYTES","shape":[1,1],"data":["hello"]}]}'
assert_curl_success "Unexpected infer failure"

# TODO: /generate
#curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/models/${MODEL_NAME}/generate -d '{"prompt": "hello", "stream": false}'
#assert_curl_success "Unexpected generate failure"

curl -s -w %{http_code} -o ./curl.out -X POST localhost:8000/v2/models/${MODEL_NAME}/generate_stream -d '{"prompt": "hello", "stream": true}'
assert_curl_success "Unexpected generate_stream failure"

## Python Unit Tests
TEST_RESULT_FILE='test_results_llm.txt'
TEST_RESULT_FILE='test_results.txt'
PYTHON_TEST=llm_test.py
EXPECTED_NUM_TESTS=2
set +e
Expand Down

0 comments on commit aa81059

Please sign in to comment.