-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathapp.py
executable file
·176 lines (151 loc) · 6.47 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
#!/usr/bin/env python
# Copyright The Caikit Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Standard
import argparse
import os
import sys
# Third Party
from client.app import get_frontend
import grpc
# Local
from caikit.config import configure, get_config
from caikit.core import ModuleConfig
from caikit.runtime.grpc_server import RuntimeGRPCServer
from caikit.runtime.model_management.model_manager import ModelManager
from caikit.runtime.service_factory import ServicePackageFactory
# runtime library config
CONFIG_PATH = os.path.realpath(
os.path.join(os.path.dirname(__file__), "runtime", "config", "config.yml")
)
configure(CONFIG_PATH)
def _get_module_models(model_manager=None) -> dict:
"""
Determine the modules and models that are loaded (so the UI only shows what is available).
Tries to use model manager to keep backend/frontend in sync, but can run independently by
using the same configs that the backend would use (library runtime config.yml and model config.yml files.
Parameters
----------
model_manager - if set this allows us to ask the ModelManager for loaded models (and details) instead of crawling configs and assuming the backend used the same configs.
Returns
-------
dict - mapping loaded module module_ids to loaded model model_ids
"""
if model_manager:
model_modules = {
k: v.module().metadata["module_id"]
for (k, v) in model_manager.loaded_models.items()
}
else:
model_modules = {}
# Without loading models build a map from local_models_dir configs
local_models_path = get_config().runtime.local_models_dir
if os.path.exists(local_models_path):
for model_id in os.listdir(local_models_path):
try:
# Use the file name as the model id
model_path = os.path.join(local_models_path, model_id)
config = ModuleConfig.load(model_path)
model_modules[model_id] = config.module_id
except Exception: # pylint: disable=broad-exception-caught
# Broad exception, but want to ignore any unusable dirs/files
pass
flipped = {} # map module_id to list of model_ids
for k, v in model_modules.items():
flipped[v] = flipped.get(v, []) + [k]
return flipped
def _parse_args():
parser = argparse.ArgumentParser(
description="Starts app backend (gRPC) and/or frontend (gradio UI). Arguments "
"allow running backend or frontend independently. The default "
"is to run both in one process."
)
# Pattern emulates action=argparse.BooleanOptionalAction but that requires Python >= 3.9
parser.add_argument(
"--backend", default=None, action="store_true", help="Flag for backend-only"
)
parser.add_argument(
"--no-backend",
dest="backend",
action="store_false",
help="Flag to run without backend",
)
parser.add_argument(
"--frontend", default=None, action="store_true", help="Flag for frontend-only"
)
parser.add_argument(
"--no-frontend",
dest="frontend",
action="store_false",
help="Flag to run without frontend",
)
args = parser.parse_args()
# By default backend and frontend both start unless explicitly disabled with --no-backend or --no-frontend
# or if one is explicitly enabled with --backend or --frontend and the other is not.
backend = args.backend or args.backend is None and not args.frontend
frontend = args.frontend or args.frontend is None and not args.backend
if backend and frontend:
print("Command-line enabled Caikit gRPC backend server and frontend gradio UI")
else:
print("Command-line disabled backend and/or frontend:")
if not backend:
print(" * --no-backend")
if not frontend:
print(" * --no-frontend")
return backend, frontend
def start_frontend(backend, inference_service):
model_manager = ModelManager.get_instance() if backend else None
module_models = _get_module_models(model_manager)
# Channel and stub is for client
port = (
get_config().runtime.port if not backend else backend.port
) # Using the actual port when we have a backend
target = f"localhost:{port}"
channel = grpc.insecure_channel(target)
frontend = get_frontend(channel, inference_service, module_models)
print(f"▶️ Starting the frontend gradio UI with using backend target={target}")
frontend.launch(share=False)
print("⏹️ Stopped")
def main() -> int:
# Command-line args allow running backend/frontend separately
backend, frontend = _parse_args()
# inference_service is needed for both Caikit backend server and frontend UI
inference_service = ServicePackageFactory().get_service_package(
ServicePackageFactory.ServiceType.INFERENCE,
)
if backend:
print("▶️ Starting the backend Caikit inference server...")
with RuntimeGRPCServer(
inference_service=inference_service, training_service=None
) as backend:
if frontend:
start_frontend(backend, inference_service) # and wait for termination
else:
# Block on backend when there is no waiting on frontend (in same process)
try:
backend.server.wait_for_termination()
except KeyboardInterrupt as e:
# This is the expected CTRL-C when blocking on the server only (hiding the stack trace)
print(
f"\n⏹️ Stopping the backend Caikit inference server due to {repr(e)}"
)
elif frontend:
start_frontend(backend, inference_service)
else:
print(
"⚠️ Inference service generation completed, but no backend or frontend servers started due to command-line options."
)
return 0
if __name__ == "__main__":
sys.exit(main())