-
Notifications
You must be signed in to change notification settings - Fork 31
/
ai-serving.proto
385 lines (331 loc) · 14 KB
/
ai-serving.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
syntax = "proto3";
import "google/protobuf/wrappers.proto";
import "google/protobuf/timestamp.proto";
package com.autodeployai.serving.protobuf;
// Provides access to AI models served by AI-Serving.
service DeploymentService {
rpc Validate (ValidateRequest) returns (ModelInfo);
rpc Deploy (DeployRequest) returns (DeployResponse);
rpc Undeploy (UndeployRequest) returns (UndeployResponse);
rpc Predict (PredictRequest) returns (PredictResponse);
rpc GetModelMetadata (GetModelMetadataRequest) returns (GetModelMetadataResponse);
}
// Specifies model with its type to validate.
// Currently, both types "PMML" and "ONNX" are supported.
message ValidateRequest {
bytes model = 1;
string type = 2;
}
// Specifies a servable name, and model with its type.
// Currently, both types "PMML" and "ONNX" are supported.
message DeployRequest {
string name = 1;
bytes model = 2;
string type = 3;
}
message DeployResponse {
// Specifies the deployed model specification:
// the specified servable name and the deployed version starts from 1.
ModelSpec model_spec = 1;
}
message UndeployRequest {
// Specifies which model to un-deploy.
ModelSpec model_spec = 1;
}
message UndeployResponse {
// Specifies which model has been un-deployed.
ModelSpec model_spec = 1;
}
message GetModelMetadataRequest {
// Specifies which model to get metadata.
ModelSpec model_spec = 1;
}
message GetModelMetadataResponse {
// Specifies which model metadata is returned.
ModelSpec model_spec = 1;
// Model metadata.
repeated ModelMetadata metadata = 2;
}
message ModelInfo {
// Model type.
string type = 1;
// Model serialization type.
string serialization = 2;
// The runtime library to handle such model.
string runtime = 3;
// A list of predictors involved to predict this model.
repeated Field predictors = 4;
// A list of targets.
repeated Field targets = 5;
// A list of outputs could be produced by this model.
repeated Field outputs = 6;
// A list of redundancy fields not picked up by this model.
repeated Field redundancies = 7;
// Model algorithm.
string algorithm = 8;
// Mining function: regression, classification, clustering, or associationRules.
string function_name = 9;
// Model description.
string description = 10;
// Model version.
google.protobuf.Int32Value version = 11;
// The version of model serialization standard.
string format_version = 12;
// The MD5 hash string of this model file.
string hash = 13;
// The size of this model file in bytes
int64 size = 14;
// Model creation timestamp.
google.protobuf.Timestamp created_at = 15;
// The application that generated this model.
string app = 16;
// The version of the application.
string app_version = 17;
// Model copyright.
string copyright = 18;
// Original model source.
string source = 19;
}
// Field info
message Field {
// A unique name.
string name = 1;
// Field type, main two kinds:
// - scalar types for PMML models: float, double, integer, string and so on.
// - tensor, map, and list for ONNX models.
string type = 2;
// Determines which operations are defined on the values:
// - categorical
// - ordinal
// - continuous
string optype = 3;
// Field shape dimensions, mainly used for the tensor field, None for others.
repeated int64 shape = 4;
// A string describes valid values for this field.
string values = 5;
}
// Model metadata with versions
message ModelMetadata {
// Model ID
string id = 1;
// A unique model name
string name = 2;
// Model creation timestamp.
google.protobuf.Timestamp created_at = 3;
// Model last updated timestamp.
google.protobuf.Timestamp update_at = 4;
// The latest version number.
int32 latest_version = 5;
// Model version(s).
repeated ModelInfo versions = 6;
}
// Contains the model name and version
message ModelSpec {
// Required servable name.
string name = 1;
// Optional choice of which version of the model to use.
// The latest version is used when left unspecified
google.protobuf.Int32Value version = 2;
}
// Request to predict
message PredictRequest {
ModelSpec model_spec = 1;
// Input payload
RecordSpec X = 2;
// Output filters to specify which output fields need to be returned.
// If the list is empty, all outputs will be included.
repeated string filter = 3;
}
// Response for predicting request on successful run
message PredictResponse {
ModelSpec model_spec = 1;
// Output result
RecordSpec result = 2;
}
// Takes more than one records, there are two formats supported:
// - `records` : list like [{column -> value}, … , {column -> value}]
// - `split` : dict like {columns -> [columns], data -> [values]}
message RecordSpec {
repeated Record records = 1;
repeated string columns = 2;
repeated ListValue data = 3;
}
message Record {
// Unordered map of dynamically typed values.
map<string, Value> fields = 1;
}
// Extends `Value` of `Struct` with the support of TensorValue
message Value {
// The kind of value.
oneof kind {
// Represents a null value.
NullValue null_value = 1;
// Represents a double value.
double number_value = 2;
// Represents a string value.
string string_value = 3;
// Represents a boolean value.
bool bool_value = 4;
// Represents a structured value.
Record record_value = 5;
// Represents a repeated `Value`.
ListValue list_value = 6;
// Represents a tensor `Value`.
TensorProto tensor_value = 7;
}
}
enum NullValue {
// Null value.
NULL_VALUE = 0;
}
message ListValue {
// Repeated field of dynamically typed values.
repeated Value values = 1;
}
// Tensors
//
// A serialized tensor value.
// Compatible with the onnx.TensorProto: https://github.com/onnx/onnx/blob/main/onnx/onnx.proto3
message TensorProto {
enum DataType {
UNDEFINED = 0;
// Basic types.
FLOAT = 1; // float
UINT8 = 2; // uint8_t
INT8 = 3; // int8_t
UINT16 = 4; // uint16_t
INT16 = 5; // int16_t
INT32 = 6; // int32_t
INT64 = 7; // int64_t
STRING = 8; // string
BOOL = 9; // bool
// IEEE754 half-precision floating-point format (16 bits wide).
// This format has 1 sign bit, 5 exponent bits, and 10 mantissa bits.
FLOAT16 = 10;
DOUBLE = 11;
UINT32 = 12;
UINT64 = 13;
COMPLEX64 = 14; // complex with float32 real and imaginary components
COMPLEX128 = 15; // complex with float64 real and imaginary components
// Non-IEEE floating-point format based on IEEE754 single-precision
// floating-point number truncated to 16 bits.
// This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits.
BFLOAT16 = 16;
// Non-IEEE floating-point format based on papers
// FP8 Formats for Deep Learning, https://arxiv.org/abs/2209.05433,
// 8-bit Numerical Formats For Deep Neural Networks, https://arxiv.org/pdf/2206.02915.pdf.
// Operators supported FP8 are Cast, CastLike, QuantizeLinear, DequantizeLinear.
// The computation usually happens inside a block quantize / dequantize
// fused by the runtime.
FLOAT8E4M3FN = 17; // float 8, mostly used for coefficients, supports nan, not inf
FLOAT8E4M3FNUZ = 18; // float 8, mostly used for coefficients, supports nan, not inf, no negative zero
FLOAT8E5M2 = 19; // follows IEEE 754, supports nan, inf, mostly used for gradients
FLOAT8E5M2FNUZ = 20; // follows IEEE 754, supports nan, not inf, mostly used for gradients, no negative zero
// 4-bit data-types
UINT4 = 21; // Unsigned integer in range [0, 15]
INT4 = 22; // Signed integer in range [-8, 7], using two's-complement representation
// Future extensions go here.
}
// The shape of the tensor.
repeated int64 dims = 1;
// The data type of the tensor.
// This field MUST have a valid TensorProto.DataType value
int32 data_type = 2;
// For very large tensors, we may want to store them in chunks, in which
// case the following fields will specify the segment that is stored in
// the current TensorProto.
message Segment {
int64 begin = 1;
int64 end = 2;
}
Segment segment = 3;
// Tensor content must be organized in row-major order.
//
// Depending on the data_type field, exactly one of the fields below with
// name ending in _data is used to store the elements of the tensor.
// For float and complex64 values
// Complex64 tensors are encoded as a single array of floats,
// with the real components appearing in odd numbered positions,
// and the corresponding imaginary component appearing in the
// subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
// is encoded as [1.0, 2.0 ,3.0 ,4.0]
// When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
repeated float float_data = 4 [packed = true];
// For int32, uint8, int8, uint16, int16, uint4, int4, bool, float8 and float16 values
// float16 and float8 values must be bit-wise converted to an uint16_t prior
// to writing to the buffer.
// uint4 and int4 values must be packed to 4bitx2 prior to writing to the buffer, the first element is stored in
// the 4 LSB and the second element is stored in the 4 MSB.
// When this field is present, the data_type field MUST be
// INT32, INT16, INT8, INT4, UINT16, UINT8, UINT4, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
repeated int32 int32_data = 5 [packed = true];
// For strings.
// Each element of string_data is a UTF-8 encoded Unicode
// string. No trailing null, no leading BOM. The protobuf "string"
// scalar type is not used to match ML community conventions.
// When this field is present, the data_type field MUST be STRING
repeated bytes string_data = 6;
// For int64.
// When this field is present, the data_type field MUST be INT64
repeated int64 int64_data = 7 [packed = true];
// Optionally, a name for the tensor.
string name = 8; // namespace Value
// A human-readable documentation for this tensor. Markdown is allowed.
string doc_string = 12;
// Serializations can either use one of the fields above, or use this
// raw bytes field. The only exception is the string case, where one is
// required to store the content in the repeated bytes string_data field.
//
// When this raw_data field is used to store tensor value, elements MUST
// be stored in as fixed-width, little-endian order.
// Floating-point data types MUST be stored in IEEE 754 format.
// Complex64 elements must be written as two consecutive FLOAT values, real component first.
// Complex128 elements must be written as two consecutive DOUBLE values, real component first.
// Boolean type MUST be written one byte per tensor element (00000001 for true, 00000000 for false).
// uint4 and int4 values must be packed to 4bitx2, the first element is stored in the 4 LSB and the second element is stored in the 4 MSB.
//
// Note: the advantage of specific field rather than the raw_data field is
// that in some cases (e.g. int data), protobuf does a better packing via
// variable length storage, and may lead to smaller binary footprint.
// When this field is present, the data_type field MUST NOT be STRING or UNDEFINED
bytes raw_data = 9;
// Data can be stored inside the protobuf file using type-specific fields or raw_data.
// Alternatively, raw bytes data can be stored in an external file, using the external_data field.
// external_data stores key-value pairs describing data location. Recognized keys are:
// - "location" (required) - POSIX filesystem path relative to the directory where the ONNX
// protobuf model was stored
// - "offset" (optional) - position of byte at which stored data begins. Integer stored as string.
// Offset values SHOULD be multiples 4096 (page size) to enable mmap support.
// - "length" (optional) - number of bytes containing data. Integer stored as string.
// - "checksum" (optional) - SHA1 digest of file specified in under 'location' key.
repeated StringStringEntryProto external_data = 13;
// Location of the data for this tensor. MUST be one of:
// - DEFAULT - data stored inside the protobuf message. Data is stored in raw_data (if set) otherwise in type-specified field.
// - EXTERNAL - data stored in an external location as described by external_data field.
enum DataLocation {
DEFAULT = 0;
EXTERNAL = 1;
}
// If value not set, data is stored in raw_data (if set) otherwise in type-specified field.
DataLocation data_location = 14;
// For double
// Complex128 tensors are encoded as a single array of doubles,
// with the real components appearing in odd numbered positions,
// and the corresponding imaginary component appearing in the
// subsequent even numbered position. (e.g., [1.0 + 2.0i, 3.0 + 4.0i]
// is encoded as [1.0, 2.0 ,3.0 ,4.0]
// When this field is present, the data_type field MUST be DOUBLE or COMPLEX128
repeated double double_data = 10 [packed = true];
// For uint64 and uint32 values
// When this field is present, the data_type field MUST be
// UINT32 or UINT64
repeated uint64 uint64_data = 11 [packed = true];
// Named metadata values; keys should be distinct.
repeated StringStringEntryProto metadata_props = 16;
}
// StringStringEntryProto follows the pattern for cross-proto-version maps.
// See https://developers.google.com/protocol-buffers/docs/proto3#maps
message StringStringEntryProto {
string key = 1;
string value = 2;
}