-
Notifications
You must be signed in to change notification settings - Fork 10
/
index.bs
190 lines (169 loc) · 7.04 KB
/
index.bs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
<pre class='metadata'>
Title: Model Loader API
Shortname: model-loader
Level: 1
Status: w3c/CG-DRAFT
Status Text: <strong>This incubation is on pause, see <a href="https://github.com/webmachinelearning/model-loader/issues/36">discussion</a> for the latest updates.</strong>
Group: webml
URL: https://webmachinelearning.github.io/model-loader/
!Explainer: <a href="https://github.com/webmachinelearning/model-loader/blob/master/explainer.md">explainer.md</a>
Editor: Jonathan Bingham 114606, Google Inc. https://google.com
Abstract: This document describes an API to load a custom pre-trained machine learning model.
Logo: https://webmachinelearning.github.io/webmachinelearning-logo.png
</pre>
<pre class="anchors">
urlPrefix: https://webmachinelearning.github.io/webnn/; url: dom-navigator-ml; type: interface; text: ML
</pre>
<pre class="anchors">
urlPrefix: https://webmachinelearning.github.io/webnn/; spec: webnn
type: interface
text: ML; url: ml
text: MLContextOptions; url: dictdef-mlcontextoptions
text: MLContext; url: mlcontext
text: MLNamedInputs; url: typedefdef-mlnamedinputs
text: MLNamedOutputs; url: typedefdef-mlnamedoutputs
</pre>
<pre class="link-defaults">
spec: webnn; type: interface; text: ML
</pre>
Introduction {#intro}
=====================
For the introduction and use cases, please see the <a href="https://github.com/webmachinelearning/model-loader/blob/master/explainer.md">explainer.md</a>.
For illustration purposes, the API and examples use the <a href="https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/schema/schema.fbs">TF Lite flatbuffer</a> format.
API {#api}
==========
<pre class="idl">
enum MLModelFormat {
// Tensorflow-lite flatbuffer.
"tflite"
};
enum MLDevicePreference {
// Let the backend selects the most suitable device.
"auto",
// The backend will use GPU to do model inference. If some operator is not
// supported by GPU, it will fall back to CPU.
"gpu",
// The backend will use CPU to do model inference.
"cpu"
};
enum MLPowerPreference {
// Let the backend selects the most suitable behavior.
"auto",
// Prioritizes execution speed over power consumption.
"high-performance",
// Prioritizes power consumption over other considerations such as execution
// speed.
"low-power",
};
dictionary MLContextOptions {
// Preferred kind of device to use.
MLDevicePreference devicePreference = "auto";
// Preference as related to power consumption.
MLPowerPreference powerPreference = "auto";
// Model format for the model loader API.
MLModelFormat modelFormat = "tflite";
// Number of thread to use.
// "0" means the backend can determine it automatically.
unsigned long numThreads = 0;
};
[Exposed=Window]
interface ML {
Promise<MLContext> createContext(optional MLContextOptions options = {});
};
enum MLDataType {
// "Unknown" doesn't mean "unsupported". The background can support more types
// than which are explicitly listed here (e.g., TfLite has complex numbers).
// We treat them as "unknown" to avoid exposing too many details of the
// backends from the beginning.
"unknown",
"int64",
"uint64",
"float64",
"int32",
"uint32",
"float32",
"int16",
"uint16",
"float16",
"int8",
"uint8",
"bool",
};
dictionary MLTensor {
required ArrayBufferView data;
required sequence<unsigned long> dimensions;
};
dictionary MLTensorInfo {
required DOMString name;
required MLDataType type;
required sequence<unsigned long> dimensions;
};
[SecureContext, Exposed=Window]
interface MLModel {
Promise<record<DOMString, MLTensor>> compute(record<DOMString, MLTensor> inputs);
sequence<MLTensorInfo> inputs();
sequence<MLTensorInfo> outputs();
};
[Exposed=Window]
interface MLModelLoader {
constructor(MLContext context);
Promise<MLModel> load(ArrayBuffer modelBuffer);
};
</pre>
Examples {#examples}
==================
<pre highlight="js">
// First, create an MLContext. This is consistent with the WebNN API. And we will
// add two new fields, “numThread” and "modelFormat".
const context = await navigator.ml.createContext(
{ devicePreference: "cpu",
powerPreference: "low-power",
numThread: 0, // the default 0 means
// "decide automatically".
modelFormat: "tflite" });
// Then create the model loader using the ML context.
loader = new MLModelLoader(context);
// In the first version, we only support loading models from ArrayBuffers. We
// believe this covers most of the usage cases. Web developers can download the
// model, e.g., by the fetch API. We can add new "load" functions in the future
// if they are really needed.
const modelUrl = 'https://path/to/model/file';
const modelBuffer = await fetch(modelUrl)
.then(response => response.arrayBuffer());
// Load the model.
model = await loader.load(modelBuffer);
// Use the `model.compute` function to get the output of the model from some
// inputs. Example ways of using this function includes,
// 1. When there is only one input tensor of the model, one can simply input the
// tensor, without specifying the name of it (the user can still designate this
// input tensor by name if they like).
z = await model.compute({ data: new Float32Array([10]),
dimensions: [1]) });
// 2. When there are multiple input tensors, the user has to designate the name
// of the input tensors by their names.
z = await model.compute({ x: { data: new Float32Array([10]),
dimensions: [1] },
y: { data: new Float32Array([20]),
dimensions: [1] } });
// 3. The client can also specify the output tensor. This is consistent with the
// WebNN API and can be useful, e.g., when the output tensor is a GPU buffer. At
// this time, the function will return an empty promise. The dimension of the
// output tensor specified must match the dimensions of the output tensor of the
// model.
z_buffer = ml.tensor({data: new Float64Array(1),
dimensions: [1] });
await model.compute({ data: new Float32Array([10]),
dimensions: [1] },
z_buffer);
// For the output tensor(s),
// Similar to the input arguments, if there is only one output tensor, the
// `compute` function returns a tensor in case 1 and 2, and there is no need to
// specify the name of the output tensor in case 3. But if there are multiple
// output tensors, the output in case 1 and 2 will be a map from tensor name to
// tensors, and in case 3, the output argument must be a map from tensor name to
// tensors too.
// For case 1 and 2, where the actual output data locate will depend on the
// context: if it is CPU context, the output tensor’s buffer will be RAM buffer(s)
// and if the context is GPU context, the output tensor’s buffer will be GPU
// buffer(s).
</pre>