-
Notifications
You must be signed in to change notification settings - Fork 6
/
compile.js
358 lines (339 loc) · 14.5 KB
/
compile.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
"use strict"
var uniq = require("uniq")
// This function generates very simple loops analogous to how you typically traverse arrays (the outermost loop corresponds to the slowest changing index, the innermost loop to the fastest changing index)
// TODO: If two arrays have the same strides (and offsets) there is potential for decreasing the number of "pointers" and related variables. The drawback is that the type signature would become more specific and that there would thus be less potential for caching, but it might still be worth it, especially when dealing with large numbers of arguments.
function innerFill(order, proc, body) {
var dimension = order.length
, nargs = proc.arrayArgs.length
, has_index = proc.indexArgs.length>0
, code = []
, vars = []
, idx=0, pidx=0, i, j
for(i=0; i<dimension; ++i) { // Iteration variables
vars.push(["i",i,"=0"].join(""))
}
//Compute scan deltas
for(j=0; j<nargs; ++j) {
for(i=0; i<dimension; ++i) {
pidx = idx
idx = order[i]
if(i === 0) { // The innermost/fastest dimension's delta is simply its stride
vars.push(["d",j,"s",i,"=t",j,"p",idx].join(""))
} else { // For other dimensions the delta is basically the stride minus something which essentially "rewinds" the previous (more inner) dimension
vars.push(["d",j,"s",i,"=(t",j,"p",idx,"-s",pidx,"*t",j,"p",pidx,")"].join(""))
}
}
}
if (vars.length > 0) {
code.push("var " + vars.join(","))
}
//Scan loop
for(i=dimension-1; i>=0; --i) { // Start at largest stride and work your way inwards
idx = order[i]
code.push(["for(i",i,"=0;i",i,"<s",idx,";++i",i,"){"].join(""))
}
//Push body of inner loop
code.push(body)
//Advance scan pointers
for(i=0; i<dimension; ++i) {
pidx = idx
idx = order[i]
for(j=0; j<nargs; ++j) {
code.push(["p",j,"+=d",j,"s",i].join(""))
}
if(has_index) {
if(i > 0) {
code.push(["index[",pidx,"]-=s",pidx].join(""))
}
code.push(["++index[",idx,"]"].join(""))
}
code.push("}")
}
return code.join("\n")
}
// Generate "outer" loops that loop over blocks of data, applying "inner" loops to the blocks by manipulating the local variables in such a way that the inner loop only "sees" the current block.
// TODO: If this is used, then the previous declaration (done by generateCwiseOp) of s* is essentially unnecessary.
// I believe the s* are not used elsewhere (in particular, I don't think they're used in the pre/post parts and "shape" is defined independently), so it would be possible to make defining the s* dependent on what loop method is being used.
function outerFill(matched, order, proc, body) {
var dimension = order.length
, nargs = proc.arrayArgs.length
, blockSize = proc.blockSize
, has_index = proc.indexArgs.length > 0
, code = []
for(var i=0; i<nargs; ++i) {
code.push(["var offset",i,"=p",i].join(""))
}
//Generate loops for unmatched dimensions
// The order in which these dimensions are traversed is fairly arbitrary (from small stride to large stride, for the first argument)
// TODO: It would be nice if the order in which these loops are placed would also be somehow "optimal" (at the very least we should check that it really doesn't hurt us if they're not).
for(var i=matched; i<dimension; ++i) {
code.push(["for(var j"+i+"=SS[", order[i], "]|0;j", i, ">0;){"].join("")) // Iterate back to front
code.push(["if(j",i,"<",blockSize,"){"].join("")) // Either decrease j by blockSize (s = blockSize), or set it to zero (after setting s = j).
code.push(["s",order[i],"=j",i].join(""))
code.push(["j",i,"=0"].join(""))
code.push(["}else{s",order[i],"=",blockSize].join(""))
code.push(["j",i,"-=",blockSize,"}"].join(""))
if(has_index) {
code.push(["index[",order[i],"]=j",i].join(""))
}
}
for(var i=0; i<nargs; ++i) {
var indexStr = ["offset"+i]
for(var j=matched; j<dimension; ++j) {
indexStr.push(["j",j,"*t",i,"p",order[j]].join(""))
}
code.push(["p",i,"=(",indexStr.join("+"),")"].join(""))
}
code.push(innerFill(order, proc, body))
for(var i=matched; i<dimension; ++i) {
code.push("}")
}
return code.join("\n")
}
//Count the number of compatible inner orders
// This is the length of the longest common prefix of the arrays in orders.
// Each array in orders lists the dimensions of the correspond ndarray in order of increasing stride.
// This is thus the maximum number of dimensions that can be efficiently traversed by simple nested loops for all arrays.
function countMatches(orders) {
var matched = 0, dimension = orders[0].length
while(matched < dimension) {
for(var j=1; j<orders.length; ++j) {
if(orders[j][matched] !== orders[0][matched]) {
return matched
}
}
++matched
}
return matched
}
//Processes a block according to the given data types
// Replaces variable names by different ones, either "local" ones (that are then ferried in and out of the given array) or ones matching the arguments that the function performing the ultimate loop will accept.
function processBlock(block, proc, dtypes) {
var code = block.body
var pre = []
var post = []
for(var i=0; i<block.args.length; ++i) {
var carg = block.args[i]
if(carg.count <= 0) {
continue
}
var re = new RegExp(carg.name, "g")
var ptrStr = ""
var arrNum = proc.arrayArgs.indexOf(i)
switch(proc.argTypes[i]) {
case "offset":
var offArgIndex = proc.offsetArgIndex.indexOf(i)
var offArg = proc.offsetArgs[offArgIndex]
arrNum = offArg.array
ptrStr = "+q" + offArgIndex // Adds offset to the "pointer" in the array
case "array":
ptrStr = "p" + arrNum + ptrStr
var localStr = "l" + i
var arrStr = "a" + arrNum
if (proc.arrayBlockIndices[arrNum] === 0) { // Argument to body is just a single value from this array
if(carg.count === 1) { // Argument/array used only once(?)
if(dtypes[arrNum] === "generic") {
if(carg.lvalue) {
pre.push(["var ", localStr, "=", arrStr, ".get(", ptrStr, ")"].join("")) // Is this necessary if the argument is ONLY used as an lvalue? (keep in mind that we can have a += something, so we would actually need to check carg.rvalue)
code = code.replace(re, localStr)
post.push([arrStr, ".set(", ptrStr, ",", localStr,")"].join(""))
} else {
code = code.replace(re, [arrStr, ".get(", ptrStr, ")"].join(""))
}
} else {
code = code.replace(re, [arrStr, "[", ptrStr, "]"].join(""))
}
} else if(dtypes[arrNum] === "generic") {
pre.push(["var ", localStr, "=", arrStr, ".get(", ptrStr, ")"].join("")) // TODO: Could we optimize by checking for carg.rvalue?
code = code.replace(re, localStr)
if(carg.lvalue) {
post.push([arrStr, ".set(", ptrStr, ",", localStr,")"].join(""))
}
} else {
pre.push(["var ", localStr, "=", arrStr, "[", ptrStr, "]"].join("")) // TODO: Could we optimize by checking for carg.rvalue?
code = code.replace(re, localStr)
if(carg.lvalue) {
post.push([arrStr, "[", ptrStr, "]=", localStr].join(""))
}
}
} else { // Argument to body is a "block"
var reStrArr = [carg.name], ptrStrArr = [ptrStr]
for(var j=0; j<Math.abs(proc.arrayBlockIndices[arrNum]); j++) {
reStrArr.push("\\s*\\[([^\\]]+)\\]")
ptrStrArr.push("$" + (j+1) + "*t" + arrNum + "b" + j) // Matched index times stride
}
re = new RegExp(reStrArr.join(""), "g")
ptrStr = ptrStrArr.join("+")
if(dtypes[arrNum] === "generic") {
/*if(carg.lvalue) {
pre.push(["var ", localStr, "=", arrStr, ".get(", ptrStr, ")"].join("")) // Is this necessary if the argument is ONLY used as an lvalue? (keep in mind that we can have a += something, so we would actually need to check carg.rvalue)
code = code.replace(re, localStr)
post.push([arrStr, ".set(", ptrStr, ",", localStr,")"].join(""))
} else {
code = code.replace(re, [arrStr, ".get(", ptrStr, ")"].join(""))
}*/
throw new Error("cwise: Generic arrays not supported in combination with blocks!")
} else {
// This does not produce any local variables, even if variables are used multiple times. It would be possible to do so, but it would complicate things quite a bit.
code = code.replace(re, [arrStr, "[", ptrStr, "]"].join(""))
}
}
break
case "scalar":
code = code.replace(re, "Y" + proc.scalarArgs.indexOf(i))
break
case "index":
code = code.replace(re, "index")
break
case "shape":
code = code.replace(re, "shape")
break
}
}
return [pre.join("\n"), code, post.join("\n")].join("\n").trim()
}
function typeSummary(dtypes) {
var summary = new Array(dtypes.length)
var allEqual = true
for(var i=0; i<dtypes.length; ++i) {
var t = dtypes[i]
var digits = t.match(/\d+/)
if(!digits) {
digits = ""
} else {
digits = digits[0]
}
if(t.charAt(0) === 0) {
summary[i] = "u" + t.charAt(1) + digits
} else {
summary[i] = t.charAt(0) + digits
}
if(i > 0) {
allEqual = allEqual && summary[i] === summary[i-1]
}
}
if(allEqual) {
return summary[0]
}
return summary.join("")
}
//Generates a cwise operator
function generateCWiseOp(proc, typesig) {
//Compute dimension
// Arrays get put first in typesig, and there are two entries per array (dtype and order), so this gets the number of dimensions in the first array arg.
var dimension = (typesig[1].length - Math.abs(proc.arrayBlockIndices[0]))|0
var orders = new Array(proc.arrayArgs.length)
var dtypes = new Array(proc.arrayArgs.length)
for(var i=0; i<proc.arrayArgs.length; ++i) {
dtypes[i] = typesig[2*i]
orders[i] = typesig[2*i+1]
}
//Determine where block and loop indices start and end
var blockBegin = [], blockEnd = [] // These indices are exposed as blocks
var loopBegin = [], loopEnd = [] // These indices are iterated over
var loopOrders = [] // orders restricted to the loop indices
for(var i=0; i<proc.arrayArgs.length; ++i) {
if (proc.arrayBlockIndices[i]<0) {
loopBegin.push(0)
loopEnd.push(dimension)
blockBegin.push(dimension)
blockEnd.push(dimension+proc.arrayBlockIndices[i])
} else {
loopBegin.push(proc.arrayBlockIndices[i]) // Non-negative
loopEnd.push(proc.arrayBlockIndices[i]+dimension)
blockBegin.push(0)
blockEnd.push(proc.arrayBlockIndices[i])
}
var newOrder = []
for(var j=0; j<orders[i].length; j++) {
if (loopBegin[i]<=orders[i][j] && orders[i][j]<loopEnd[i]) {
newOrder.push(orders[i][j]-loopBegin[i]) // If this is a loop index, put it in newOrder, subtracting loopBegin, to make sure that all loopOrders are using a common set of indices.
}
}
loopOrders.push(newOrder)
}
//First create arguments for procedure
var arglist = ["SS"] // SS is the overall shape over which we iterate
var code = ["'use strict'"]
var vars = []
for(var j=0; j<dimension; ++j) {
vars.push(["s", j, "=SS[", j, "]"].join("")) // The limits for each dimension.
}
for(var i=0; i<proc.arrayArgs.length; ++i) {
arglist.push("a"+i) // Actual data array
arglist.push("t"+i) // Strides
arglist.push("p"+i) // Offset in the array at which the data starts (also used for iterating over the data)
for(var j=0; j<dimension; ++j) { // Unpack the strides into vars for looping
vars.push(["t",i,"p",j,"=t",i,"[",loopBegin[i]+j,"]"].join(""))
}
for(var j=0; j<Math.abs(proc.arrayBlockIndices[i]); ++j) { // Unpack the strides into vars for block iteration
vars.push(["t",i,"b",j,"=t",i,"[",blockBegin[i]+j,"]"].join(""))
}
}
for(var i=0; i<proc.scalarArgs.length; ++i) {
arglist.push("Y" + i)
}
if(proc.shapeArgs.length > 0) {
vars.push("shape=SS.slice(0)") // Makes the shape over which we iterate available to the user defined functions (so you can use width/height for example)
}
if(proc.indexArgs.length > 0) {
// Prepare an array to keep track of the (logical) indices, initialized to dimension zeroes.
var zeros = new Array(dimension)
for(var i=0; i<dimension; ++i) {
zeros[i] = "0"
}
vars.push(["index=[", zeros.join(","), "]"].join(""))
}
for(var i=0; i<proc.offsetArgs.length; ++i) { // Offset arguments used for stencil operations
var off_arg = proc.offsetArgs[i]
var init_string = []
for(var j=0; j<off_arg.offset.length; ++j) {
if(off_arg.offset[j] === 0) {
continue
} else if(off_arg.offset[j] === 1) {
init_string.push(["t", off_arg.array, "p", j].join(""))
} else {
init_string.push([off_arg.offset[j], "*t", off_arg.array, "p", j].join(""))
}
}
if(init_string.length === 0) {
vars.push("q" + i + "=0")
} else {
vars.push(["q", i, "=", init_string.join("+")].join(""))
}
}
//Prepare this variables
var thisVars = uniq([].concat(proc.pre.thisVars)
.concat(proc.body.thisVars)
.concat(proc.post.thisVars))
vars = vars.concat(thisVars)
if (vars.length > 0) {
code.push("var " + vars.join(","))
}
for(var i=0; i<proc.arrayArgs.length; ++i) {
code.push("p"+i+"|=0")
}
//Inline prelude
if(proc.pre.body.length > 3) {
code.push(processBlock(proc.pre, proc, dtypes))
}
//Process body
var body = processBlock(proc.body, proc, dtypes)
var matched = countMatches(loopOrders)
if(matched < dimension) {
code.push(outerFill(matched, loopOrders[0], proc, body)) // TODO: Rather than passing loopOrders[0], it might be interesting to look at passing an order that represents the majority of the arguments for example.
} else {
code.push(innerFill(loopOrders[0], proc, body))
}
//Inline epilog
if(proc.post.body.length > 3) {
code.push(processBlock(proc.post, proc, dtypes))
}
if(proc.debug) {
console.log("-----Generated cwise routine for ", typesig, ":\n" + code.join("\n") + "\n----------")
}
var loopName = [(proc.funcName||"unnamed"), "_cwise_loop_", orders[0].join("s"),"m",matched,typeSummary(dtypes)].join("")
var f = new Function(["function ",loopName,"(", arglist.join(","),"){", code.join("\n"),"} return ", loopName].join(""))
return f()
}
module.exports = generateCWiseOp