-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
- Loading branch information
There are no files selected for viewing
This file was deleted.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
{ | ||
"name": "BurstBLAS", | ||
"references": [ | ||
"Barracuda", | ||
"Unity.Burst" | ||
], | ||
"optionalUnityReferences": [], | ||
"includePlatforms": [], | ||
"excludePlatforms": [], | ||
"allowUnsafeCode": true | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
using System; | ||
using System.Collections; | ||
using System.Collections.Generic; | ||
using System.Runtime.InteropServices; | ||
using Barracuda; | ||
using Unity.Burst; | ||
using Unity.Collections; | ||
using Unity.Collections.LowLevel.Unsafe; | ||
using Unity.Jobs; | ||
using Unity.Jobs.LowLevel.Unsafe; | ||
using UnityEngine; | ||
using UnityEngine.Scripting; | ||
|
||
[Preserve] | ||
public class BurstBLAS : BLASPlugin | ||
{ | ||
public bool IsCurrentPlatformSupported() | ||
{ | ||
try | ||
{ | ||
// Sanity test if all the dependencies of the job are met at runtime | ||
// Also prevent compiler from optimising this out | ||
var test = new UnsafeMatrixBlockMultiplyUnrolled8xhJob(); | ||
D.Log($"Loaded: {test}"); | ||
} | ||
catch (Exception e) | ||
{ | ||
D.Log($"C# Job system not found. Disabling {this.GetType()}. Error: {e}"); | ||
return false; | ||
} | ||
return true; | ||
} | ||
|
||
public unsafe void SGEMM(float* Ap, int AN, int AM, float* Bp, int BN, int BM, float* Cp, int CN, int CM, int bs, | ||
bool transposeA = false, bool transposeB = false) | ||
{ | ||
if (transposeA) | ||
{ | ||
var tmp = AN; AN = AM; AM = tmp; | ||
} | ||
if (transposeB) | ||
{ | ||
var tmp = BN; BN = BM; BM = tmp; | ||
} | ||
|
||
UnsafeMatrixBlockMultiplyUnrolled8xhJob job = new UnsafeMatrixBlockMultiplyUnrolled8xhJob(); | ||
job.A = Ap; | ||
job.AN = AN; | ||
job.AM = AM; | ||
job.B = Bp; | ||
job.BN = BN; | ||
job.BM = BM; | ||
job.C = Cp; | ||
job.CN = CN; | ||
job.CM = CM; | ||
job.bs = bs; | ||
job.transposeA = transposeA; | ||
job.transposeB = transposeB; | ||
|
||
var fence = job.Schedule((BM / bs) + (BM % bs > 0 ? 1 : 0), 4); | ||
fence.Complete(); | ||
} | ||
} | ||
|
||
//[BurstCompile] | ||
struct UnsafeMatrixBlockMultiplyUnrolled8xhJob : IJobParallelFor | ||
{ | ||
[NativeDisableParallelForRestriction] [NativeDisableUnsafePtrRestriction] public unsafe float* A; | ||
public int AN, AM; | ||
[NativeDisableParallelForRestriction] [NativeDisableUnsafePtrRestriction] public unsafe float* B; | ||
public int BN, BM; | ||
[NativeDisableParallelForRestriction] [NativeDisableUnsafePtrRestriction] public unsafe float* C; | ||
public int CN, CM; | ||
public int bs; | ||
public bool transposeA; | ||
public bool transposeB; | ||
|
||
public void Execute(int colB) | ||
{ | ||
unsafe | ||
{ | ||
int sz = bs * bs * 4; | ||
|
||
; float* blockA = (float*)UnsafeUtility.Malloc(sz, 4, Allocator.TempJob); | ||
float* blockB = (float*)UnsafeUtility.Malloc(sz, 4, Allocator.TempJob); | ||
float* blockC = (float*)UnsafeUtility.Malloc(sz, 4, Allocator.TempJob); | ||
|
||
for (int rowA = 0; rowA < AN; rowA += bs) | ||
{ | ||
//for (int colB = 0; colB < BM; colB += bs) | ||
{ | ||
for (int l = 0; l < AM; l += bs) | ||
{ | ||
|
||
MatrixUtils.CopyBlockWithPadding(A, rowA, AN, l, AM, blockA, bs, transposeA); | ||
MatrixUtils.CopyBlockWithPadding(B, l, BN, colB * bs, BM, blockB, bs, transposeB); | ||
MatrixUtils.CopyBlockWithPadding(C, rowA, CN, colB * bs, CM, blockC, bs); | ||
|
||
MatrixUtils.MultiplyBlockUnroll8xhPadded(blockA, blockB, blockC, bs); | ||
|
||
MatrixUtils.CopyBlockWithPadding(blockC, C, rowA, CN, colB * bs, CM, bs); | ||
} | ||
} | ||
} | ||
|
||
UnsafeUtility.Free(blockA, Allocator.TempJob); | ||
UnsafeUtility.Free(blockB, Allocator.TempJob); | ||
UnsafeUtility.Free(blockC, Allocator.TempJob); | ||
} | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
using System.Reflection; | ||
|
||
// DON'T EDIT | ||
// Will be replaced by Tools/Build/build.py | ||
[assembly: AssemblyVersion("0.4.0.0")] | ||
[assembly: AssemblyFileVersion("0.4.0.0")] |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
using System; | ||
using System.Collections; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using UnityEngine; | ||
|
||
namespace Barracuda { | ||
|
||
/// <summary> | ||
/// Interfaces for backend implementers | ||
/// see ModelBuilder.cs for detail on layers. | ||
/// </summary> | ||
public interface IOps | ||
{ | ||
Tensor MatMul(Tensor x, bool xTranspose, Tensor y, bool yTranspose);// @TODO: consider MatMulAdd instead | ||
Tensor Dense(Tensor x, Tensor w, Tensor b); | ||
Tensor Conv2D(Tensor x, Tensor k, Tensor b, int[] stride, int[] pad); | ||
Tensor DepthwiseConv2D(Tensor x, Tensor k, Tensor b, int[] stride, int[] pad); | ||
Tensor Conv2DTrans(Tensor x, Tensor k, Tensor b, int[] stride, int[] pad, int[] outputAdjustment); | ||
Tensor Upsample2D(Tensor x, int[] size); | ||
Tensor MaxPool2D(Tensor x, int[] pool, int[] stride, int[] pad); | ||
Tensor AvgPool2D(Tensor x, int[] pool, int[] stride, int[] pad); | ||
Tensor GlobalMaxPool2D(Tensor x); // @TODO: consider, if it should be just a special case of MaxPool2D with {pool=X.width/height, stride=1} | ||
Tensor GlobalAvgPool2D(Tensor x); | ||
Tensor GlobalAvgVariancePool2D(Tensor x); | ||
Tensor Border2D(Tensor x, int[] pad, float borderValue); | ||
Tensor Pad2DReflect(Tensor x, int[] pad); | ||
Tensor Pad2DSymmetric(Tensor x, int[] pad); | ||
Tensor Pad2DEdge(Tensor x, int[] pad); | ||
|
||
Tensor ScaleBias(Tensor x, Tensor s, Tensor b); | ||
Tensor Normalization(Tensor x, Tensor s, Tensor b, int pool, int axis, float epsilon); | ||
Tensor LRN(Tensor x, float alpha, float beta, float bias, int size); | ||
Tensor Dropout(Tensor x, float alpha); | ||
Tensor RandomNormal(TensorShape s, float mean, float scale, int seed); | ||
Tensor RandomUniform(TensorShape s, float mean, float scale, int seed); | ||
Tensor Multinomial(Tensor x, int count, int seed); | ||
Tensor OneHot(Tensor x, int depth, float onValue, float offValue); | ||
|
||
Tensor Relu(Tensor x); | ||
Tensor Softmax(Tensor x); | ||
Tensor LogSoftmax(Tensor x); | ||
Tensor Tanh(Tensor x); | ||
Tensor Sigmoid(Tensor x); | ||
Tensor Elu(Tensor x, float alpha); | ||
Tensor Relu6(Tensor x); | ||
Tensor LeakyRelu(Tensor x, float alpha); | ||
Tensor Selu(Tensor x, float alpha, float gamma); | ||
Tensor PRelu(Tensor x, Tensor alpha); | ||
Tensor Swish(Tensor x); | ||
Tensor Abs(Tensor x); | ||
Tensor Neg(Tensor x); | ||
Tensor Ceil(Tensor x); | ||
Tensor Clip(Tensor x, float min, float max); | ||
Tensor Floor(Tensor x); | ||
|
||
Tensor Reciprocal(Tensor x); | ||
Tensor Pow(Tensor x, float alpha); | ||
Tensor Exp(Tensor x); | ||
Tensor Log(Tensor x); | ||
Tensor Sqrt(Tensor x); | ||
|
||
Tensor Add(Tensor[] tensors); | ||
Tensor Sub(Tensor[] tensors); | ||
Tensor Mul(Tensor[] tensors); | ||
Tensor Div(Tensor[] tensors); | ||
Tensor Pow(Tensor[] tensors); | ||
Tensor Min(Tensor[] tensors); | ||
Tensor Max(Tensor[] tensors); | ||
Tensor Mean(Tensor[] tensors); | ||
|
||
Tensor ReduceMax(Tensor x, int axis); | ||
Tensor ReduceMean(Tensor x, int axis); | ||
Tensor ReduceMin(Tensor x, int axis); | ||
Tensor ReduceProd(Tensor x, int axis); | ||
Tensor ReduceSum(Tensor x, int axis); | ||
|
||
Tensor Greater(Tensor a, Tensor b); | ||
Tensor GreaterEqual(Tensor a, Tensor b); | ||
Tensor Less(Tensor a, Tensor b); | ||
Tensor LessEqual(Tensor a, Tensor b); | ||
Tensor Equal(Tensor a, Tensor b); | ||
Tensor LogicalOr(Tensor a, Tensor b); | ||
Tensor LogicalAnd(Tensor a, Tensor b); | ||
Tensor LogicalXor(Tensor a, Tensor b); | ||
Tensor LogicalNot(Tensor x); | ||
|
||
Tensor Flatten(Tensor x); | ||
Tensor Reshape(Tensor x, TensorShape shape); | ||
Tensor Transpose(Tensor x); | ||
|
||
Tensor Concat(Tensor[] tensors, int axis); | ||
Tensor StridedSlice(Tensor x, int[] starts, int[] ends, int[] stride); | ||
Tensor Tile(Tensor x, int[] repeats); | ||
|
||
/// <summary> | ||
/// Prepares tensor for use | ||
/// </summary> | ||
Tensor Prepare(Tensor x); | ||
|
||
/// <summary> | ||
/// Waits for previously scheduled OP to complete | ||
/// Tensor x is the destination of that OP | ||
/// </summary> | ||
void WaitForCompletion(Tensor x); | ||
|
||
/// <summary> | ||
/// Reset internal allocator | ||
/// </summary> | ||
void ResetAllocator(bool keepCachedMemory = true); | ||
} | ||
|
||
/// <summary> | ||
/// Interfaces for model compiler | ||
/// </summary> | ||
public interface IModelCompiler | ||
{ | ||
void PrepareModel(Model model, IDictionary<string, TensorShape> inputShapes); | ||
void PreExecuteLayer(Layer layer, Tensor[] inputs); | ||
} | ||
|
||
/// <summary> | ||
/// Interfaces for variables | ||
/// </summary> | ||
public interface IVars : IDisposable | ||
{ | ||
void SetInput(string name, Tensor x); | ||
void PrepareStorage(Model model, IOps optionalOpsToPrepareTensors = null, IDictionary<string, TensorShape> optionalInputShapes = null); | ||
Tensor[] GatherInputs(Layer forLayer); | ||
void PrepareStorage(Layer forLayer); | ||
void Store(Layer fromLayer, Tensor result); | ||
Tensor PeekOutput(string name); | ||
|
||
ITensorAllocator GetAllocator(); | ||
} | ||
|
||
/// <summary> | ||
/// Interfaces for tensor allocator | ||
/// </summary> | ||
public interface ITensorAllocator : IDisposable | ||
{ | ||
Tensor Alloc(TensorShape shape); | ||
Tensor Alloc(TensorShape shape, ITensorData buffer); | ||
|
||
// Repin() callback is called from the following Tensor methods: | ||
// PinToDeviceAndUploadToIt(), PinToDeviceAndDownloadFromIt(), | ||
// Unpin() and UnpinAndDisposeTensor() | ||
void Repin(Tensor x, ITensorData newBuffer, ITensorData oldBuffer, bool disposeUnpinnedHint); | ||
|
||
// Cast() callback is called from the following Tensor methods: | ||
// CastOnDevice() | ||
void Cast(Tensor x, ITensorData newBuffer, ITensorData oldBuffer); | ||
|
||
// NOTE: Release() should be ready to handle edge-case situation when | ||
// externally created new Tensor instance is passed with | ||
// ITensorData (tensorOnDevice) that is already owned by the allocator | ||
void Release(Tensor x, bool calledFromTensorDispose); | ||
|
||
void WaiveOwnership(Tensor x); | ||
void Reset(bool keepCachedMemory); // end-of-frame | ||
} | ||
|
||
} // namespace Barracuda |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.