Skip to content

Commit

Permalink
Updated to lkuich/Barricuda 0.4.1
Browse files Browse the repository at this point in the history
  • Loading branch information
lkuich committed Jan 23, 2020
1 parent 424177f commit a05d770
Show file tree
Hide file tree
Showing 108 changed files with 16,820 additions and 5,377 deletions.
Binary file removed Assets/Coach-ML/Barracuda/Barracuda.dll
Binary file not shown.
30 changes: 0 additions & 30 deletions Assets/Coach-ML/Barracuda/Barracuda.dll.meta

This file was deleted.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions Assets/Coach-ML/Barracuda/Burst/BurstBLAS.asmdef
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"name": "BurstBLAS",
"references": [
"Barracuda",
"Unity.Burst"
],
"optionalUnityReferences": [],
"includePlatforms": [],
"excludePlatforms": [],
"allowUnsafeCode": true
}
7 changes: 7 additions & 0 deletions Assets/Coach-ML/Barracuda/Burst/BurstBLAS.asmdef.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

111 changes: 111 additions & 0 deletions Assets/Coach-ML/Barracuda/Burst/BurstBLAS.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using Barracuda;
using Unity.Burst;
using Unity.Collections;
using Unity.Collections.LowLevel.Unsafe;
using Unity.Jobs;
using Unity.Jobs.LowLevel.Unsafe;
using UnityEngine;
using UnityEngine.Scripting;

[Preserve]
public class BurstBLAS : BLASPlugin
{
public bool IsCurrentPlatformSupported()
{
try
{
// Sanity test if all the dependencies of the job are met at runtime
// Also prevent compiler from optimising this out
var test = new UnsafeMatrixBlockMultiplyUnrolled8xhJob();
D.Log($"Loaded: {test}");
}
catch (Exception e)
{
D.Log($"C# Job system not found. Disabling {this.GetType()}. Error: {e}");
return false;
}
return true;
}

public unsafe void SGEMM(float* Ap, int AN, int AM, float* Bp, int BN, int BM, float* Cp, int CN, int CM, int bs,
bool transposeA = false, bool transposeB = false)
{
if (transposeA)
{
var tmp = AN; AN = AM; AM = tmp;
}
if (transposeB)
{
var tmp = BN; BN = BM; BM = tmp;
}

UnsafeMatrixBlockMultiplyUnrolled8xhJob job = new UnsafeMatrixBlockMultiplyUnrolled8xhJob();
job.A = Ap;
job.AN = AN;
job.AM = AM;
job.B = Bp;
job.BN = BN;
job.BM = BM;
job.C = Cp;
job.CN = CN;
job.CM = CM;
job.bs = bs;
job.transposeA = transposeA;
job.transposeB = transposeB;

var fence = job.Schedule((BM / bs) + (BM % bs > 0 ? 1 : 0), 4);
fence.Complete();
}
}

//[BurstCompile]
struct UnsafeMatrixBlockMultiplyUnrolled8xhJob : IJobParallelFor
{
[NativeDisableParallelForRestriction] [NativeDisableUnsafePtrRestriction] public unsafe float* A;
public int AN, AM;
[NativeDisableParallelForRestriction] [NativeDisableUnsafePtrRestriction] public unsafe float* B;
public int BN, BM;
[NativeDisableParallelForRestriction] [NativeDisableUnsafePtrRestriction] public unsafe float* C;
public int CN, CM;
public int bs;
public bool transposeA;
public bool transposeB;

public void Execute(int colB)
{
unsafe
{
int sz = bs * bs * 4;

; float* blockA = (float*)UnsafeUtility.Malloc(sz, 4, Allocator.TempJob);
float* blockB = (float*)UnsafeUtility.Malloc(sz, 4, Allocator.TempJob);
float* blockC = (float*)UnsafeUtility.Malloc(sz, 4, Allocator.TempJob);

for (int rowA = 0; rowA < AN; rowA += bs)
{
//for (int colB = 0; colB < BM; colB += bs)
{
for (int l = 0; l < AM; l += bs)
{

MatrixUtils.CopyBlockWithPadding(A, rowA, AN, l, AM, blockA, bs, transposeA);
MatrixUtils.CopyBlockWithPadding(B, l, BN, colB * bs, BM, blockB, bs, transposeB);
MatrixUtils.CopyBlockWithPadding(C, rowA, CN, colB * bs, CM, blockC, bs);

MatrixUtils.MultiplyBlockUnroll8xhPadded(blockA, blockB, blockC, bs);

MatrixUtils.CopyBlockWithPadding(blockC, C, rowA, CN, colB * bs, CM, bs);
}
}
}

UnsafeUtility.Free(blockA, Allocator.TempJob);
UnsafeUtility.Free(blockB, Allocator.TempJob);
UnsafeUtility.Free(blockC, Allocator.TempJob);
}
}
}
11 changes: 11 additions & 0 deletions Assets/Coach-ML/Barracuda/Burst/BurstBLAS.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions Assets/Coach-ML/Barracuda/Core.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions Assets/Coach-ML/Barracuda/Core/AssemblyInfo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
using System.Reflection;

// DON'T EDIT
// Will be replaced by Tools/Build/build.py
[assembly: AssemblyVersion("0.4.0.0")]
[assembly: AssemblyFileVersion("0.4.0.0")]
3 changes: 3 additions & 0 deletions Assets/Coach-ML/Barracuda/Core/AssemblyInfo.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions Assets/Coach-ML/Barracuda/Core/Backends.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

163 changes: 163 additions & 0 deletions Assets/Coach-ML/Barracuda/Core/Backends/BarracudaBackends.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using UnityEngine;

namespace Barracuda {

/// <summary>
/// Interfaces for backend implementers
/// see ModelBuilder.cs for detail on layers.
/// </summary>
public interface IOps
{
Tensor MatMul(Tensor x, bool xTranspose, Tensor y, bool yTranspose);// @TODO: consider MatMulAdd instead
Tensor Dense(Tensor x, Tensor w, Tensor b);
Tensor Conv2D(Tensor x, Tensor k, Tensor b, int[] stride, int[] pad);
Tensor DepthwiseConv2D(Tensor x, Tensor k, Tensor b, int[] stride, int[] pad);
Tensor Conv2DTrans(Tensor x, Tensor k, Tensor b, int[] stride, int[] pad, int[] outputAdjustment);
Tensor Upsample2D(Tensor x, int[] size);
Tensor MaxPool2D(Tensor x, int[] pool, int[] stride, int[] pad);
Tensor AvgPool2D(Tensor x, int[] pool, int[] stride, int[] pad);
Tensor GlobalMaxPool2D(Tensor x); // @TODO: consider, if it should be just a special case of MaxPool2D with {pool=X.width/height, stride=1}
Tensor GlobalAvgPool2D(Tensor x);
Tensor GlobalAvgVariancePool2D(Tensor x);
Tensor Border2D(Tensor x, int[] pad, float borderValue);
Tensor Pad2DReflect(Tensor x, int[] pad);
Tensor Pad2DSymmetric(Tensor x, int[] pad);
Tensor Pad2DEdge(Tensor x, int[] pad);

Tensor ScaleBias(Tensor x, Tensor s, Tensor b);
Tensor Normalization(Tensor x, Tensor s, Tensor b, int pool, int axis, float epsilon);
Tensor LRN(Tensor x, float alpha, float beta, float bias, int size);
Tensor Dropout(Tensor x, float alpha);
Tensor RandomNormal(TensorShape s, float mean, float scale, int seed);
Tensor RandomUniform(TensorShape s, float mean, float scale, int seed);
Tensor Multinomial(Tensor x, int count, int seed);
Tensor OneHot(Tensor x, int depth, float onValue, float offValue);

Tensor Relu(Tensor x);
Tensor Softmax(Tensor x);
Tensor LogSoftmax(Tensor x);
Tensor Tanh(Tensor x);
Tensor Sigmoid(Tensor x);
Tensor Elu(Tensor x, float alpha);
Tensor Relu6(Tensor x);
Tensor LeakyRelu(Tensor x, float alpha);
Tensor Selu(Tensor x, float alpha, float gamma);
Tensor PRelu(Tensor x, Tensor alpha);
Tensor Swish(Tensor x);
Tensor Abs(Tensor x);
Tensor Neg(Tensor x);
Tensor Ceil(Tensor x);
Tensor Clip(Tensor x, float min, float max);
Tensor Floor(Tensor x);

Tensor Reciprocal(Tensor x);
Tensor Pow(Tensor x, float alpha);
Tensor Exp(Tensor x);
Tensor Log(Tensor x);
Tensor Sqrt(Tensor x);

Tensor Add(Tensor[] tensors);
Tensor Sub(Tensor[] tensors);
Tensor Mul(Tensor[] tensors);
Tensor Div(Tensor[] tensors);
Tensor Pow(Tensor[] tensors);
Tensor Min(Tensor[] tensors);
Tensor Max(Tensor[] tensors);
Tensor Mean(Tensor[] tensors);

Tensor ReduceMax(Tensor x, int axis);
Tensor ReduceMean(Tensor x, int axis);
Tensor ReduceMin(Tensor x, int axis);
Tensor ReduceProd(Tensor x, int axis);
Tensor ReduceSum(Tensor x, int axis);

Tensor Greater(Tensor a, Tensor b);
Tensor GreaterEqual(Tensor a, Tensor b);
Tensor Less(Tensor a, Tensor b);
Tensor LessEqual(Tensor a, Tensor b);
Tensor Equal(Tensor a, Tensor b);
Tensor LogicalOr(Tensor a, Tensor b);
Tensor LogicalAnd(Tensor a, Tensor b);
Tensor LogicalXor(Tensor a, Tensor b);
Tensor LogicalNot(Tensor x);

Tensor Flatten(Tensor x);
Tensor Reshape(Tensor x, TensorShape shape);
Tensor Transpose(Tensor x);

Tensor Concat(Tensor[] tensors, int axis);
Tensor StridedSlice(Tensor x, int[] starts, int[] ends, int[] stride);
Tensor Tile(Tensor x, int[] repeats);

/// <summary>
/// Prepares tensor for use
/// </summary>
Tensor Prepare(Tensor x);

/// <summary>
/// Waits for previously scheduled OP to complete
/// Tensor x is the destination of that OP
/// </summary>
void WaitForCompletion(Tensor x);

/// <summary>
/// Reset internal allocator
/// </summary>
void ResetAllocator(bool keepCachedMemory = true);
}

/// <summary>
/// Interfaces for model compiler
/// </summary>
public interface IModelCompiler
{
void PrepareModel(Model model, IDictionary<string, TensorShape> inputShapes);
void PreExecuteLayer(Layer layer, Tensor[] inputs);
}

/// <summary>
/// Interfaces for variables
/// </summary>
public interface IVars : IDisposable
{
void SetInput(string name, Tensor x);
void PrepareStorage(Model model, IOps optionalOpsToPrepareTensors = null, IDictionary<string, TensorShape> optionalInputShapes = null);
Tensor[] GatherInputs(Layer forLayer);
void PrepareStorage(Layer forLayer);
void Store(Layer fromLayer, Tensor result);
Tensor PeekOutput(string name);

ITensorAllocator GetAllocator();
}

/// <summary>
/// Interfaces for tensor allocator
/// </summary>
public interface ITensorAllocator : IDisposable
{
Tensor Alloc(TensorShape shape);
Tensor Alloc(TensorShape shape, ITensorData buffer);

// Repin() callback is called from the following Tensor methods:
// PinToDeviceAndUploadToIt(), PinToDeviceAndDownloadFromIt(),
// Unpin() and UnpinAndDisposeTensor()
void Repin(Tensor x, ITensorData newBuffer, ITensorData oldBuffer, bool disposeUnpinnedHint);

// Cast() callback is called from the following Tensor methods:
// CastOnDevice()
void Cast(Tensor x, ITensorData newBuffer, ITensorData oldBuffer);

// NOTE: Release() should be ready to handle edge-case situation when
// externally created new Tensor instance is passed with
// ITensorData (tensorOnDevice) that is already owned by the allocator
void Release(Tensor x, bool calledFromTensorDispose);

void WaiveOwnership(Tensor x);
void Reset(bool keepCachedMemory); // end-of-frame
}

} // namespace Barracuda
11 changes: 11 additions & 0 deletions Assets/Coach-ML/Barracuda/Core/Backends/BarracudaBackends.cs.meta

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit a05d770

Please sign in to comment.