Skip to content

Commit

Permalink
Merge pull request #48 from Sergio0694/feature_inception-layer
Browse files Browse the repository at this point in the history
Feature inception layer
  • Loading branch information
Sergio0694 authored Dec 29, 2017
2 parents 1711109 + b84d991 commit 2d56d11
Show file tree
Hide file tree
Showing 25 changed files with 1,710 additions and 202 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,18 @@ public static INetworkLayer Convolutional(
[PublicAPI]
[Pure, NotNull]
public static INetworkLayer Pooling(in TensorInfo input, in PoolingInfo info, ActivationFunctionType activation) => new CuDnnPoolingLayer(input, info, activation);

/// <summary>
/// Creates a new inception layer with the given input and features
/// </summary>
/// <param name="input">The input volume to process</param>
/// <param name="info">The info on the operations to execute inside the layer</param>
/// <param name="biasMode">Indicates the desired initialization mode to use for the layer bias values</param>
[PublicAPI]
[Pure, NotNull]
public static INetworkLayer Inception(
in TensorInfo input, in InceptionInfo info,
BiasInitializationMode biasMode = BiasInitializationMode.Zero)
=> new CuDnnInceptionLayer(input, info, biasMode);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ private static INetworkLayer Deserialize([NotNull] Stream stream, LayerType type
case LayerType.Convolutional: return CuDnnConvolutionalLayer.Deserialize(stream);
case LayerType.Pooling: return CuDnnPoolingLayer.Deserialize(stream);
case LayerType.Softmax: return CuDnnSoftmaxLayer.Deserialize(stream);
case LayerType.Inception: return CuDnnInceptionLayer.Deserialize(stream);
default: return null;
}
}
Expand Down
64 changes: 64 additions & 0 deletions NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,39 @@ public static DeviceMemory<float> AllocateDevice([NotNull] this Gpu gpu, in Tens
: throw new InvalidOperationException($"Failed to copy the source data on the target GPU device, [CUDA ERROR] {result}");
}

/// <summary>
/// Allocates a memory area on device memory, reading the target values at a given offset from the input <see cref="Tensor"/>
/// </summary>
/// <param name="gpu">The <see cref="Gpu"/> device to use</param>
/// <param name="source">The source <see cref="Tensor"/> with the data to copy</param>
/// <param name="offset">The column offset for the data to read from each row</param>
/// <param name="length"></param>
[MustUseReturnValue, NotNull]
public static unsafe DeviceMemory<float> AllocateDevice([NotNull] this Gpu gpu, in Tensor source, int offset, int length)
{
// Checks
if (source.Length - offset < length) throw new ArgumentOutOfRangeException(nameof(offset), "The input offset isn't valid");

// Memory copy
DeviceMemory<float> result_gpu = gpu.AllocateDevice<float>(source.Entities * length);
CUDAInterop.CUDA_MEMCPY2D_st* ptSt = stackalloc CUDAInterop.CUDA_MEMCPY2D_st[1];
ptSt[0] = new CUDAInterop.CUDA_MEMCPY2D_st
{
srcMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_HOST,
srcHost = source.Ptr + sizeof(float) * offset,
srcPitch = new IntPtr(sizeof(float) * source.Length),
dstMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE,
dstDevice = result_gpu.Handle,
dstPitch = new IntPtr(sizeof(float) * length),
WidthInBytes = new IntPtr(sizeof(float) * length),
Height = new IntPtr(source.Entities)
};
CUDAInterop.cudaError_enum result = CUDAInterop.cuMemcpy2D(ptSt);
return result == CUDAInterop.cudaError_enum.CUDA_SUCCESS
? result_gpu
: throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}");
}

/// <summary>
/// Copies the contents of the input <see cref="DeviceMemory{T}"/> instance to the target host memory area
/// </summary>
Expand All @@ -40,6 +73,37 @@ public static void CopyTo([NotNull] this DeviceMemory<float> source, in Tensor d
throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}");
}

/// <summary>
/// Copies the source data into the target <see cref="Tensor"/>, splitting each individual entry into its own row
/// </summary>
/// <param name="source">The source memory area with the concatenated data for each entry</param>
/// <param name="destination">The destination <see cref="Tensor"/> that will store the data</param>
/// <param name="offset">The column offset for the data for each entry</param>
/// <param name="length">The number of values to copy for each entry</param>
public static unsafe void CopyTo([NotNull] this DeviceMemory<float> source, in Tensor destination, int offset, int length)
{
// Checks
if (source.Length / length != destination.Entities) throw new ArgumentOutOfRangeException(nameof(length), "The input length doesn't match the given arguments");
if (destination.Length - offset < length) throw new ArgumentOutOfRangeException(nameof(offset), "The input offset isn't valid");

// Memory copy
CUDAInterop.CUDA_MEMCPY2D_st* ptSt = stackalloc CUDAInterop.CUDA_MEMCPY2D_st[1];
ptSt[0] = new CUDAInterop.CUDA_MEMCPY2D_st
{
srcMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE,
srcDevice = source.Handle,
srcPitch = new IntPtr(sizeof(float) * length),
dstMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_HOST,
dstHost = destination.Ptr + sizeof(float) * offset,
dstPitch = new IntPtr(sizeof(float) * destination.Length),
WidthInBytes = new IntPtr(sizeof(float) * length),
Height = new IntPtr(destination.Entities)
};
CUDAInterop.cudaError_enum result = CUDAInterop.cuMemcpy2D(ptSt);
if (result != CUDAInterop.cudaError_enum.CUDA_SUCCESS)
throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}");
}

/// <summary>
/// Copies the contents of the input <see cref="DeviceMemory{T}"/> to a new memory area on the unmanaged heap
/// </summary>
Expand Down
96 changes: 43 additions & 53 deletions NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ internal sealed class CuDnnConvolutionalLayer : ConvolutionalLayer
[NotNull]
private readonly Dnn DnnInstance = DnnService.Instance;

/// <summary>
/// Sets the cuDNN fields that will be used during future forward/backwards operations
/// </summary>
// cuDNN fields setup
private void SetupCuDnnInfo()
{
ConvolutionDescription.Set2D(OperationInfo.VerticalPadding, OperationInfo.HorizontalPadding, OperationInfo.VerticalStride, OperationInfo.HorizontalStride, 1, 1, (Alea.cuDNN.ConvolutionMode)OperationInfo.Mode);
Expand All @@ -74,71 +72,63 @@ public CuDnnConvolutionalLayer(
#region Implementation

/// <inheritdoc/>
public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a)
public override void Forward(in Tensor x, out Tensor z, out Tensor a)
{
fixed (float* pw = Weights)
using (DeviceMemory<float> z_gpu = DnnInstance.Gpu.AllocateDevice<float>(x.Entities * OutputInfo.Size))
{
Tensor.Reshape(pw, OutputInfo.Channels, KernelInfo.Size, out Tensor wTensor);
using (DeviceMemory<float> z_gpu = DnnInstance.Gpu.AllocateDevice<float>(x.Entities * OutputInfo.Size))
// Tensors info setup
InputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width);
OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OutputInfo.Channels, OutputInfo.Height, OutputInfo.Width);

// Forward convolution
DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm);
DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, algorithm, out IntPtr size);
using (DeviceMemory<float>
x_gpu = DnnInstance.Gpu.AllocateDevice(x),
w_gpu = DnnInstance.Gpu.AllocateDevice(Weights))
using (DeviceMemory<byte> workspace_gpu = DnnInstance.Gpu.AllocateDevice<byte>(size))
{
// Tensors info setup
InputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width);
OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OutputInfo.Channels, OutputInfo.Height, OutputInfo.Width);

// Forward convolution
DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm);
DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, algorithm, out IntPtr size);
using (DeviceMemory<float>
x_gpu = DnnInstance.Gpu.AllocateDevice(x),
w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor))
using (DeviceMemory<byte> workspace_gpu = DnnInstance.Gpu.AllocateDevice<byte>(size))
{
DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, FilterDescription, w_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, OutputDescription, z_gpu.Ptr);
}
DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, FilterDescription, w_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, OutputDescription, z_gpu.Ptr);
}

// Biases
using (DeviceMemory<float> b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
{
DnnInstance.AddTensor(1, BiasDescription, b_gpu.Ptr, 1, OutputDescription, z_gpu.Ptr);
}
z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
// Biases
using (DeviceMemory<float> b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
{
DnnInstance.AddTensor(1, BiasDescription, b_gpu.Ptr, 1, OutputDescription, z_gpu.Ptr);
}
z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);

// Activation
if (ActivationFunctionType == ActivationFunctionType.Identity) z.Duplicate(out a);
else
{
DnnInstance.ActivationForward(z.Entities, z.Length, z_gpu.Ptr, z_gpu.Ptr, ActivationFunctions.Activation);
z_gpu.CopyToHost(z.Entities, z.Length, out a);
}
// Activation
if (ActivationFunctionType == ActivationFunctionType.Identity) z.Duplicate(out a);
else
{
DnnInstance.ActivationForward(z.Entities, z.Length, z_gpu.Ptr, z_gpu.Ptr, ActivationFunctions.Activation);
z_gpu.CopyToHost(z.Entities, z.Length, out a);
}
}
}

/// <inheritdoc/>
public override unsafe void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime)
public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime)
{
fixed (float* pw = Weights)
using (DeviceMemory<float> delta_gpu = DnnInstance.Gpu.AllocateDevice<float>(z.Size))
{
Tensor.Reshape(pw, OutputInfo.Channels, KernelInfo.Size, out Tensor wTensor);
// Convolution
DnnInstance.GetConvolutionBackwardDataAlgorithm(FilterDescription, OutputDescription, ConvolutionDescription, InputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdDataAlgo algorithm);
DnnInstance.GetConvolutionBackwardDataWorkspaceSize(FilterDescription, OutputDescription, ConvolutionDescription, InputDescription, algorithm, out IntPtr size);
using (DeviceMemory<float> delta_gpu = DnnInstance.Gpu.AllocateDevice<float>(z.Size))
using (DeviceMemory<float>
delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1),
w_gpu = DnnInstance.Gpu.AllocateDevice(Weights))
using (DeviceMemory<byte> workspace_gpu = DnnInstance.Gpu.AllocateDevice<byte>(size))
{
// Backwards convolution
using (DeviceMemory<float>
delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1),
w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor))
using (DeviceMemory<byte> workspace_gpu = DnnInstance.Gpu.AllocateDevice<byte>(size))
{
DnnInstance.ConvolutionBackwardData(1, FilterDescription, w_gpu.Ptr, OutputDescription, delta_1_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, InputDescription, delta_gpu.Ptr);
}
DnnInstance.ConvolutionBackwardData(1, FilterDescription, w_gpu.Ptr, OutputDescription, delta_1_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, InputDescription, delta_gpu.Ptr);
}

// Activation
using (DeviceMemory<float> z_gpu = DnnInstance.Gpu.AllocateDevice(z))
{
DnnInstance.ActivationBackward(z.Entities, z.Length, z_gpu.Ptr, delta_gpu.Ptr, activationPrime);
z_gpu.CopyTo(z);
}
// Activation
using (DeviceMemory<float> z_gpu = DnnInstance.Gpu.AllocateDevice(z))
{
DnnInstance.ActivationBackward(z.Entities, z.Length, z_gpu.Ptr, delta_gpu.Ptr, activationPrime);
z_gpu.CopyTo(z);
}
}
}
Expand All @@ -159,7 +149,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ
{
DnnInstance.ConvolutionBackwardFilter(1, InputDescription, a_gpu.Ptr, OutputDescription, delta_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, FilterDescription, w_gpu.Ptr);
}
w_gpu.CopyToHost(Kernels, KernelInfo.Size, out dJdw);
w_gpu.CopyToHost(1, Weights.Length, out dJdw);
}

// Bias
Expand Down
44 changes: 18 additions & 26 deletions NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,39 +30,31 @@ public CuDnnFullyConnectedLayer(in TensorInfo input, int neurons, [NotNull] floa
#region Implementation

/// <inheritdoc/>
public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a)
public override void Forward(in Tensor x, out Tensor z, out Tensor a)
{
fixed (float* pw = Weights)
using (DeviceMemory<float>
x_gpu = DnnInstance.Gpu.AllocateDevice(x),
w_gpu = DnnInstance.Gpu.AllocateDevice(Weights),
y_gpu = DnnInstance.Gpu.AllocateDevice<float>(x.Entities * OutputInfo.Size),
b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
{
Tensor.Reshape(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor);
using (DeviceMemory<float>
x_gpu = DnnInstance.Gpu.AllocateDevice(x),
w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor),
y_gpu = DnnInstance.Gpu.AllocateDevice<float>(x.Entities * OutputInfo.Size),
b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
{
DnnInstance.FullyConnectedForward(x.Entities, x.Length, OutputInfo.Size, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, y_gpu.Ptr);
y_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
DnnInstance.ActivationForward(z.Entities, z.Length, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation);
y_gpu.CopyToHost(z.Entities, z.Length, out a);
}
DnnInstance.FullyConnectedForward(x.Entities, x.Length, OutputInfo.Size, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, y_gpu.Ptr);
y_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
DnnInstance.ActivationForward(z.Entities, z.Length, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation);
y_gpu.CopyToHost(z.Entities, z.Length, out a);
}
}

/// <inheritdoc/>
public override unsafe void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime)
public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime)
{
fixed (float* pw = Weights)
using (DeviceMemory<float>
delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1),
w_gpu = DnnInstance.Gpu.AllocateDevice(Weights),
z_gpu = DnnInstance.Gpu.AllocateDevice(z))
{
Tensor.Reshape(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor);
using (DeviceMemory<float>
delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1),
w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor),
z_gpu = DnnInstance.Gpu.AllocateDevice(z))
{
DnnInstance.FullyConnectedBackwardData(z.Entities, InputInfo.Size, OutputInfo.Size, z_gpu.Ptr, delta_1_gpu.Ptr, w_gpu.Ptr, activationPrime);
z_gpu.CopyTo(z);
}
DnnInstance.FullyConnectedBackwardData(z.Entities, InputInfo.Size, OutputInfo.Size, z_gpu.Ptr, delta_1_gpu.Ptr, w_gpu.Ptr, activationPrime);
z_gpu.CopyTo(z);
}
}

Expand All @@ -75,7 +67,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ
w_gpu = DnnInstance.Gpu.AllocateDevice<float>(a.Length * delta.Length))
{
DnnInstance.FullyConnectedBackwardFilter(a.Entities, a.Length, delta.Length, a_gpu.Ptr, delta_gpu.Ptr, w_gpu.Ptr);
w_gpu.CopyToHost(a.Length, delta.Length, out dJdw);
w_gpu.CopyToHost(1, Weights.Length, out dJdw);
}
delta.CompressVertically(out dJdb); // Doing this on CPU is generally faster than launching the kernels
}
Expand Down
Loading

0 comments on commit 2d56d11

Please sign in to comment.