diff --git a/NeuralNetwork.NET.Cuda/APIS/CuDnnNetworkLayers.cs b/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayers.cs
similarity index 85%
rename from NeuralNetwork.NET.Cuda/APIS/CuDnnNetworkLayers.cs
rename to NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayers.cs
index 13c6ab2..1b6f1d6 100644
--- a/NeuralNetwork.NET.Cuda/APIS/CuDnnNetworkLayers.cs
+++ b/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayers.cs
@@ -67,5 +67,18 @@ public static INetworkLayer Convolutional(
[PublicAPI]
[Pure, NotNull]
public static INetworkLayer Pooling(in TensorInfo input, in PoolingInfo info, ActivationFunctionType activation) => new CuDnnPoolingLayer(input, info, activation);
+
+ ///
+ /// Creates a new inception layer with the given input and features
+ ///
+ /// The input volume to process
+ /// The info on the operations to execute inside the layer
+ /// Indicates the desired initialization mode to use for the layer bias values
+ [PublicAPI]
+ [Pure, NotNull]
+ public static INetworkLayer Inception(
+ in TensorInfo input, in InceptionInfo info,
+ BiasInitializationMode biasMode = BiasInitializationMode.Zero)
+ => new CuDnnInceptionLayer(input, info, biasMode);
}
}
\ No newline at end of file
diff --git a/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayersDeserializer.cs b/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayersDeserializer.cs
index dfafc6a..fda4c29 100644
--- a/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayersDeserializer.cs
+++ b/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayersDeserializer.cs
@@ -31,6 +31,7 @@ private static INetworkLayer Deserialize([NotNull] Stream stream, LayerType type
case LayerType.Convolutional: return CuDnnConvolutionalLayer.Deserialize(stream);
case LayerType.Pooling: return CuDnnPoolingLayer.Deserialize(stream);
case LayerType.Softmax: return CuDnnSoftmaxLayer.Deserialize(stream);
+ case LayerType.Inception: return CuDnnInceptionLayer.Deserialize(stream);
default: return null;
}
}
diff --git a/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs b/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs
index 3426b0a..03d7ead 100644
--- a/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs
+++ b/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs
@@ -27,6 +27,39 @@ public static DeviceMemory AllocateDevice([NotNull] this Gpu gpu, in Tens
: throw new InvalidOperationException($"Failed to copy the source data on the target GPU device, [CUDA ERROR] {result}");
}
+ ///
+ /// Allocates a memory area on device memory, reading the target values at a given offset from the input
+ ///
+ /// The device to use
+ /// The source with the data to copy
+ /// The column offset for the data to read from each row
+ ///
+ [MustUseReturnValue, NotNull]
+ public static unsafe DeviceMemory AllocateDevice([NotNull] this Gpu gpu, in Tensor source, int offset, int length)
+ {
+ // Checks
+ if (source.Length - offset < length) throw new ArgumentOutOfRangeException(nameof(offset), "The input offset isn't valid");
+
+ // Memory copy
+ DeviceMemory result_gpu = gpu.AllocateDevice(source.Entities * length);
+ CUDAInterop.CUDA_MEMCPY2D_st* ptSt = stackalloc CUDAInterop.CUDA_MEMCPY2D_st[1];
+ ptSt[0] = new CUDAInterop.CUDA_MEMCPY2D_st
+ {
+ srcMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_HOST,
+ srcHost = source.Ptr + sizeof(float) * offset,
+ srcPitch = new IntPtr(sizeof(float) * source.Length),
+ dstMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE,
+ dstDevice = result_gpu.Handle,
+ dstPitch = new IntPtr(sizeof(float) * length),
+ WidthInBytes = new IntPtr(sizeof(float) * length),
+ Height = new IntPtr(source.Entities)
+ };
+ CUDAInterop.cudaError_enum result = CUDAInterop.cuMemcpy2D(ptSt);
+ return result == CUDAInterop.cudaError_enum.CUDA_SUCCESS
+ ? result_gpu
+ : throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}");
+ }
+
///
/// Copies the contents of the input instance to the target host memory area
///
@@ -40,6 +73,37 @@ public static void CopyTo([NotNull] this DeviceMemory source, in Tensor d
throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}");
}
+ ///
+ /// Copies the source data into the target , splitting each individual entry into its own row
+ ///
+ /// The source memory area with the concatenated data for each entry
+ /// The destination that will store the data
+ /// The column offset for the data for each entry
+ /// The number of values to copy for each entry
+ public static unsafe void CopyTo([NotNull] this DeviceMemory source, in Tensor destination, int offset, int length)
+ {
+ // Checks
+ if (source.Length / length != destination.Entities) throw new ArgumentOutOfRangeException(nameof(length), "The input length doesn't match the given arguments");
+ if (destination.Length - offset < length) throw new ArgumentOutOfRangeException(nameof(offset), "The input offset isn't valid");
+
+ // Memory copy
+ CUDAInterop.CUDA_MEMCPY2D_st* ptSt = stackalloc CUDAInterop.CUDA_MEMCPY2D_st[1];
+ ptSt[0] = new CUDAInterop.CUDA_MEMCPY2D_st
+ {
+ srcMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE,
+ srcDevice = source.Handle,
+ srcPitch = new IntPtr(sizeof(float) * length),
+ dstMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_HOST,
+ dstHost = destination.Ptr + sizeof(float) * offset,
+ dstPitch = new IntPtr(sizeof(float) * destination.Length),
+ WidthInBytes = new IntPtr(sizeof(float) * length),
+ Height = new IntPtr(destination.Entities)
+ };
+ CUDAInterop.cudaError_enum result = CUDAInterop.cuMemcpy2D(ptSt);
+ if (result != CUDAInterop.cudaError_enum.CUDA_SUCCESS)
+ throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}");
+ }
+
///
/// Copies the contents of the input to a new memory area on the unmanaged heap
///
diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs
index 4aa0982..db5feb5 100644
--- a/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs
+++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs
@@ -47,9 +47,7 @@ internal sealed class CuDnnConvolutionalLayer : ConvolutionalLayer
[NotNull]
private readonly Dnn DnnInstance = DnnService.Instance;
- ///
- /// Sets the cuDNN fields that will be used during future forward/backwards operations
- ///
+ // cuDNN fields setup
private void SetupCuDnnInfo()
{
ConvolutionDescription.Set2D(OperationInfo.VerticalPadding, OperationInfo.HorizontalPadding, OperationInfo.VerticalStride, OperationInfo.HorizontalStride, 1, 1, (Alea.cuDNN.ConvolutionMode)OperationInfo.Mode);
@@ -74,71 +72,63 @@ public CuDnnConvolutionalLayer(
#region Implementation
///
- public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a)
+ public override void Forward(in Tensor x, out Tensor z, out Tensor a)
{
- fixed (float* pw = Weights)
+ using (DeviceMemory z_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * OutputInfo.Size))
{
- Tensor.Reshape(pw, OutputInfo.Channels, KernelInfo.Size, out Tensor wTensor);
- using (DeviceMemory z_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * OutputInfo.Size))
+ // Tensors info setup
+ InputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width);
+ OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OutputInfo.Channels, OutputInfo.Height, OutputInfo.Width);
+
+ // Forward convolution
+ DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm);
+ DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, algorithm, out IntPtr size);
+ using (DeviceMemory
+ x_gpu = DnnInstance.Gpu.AllocateDevice(x),
+ w_gpu = DnnInstance.Gpu.AllocateDevice(Weights))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
{
- // Tensors info setup
- InputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width);
- OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OutputInfo.Channels, OutputInfo.Height, OutputInfo.Width);
-
- // Forward convolution
- DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm);
- DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, algorithm, out IntPtr size);
- using (DeviceMemory
- x_gpu = DnnInstance.Gpu.AllocateDevice(x),
- w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor))
- using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
- {
- DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, FilterDescription, w_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, OutputDescription, z_gpu.Ptr);
- }
+ DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, FilterDescription, w_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, OutputDescription, z_gpu.Ptr);
+ }
- // Biases
- using (DeviceMemory b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
- {
- DnnInstance.AddTensor(1, BiasDescription, b_gpu.Ptr, 1, OutputDescription, z_gpu.Ptr);
- }
- z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
+ // Biases
+ using (DeviceMemory b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
+ {
+ DnnInstance.AddTensor(1, BiasDescription, b_gpu.Ptr, 1, OutputDescription, z_gpu.Ptr);
+ }
+ z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
- // Activation
- if (ActivationFunctionType == ActivationFunctionType.Identity) z.Duplicate(out a);
- else
- {
- DnnInstance.ActivationForward(z.Entities, z.Length, z_gpu.Ptr, z_gpu.Ptr, ActivationFunctions.Activation);
- z_gpu.CopyToHost(z.Entities, z.Length, out a);
- }
+ // Activation
+ if (ActivationFunctionType == ActivationFunctionType.Identity) z.Duplicate(out a);
+ else
+ {
+ DnnInstance.ActivationForward(z.Entities, z.Length, z_gpu.Ptr, z_gpu.Ptr, ActivationFunctions.Activation);
+ z_gpu.CopyToHost(z.Entities, z.Length, out a);
}
}
}
///
- public override unsafe void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime)
+ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime)
{
- fixed (float* pw = Weights)
+ using (DeviceMemory delta_gpu = DnnInstance.Gpu.AllocateDevice(z.Size))
{
- Tensor.Reshape(pw, OutputInfo.Channels, KernelInfo.Size, out Tensor wTensor);
+ // Convolution
DnnInstance.GetConvolutionBackwardDataAlgorithm(FilterDescription, OutputDescription, ConvolutionDescription, InputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdDataAlgo algorithm);
DnnInstance.GetConvolutionBackwardDataWorkspaceSize(FilterDescription, OutputDescription, ConvolutionDescription, InputDescription, algorithm, out IntPtr size);
- using (DeviceMemory delta_gpu = DnnInstance.Gpu.AllocateDevice(z.Size))
+ using (DeviceMemory
+ delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1),
+ w_gpu = DnnInstance.Gpu.AllocateDevice(Weights))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
{
- // Backwards convolution
- using (DeviceMemory
- delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1),
- w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor))
- using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
- {
- DnnInstance.ConvolutionBackwardData(1, FilterDescription, w_gpu.Ptr, OutputDescription, delta_1_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, InputDescription, delta_gpu.Ptr);
- }
+ DnnInstance.ConvolutionBackwardData(1, FilterDescription, w_gpu.Ptr, OutputDescription, delta_1_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, InputDescription, delta_gpu.Ptr);
+ }
- // Activation
- using (DeviceMemory z_gpu = DnnInstance.Gpu.AllocateDevice(z))
- {
- DnnInstance.ActivationBackward(z.Entities, z.Length, z_gpu.Ptr, delta_gpu.Ptr, activationPrime);
- z_gpu.CopyTo(z);
- }
+ // Activation
+ using (DeviceMemory z_gpu = DnnInstance.Gpu.AllocateDevice(z))
+ {
+ DnnInstance.ActivationBackward(z.Entities, z.Length, z_gpu.Ptr, delta_gpu.Ptr, activationPrime);
+ z_gpu.CopyTo(z);
}
}
}
@@ -159,7 +149,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ
{
DnnInstance.ConvolutionBackwardFilter(1, InputDescription, a_gpu.Ptr, OutputDescription, delta_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, FilterDescription, w_gpu.Ptr);
}
- w_gpu.CopyToHost(Kernels, KernelInfo.Size, out dJdw);
+ w_gpu.CopyToHost(1, Weights.Length, out dJdw);
}
// Bias
diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs
index 87685d3..c21c554 100644
--- a/NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs
+++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs
@@ -30,39 +30,31 @@ public CuDnnFullyConnectedLayer(in TensorInfo input, int neurons, [NotNull] floa
#region Implementation
///
- public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a)
+ public override void Forward(in Tensor x, out Tensor z, out Tensor a)
{
- fixed (float* pw = Weights)
+ using (DeviceMemory
+ x_gpu = DnnInstance.Gpu.AllocateDevice(x),
+ w_gpu = DnnInstance.Gpu.AllocateDevice(Weights),
+ y_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * OutputInfo.Size),
+ b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
{
- Tensor.Reshape(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor);
- using (DeviceMemory
- x_gpu = DnnInstance.Gpu.AllocateDevice(x),
- w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor),
- y_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * OutputInfo.Size),
- b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
- {
- DnnInstance.FullyConnectedForward(x.Entities, x.Length, OutputInfo.Size, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, y_gpu.Ptr);
- y_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
- DnnInstance.ActivationForward(z.Entities, z.Length, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation);
- y_gpu.CopyToHost(z.Entities, z.Length, out a);
- }
+ DnnInstance.FullyConnectedForward(x.Entities, x.Length, OutputInfo.Size, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, y_gpu.Ptr);
+ y_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
+ DnnInstance.ActivationForward(z.Entities, z.Length, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation);
+ y_gpu.CopyToHost(z.Entities, z.Length, out a);
}
}
///
- public override unsafe void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime)
+ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime)
{
- fixed (float* pw = Weights)
+ using (DeviceMemory
+ delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1),
+ w_gpu = DnnInstance.Gpu.AllocateDevice(Weights),
+ z_gpu = DnnInstance.Gpu.AllocateDevice(z))
{
- Tensor.Reshape(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor);
- using (DeviceMemory
- delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1),
- w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor),
- z_gpu = DnnInstance.Gpu.AllocateDevice(z))
- {
- DnnInstance.FullyConnectedBackwardData(z.Entities, InputInfo.Size, OutputInfo.Size, z_gpu.Ptr, delta_1_gpu.Ptr, w_gpu.Ptr, activationPrime);
- z_gpu.CopyTo(z);
- }
+ DnnInstance.FullyConnectedBackwardData(z.Entities, InputInfo.Size, OutputInfo.Size, z_gpu.Ptr, delta_1_gpu.Ptr, w_gpu.Ptr, activationPrime);
+ z_gpu.CopyTo(z);
}
}
@@ -75,7 +67,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ
w_gpu = DnnInstance.Gpu.AllocateDevice(a.Length * delta.Length))
{
DnnInstance.FullyConnectedBackwardFilter(a.Entities, a.Length, delta.Length, a_gpu.Ptr, delta_gpu.Ptr, w_gpu.Ptr);
- w_gpu.CopyToHost(a.Length, delta.Length, out dJdw);
+ w_gpu.CopyToHost(1, Weights.Length, out dJdw);
}
delta.CompressVertically(out dJdb); // Doing this on CPU is generally faster than launching the kernels
}
diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs
new file mode 100644
index 0000000..27d7200
--- /dev/null
+++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs
@@ -0,0 +1,751 @@
+using Alea;
+using Alea.cuDNN;
+using JetBrains.Annotations;
+using NeuralNetworkNET.APIs.Enums;
+using NeuralNetworkNET.APIs.Interfaces;
+using NeuralNetworkNET.APIs.Structs;
+using NeuralNetworkNET.Cuda.Extensions;
+using NeuralNetworkNET.Cuda.Services;
+using NeuralNetworkNET.Extensions;
+using NeuralNetworkNET.Networks.Activations;
+using NeuralNetworkNET.Networks.Activations.Delegates;
+using NeuralNetworkNET.Networks.Implementations.Layers.Abstract;
+using NeuralNetworkNET.Networks.Implementations.Layers.Helpers;
+using System;
+using System.Runtime.CompilerServices;
+
+namespace NeuralNetworkNET.Cuda.Layers
+{
+ ///
+ /// A simplified inception module, with 4 pipelines combining 1x1 convolution, 1x1 + 3x3, 1x1 + 5x5 and pooling + 1x1
+ ///
+ internal sealed class CuDnnInceptionLayer : WeightedLayerBase, IDisposable
+ {
+ #region Parameters
+
+ ///
+ public override LayerType LayerType { get; } = LayerType.Inception;
+
+ private readonly InceptionInfo _OperationInfo;
+
+ ///
+ /// Gets the info on the inception parameters used by the layer
+ ///
+ public ref readonly InceptionInfo OperationInfo
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get => ref _OperationInfo;
+ }
+
+ #endregion
+
+ #region Private fields and parameters
+
+ // 1x1 convolution weights on first pipeline
+ private int _1x1Weights
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get => InputInfo.Channels * OperationInfo.Primary1x1ConvolutionKernels;
+ }
+
+ // 1x1 convolution weights on 3x3 pipeline
+ private int _3x3Reduce1x1Weights
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get => InputInfo.Channels * OperationInfo.Primary3x3Reduce1x1ConvolutionKernels;
+ }
+
+ // 3x3 convolution weights
+ private int _3x3Weights
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get => 3 * 3 * OperationInfo.Primary3x3Reduce1x1ConvolutionKernels * OperationInfo.Secondary3x3ConvolutionKernels;
+ }
+
+ // 1x1 convolution weights on 5x5 pipeline
+ private int _5x5Reduce1x1Weights
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get => InputInfo.Channels * OperationInfo.Primary5x5Reduce1x1ConvolutionKernels;
+ }
+
+ // 5x5 convolution weights
+ private int _5x5Weights
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get => 5 * 5 * OperationInfo.Primary5x5Reduce1x1ConvolutionKernels * OperationInfo.Secondary5x5ConvolutionKernels;
+ }
+
+ // 1x1 convolution weights on pooling pipeline
+ private int Secondary1x1Weights
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get => InputInfo.Channels * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels;
+ }
+
+ // A copy of the forward layer inputs
+ private Tensor _Inputs;
+
+ // 3x3 reduction 1x1 convolution activity
+ private Tensor _3x3Reduce1x1Z;
+
+ // 3x3 reduction 1x1 convolution activation
+ private Tensor _3x3Reduce1x1A;
+
+ // 3x3 reduction 1x1 convolution output delta
+ private Tensor _3x3Reduce1x1Delta;
+
+ // 5x5 reduction 1x1 convolution activity
+ private Tensor _5x5Reduce1x1Z;
+
+ // 5x5 reduction 1x1 convolution activation
+ private Tensor _5x5Reduce1x1A;
+
+ // 5x5 reduction 1x1 convolution output delta
+ private Tensor _5x5Reduce1x1Delta;
+
+ // Pooling output activity
+ private Tensor _PoolingZ;
+
+ // Pooling output activation
+ private Tensor _PoolingA;
+
+ // Pooling output delta
+ private Tensor _PoolingDelta;
+
+ #endregion
+
+ #region cuDNN fields
+
+ // The NCHW tensor info for the layer inputs
+ [NotNull]
+ private readonly TensorDescriptor InputDescription = new TensorDescriptor();
+
+ #region 1x1 convolution
+
+ // The NCHW info for the 1x1 convolution weights
+ [NotNull]
+ private readonly FilterDescriptor _1x1FilterDescription = new FilterDescriptor();
+
+ // The info on the 1x1 convolution bias (one value per output channel)
+ [NotNull]
+ private readonly TensorDescriptor _1x1BiasDescription = new TensorDescriptor();
+
+ // The first 1x1 convolution info
+ [NotNull]
+ private readonly ConvolutionDescriptor _1x1ConvolutionDescription = new ConvolutionDescriptor();
+
+ // The NCHW tensor info for the outputs of the first 1x1 convolution
+ [NotNull]
+ private readonly TensorDescriptor _1x1OutputDescription = new TensorDescriptor();
+
+ #endregion
+
+ #region 3x3 reduce 1x1 convolution
+
+ // The NCHW info for the 3x3 reduce 1x1 convolution weights
+ [NotNull]
+ private readonly FilterDescriptor _3x3Reduce1x1FilterDescription = new FilterDescriptor();
+
+ // The info on the 3x3 reduce 1x1 convolution bias (one value per output channel)
+ [NotNull]
+ private readonly TensorDescriptor _3x3Reduce1x1BiasDescription = new TensorDescriptor();
+
+ // The NCHW tensor info for the outputs of the 3x3 reduce 1x1 convolution
+ [NotNull]
+ private readonly TensorDescriptor _3x3Reduce1x1OutputDescription = new TensorDescriptor();
+
+ #endregion
+
+ #region 3x3 secondary convolution
+
+ // The NCHW info for the 3x3 convolution weights
+ [NotNull]
+ private readonly FilterDescriptor _3x3FilterDescription = new FilterDescriptor();
+
+ // The info on the 3x3 convolution bias (one value per output channel)
+ [NotNull]
+ private readonly TensorDescriptor _3x3BiasDescription = new TensorDescriptor();
+
+ // The first 3x3 convolution info
+ [NotNull]
+ private readonly ConvolutionDescriptor _3x3ConvolutionDescription = new ConvolutionDescriptor();
+
+ // The NCHW tensor info for the outputs of the 3x3 convolution
+ [NotNull]
+ private readonly TensorDescriptor _3x3OutputDescription = new TensorDescriptor();
+
+ #endregion
+
+ #region 3x3 reduce 1x1 convolution
+
+ // The NCHW info for the 5x5 reduce 1x1 convolution weights
+ [NotNull]
+ private readonly FilterDescriptor _5x5Reduce1x1FilterDescription = new FilterDescriptor();
+
+ // The info on the 5x5 reduce 1x1 convolution bias (one value per output channel)
+ [NotNull]
+ private readonly TensorDescriptor _5x5Reduce1x1BiasDescription = new TensorDescriptor();
+
+ // The NCHW tensor info for the outputs of the 5x5 reduce 1x1 convolution
+ [NotNull]
+ private readonly TensorDescriptor _5x5Reduce1x1OutputDescription = new TensorDescriptor();
+
+ #endregion
+
+ #region 5x5 secondary convolution
+
+ // The NCHW info for the 5x5 convolution weights
+ [NotNull]
+ private readonly FilterDescriptor _5x5FilterDescription = new FilterDescriptor();
+
+ // The info on the 5x5 convolution bias (one value per output channel)
+ [NotNull]
+ private readonly TensorDescriptor _5x5BiasDescription = new TensorDescriptor();
+
+ // The first 5x5 convolution info
+ [NotNull]
+ private readonly ConvolutionDescriptor _5x5ConvolutionDescription = new ConvolutionDescriptor();
+
+ // The NCHW tensor info for the outputs of the 5x5 convolution
+ [NotNull]
+ private readonly TensorDescriptor _5x5OutputDescription = new TensorDescriptor();
+
+ #endregion
+
+ #region Pooling pipeline
+
+ // The descriptor for the pooling operation performed by the layer
+ [NotNull]
+ private readonly PoolingDescriptor PoolingDescription = new PoolingDescriptor();
+
+ // The NCHW tensor info for the pooling outputs
+ [NotNull]
+ private readonly TensorDescriptor PoolingOutputDescription = new TensorDescriptor();
+
+ // The NCHW info for the secondary 1x1 convolution weights
+ [NotNull]
+ private readonly FilterDescriptor Secondary1x1FilterDescription = new FilterDescriptor();
+
+ // The info on the secondary 1x1 convolution bias (one value per output channel)
+ [NotNull]
+ private readonly TensorDescriptor Secondary1x1BiasDescription = new TensorDescriptor();
+
+ // The info on the secondary 1x1 convolution outputs
+ [NotNull]
+ private readonly TensorDescriptor Secondary1x1OutputDescription = new TensorDescriptor();
+
+ #endregion
+
+ ///
+ /// Gets the instance for the current layer
+ ///
+ [NotNull]
+ private readonly Dnn DnnInstance = DnnService.Instance;
+
+ // cuDNN fields setup
+ private void SetupCuDnnInfo()
+ {
+ // First 1x1 convolution
+ _1x1ConvolutionDescription.Set2D(0, 0, 1, 1, 1, 1, Alea.cuDNN.ConvolutionMode.CROSS_CORRELATION);
+ _1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Primary1x1ConvolutionKernels, InputInfo.Channels, 1, 1);
+ _1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Primary1x1ConvolutionKernels, 1, 1);
+
+ // 3x3 reduce 1x1 convolution
+ _3x3Reduce1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, InputInfo.Channels, 1, 1);
+ _3x3Reduce1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, 1, 1);
+
+ // 3x3 convolution
+ _3x3ConvolutionDescription.Set2D(1, 1, 1, 1, 1, 1, Alea.cuDNN.ConvolutionMode.CROSS_CORRELATION); // 1-padding to keep size
+ _3x3FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Secondary3x3ConvolutionKernels, _OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, 3, 3);
+ _3x3BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Secondary3x3ConvolutionKernels, 1, 1);
+
+ // 5x5 reduce 1x1 convolution
+ _5x5Reduce1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, InputInfo.Channels, 1, 1);
+ _5x5Reduce1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, 1, 1);
+
+ // 5x5 convolution
+ _5x5ConvolutionDescription.Set2D(2, 2, 1, 1, 1, 1, Alea.cuDNN.ConvolutionMode.CROSS_CORRELATION);
+ _5x5FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Secondary5x5ConvolutionKernels, _OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, 5, 5);
+ _5x5BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Secondary5x5ConvolutionKernels, 1, 1);
+
+ // Pooling
+ PoolingDescription.Set2D((Alea.cuDNN.PoolingMode)OperationInfo.Pooling, NanPropagation.PROPAGATE_NAN, 3, 3, 1, 1, 1, 1);
+
+ // Secondary 1x1 convolution
+ Secondary1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, InputInfo.Channels, 1, 1);
+ Secondary1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, 1, 1);
+ }
+
+ #endregion
+
+ internal CuDnnInceptionLayer(in TensorInfo input, in InceptionInfo info, BiasInitializationMode biasMode = BiasInitializationMode.Zero)
+ : base(input, new TensorInfo(input.Height, input.Width, info.OutputChannels),
+ WeightsProvider.NewInceptionWeights(input, info),
+ WeightsProvider.NewBiases(info.ConvolutionKernels, biasMode),
+ ActivationFunctionType.ReLU)
+ {
+ _OperationInfo = info;
+ SetupCuDnnInfo();
+ }
+
+ internal CuDnnInceptionLayer(in TensorInfo input, in InceptionInfo info, [NotNull] float[] w, [NotNull] float[] b)
+ : base(input, new TensorInfo(input.Height, input.Width, info.OutputChannels), w, b, ActivationFunctionType.ReLU)
+ {
+ _OperationInfo = info;
+ SetupCuDnnInfo();
+ }
+
+ #region Implementation
+
+ ///
+ public override void Forward(in Tensor x, out Tensor z, out Tensor a)
+ {
+ _Inputs.TryFree();
+ x.Duplicate(out _Inputs);
+ Tensor.New(x.Entities, OutputInfo.Size, out z);
+ Tensor.New(x.Entities, OutputInfo.Size, out a);
+ using (DeviceMemory
+ w_gpu = DnnInstance.Gpu.AllocateDevice(Weights),
+ b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
+ {
+ // Pointers
+ deviceptr pw_gpu = w_gpu.Ptr, pb_gpu = b_gpu.Ptr;
+
+ // First 1x1 convolution
+ using (DeviceMemory y_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels))
+ {
+ // Descriptors setup and first 1x1 convolution
+ InputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width);
+ _1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Primary1x1ConvolutionKernels, InputInfo.Height, InputInfo.Width);
+ DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, _1x1FilterDescription, _1x1ConvolutionDescription, _1x1OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm);
+ DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, _1x1FilterDescription, _1x1ConvolutionDescription, _1x1OutputDescription, algorithm, out IntPtr size);
+ using (DeviceMemory x_gpu = DnnInstance.Gpu.AllocateDevice(x))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, _1x1FilterDescription, pw_gpu, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _1x1OutputDescription, y_gpu.Ptr);
+ }
+ DnnInstance.AddTensor(1, _1x1BiasDescription, pb_gpu, 1, _1x1OutputDescription, y_gpu.Ptr);
+ y_gpu.CopyTo(z, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels);
+
+ // 1x1 convolution activation
+ DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation);
+ y_gpu.CopyTo(a, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels);
+ }
+
+ // 1x1 + 3x3 convolution
+ using (DeviceMemory
+ y1x1_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Primary3x3Reduce1x1ConvolutionKernels),
+ y_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels))
+ {
+ // 1x1 convolution
+ _3x3Reduce1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, InputInfo.Height, InputInfo.Width);
+ DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, _3x3Reduce1x1FilterDescription, _1x1ConvolutionDescription, _3x3Reduce1x1OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm);
+ DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, _3x3Reduce1x1FilterDescription, _1x1ConvolutionDescription, _3x3Reduce1x1OutputDescription, algorithm, out IntPtr size);
+ using (DeviceMemory x_gpu = DnnInstance.Gpu.AllocateDevice(x))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, _3x3Reduce1x1FilterDescription, pw_gpu += _1x1Weights, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3Reduce1x1OutputDescription, y1x1_gpu.Ptr);
+ }
+ DnnInstance.AddTensor(1, _3x3Reduce1x1BiasDescription, pb_gpu += OperationInfo.Primary1x1ConvolutionKernels, 1, _3x3Reduce1x1OutputDescription, y1x1_gpu.Ptr);
+ _3x3Reduce1x1Z.TryFree();
+ y1x1_gpu.CopyToHost(x.Entities, InputInfo.SliceSize * OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, out _3x3Reduce1x1Z);
+ DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, y1x1_gpu.Ptr, y1x1_gpu.Ptr, ActivationFunctions.Activation);
+ _3x3Reduce1x1A.TryFree();
+ y1x1_gpu.CopyToHost(x.Entities, InputInfo.SliceSize * OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, out _3x3Reduce1x1A);
+
+ // 3x3 convolution
+ _3x3OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Secondary3x3ConvolutionKernels, InputInfo.Height, InputInfo.Width);
+ DnnInstance.GetConvolutionForwardAlgorithm(_3x3Reduce1x1OutputDescription, _3x3FilterDescription, _3x3ConvolutionDescription, _3x3OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm);
+ DnnInstance.GetConvolutionForwardWorkspaceSize(_3x3Reduce1x1OutputDescription, _3x3FilterDescription, _3x3ConvolutionDescription, _3x3OutputDescription, algorithm, out size);
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ DnnInstance.ConvolutionForward(1, _3x3Reduce1x1OutputDescription, y1x1_gpu.Ptr, _3x3FilterDescription, pw_gpu += _3x3Reduce1x1Weights, _3x3ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3OutputDescription, y_gpu.Ptr);
+ }
+ DnnInstance.AddTensor(1, _3x3BiasDescription, pb_gpu += OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, 1, _3x3OutputDescription, y_gpu.Ptr);
+ y_gpu.CopyTo(z, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels);
+
+ // Activation
+ DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation);
+ y_gpu.CopyTo(a, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels);
+ }
+
+ // 1x1 + 5x5 convolution
+ using (DeviceMemory
+ y1x1_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Primary5x5Reduce1x1ConvolutionKernels),
+ y_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels))
+ {
+ // 1x1 convolution
+ _5x5Reduce1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, InputInfo.Height, InputInfo.Width);
+ DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, _5x5Reduce1x1FilterDescription, _1x1ConvolutionDescription, _5x5Reduce1x1OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm);
+ DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, _5x5Reduce1x1FilterDescription, _1x1ConvolutionDescription, _5x5Reduce1x1OutputDescription, algorithm, out IntPtr size);
+ using (DeviceMemory x_gpu = DnnInstance.Gpu.AllocateDevice(x))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, _5x5Reduce1x1FilterDescription, pw_gpu += _3x3Weights, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr);
+ }
+ DnnInstance.AddTensor(1, _5x5Reduce1x1BiasDescription, pb_gpu += OperationInfo.Secondary3x3ConvolutionKernels, 1, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr);
+ _5x5Reduce1x1Z.TryFree();
+ y1x1_gpu.CopyToHost(x.Entities, InputInfo.SliceSize * OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, out _5x5Reduce1x1Z);
+ DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, y1x1_gpu.Ptr, y1x1_gpu.Ptr, ActivationFunctions.Activation);
+ _5x5Reduce1x1A.TryFree();
+ y1x1_gpu.CopyToHost(x.Entities, InputInfo.SliceSize * OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, out _5x5Reduce1x1A);
+
+ // 5x5 convolution
+ _5x5OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Secondary5x5ConvolutionKernels, InputInfo.Height, InputInfo.Width);
+ DnnInstance.GetConvolutionForwardAlgorithm(_5x5Reduce1x1OutputDescription, _5x5FilterDescription, _5x5ConvolutionDescription, _5x5OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm);
+ DnnInstance.GetConvolutionForwardWorkspaceSize(_5x5Reduce1x1OutputDescription, _5x5FilterDescription, _5x5ConvolutionDescription, _5x5OutputDescription, algorithm, out size);
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ DnnInstance.ConvolutionForward(1, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr, _5x5FilterDescription, pw_gpu += _5x5Reduce1x1Weights, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5OutputDescription, y_gpu.Ptr);
+ }
+ DnnInstance.AddTensor(1, _5x5BiasDescription, pb_gpu += OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, 1, _5x5OutputDescription, y_gpu.Ptr);
+ y_gpu.CopyTo(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels);
+
+ // Activation
+ DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation);
+ y_gpu.CopyTo(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels);
+ }
+
+ // Pooling pipeline
+ PoolingOutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width);
+ using (DeviceMemory y_gpu = DnnInstance.Gpu.AllocateDevice(x.Size))
+ {
+ // Pooling
+ using (DeviceMemory x_gpu = DnnInstance.Gpu.AllocateDevice(x))
+ {
+ DnnInstance.PoolingForward(PoolingDescription, 1, InputDescription, x_gpu.Ptr, 0, InputDescription, y_gpu.Ptr);
+ }
+ _PoolingZ.TryFree();
+ y_gpu.CopyToHost(x.Entities, InputInfo.Size, out _PoolingZ);
+ DnnInstance.ActivationForward(x.Entities, x.Length, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation);
+ _PoolingA.TryFree();
+ y_gpu.CopyToHost(x.Entities, InputInfo.Size, out _PoolingA);
+
+ // 1x1 convolution
+ using (DeviceMemory _1x1Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels))
+ {
+ Secondary1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, InputInfo.Height, InputInfo.Width);
+ DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, Secondary1x1FilterDescription, _1x1ConvolutionDescription, Secondary1x1OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm);
+ DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, Secondary1x1FilterDescription, _1x1ConvolutionDescription, Secondary1x1OutputDescription, algorithm, out IntPtr size);
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ DnnInstance.ConvolutionForward(1, InputDescription, y_gpu.Ptr, Secondary1x1FilterDescription, pw_gpu += _5x5Weights, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr);
+ }
+ DnnInstance.AddTensor(1, Secondary1x1BiasDescription, pb_gpu += OperationInfo.Secondary5x5ConvolutionKernels, 1, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr);
+ _1x1Output_gpu.CopyTo(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels);
+
+ // 1x1 convolution activation
+ DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, _1x1Output_gpu.Ptr, _1x1Output_gpu.Ptr, ActivationFunctions.Activation);
+ _1x1Output_gpu.CopyTo(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels);
+ }
+ }
+ }
+ }
+
+ ///
+ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime)
+ {
+ using (DeviceMemory
+ dx_gpu = DnnInstance.Gpu.AllocateDevice(z.Size),
+ w_gpu = DnnInstance.Gpu.AllocateDevice(Weights))
+ {
+ // First 1x1 convolution
+ DnnInstance.GetConvolutionBackwardDataAlgorithm(_1x1FilterDescription, _1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdDataAlgo algorithm);
+ DnnInstance.GetConvolutionBackwardDataWorkspaceSize(_1x1FilterDescription, _1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, algorithm, out IntPtr size);
+ using (DeviceMemory dy_gpu = DnnInstance.Gpu.AllocateDevice(delta_1, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ DnnInstance.ConvolutionBackwardData(1, _1x1FilterDescription, w_gpu.Ptr, _1x1OutputDescription, dy_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, InputDescription, dx_gpu.Ptr);
+ }
+
+ // 1x1 + 3x3 convolution
+ using (DeviceMemory _3x3Reduce1x1z_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Reduce1x1Z))
+ {
+ // 3x3 backward
+ DnnInstance.GetConvolutionBackwardDataAlgorithm(_3x3FilterDescription, _3x3OutputDescription, _3x3ConvolutionDescription, _3x3Reduce1x1OutputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm);
+ DnnInstance.GetConvolutionBackwardDataWorkspaceSize(_3x3FilterDescription, _3x3OutputDescription, _3x3ConvolutionDescription, _3x3Reduce1x1OutputDescription, algorithm, out size);
+ using (DeviceMemory
+ dy_gpu = DnnInstance.Gpu.AllocateDevice(delta_1, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels),
+ _3x3Reduce1x1dx_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Reduce1x1Z.Size))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ deviceptr p3x3Weights_gpu = w_gpu.Ptr + _1x1Weights + _3x3Reduce1x1Weights;
+ DnnInstance.ConvolutionBackwardData(1, _3x3FilterDescription, p3x3Weights_gpu, _3x3OutputDescription, dy_gpu.Ptr, _3x3ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3Reduce1x1OutputDescription, _3x3Reduce1x1dx_gpu.Ptr);
+ DnnInstance.ActivationBackward(_3x3Reduce1x1Z.Entities, _3x3Reduce1x1Z.Length, _3x3Reduce1x1z_gpu.Ptr, _3x3Reduce1x1dx_gpu.Ptr, ActivationFunctions.ActivationPrime);
+ _3x3Reduce1x1Delta.TryFree();
+ _3x3Reduce1x1z_gpu.CopyToHost(_3x3Reduce1x1Z.Entities, _3x3Reduce1x1Z.Length, out _3x3Reduce1x1Delta);
+ }
+
+ // 3x3 reduce 1x1 backward
+ DnnInstance.GetConvolutionBackwardDataAlgorithm(_3x3Reduce1x1FilterDescription, _3x3Reduce1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm);
+ DnnInstance.GetConvolutionBackwardDataWorkspaceSize(_3x3Reduce1x1FilterDescription, _3x3Reduce1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, algorithm, out size);
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ deviceptr p3x3Reduce1x1Weights_gpu = w_gpu.Ptr + _1x1Weights;
+ DnnInstance.ConvolutionBackwardData(1, _3x3Reduce1x1FilterDescription, p3x3Reduce1x1Weights_gpu, _3x3Reduce1x1OutputDescription, _3x3Reduce1x1z_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 1, InputDescription, dx_gpu.Ptr);
+ }
+ }
+
+ // 1x1 + 5x5 convolution
+ using (DeviceMemory _5x5Reduce1x1z_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Reduce1x1Z))
+ {
+ // 5x5 backward
+ DnnInstance.GetConvolutionBackwardDataAlgorithm(_5x5FilterDescription, _5x5OutputDescription, _5x5ConvolutionDescription, _5x5Reduce1x1OutputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm);
+ DnnInstance.GetConvolutionBackwardDataWorkspaceSize(_5x5FilterDescription, _5x5OutputDescription, _5x5ConvolutionDescription, _5x5Reduce1x1OutputDescription, algorithm, out size);
+ using (DeviceMemory
+ dy_gpu = DnnInstance.Gpu.AllocateDevice(delta_1, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels),
+ _5x5Reduce1x1dx_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Reduce1x1Z.Size))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ deviceptr p5x5Weights_gpu = w_gpu.Ptr + _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights + _5x5Reduce1x1Weights;
+ DnnInstance.ConvolutionBackwardData(1, _5x5FilterDescription, p5x5Weights_gpu, _5x5OutputDescription, dy_gpu.Ptr, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5Reduce1x1OutputDescription, _5x5Reduce1x1dx_gpu.Ptr);
+ DnnInstance.ActivationBackward(_5x5Reduce1x1Z.Entities, _5x5Reduce1x1Z.Length, _5x5Reduce1x1z_gpu.Ptr, _5x5Reduce1x1dx_gpu.Ptr, ActivationFunctions.ActivationPrime);
+ _5x5Reduce1x1Delta.TryFree();
+ _5x5Reduce1x1z_gpu.CopyToHost(_5x5Reduce1x1Z.Entities, _5x5Reduce1x1Z.Length, out _5x5Reduce1x1Delta);
+ }
+
+ // 5x5 reduce 1x1 backward
+ DnnInstance.GetConvolutionBackwardDataAlgorithm(_5x5Reduce1x1FilterDescription, _5x5Reduce1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm);
+ DnnInstance.GetConvolutionBackwardDataWorkspaceSize(_5x5Reduce1x1FilterDescription, _5x5Reduce1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, algorithm, out size);
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ deviceptr p5x5Reduce1x1Weights_gpu = w_gpu.Ptr + _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights;
+ DnnInstance.ConvolutionBackwardData(1, _5x5Reduce1x1FilterDescription, p5x5Reduce1x1Weights_gpu, _5x5Reduce1x1OutputDescription, _5x5Reduce1x1z_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 1, InputDescription, dx_gpu.Ptr);
+ }
+ }
+
+ // Pooling
+ using (DeviceMemory pooldy_gpu = DnnInstance.Gpu.AllocateDevice(_PoolingZ))
+ {
+ // 1x1 backward
+ DnnInstance.GetConvolutionBackwardDataAlgorithm(Secondary1x1FilterDescription, Secondary1x1OutputDescription, _1x1ConvolutionDescription, PoolingOutputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm);
+ DnnInstance.GetConvolutionBackwardDataWorkspaceSize(Secondary1x1FilterDescription, Secondary1x1OutputDescription, _1x1ConvolutionDescription, PoolingOutputDescription, algorithm, out size);
+ using (DeviceMemory
+ dy_gpu = DnnInstance.Gpu.AllocateDevice(delta_1, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels),
+ poolDx_gpu = DnnInstance.Gpu.AllocateDevice(_PoolingZ.Size))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ deviceptr p1x1PoolingWeights_gpu = w_gpu.Ptr + _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights + _5x5Reduce1x1Weights + _5x5Weights;
+ DnnInstance.ConvolutionBackwardData(1, Secondary1x1FilterDescription, p1x1PoolingWeights_gpu, Secondary1x1OutputDescription, dy_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, PoolingOutputDescription, poolDx_gpu.Ptr);
+ DnnInstance.ActivationBackward(_PoolingZ.Entities, _PoolingZ.Length, pooldy_gpu.Ptr, poolDx_gpu.Ptr, ActivationFunctions.ActivationPrime);
+ _PoolingDelta.TryFree();
+ pooldy_gpu.CopyToHost(_PoolingZ.Entities, _PoolingZ.Length, out _PoolingDelta);
+ }
+
+ // Pooling backward
+ using (DeviceMemory
+ x_gpu = DnnInstance.Gpu.AllocateDevice(_Inputs),
+ poolZ_gpu = DnnInstance.Gpu.AllocateDevice(_PoolingZ))
+ {
+ DnnInstance.PoolingBackward(PoolingDescription, 1, PoolingOutputDescription, poolZ_gpu.Ptr, PoolingOutputDescription, pooldy_gpu.Ptr, InputDescription, x_gpu.Ptr, 1, InputDescription, dx_gpu.Ptr); // TODO: finish pooling backward
+ }
+ }
+
+ // Activation backward
+ using (DeviceMemory z_gpu = DnnInstance.Gpu.AllocateDevice(z))
+ {
+ DnnInstance.ActivationBackward(z.Entities, z.Length, z_gpu.Ptr, dx_gpu.Ptr, activationPrime);
+ z_gpu.CopyTo(z);
+ }
+ }
+ }
+
+ ///
+ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJdw, out Tensor dJdb)
+ {
+ Tensor.New(1, Weights.Length, out dJdw);
+ Tensor.New(1, Biases.Length, out dJdb);
+ using (DeviceMemory a_gpu = DnnInstance.Gpu.AllocateDevice(a))
+ {
+ // 1x1 weights
+ using (DeviceMemory dy1x1_gpu = DnnInstance.Gpu.AllocateDevice(delta, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels))
+ {
+ DnnInstance.GetConvolutionBackwardFilterAlgorithm(InputDescription, _1x1OutputDescription, _1x1ConvolutionDescription, _1x1FilterDescription, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdFilterAlgo algorithm);
+ DnnInstance.GetConvolutionBackwardFilterWorkspaceSize(InputDescription, _1x1OutputDescription, _1x1ConvolutionDescription, _1x1FilterDescription, algorithm, out IntPtr size);
+ using (DeviceMemory dw_gpu = DnnInstance.Gpu.AllocateDevice(_1x1Weights))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ DnnInstance.ConvolutionBackwardFilter(1, InputDescription, a_gpu.Ptr, _1x1OutputDescription, dy1x1_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _1x1FilterDescription, dw_gpu.Ptr);
+ dw_gpu.CopyTo(dJdw, 0, _1x1Weights);
+ }
+
+ // 1x1 bias
+ using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Primary1x1ConvolutionKernels))
+ {
+ DnnInstance.ConvolutionBackwardBias(1, _1x1OutputDescription, dy1x1_gpu.Ptr, 0, _1x1BiasDescription, db_gpu.Ptr);
+ db_gpu.CopyTo(dJdb, 0, OperationInfo.Primary1x1ConvolutionKernels);
+ }
+ }
+
+ // 3x3 reduce 1x1 weights
+ using (DeviceMemory dy3x3Reduce1x1_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Reduce1x1Delta))
+ {
+ DnnInstance.GetConvolutionBackwardFilterAlgorithm(InputDescription, _3x3Reduce1x1OutputDescription, _1x1ConvolutionDescription, _3x3Reduce1x1FilterDescription, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdFilterAlgo algorithm);
+ DnnInstance.GetConvolutionBackwardFilterWorkspaceSize(InputDescription, _3x3Reduce1x1OutputDescription, _1x1ConvolutionDescription, _3x3Reduce1x1FilterDescription, algorithm, out IntPtr size);
+ using (DeviceMemory dw_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Reduce1x1Weights))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ DnnInstance.ConvolutionBackwardFilter(1, InputDescription, a_gpu.Ptr, _3x3Reduce1x1OutputDescription, dy3x3Reduce1x1_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3Reduce1x1FilterDescription, dw_gpu.Ptr);
+ dw_gpu.CopyTo(dJdw, _1x1Weights, _3x3Reduce1x1Weights);
+ }
+
+ // 3x3 reduce 1x1 bias
+ using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Primary3x3Reduce1x1ConvolutionKernels))
+ {
+ DnnInstance.ConvolutionBackwardBias(1, _3x3Reduce1x1OutputDescription, dy3x3Reduce1x1_gpu.Ptr, 0, _3x3Reduce1x1BiasDescription, db_gpu.Ptr);
+ db_gpu.CopyTo(dJdb, OperationInfo.Primary1x1ConvolutionKernels, OperationInfo.Primary3x3Reduce1x1ConvolutionKernels);
+ }
+ }
+
+ // 5x5 reduce 1x1 weights
+ using (DeviceMemory dy5x5Reduce1x1_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Reduce1x1Delta))
+ {
+ DnnInstance.GetConvolutionBackwardFilterAlgorithm(InputDescription, _5x5Reduce1x1OutputDescription, _1x1ConvolutionDescription, _5x5Reduce1x1FilterDescription, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdFilterAlgo algorithm);
+ DnnInstance.GetConvolutionBackwardFilterWorkspaceSize(InputDescription, _5x5Reduce1x1OutputDescription, _1x1ConvolutionDescription, _5x5Reduce1x1FilterDescription, algorithm, out IntPtr size);
+ using (DeviceMemory dw_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Reduce1x1Weights))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ DnnInstance.ConvolutionBackwardFilter(1, InputDescription, a_gpu.Ptr, _5x5Reduce1x1OutputDescription, dy5x5Reduce1x1_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5Reduce1x1FilterDescription, dw_gpu.Ptr);
+ dw_gpu.CopyTo(dJdw, _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights, _5x5Reduce1x1Weights);
+ }
+
+ // 5x5 reduce 1x1 bias
+ using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Primary5x5Reduce1x1ConvolutionKernels))
+ {
+ DnnInstance.ConvolutionBackwardBias(1, _5x5Reduce1x1OutputDescription, dy5x5Reduce1x1_gpu.Ptr, 0, _5x5Reduce1x1BiasDescription, db_gpu.Ptr);
+ db_gpu.CopyTo(dJdb, OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Primary3x3Reduce1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels, OperationInfo.Primary5x5Reduce1x1ConvolutionKernels);
+ }
+ }
+ }
+
+ // 3x3 weights
+ using (DeviceMemory dy3x3_gpu = DnnInstance.Gpu.AllocateDevice(delta, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels))
+ {
+ DnnInstance.GetConvolutionBackwardFilterAlgorithm(_3x3Reduce1x1OutputDescription, _3x3OutputDescription, _3x3ConvolutionDescription, _3x3FilterDescription, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdFilterAlgo algorithm);
+ DnnInstance.GetConvolutionBackwardFilterWorkspaceSize(_3x3Reduce1x1OutputDescription, _3x3OutputDescription, _3x3ConvolutionDescription, _3x3FilterDescription, algorithm, out IntPtr size);
+ using (DeviceMemory
+ a3x3Reduce1x1_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Reduce1x1A),
+ dw_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Weights))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ DnnInstance.ConvolutionBackwardFilter(1, _3x3Reduce1x1OutputDescription, a3x3Reduce1x1_gpu.Ptr, _3x3OutputDescription, dy3x3_gpu.Ptr, _3x3ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3FilterDescription, dw_gpu.Ptr);
+ dw_gpu.CopyTo(dJdw, _1x1Weights + _3x3Reduce1x1Weights, _3x3Weights);
+ }
+
+ // 3x3 bias
+ using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Secondary3x3ConvolutionKernels))
+ {
+ DnnInstance.ConvolutionBackwardBias(1, _3x3OutputDescription, dy3x3_gpu.Ptr, 0, _3x3BiasDescription, db_gpu.Ptr);
+ db_gpu.CopyTo(dJdb, OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, OperationInfo.Secondary3x3ConvolutionKernels);
+ }
+ }
+
+ // 5x5 weights
+ using (DeviceMemory dy5x5_gpu = DnnInstance.Gpu.AllocateDevice(delta, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels))
+ {
+ DnnInstance.GetConvolutionBackwardFilterAlgorithm(_5x5Reduce1x1OutputDescription, _5x5OutputDescription, _5x5ConvolutionDescription, _5x5FilterDescription, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdFilterAlgo algorithm);
+ DnnInstance.GetConvolutionBackwardFilterWorkspaceSize(_5x5Reduce1x1OutputDescription, _5x5OutputDescription, _5x5ConvolutionDescription, _5x5FilterDescription, algorithm, out IntPtr size);
+ using (DeviceMemory
+ a5x5Reduce1x1_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Reduce1x1A),
+ dw_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Weights))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ DnnInstance.ConvolutionBackwardFilter(1, _5x5Reduce1x1OutputDescription, a5x5Reduce1x1_gpu.Ptr, _5x5OutputDescription, dy5x5_gpu.Ptr, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5FilterDescription, dw_gpu.Ptr);
+ dw_gpu.CopyTo(dJdw, _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights + _5x5Reduce1x1Weights, _5x5Weights);
+ }
+
+ // 5x5 bias
+ using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Secondary5x5ConvolutionKernels))
+ {
+ DnnInstance.ConvolutionBackwardBias(1, _5x5OutputDescription, dy5x5_gpu.Ptr, 0, _5x5BiasDescription, db_gpu.Ptr);
+ db_gpu.CopyTo(dJdb, OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Primary3x3Reduce1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, OperationInfo.Secondary5x5ConvolutionKernels);
+ }
+ }
+
+ // Pooling 1x1 convolution
+ using (DeviceMemory dy1x1Pool_gpu = DnnInstance.Gpu.AllocateDevice(delta, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels))
+ {
+ DnnInstance.GetConvolutionBackwardFilterAlgorithm(PoolingOutputDescription, Secondary1x1OutputDescription, _1x1ConvolutionDescription, Secondary1x1FilterDescription, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdFilterAlgo algorithm);
+ DnnInstance.GetConvolutionBackwardFilterWorkspaceSize(PoolingOutputDescription, Secondary1x1OutputDescription, _1x1ConvolutionDescription, Secondary1x1FilterDescription, algorithm, out IntPtr size);
+ using (DeviceMemory
+ aPool_gpu = DnnInstance.Gpu.AllocateDevice(_PoolingA),
+ dw_gpu = DnnInstance.Gpu.AllocateDevice(Secondary1x1Weights))
+ using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size))
+ {
+ DnnInstance.ConvolutionBackwardFilter(1, PoolingOutputDescription, aPool_gpu.Ptr, Secondary1x1OutputDescription, dy1x1Pool_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, Secondary1x1FilterDescription, dw_gpu.Ptr);
+ dw_gpu.CopyTo(dJdw, _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights + _5x5Reduce1x1Weights + _5x5Weights, Secondary1x1Weights);
+ }
+
+ // Pooling 1x1 bias
+ using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Secondary1x1AfterPoolingConvolutionKernels))
+ {
+ DnnInstance.ConvolutionBackwardBias(1, Secondary1x1OutputDescription, dy1x1Pool_gpu.Ptr, 0, Secondary1x1BiasDescription, db_gpu.Ptr);
+ db_gpu.CopyTo(dJdb, OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Primary3x3Reduce1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Primary5x5Reduce1x1ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels, OperationInfo.Secondary1x1AfterPoolingConvolutionKernels);
+ }
+ }
+ }
+
+ #endregion
+
+ #region Misc
+
+ ///
+ public override INetworkLayer Clone() => new CuDnnInceptionLayer(InputInfo, OperationInfo, Weights, Biases);
+
+ ///
+ public override void Serialize(System.IO.Stream stream)
+ {
+ base.Serialize(stream);
+ stream.Write(OperationInfo);
+ }
+
+ ///
+ /// Tries to deserialize a new from the input
+ ///
+ /// The input to use to read the layer data
+ [MustUseReturnValue, CanBeNull]
+ public static INetworkLayer Deserialize([NotNull] System.IO.Stream stream)
+ {
+ if (!stream.TryRead(out TensorInfo input)) return null;
+ if (!stream.TryRead(out _)) return null;
+ if (!stream.TryRead(out _)) return null;
+ if (!stream.TryRead(out int wLength)) return null;
+ float[] weights = stream.ReadUnshuffled(wLength);
+ if (!stream.TryRead(out int bLength)) return null;
+ float[] biases = stream.ReadUnshuffled(bLength);
+ if (!stream.TryRead(out InceptionInfo info)) return null;
+ return new CuDnnInceptionLayer(input, info, weights, biases);
+ }
+
+ #endregion
+
+ #region IDisposable
+
+ ~CuDnnInceptionLayer() => Dispose();
+
+ ///
+ void IDisposable.Dispose()
+ {
+ GC.SuppressFinalize(this);
+ Dispose();
+ }
+
+ // Private Dispose method
+ private void Dispose()
+ {
+ _Inputs.TryFree();
+ _3x3Reduce1x1Z.TryFree();
+ _3x3Reduce1x1A.TryFree();
+ _3x3Reduce1x1Delta.TryFree();
+ _5x5Reduce1x1Z.TryFree();
+ _5x5Reduce1x1A.TryFree();
+ _5x5Reduce1x1Delta.TryFree();
+ _PoolingZ.TryFree();
+ _PoolingA.TryFree();
+ _PoolingDelta.TryFree();
+ }
+
+ #endregion
+ }
+}
diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnPoolingLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnPoolingLayer.cs
index 0d5aced..3cca675 100644
--- a/NeuralNetwork.NET.Cuda/Layers/CuDnnPoolingLayer.cs
+++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnPoolingLayer.cs
@@ -1,4 +1,5 @@
-using Alea;
+using System;
+using Alea;
using Alea.cuDNN;
using NeuralNetworkNET.Extensions;
using NeuralNetworkNET.Cuda.Extensions;
@@ -14,10 +15,10 @@
namespace NeuralNetworkNET.Cuda.Layers
{
///
- /// A pooling layer running on cuDNN, with a 2x2 window and a stride of 2
+ /// A pooling layer running on cuDNN, with a custom pooling mode
///
[JsonObject(MemberSerialization.OptIn)]
- internal sealed class CuDnnPoolingLayer : PoolingLayer
+ internal sealed class CuDnnPoolingLayer : PoolingLayer, IDisposable
{
#region cuDNN fields
@@ -41,6 +42,16 @@ internal sealed class CuDnnPoolingLayer : PoolingLayer
#endregion
+ #region Fields
+
+ // A copy of the layer inputs
+ private Tensor _X;
+
+ // A copy of the layer output activity
+ private Tensor _Z;
+
+ #endregion
+
public CuDnnPoolingLayer(in TensorInfo input, in PoolingInfo operation, ActivationFunctionType activation) : base(input, operation, activation)
{
PoolingDescription.Set2D((PoolingMode)operation.Mode, NanPropagation.PROPAGATE_NAN, operation.WindowHeight, operation.WindowWidth, operation.VerticalPadding, operation.HorizontalPadding, operation.VerticalStride, operation.HorizontalStride);
@@ -49,6 +60,8 @@ public CuDnnPoolingLayer(in TensorInfo input, in PoolingInfo operation, Activati
///
public override void Forward(in Tensor x, out Tensor z, out Tensor a)
{
+ _X.TryFree();
+ x.Duplicate(out _X);
using (DeviceMemory
x_gpu = DnnInstance.Gpu.AllocateDevice(x),
z_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * OutputInfo.Size))
@@ -58,6 +71,8 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a)
OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OutputInfo.Channels, OutputInfo.Height, OutputInfo.Width);
DnnInstance.PoolingForward(PoolingDescription, 1, InputDescription, x_gpu.Ptr, 0, OutputDescription, z_gpu.Ptr);
z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
+ _Z.TryFree();
+ z.Duplicate(out _Z);
// Activation
DnnInstance.ActivationForward(z.Entities, z.Length, z_gpu.Ptr, z_gpu.Ptr, ActivationFunctions.Activation);
@@ -66,7 +81,24 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a)
}
///
- public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime) => z.UpscalePool2x2(delta_1, InputInfo.Channels);
+ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime)
+ {
+ using (DeviceMemory dx_gpu = DnnInstance.Gpu.AllocateDevice(z.Size))
+ {
+ using (DeviceMemory
+ x_gpu = DnnInstance.Gpu.AllocateDevice(_X),
+ y_gpu = DnnInstance.Gpu.AllocateDevice(_Z),
+ dy_gpu = DnnInstance.Gpu.AllocateDevice(delta_1))
+ {
+ DnnInstance.PoolingBackward(PoolingDescription, 1, OutputDescription, y_gpu.Ptr, OutputDescription, dy_gpu.Ptr, InputDescription, x_gpu.Ptr, 0, InputDescription, dx_gpu.Ptr);
+ }
+ using (DeviceMemory z_gpu = DnnInstance.Gpu.AllocateDevice(z))
+ {
+ DnnInstance.ActivationBackward(z.Entities, z.Length, z_gpu.Ptr, dx_gpu.Ptr, activationPrime);
+ z_gpu.CopyTo(z);
+ }
+ }
+ }
///
public override INetworkLayer Clone() => new CuDnnPoolingLayer(InputInfo, OperationInfo, ActivationFunctionType);
@@ -84,5 +116,25 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a)
if (!stream.TryRead(out PoolingInfo operation)) return null;
return new CuDnnPoolingLayer(input, operation, activation);
}
+
+ #region IDisposable
+
+ ~CuDnnPoolingLayer() => Dispose();
+
+ ///
+ void IDisposable.Dispose()
+ {
+ GC.SuppressFinalize(this);
+ Dispose();
+ }
+
+ // Private Dispose method
+ private void Dispose()
+ {
+ _X.TryFree();
+ _Z.TryFree();
+ }
+
+ #endregion
}
}
\ No newline at end of file
diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnSoftmaxLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnSoftmaxLayer.cs
index abc7f07..be8a0cd 100644
--- a/NeuralNetwork.NET.Cuda/Layers/CuDnnSoftmaxLayer.cs
+++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnSoftmaxLayer.cs
@@ -42,17 +42,13 @@ public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a)
using (DeviceMemory z_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * OutputInfo.Size))
{
// Linear pass
- fixed (float* pw = Weights)
+ using (DeviceMemory
+ x_gpu = DnnInstance.Gpu.AllocateDevice(x),
+ w_gpu = DnnInstance.Gpu.AllocateDevice(Weights),
+ b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
{
- Tensor.Reshape(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor);
- using (DeviceMemory
- x_gpu = DnnInstance.Gpu.AllocateDevice(x),
- w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor),
- b_gpu = DnnInstance.Gpu.AllocateDevice(Biases))
- {
- DnnInstance.FullyConnectedForward(x.Entities, x.Length, OutputInfo.Size, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, z_gpu.Ptr);
- z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
- }
+ DnnInstance.FullyConnectedForward(x.Entities, x.Length, OutputInfo.Size, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, z_gpu.Ptr);
+ z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z);
}
// Activation
diff --git a/NeuralNetwork.NET/APIs/Enums/LayerType.cs b/NeuralNetwork.NET/APIs/Enums/LayerType.cs
index 498e059..4406841 100644
--- a/NeuralNetwork.NET/APIs/Enums/LayerType.cs
+++ b/NeuralNetwork.NET/APIs/Enums/LayerType.cs
@@ -5,10 +5,34 @@
///
public enum LayerType : byte
{
+ ///
+ /// A fully connected layer, mapping n inputs to m outputs
+ ///
FullyConnected,
+
+ ///
+ /// A convolutional layer, which keeps spatial information on the input volume
+ ///
Convolutional,
+
+ ///
+ /// A pooling layer, useful to reduce the size of the input data volume
+ ///
Pooling,
+
+ ///
+ /// A fully connected output layer, with an arbitrary activation and cost function
+ ///
Output,
- Softmax
+
+ ///
+ /// A softmax layer, with the softmax activation and log-likelyhood cost function
+ ///
+ Softmax,
+
+ ///
+ /// An inception module, combining different kinds of convolution with a pooling operation
+ ///
+ Inception
}
}
\ No newline at end of file
diff --git a/NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs b/NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs
index 00ac1b7..cb8c7ad 100644
--- a/NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs
+++ b/NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs
@@ -13,27 +13,27 @@ namespace NeuralNetworkNET.APIs.Structs
public readonly struct ConvolutionInfo : IEquatable
{
///
- /// Gets the current convolution mode for the layer
+ /// The current convolution mode for the layer
///
public readonly ConvolutionMode Mode;
///
- /// Gets the optional vertical padding for the convolution operation
+ /// The optional vertical padding for the convolution operation
///
public readonly int VerticalPadding;
///
- /// Gets the optional horizontal padding for the convolution operation
+ /// The optional horizontal padding for the convolution operation
///
public readonly int HorizontalPadding;
///
- /// Gets the vertical stride length while sliding the receptive window over the input
+ /// The vertical stride length while sliding the receptive window over the input
///
public readonly int VerticalStride;
///
- /// Gets the horizontal stride length while sliding the receptive window over the input
+ /// The horizontal stride length while sliding the receptive window over the input
///
public readonly int HorizontalStride;
@@ -45,16 +45,11 @@ private ConvolutionInfo(
int verticalPadding, int horizontalPadding,
int verticalStride, int horizontalStride)
{
- if (verticalPadding < 0) throw new ArgumentOutOfRangeException(nameof(verticalPadding), "The vertical padding must be greater than or equal to 0");
- if (horizontalPadding < 0) throw new ArgumentOutOfRangeException(nameof(horizontalPadding), "The horizontal padding must be greater than or equal to 0");
- if (verticalStride < 1) throw new ArgumentOutOfRangeException(nameof(verticalStride), "The vertical stride must be at least equal to 1");
- if (horizontalStride < 1) throw new ArgumentOutOfRangeException(nameof(horizontalStride), "The horizontal stride must be at least equal to 1");
-
+ VerticalPadding = verticalPadding >= 0 ? verticalPadding : throw new ArgumentOutOfRangeException(nameof(verticalPadding), "The vertical padding must be greater than or equal to 0");
+ HorizontalPadding = horizontalPadding >= 0 ? horizontalPadding : throw new ArgumentOutOfRangeException(nameof(horizontalPadding), "The horizontal padding must be greater than or equal to 0");
+ VerticalStride = verticalStride >= 1 ? verticalStride : throw new ArgumentOutOfRangeException(nameof(verticalStride), "The vertical stride must be at least equal to 1");
+ HorizontalStride = horizontalStride >= 1 ? horizontalStride : throw new ArgumentOutOfRangeException(nameof(horizontalStride), "The horizontal stride must be at least equal to 1");
Mode = mode;
- VerticalPadding = verticalPadding;
- HorizontalPadding = horizontalPadding;
- VerticalStride = verticalStride;
- HorizontalStride = horizontalStride;
}
///
@@ -80,6 +75,21 @@ public static ConvolutionInfo New(
#endregion
+ ///
+ /// Calculates the output size after applying a convolution operation to the input tensor
+ ///
+ /// The info on the input tensor
+ /// The size of the convolution kernels
+ /// The number of convolution kernels to be used
+ [Pure]
+ internal TensorInfo GetForwardOutputTensorInfo(in TensorInfo input, (int X, int Y) field, int kernels)
+ {
+ int
+ h = (input.Height - field.X + 2 * VerticalPadding) / VerticalStride + 1,
+ w = (input.Width - field.Y + 2 * HorizontalPadding) / HorizontalStride + 1;
+ return new TensorInfo(h, w, kernels);
+ }
+
#region Equality
///
diff --git a/NeuralNetwork.NET/APIs/Structs/InceptionInfo.cs b/NeuralNetwork.NET/APIs/Structs/InceptionInfo.cs
new file mode 100644
index 0000000..2a69aab
--- /dev/null
+++ b/NeuralNetwork.NET/APIs/Structs/InceptionInfo.cs
@@ -0,0 +1,146 @@
+using JetBrains.Annotations;
+using NeuralNetworkNET.APIs.Enums;
+using Newtonsoft.Json;
+using System;
+using System.Runtime.CompilerServices;
+
+namespace NeuralNetworkNET.APIs.Structs
+{
+ ///
+ /// A containing all the info on an inception module
+ ///
+ [JsonObject(MemberSerialization.Fields)]
+ public readonly struct InceptionInfo : IEquatable
+ {
+ #region Fields and properties
+
+ ///
+ /// The number of 1x1 convolution kernels used in the first step of the forward pass
+ ///
+ public readonly int Primary1x1ConvolutionKernels;
+
+ ///
+ /// The number of 1x1 convolution kernels before the 3x3 convolution
+ ///
+ public readonly int Primary3x3Reduce1x1ConvolutionKernels;
+
+ ///
+ /// The number of 3x3 convolution kernels
+ ///
+ public readonly int Secondary3x3ConvolutionKernels;
+
+ ///
+ /// The number of 1x1 convolution kernels before the 5x5 convolution
+ ///
+ public readonly int Primary5x5Reduce1x1ConvolutionKernels;
+
+ ///
+ /// The number of 5x5 convolution kernels
+ ///
+ public readonly int Secondary5x5ConvolutionKernels;
+
+ ///
+ /// The kind of pooling operation performed on the layer
+ ///
+ public readonly PoolingMode Pooling;
+
+ ///
+ /// The number of 1x1 convolution kernels after the pooling operation
+ ///
+ public readonly int Secondary1x1AfterPoolingConvolutionKernels;
+
+ ///
+ /// Gets the number of output channels after the depth concatenation
+ ///
+ public int OutputChannels
+ {
+ [Pure]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get => Primary1x1ConvolutionKernels + Secondary3x3ConvolutionKernels + Secondary5x5ConvolutionKernels + Secondary1x1AfterPoolingConvolutionKernels;
+ }
+
+ ///
+ /// Gets the total number of convolution kernels for the current instance
+ ///
+ public int ConvolutionKernels
+ {
+ [Pure]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get => Primary1x1ConvolutionKernels + Primary3x3Reduce1x1ConvolutionKernels + Secondary3x3ConvolutionKernels + Primary5x5Reduce1x1ConvolutionKernels + Secondary5x5ConvolutionKernels + Secondary1x1AfterPoolingConvolutionKernels;
+ }
+
+ #endregion
+
+ #region Constructors
+
+ // Internal constructor
+ private InceptionInfo(int _1x1Kernels, int _3x3Reduce1x1Kernels, int _3x3Kernels, int _5x5Reduce1x1Kernels, int _5x5Kernels, PoolingMode poolingMode, int _1x1SecondaryKernels)
+ {
+ Primary1x1ConvolutionKernels = _1x1Kernels >= 1 ? _1x1Kernels : throw new ArgumentOutOfRangeException(nameof(_1x1Kernels), "The number of 1x1 kernels must be at least 1");
+ Primary3x3Reduce1x1ConvolutionKernels = _3x3Reduce1x1Kernels >= 1 ? _3x3Reduce1x1Kernels : throw new ArgumentOutOfRangeException(nameof(_3x3Reduce1x1Kernels), "The number of 3x3 reduction 1x1 kernels must be at least 1");
+ Secondary3x3ConvolutionKernels = _3x3Kernels >= 1 ? _3x3Kernels : throw new ArgumentOutOfRangeException(nameof(_3x3Kernels), "The number of 3x3 kernels must be at least 1");
+ Primary5x5Reduce1x1ConvolutionKernels = _5x5Reduce1x1Kernels >= 1 ? _5x5Reduce1x1Kernels : throw new ArgumentOutOfRangeException(nameof(_3x3Kernels), "The number of 5x5 reduction 1x1 kernels must be at least 1");
+ Secondary5x5ConvolutionKernels = _5x5Kernels >= 1 ? _5x5Kernels : throw new ArgumentOutOfRangeException(nameof(_5x5Kernels), "The number of 5x5 kernels must be at least 1");
+ Secondary1x1AfterPoolingConvolutionKernels = _1x1SecondaryKernels >= 1 ? _1x1SecondaryKernels : throw new ArgumentOutOfRangeException(nameof(_1x1SecondaryKernels), "The number of secondary 1x1 kernels must be at least 1");
+ Pooling = poolingMode;
+ }
+
+ ///
+ /// Creates a new inception layer description with the input parameters
+ ///
+ /// The number of 1x1 primary convolution kernels
+ /// The number of 3x3 reduction 1x1 kernels
+ /// The number of 3x3 convolution kernels
+ /// The number of 5x5 reduction 1x1 kernels
+ /// The number of 5x5 convolution kernels
+ /// The pooling mode for the pooling pipeline
+ /// The number of secondary 1x1 convolution kernels
+ [PublicAPI]
+ [Pure]
+ public static InceptionInfo New(
+ int _1x1Kernels, int _3x3Reduce1x1Kernels, int _3x3Kernels, int _5x5Reduce1x1Kernels, int _5x5Kernels,
+ PoolingMode poolingMode, int _1x1SecondaryKernels)
+ => new InceptionInfo(_1x1Kernels, _3x3Reduce1x1Kernels, _3x3Kernels, _5x5Reduce1x1Kernels, _5x5Kernels, poolingMode, _1x1SecondaryKernels);
+
+ #endregion
+
+ #region Equality
+
+ ///
+ public bool Equals(InceptionInfo other) => this == other;
+
+ ///
+ public override bool Equals(object obj) => obj is InceptionInfo info ? this == info : false;
+
+ ///
+ public override int GetHashCode()
+ {
+ int hash = 17;
+ unchecked
+ {
+ hash = hash * 31 + Primary1x1ConvolutionKernels;
+ hash = hash * 31 + Primary3x3Reduce1x1ConvolutionKernels;
+ hash = hash * 31 + Secondary3x3ConvolutionKernels;
+ hash = hash * 31 + Primary5x5Reduce1x1ConvolutionKernels;
+ hash = hash * 31 + Secondary5x5ConvolutionKernels;
+ hash = hash * 31 + Secondary1x1AfterPoolingConvolutionKernels;
+ hash = hash * 31 + (int)Pooling;
+ }
+ return hash;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool operator ==(in InceptionInfo a, in InceptionInfo b) => a.Primary1x1ConvolutionKernels == b.Primary1x1ConvolutionKernels &&
+ a.Primary3x3Reduce1x1ConvolutionKernels == b.Primary3x3Reduce1x1ConvolutionKernels &&
+ a.Secondary3x3ConvolutionKernels == b.Secondary3x3ConvolutionKernels &&
+ a.Primary5x5Reduce1x1ConvolutionKernels == b.Primary5x5Reduce1x1ConvolutionKernels &&
+ a.Secondary5x5ConvolutionKernels == b.Secondary5x5ConvolutionKernels &&
+ a.Secondary1x1AfterPoolingConvolutionKernels == b.Secondary1x1AfterPoolingConvolutionKernels &&
+ a.Pooling == b.Pooling;
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static bool operator !=(in InceptionInfo a, in InceptionInfo b) => !(a == b);
+
+ #endregion
+ }
+}
diff --git a/NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs b/NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs
index 7185d35..e497dd1 100644
--- a/NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs
+++ b/NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs
@@ -13,37 +13,37 @@ namespace NeuralNetworkNET.APIs.Structs
public readonly struct PoolingInfo : IEquatable
{
///
- /// Gets the current pooling mode for the layer
+ /// The current pooling mode for the layer
///
public readonly PoolingMode Mode;
///
- /// Gets the height of each input local receptive field
+ /// The height of each input local receptive field
///
public readonly int WindowHeight;
///
- /// Gets the width of each input local receptive field
+ /// The width of each input local receptive field
///
public readonly int WindowWidth;
///
- /// Gets the optional vertical padding for the pooling operation
+ /// The optional vertical padding for the pooling operation
///
public readonly int VerticalPadding;
///
- /// Gets the optional horizontal padding for the pooling operation
+ /// The optional horizontal padding for the pooling operation
///
public readonly int HorizontalPadding;
///
- /// Gets the vertical stride length while sliding the receptive window over the input
+ /// The vertical stride length while sliding the receptive window over the input
///
public readonly int VerticalStride;
///
- /// Gets the horizontal stride length while sliding the receptive window over the input
+ /// The horizontal stride length while sliding the receptive window over the input
///
public readonly int HorizontalStride;
@@ -55,20 +55,13 @@ private PoolingInfo(
int verticalPadding, int horizontalPadding,
int verticalStride, int horizontalStride)
{
- if (windowHeight <= 0) throw new ArgumentOutOfRangeException(nameof(windowHeight), "The window height must be at least equal to 1");
- if (windowWidth <= 0) throw new ArgumentOutOfRangeException(nameof(windowWidth), "The window width must be at least equal to 1");
- if (verticalPadding < 0) throw new ArgumentOutOfRangeException(nameof(verticalPadding), "The vertical padding must be greater than or equal to 0");
- if (horizontalPadding < 0) throw new ArgumentOutOfRangeException(nameof(horizontalPadding), "The horizontal padding must be greater than or equal to 0");
- if (verticalStride < 1) throw new ArgumentOutOfRangeException(nameof(verticalStride), "The vertical stride must be at least equal to 1");
- if (horizontalStride < 1) throw new ArgumentOutOfRangeException(nameof(horizontalStride), "The horizontal stride must be at least equal to 1");
-
+ WindowHeight = windowHeight > 0 ? windowHeight : throw new ArgumentOutOfRangeException(nameof(windowHeight), "The window height must be at least equal to 1");
+ WindowWidth = windowWidth > 0 ? windowWidth : throw new ArgumentOutOfRangeException(nameof(windowWidth), "The window width must be at least equal to 1");
+ VerticalPadding = verticalPadding >= 0 ? verticalPadding : throw new ArgumentOutOfRangeException(nameof(verticalPadding), "The vertical padding must be greater than or equal to 0");
+ HorizontalPadding = horizontalPadding >= 0 ? horizontalPadding : throw new ArgumentOutOfRangeException(nameof(horizontalPadding), "The horizontal padding must be greater than or equal to 0");
+ VerticalStride = verticalStride >= 1 ? verticalStride : throw new ArgumentOutOfRangeException(nameof(verticalStride), "The vertical stride must be at least equal to 1");
+ HorizontalStride = horizontalStride >= 1 ? horizontalStride : throw new ArgumentOutOfRangeException(nameof(horizontalStride), "The horizontal stride must be at least equal to 1");
Mode = mode;
- WindowHeight = windowHeight;
- WindowWidth = windowWidth;
- VerticalPadding = verticalPadding;
- HorizontalPadding = horizontalPadding;
- VerticalStride = verticalStride;
- HorizontalStride = horizontalStride;
}
///
@@ -96,6 +89,19 @@ public static PoolingInfo New(
#endregion
+ ///
+ /// Calculates the output size after applying a pooling operation to the input tensor
+ ///
+ /// The info on the input tensor
+ [Pure]
+ internal TensorInfo GetForwardOutputTensorInfo(in TensorInfo input)
+ {
+ int
+ h = (input.Height - WindowHeight + 2 * VerticalPadding) / VerticalStride + 1,
+ w = (input.Width - WindowWidth + 2 * HorizontalPadding) / HorizontalStride + 1;
+ return new TensorInfo(h, w, input.Channels);
+ }
+
#region Equality
///
diff --git a/NeuralNetwork.NET/APIs/Structs/Tensor.cs b/NeuralNetwork.NET/APIs/Structs/Tensor.cs
index cf57a6d..cbc8732 100644
--- a/NeuralNetwork.NET/APIs/Structs/Tensor.cs
+++ b/NeuralNetwork.NET/APIs/Structs/Tensor.cs
@@ -17,24 +17,39 @@ namespace NeuralNetworkNET.APIs.Structs
public readonly struct Tensor
{
///
- /// Gets the value to the allocated memory
+ /// The value to the allocated memory
///
public readonly IntPtr Ptr;
///
- /// Gets the number of entities (rows) in the current
+ /// The number of entities (rows) in the current
///
public readonly int Entities;
///
- /// Gets the size of each entity in the current
+ /// The size of each entity in the current
///
public readonly int Length;
///
- /// Gets the total size (the number of values) in the current
+ /// The total size (the number of values) in the current
///
- public int Size => Entities * Length;
+ public int Size
+ {
+ [Pure]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get => Entities * Length;
+ }
+
+ ///
+ /// Gets whether or not the current instance is linked to an allocated memory area
+ ///
+ public bool Null
+ {
+ [Pure]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ get => Ptr == IntPtr.Zero;
+ }
#region Initialization
@@ -186,12 +201,35 @@ public float[] ToArray()
#endregion
+ ///
+ /// Creates a new instance by wrapping the current memory area
+ ///
+ /// The height of the final matrix
+ /// The width of the final matrix
+ /// The resulting instance
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void Reshape(int n, int chw, out Tensor tensor)
+ {
+ if (n * chw != Size) throw new ArgumentException("Invalid input resized shape");
+ tensor = new Tensor(Ptr, n, chw);
+ }
+
///
/// Frees the memory associated with the current instance
///
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public void Free() => Marshal.FreeHGlobal(Ptr);
+ ///
+ /// Frees the memory associated with the current instance, if needed
+ ///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public void TryFree()
+ {
+ if (Ptr != IntPtr.Zero)
+ Marshal.FreeHGlobal(Ptr);
+ }
+
// Implicit pointer conversion
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe implicit operator float*(in Tensor tensor) => (float*)tensor.Ptr.ToPointer();
@@ -233,7 +271,7 @@ unsafe float[] ExtractRow(int i)
// Spawn the sequence
int
max = MaximumItemsCount / obj.Length,
- up = max.Min(MaximumRowsCount).Max(1);
+ up = max.Min(MaximumRowsCount).Max(1).Min(obj.Entities);
for (int i = 0; i < up; i++)
yield return ExtractRow(i);
}
diff --git a/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs b/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs
index d774cf3..0853a92 100644
--- a/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs
+++ b/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs
@@ -13,20 +13,22 @@ namespace NeuralNetworkNET.APIs.Structs
[DebuggerDisplay("Height: {Height}, Width: {Width}, Channels: {Channels}, Size: {Size}")]
public readonly struct TensorInfo : IEquatable
{
+ #region Fields and parameters
+
///
- /// Gets the height of each 2D slice
+ /// The height of each 2D slice
///
[JsonProperty(nameof(Height), Order = 1)]
public readonly int Height;
///
- /// Gets the width of each 2D slice
+ /// The width of each 2D slice
///
[JsonProperty(nameof(Width), Order = 2)]
public readonly int Width;
///
- /// Gets the number of channels for the tensor description
+ /// The number of channels for the tensor description
///
[JsonProperty(nameof(Channels), Order = 3)]
public readonly int Channels;
@@ -52,13 +54,16 @@ public int SliceSize
get => Height * Width;
}
+ #endregion
+
+ #region Constructors
+
internal TensorInfo(int height, int width, int channels)
{
if (height * width <= 0) throw new ArgumentException("The height and width of the kernels must be positive values");
- if (channels < 1) throw new ArgumentOutOfRangeException(nameof(channels), "The number of channels must be at least equal to 1");
Height = height;
Width = width;
- Channels = channels;
+ Channels = channels >= 1 ? channels : throw new ArgumentOutOfRangeException(nameof(channels), "The number of channels must be at least equal to 1");
}
///
@@ -87,6 +92,8 @@ internal TensorInfo(int height, int width, int channels)
[Pure]
public static TensorInfo CreateLinear(int size) => new TensorInfo(1, 1, size);
+ #endregion
+
#region Equality
///
diff --git a/NeuralNetwork.NET/Extensions/MatrixExtensions.cs b/NeuralNetwork.NET/Extensions/MatrixExtensions.cs
index 069ee72..148d897 100644
--- a/NeuralNetwork.NET/Extensions/MatrixExtensions.cs
+++ b/NeuralNetwork.NET/Extensions/MatrixExtensions.cs
@@ -706,8 +706,9 @@ public static unsafe float[] BlockCopy([NotNull] this float[] v)
///
/// The first to test
/// The second to test
- /// The comparison threshold
- public static unsafe bool ContentEquals(in this Tensor m, in Tensor o, float delta = 1e-6f)
+ /// The relative comparison threshold
+ /// The relative comparison threshold
+ public static unsafe bool ContentEquals(in this Tensor m, in Tensor o,float absolute = 1e-6f, float relative = 1e-6f)
{
if (m.Ptr == IntPtr.Zero && o.Ptr == IntPtr.Zero) return true;
if (m.Ptr == IntPtr.Zero || o.Ptr == IntPtr.Zero) return false;
@@ -715,7 +716,7 @@ public static unsafe bool ContentEquals(in this Tensor m, in Tensor o, float del
float* pm = m, po = o;
int items = m.Size;
for (int i = 0; i < items; i++)
- if (!pm[i].EqualsWithDelta(po[i], delta)) return false;
+ if (!pm[i].EqualsWithDelta(po[i], absolute, relative)) return false;
return true;
}
@@ -724,8 +725,9 @@ public static unsafe bool ContentEquals(in this Tensor m, in Tensor o, float del
///
/// The first matrix to test
/// The second matrix to test
- /// The comparison threshold
- public static bool ContentEquals([CanBeNull] this float[,] m, [CanBeNull] float[,] o, float delta = 1e-6f)
+ /// The relative comparison threshold
+ /// The relative comparison threshold
+ public static bool ContentEquals([CanBeNull] this float[,] m, [CanBeNull] float[,] o, float absolute = 1e-6f, float relative = 1e-6f)
{
if (m == null && o == null) return true;
if (m == null || o == null) return false;
@@ -733,7 +735,7 @@ public static bool ContentEquals([CanBeNull] this float[,] m, [CanBeNull] float[
m.GetLength(1) != o.GetLength(1)) return false;
for (int i = 0; i < m.GetLength(0); i++)
for (int j = 0; j < m.GetLength(1); j++)
- if (!m[i, j].EqualsWithDelta(o[i, j], delta)) return false;
+ if (!m[i, j].EqualsWithDelta(o[i, j], absolute, relative)) return false;
return true;
}
@@ -742,14 +744,15 @@ public static bool ContentEquals([CanBeNull] this float[,] m, [CanBeNull] float[
///
/// The first vector to test
/// The second vector to test
- /// The comparison threshold
- public static bool ContentEquals([CanBeNull] this float[] v, [CanBeNull] float[] o, float delta = 1e-6f)
+ /// The relative comparison threshold
+ /// The relative comparison threshold
+ public static bool ContentEquals([CanBeNull] this float[] v, [CanBeNull] float[] o, float absolute = 1e-6f, float relative = 1e-6f)
{
if (v == null && o == null) return true;
if (v == null || o == null) return false;
if (v.Length != o.Length) return false;
for (int i = 0; i < v.Length; i++)
- if (!v[i].EqualsWithDelta(o[i], delta)) return false;
+ if (!v[i].EqualsWithDelta(o[i], absolute, relative)) return false;
return true;
}
diff --git a/NeuralNetwork.NET/Extensions/MiscExtensions.cs b/NeuralNetwork.NET/Extensions/MiscExtensions.cs
index 914dfde..d3b2c76 100644
--- a/NeuralNetwork.NET/Extensions/MiscExtensions.cs
+++ b/NeuralNetwork.NET/Extensions/MiscExtensions.cs
@@ -32,6 +32,15 @@ public static TOut To([NotNull] this TIn item) where TOut : class, TI
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int Max(this int a, int b) => a >= b ? a : b;
+ ///
+ /// Returns the maximum value between two numbers
+ ///
+ /// The first number
+ /// The second number
+ [Pure]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static float Max(this float a, float b) => a >= b ? a : b;
+
///
/// Returns the minimum value between two numbers
///
@@ -54,16 +63,19 @@ public static TOut To([NotNull] this TIn item) where TOut : class, TI
///
/// The first value
/// The second value
- /// The comparison threshold
+ /// The relative comparison threshold
+ /// The relative comparison threshold
[Pure]
- public static bool EqualsWithDelta(this float value, float other, float delta = 1e-6f)
+ public static bool EqualsWithDelta(this float value, float other, float absolute = 1e-6f, float relative = 1e-6f)
{
if (float.IsNaN(value) ^ float.IsNaN(other)) return false;
if (float.IsNaN(value) && float.IsNaN(other)) return true;
if (float.IsInfinity(value) ^ float.IsInfinity(other)) return false;
if (float.IsPositiveInfinity(value) && float.IsPositiveInfinity(other)) return true;
if (float.IsNegativeInfinity(value) && float.IsNegativeInfinity(other)) return true;
- return (value - other).Abs() < delta;
+ float abs = (value - other).Abs();
+ if (abs < absolute) return true;
+ return abs <= absolute.Max(relative * value.Abs().Max(other.Abs()));
}
///
diff --git a/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs b/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs
index 0be71cf..476ac33 100644
--- a/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs
+++ b/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs
@@ -58,8 +58,8 @@ public ref readonly TensorInfo KernelInfo
#endregion
public ConvolutionalLayer(in TensorInfo input, in ConvolutionInfo operation, (int X, int Y) kernelSize, int kernels, ActivationFunctionType activation, BiasInitializationMode biasMode)
- : base(input, new TensorInfo(input.Height - kernelSize.X + 1, input.Width - kernelSize.Y + 1, kernels),
- WeightsProvider.NewConvolutionalKernels(input.Channels, kernelSize.X, kernelSize.Y, kernels),
+ : base(input, operation.GetForwardOutputTensorInfo(input, kernelSize, kernels),
+ WeightsProvider.NewConvolutionalKernels(input, kernelSize.X, kernelSize.Y, kernels),
WeightsProvider.NewBiases(kernels, biasMode), activation)
{
_OperationInfo = operation;
@@ -107,7 +107,8 @@ public override unsafe void Backpropagate(in Tensor delta_1, in Tensor z, Activa
public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJdw, out Tensor dJdb)
{
a.Rotate180(InputInfo.Channels, out Tensor a180);
- a180.ConvoluteGradient(InputInfo, delta, OutputInfo, out dJdw);
+ a180.ConvoluteGradient(InputInfo, delta, OutputInfo, out Tensor dJdwM);
+ dJdwM.Reshape(1, Weights.Length, out dJdw);
a180.Free();
delta.CompressVertically(OutputInfo.Channels, out dJdb);
}
diff --git a/NeuralNetwork.NET/Networks/Implementations/Layers/FullyConnectedLayer.cs b/NeuralNetwork.NET/Networks/Implementations/Layers/FullyConnectedLayer.cs
index 9086c89..6fb8cc6 100644
--- a/NeuralNetwork.NET/Networks/Implementations/Layers/FullyConnectedLayer.cs
+++ b/NeuralNetwork.NET/Networks/Implementations/Layers/FullyConnectedLayer.cs
@@ -22,7 +22,7 @@ internal class FullyConnectedLayer : WeightedLayerBase
public FullyConnectedLayer(in TensorInfo input, int neurons, ActivationFunctionType activation, WeightsInitializationMode weightsMode, BiasInitializationMode biasMode)
: base(input, TensorInfo.CreateLinear(neurons),
- WeightsProvider.NewFullyConnectedWeights(input.Size, neurons, weightsMode),
+ WeightsProvider.NewFullyConnectedWeights(input, neurons, weightsMode),
WeightsProvider.NewBiases(neurons, biasMode), activation) { }
public FullyConnectedLayer(in TensorInfo input, int neurons, [NotNull] float[] weights, [NotNull] float[] biases, ActivationFunctionType activation)
@@ -59,7 +59,8 @@ public override unsafe void Backpropagate(in Tensor delta_1, in Tensor z, Activa
public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJdw, out Tensor dJdb)
{
a.Transpose(out Tensor at);
- at.Multiply(delta, out dJdw);
+ at.Multiply(delta, out Tensor dJdwM);
+ dJdwM.Reshape(1, Weights.Length, out dJdw);
at.Free();
delta.CompressVertically(out dJdb);
}
diff --git a/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs b/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs
index 87ddb9c..23cd6be 100644
--- a/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs
+++ b/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs
@@ -13,35 +13,34 @@ namespace NeuralNetworkNET.Networks.Implementations.Layers.Helpers
internal static class WeightsProvider
{
///
- /// Creates a weight matrix for a fully connected layer
+ /// Creates a weights vector for a fully connected layer
///
- /// The input neurons
+ /// The layer inputs
/// The output neurons
/// The initialization mode for the weights
[Pure, NotNull]
- public static unsafe float[] NewFullyConnectedWeights(int inputs, int outputs, WeightsInitializationMode mode)
+ public static unsafe float[] NewFullyConnectedWeights(in TensorInfo input, int outputs, WeightsInitializationMode mode)
{
- if (inputs <= 0 || outputs <= 0) throw new ArgumentOutOfRangeException("The inputs and outputs must be positive numbers");
- float[] weights = new float[inputs * outputs];
+ float[] weights = new float[input.Size * outputs];
fixed (float* pw = weights)
{
- Tensor.Reshape(pw, inputs, outputs, out Tensor wTensor);
+ Tensor.Reshape(pw, input.Size, outputs, out Tensor wTensor);
switch (mode)
{
case WeightsInitializationMode.LeCunUniform:
- KerasWeightsProvider.FillWithLeCunUniform(wTensor, inputs);
+ KerasWeightsProvider.FillWithLeCunUniform(wTensor, input.Size);
break;
case WeightsInitializationMode.GlorotNormal:
- KerasWeightsProvider.FillWithGlorotNormal(wTensor, inputs, outputs);
+ KerasWeightsProvider.FillWithGlorotNormal(wTensor, input.Size, outputs);
break;
case WeightsInitializationMode.GlorotUniform:
- KerasWeightsProvider.FillWithGlorotUniform(wTensor, inputs, outputs);
+ KerasWeightsProvider.FillWithGlorotUniform(wTensor, input.Size, outputs);
break;
case WeightsInitializationMode.HeEtAlNormal:
- KerasWeightsProvider.FillWithHeEtAlNormal(wTensor, inputs);
+ KerasWeightsProvider.FillWithHeEtAlNormal(wTensor, input.Size);
break;
case WeightsInitializationMode.HeEtAlUniform:
- KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, inputs);
+ KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Size);
break;
default: throw new ArgumentOutOfRangeException(nameof(mode), "Unsupported weights initialization mode");
}
@@ -50,21 +49,67 @@ public static unsafe float[] NewFullyConnectedWeights(int inputs, int outputs, W
}
///
- /// Creates a weight matrix for a convolutional layer
+ /// Creates a weights vector for a convolutional layer
///
- /// The depth of the input volume
+ /// The layer inputs
/// The height of each kernel
/// The width of each kernel
/// The number of kernels in the layer
[Pure, NotNull]
- public static unsafe float[] NewConvolutionalKernels(int inputDepth, int kernelsHeight, int kernelsWidth, int kernels)
+ public static unsafe float[] NewConvolutionalKernels(in TensorInfo input, int kernelsHeight, int kernelsWidth, int kernels)
{
if (kernels <= 0) throw new ArgumentOutOfRangeException(nameof(kernels), "The number of kernels must be positive");
- float[] weights = new float[kernels * kernelsHeight * kernelsWidth * inputDepth];
+ float[] weights = new float[kernels * kernelsHeight * kernelsWidth * input.Channels];
fixed (float* pw = weights)
{
Tensor.Reshape(pw, 1, weights.Length, out Tensor wTensor);
- KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, inputDepth * kernelsHeight * kernelsWidth);
+ KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels * kernelsHeight * kernelsWidth);
+ }
+ return weights;
+ }
+
+ ///
+ /// Creates a new mixed weights vector for an inception layer
+ ///
+ /// The layer inputs
+ /// The info on the target inception layer
+ [Pure, NotNull]
+ public static unsafe float[] NewInceptionWeights(in TensorInfo input, in InceptionInfo info)
+ {
+ // Setup
+ int
+ _1x1Length = input.Channels * info.Primary1x1ConvolutionKernels,
+ _3x3Reduce1x1Length = input.Channels * info.Primary3x3Reduce1x1ConvolutionKernels,
+ _3x3Length = 3 * 3 * info.Primary3x3Reduce1x1ConvolutionKernels * info.Secondary3x3ConvolutionKernels,
+ _5x5Reduce1x1Length = input.Channels * info.Primary5x5Reduce1x1ConvolutionKernels,
+ _5x5Length = 5 * 5 * info.Primary5x5Reduce1x1ConvolutionKernels * info.Secondary5x5ConvolutionKernels,
+ secondary1x1Length = input.Channels * info.Secondary1x1AfterPoolingConvolutionKernels;
+ float[] weights = new float[_1x1Length + _3x3Reduce1x1Length + _3x3Length + _5x5Reduce1x1Length + _5x5Length + secondary1x1Length];
+ fixed (float* pw = weights)
+ {
+ // 1x1
+ Tensor.Reshape(pw, 1, _1x1Length, out Tensor wTensor);
+ KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels);
+
+ // 3x3 reduce 1x1
+ Tensor.Reshape(pw + _1x1Length, 1, _3x3Reduce1x1Length, out wTensor);
+ KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels);
+
+ // 3x3
+ Tensor.Reshape(pw + _1x1Length + _3x3Reduce1x1Length, 1, _3x3Length, out wTensor);
+ KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, 3 * 3 * info.Primary3x3Reduce1x1ConvolutionKernels);
+
+ // 5x5 reduce 1x1
+ Tensor.Reshape(pw + _1x1Length + _3x3Reduce1x1Length + _3x3Length, 1, _5x5Reduce1x1Length, out wTensor);
+ KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels);
+
+ // 5x5
+ Tensor.Reshape(pw + _1x1Length + _3x3Reduce1x1Length + _3x3Length + _5x5Reduce1x1Length, 1, _5x5Length, out wTensor);
+ KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, 5 * 5 * info.Primary5x5Reduce1x1ConvolutionKernels);
+
+ // Pool 1x1
+ Tensor.Reshape(pw + _1x1Length + _3x3Reduce1x1Length + _3x3Length + _5x5Reduce1x1Length + _5x5Length, 1, secondary1x1Length, out wTensor);
+ KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels);
}
return weights;
}
diff --git a/NeuralNetwork.NET/Networks/Implementations/Layers/PoolingLayer.cs b/NeuralNetwork.NET/Networks/Implementations/Layers/PoolingLayer.cs
index 6cba6bc..2b6c4ab 100644
--- a/NeuralNetwork.NET/Networks/Implementations/Layers/PoolingLayer.cs
+++ b/NeuralNetwork.NET/Networks/Implementations/Layers/PoolingLayer.cs
@@ -34,10 +34,7 @@ public ref readonly PoolingInfo OperationInfo
}
public PoolingLayer(in TensorInfo input, in PoolingInfo operation, ActivationFunctionType activation)
- : base(input, new TensorInfo(
- input.Height / 2 + (input.Height % 2 == 0 ? 0 : 1),
- input.Width / 2 + (input.Width % 2 == 0 ? 0 : 1),
- input.Channels), activation)
+ : base(input, operation.GetForwardOutputTensorInfo(input), activation)
=> _OperationInfo = operation;
///
diff --git a/NeuralNetwork.NET/Networks/Implementations/NeuralNetwork.cs b/NeuralNetwork.NET/Networks/Implementations/NeuralNetwork.cs
index 493282f..6707591 100644
--- a/NeuralNetwork.NET/Networks/Implementations/NeuralNetwork.cs
+++ b/NeuralNetwork.NET/Networks/Implementations/NeuralNetwork.cs
@@ -248,7 +248,7 @@ internal unsafe void Backpropagate(in TrainingBatch batch, float dropout, [NotNu
* Multiply the previous delta with the transposed weights of the following layer
* Compute d(l), the Hadamard product of z'(l) and delta(l + 1) * W(l + 1)T */
_Layers[l + 1].Backpropagate(*deltas[l + 1], zList[l], _Layers[l].ActivationFunctions.ActivationPrime);
- if (dropoutMasks[l].Ptr != IntPtr.Zero) zList[l].InPlaceHadamardProduct(dropoutMasks[l]);
+ if (!dropoutMasks[l].Null) zList[l].InPlaceHadamardProduct(dropoutMasks[l]);
deltas[l] = zList + l;
}
@@ -285,7 +285,7 @@ internal unsafe void Backpropagate(in TrainingBatch batch, float dropout, [NotNu
{
zList[i].Free();
aList[i].Free();
- if (dropoutMasks[i].Ptr != IntPtr.Zero) dropoutMasks[i].Free();
+ dropoutMasks[i].TryFree();
}
zList[_Layers.Length - 1].Free();
aList[_Layers.Length - 1].Free();
diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnInceptionLayerTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnInceptionLayerTest.cs
new file mode 100644
index 0000000..66ebcd5
--- /dev/null
+++ b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnInceptionLayerTest.cs
@@ -0,0 +1,280 @@
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using NeuralNetworkNET.APIs.Enums;
+using NeuralNetworkNET.APIs.Structs;
+using NeuralNetworkNET.Cuda.Layers;
+using NeuralNetworkNET.Extensions;
+using NeuralNetworkNET.Networks.Activations;
+using NeuralNetworkNET.Networks.Implementations.Layers.Helpers;
+using System;
+using System.Runtime.CompilerServices;
+
+namespace NeuralNetworkNET.Cuda.Unit
+{
+ ///
+ /// Test class for the cuDNN inception layer
+ ///
+ [TestClass]
+ [TestCategory(nameof(CuDnnInceptionLayerTest))]
+ public class CuDnnInceptionLayerTest
+ {
+ [TestMethod]
+ public unsafe void Inception1x1()
+ {
+ float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 32 * 32 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 32 * 32 * 3);
+ CuDnnConvolutionalLayer conv = new CuDnnConvolutionalLayer(TensorInfo.CreateForRgbImage(32, 32), ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian);
+ CuDnnInceptionLayer inception = new CuDnnInceptionLayer(conv.InputInfo, InceptionInfo.New(10, 10, 10, 10, 10, PoolingMode.Max, 10));
+ fixed (float* pw = inception.Weights)
+ Unsafe.InitBlock(pw, 0, (uint)(sizeof(float) * inception.Weights.Length));
+ Buffer.BlockCopy(conv.Weights, 0, inception.Weights, 0, sizeof(float) * conv.Weights.Length);
+ Buffer.BlockCopy(conv.Biases, 0, inception.Biases, 0, sizeof(float) * conv.Biases.Length);
+ fixed (float* px = x)
+ {
+ // Forward + Z
+ Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor);
+ conv.Forward(xTensor, out Tensor zConv, out Tensor aConv);
+ inception.Forward(xTensor, out Tensor zInc, out Tensor aInc);
+ Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped);
+ float* pzInc = (float*)zInc.Ptr.ToPointer(), preshaped = (float*)reshaped.Ptr.ToPointer();
+ for (int i = 0; i < zConv.Entities; i++)
+ Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length);
+ Assert.IsTrue(reshaped.ContentEquals(zConv));
+
+ // A
+ float* paInc = (float*)aInc.Ptr.ToPointer();
+ for (int i = 0; i < aConv.Entities; i++)
+ Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length);
+ Assert.IsTrue(reshaped.ContentEquals(aConv));
+
+ // Backpropagate
+ Tensor.New(xTensor.Entities, xTensor.Length, out Tensor z1);
+ KerasWeightsProvider.FillWithHeEtAlUniform(z1, 10);
+ z1.Duplicate(out Tensor z2);
+ conv.Backpropagate(aConv, z1, ActivationFunctions.ReLUPrime);
+ inception.Backpropagate(aInc, z2, ActivationFunctions.ReLUPrime);
+ Assert.IsTrue(z1.ContentEquals(z2));
+
+ // Gradient
+ Tensor.New(xTensor.Entities, xTensor.Length, out Tensor a);
+ KerasWeightsProvider.FillWithHeEtAlUniform(a, 10);
+ conv.ComputeGradient(a, aConv, out Tensor dJdwConv, out Tensor dJdbConv);
+ inception.ComputeGradient(a, aInc, out Tensor dJdwInc, out Tensor dJdbInc);
+ Tensor.New(1, dJdwConv.Length, out Tensor dJdwInc0);
+ Buffer.MemoryCopy((float*)dJdwInc.Ptr.ToPointer(), (float*)dJdwInc0.Ptr.ToPointer(), sizeof(float) * dJdwInc0.Size, sizeof(float) * dJdwInc0.Size);
+ Tensor.New(1, dJdbConv.Length, out Tensor dJdbInc0);
+ Buffer.MemoryCopy((float*)dJdbInc.Ptr.ToPointer(), (float*)dJdbInc0.Ptr.ToPointer(), sizeof(float) * dJdbInc0.Size, sizeof(float) * dJdbInc0.Size);
+ Assert.IsTrue(dJdwConv.ContentEquals(dJdwInc0, 1e-5f));
+ Assert.IsTrue(dJdbConv.ContentEquals(dJdbInc0, 1e-5f));
+
+ // Cleanup
+ dJdwConv.Free();
+ dJdbConv.Free();
+ dJdwInc.Free();
+ dJdbInc.Free();
+ dJdwInc0.Free();
+ dJdbInc0.Free();
+ z1.Free();
+ z2.Free();
+ zConv.Free();
+ aConv.Free();
+ zInc.Free();
+ aInc.Free();
+ reshaped.Free();
+ }
+ }
+
+ [TestMethod]
+ public unsafe void Inception3x3Pipeline()
+ {
+ float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 32 * 32 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 32 * 32 * 3);
+ CuDnnConvolutionalLayer
+ conv1 = new CuDnnConvolutionalLayer(TensorInfo.CreateForRgbImage(32, 32), ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian),
+ conv2 = new CuDnnConvolutionalLayer(conv1.OutputInfo, ConvolutionInfo.New(ConvolutionMode.CrossCorrelation, 1, 1), (3, 3), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian);
+ CuDnnInceptionLayer inception = new CuDnnInceptionLayer(TensorInfo.CreateForRgbImage(32, 32), InceptionInfo.New(10, 10, 10, 10, 10, PoolingMode.Max, 10));
+ fixed (float* pw = inception.Weights)
+ Unsafe.InitBlock(pw, 0, (uint)(sizeof(float) * inception.Weights.Length));
+ Buffer.BlockCopy(conv1.Weights, 0, inception.Weights, sizeof(float) * 3 * 10, sizeof(float) * conv1.Weights.Length);
+ Buffer.BlockCopy(conv2.Weights, 0, inception.Weights, sizeof(float) * 3 * 10 + sizeof(float) * conv1.Weights.Length, sizeof(float) * conv2.Weights.Length);
+ Buffer.BlockCopy(conv1.Biases, 0, inception.Biases, sizeof(float) * 10, sizeof(float) * conv1.Biases.Length);
+ Buffer.BlockCopy(conv2.Biases, 0, inception.Biases, sizeof(float) * 20, sizeof(float) * conv2.Biases.Length);
+ fixed (float* px = x)
+ {
+ // Forward + Z
+ Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor);
+ conv1.Forward(xTensor, out Tensor zTemp, out Tensor aTemp);
+ conv2.Forward(aTemp, out Tensor zConv, out Tensor aConv);
+ inception.Forward(xTensor, out Tensor zInc, out Tensor aInc);
+ Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped);
+ float* pzInc = (float*)zInc.Ptr.ToPointer() + 32 * 32 * 10, preshaped = (float*)reshaped.Ptr.ToPointer();
+ for (int i = 0; i < zConv.Entities; i++)
+ Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length);
+ Assert.IsTrue(reshaped.ContentEquals(zConv));
+
+ // A
+ float* paInc = (float*)aInc.Ptr.ToPointer() + 32 * 32 * 10;
+ for (int i = 0; i < aConv.Entities; i++)
+ Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length);
+ Assert.IsTrue(reshaped.ContentEquals(aConv));
+
+ // Backpropagation
+ Tensor.New(xTensor.Entities, xTensor.Length, out Tensor z1);
+ KerasWeightsProvider.FillWithHeEtAlUniform(z1, 10);
+ z1.Duplicate(out Tensor z2);
+ conv2.Backpropagate(aConv, zTemp, conv1.ActivationFunctions.ActivationPrime);
+ conv1.Backpropagate(zTemp, z1, ActivationFunctions.ReLUPrime);
+ inception.Backpropagate(aInc, z2, ActivationFunctions.ReLUPrime);
+ Assert.IsTrue(z1.ContentEquals(z2));
+
+ // Gradient
+ Tensor.New(xTensor.Entities, xTensor.Length, out Tensor a);
+ KerasWeightsProvider.FillWithHeEtAlUniform(a, 10);
+ conv1.ComputeGradient(a, zTemp, out Tensor dJdwConv1, out Tensor dJdbConv1);
+ conv2.ComputeGradient(aTemp, aConv, out Tensor dJdwConv2, out Tensor dJdbConv2);
+ inception.ComputeGradient(a, aInc, out Tensor dJdwInc, out Tensor dJdbInc);
+ Tensor.Reshape((float*)dJdwInc.Ptr.ToPointer() + 30, 1, dJdwConv1.Size, out Tensor dJdwInc0);
+ Tensor.Reshape((float*)dJdbInc.Ptr.ToPointer() + 10, 1, dJdbConv1.Size, out Tensor dJdbInc0);
+ Assert.IsTrue(dJdwConv1.ContentEquals(dJdwInc0, 1e-5f));
+ Assert.IsTrue(dJdbConv1.ContentEquals(dJdbInc0, 1e-5f));
+ Tensor.Reshape((float*)dJdwInc.Ptr.ToPointer() + 30 + dJdwConv1.Size, 1, dJdwConv2.Size, out Tensor dJdwInc1);
+ Tensor.Reshape((float*)dJdbInc.Ptr.ToPointer() + 20, 1, dJdbConv2.Size, out Tensor dJdbInc1);
+ Assert.IsTrue(dJdwConv2.ContentEquals(dJdwInc1, 1e-5f));
+ Assert.IsTrue(dJdbConv2.ContentEquals(dJdbInc1, 1e-5f));
+
+ // Cleanup
+ z1.Free();
+ z2.Free();
+ zTemp.Free();
+ zConv.Free();
+ zInc.Free();
+ aConv.Free();
+ aInc.Free();
+ reshaped.Free();
+ }
+ }
+
+ [TestMethod]
+ public unsafe void Inception5x5Pipeline()
+ {
+ float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 12 * 12 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 12 * 12 * 3);
+ CuDnnConvolutionalLayer
+ conv1 = new CuDnnConvolutionalLayer(TensorInfo.CreateForRgbImage(12, 12), ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian),
+ conv2 = new CuDnnConvolutionalLayer(conv1.OutputInfo, ConvolutionInfo.New(ConvolutionMode.CrossCorrelation, 2, 2), (5, 5), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian);
+ CuDnnInceptionLayer inception = new CuDnnInceptionLayer(TensorInfo.CreateForRgbImage(12, 12), InceptionInfo.New(3, 2, 2, 10, 10, PoolingMode.Max, 2));
+ fixed (float* pw = inception.Weights)
+ Unsafe.InitBlock(pw, 0, (uint)(sizeof(float) * inception.Weights.Length));
+ Buffer.BlockCopy(conv1.Weights, 0, inception.Weights, sizeof(float) * (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2), sizeof(float) * conv1.Weights.Length);
+ Buffer.BlockCopy(conv2.Weights, 0, inception.Weights, sizeof(float) * (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2 + conv1.Weights.Length), sizeof(float) * conv2.Weights.Length);
+ Buffer.BlockCopy(conv1.Biases, 0, inception.Biases, sizeof(float) * (3 + 2 + 2), sizeof(float) * conv1.Biases.Length);
+ Buffer.BlockCopy(conv2.Biases, 0, inception.Biases, sizeof(float) * (3 + 2 + 2 + 10), sizeof(float) * conv2.Biases.Length);
+ fixed (float* px = x)
+ {
+ // Forwaard + Z
+ Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor);
+ conv1.Forward(xTensor, out Tensor zTemp, out Tensor aTemp);
+ conv2.Forward(aTemp, out Tensor zConv, out Tensor aConv);
+ inception.Forward(xTensor, out Tensor zInc, out Tensor aInc);
+ Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped);
+ float* pzInc = (float*)zInc.Ptr.ToPointer() + 12 * 12 * (3 + 2), preshaped = (float*)reshaped.Ptr.ToPointer();
+ for (int i = 0; i < zConv.Entities; i++)
+ Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length);
+ Assert.IsTrue(reshaped.ContentEquals(zConv));
+
+ // A
+ float* paInc = (float*)aInc.Ptr.ToPointer() + 12 * 12 * (3 + 2);
+ for (int i = 0; i < aConv.Entities; i++)
+ Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length);
+ Assert.IsTrue(reshaped.ContentEquals(aConv));
+
+ // Backpropagation
+ Tensor.New(xTensor.Entities, xTensor.Length, out Tensor z1);
+ KerasWeightsProvider.FillWithHeEtAlUniform(z1, 10);
+ z1.Duplicate(out Tensor z2);
+ conv2.Backpropagate(aConv, zTemp, conv1.ActivationFunctions.ActivationPrime);
+ conv1.Backpropagate(zTemp, z1, ActivationFunctions.ReLUPrime);
+ inception.Backpropagate(aInc, z2, ActivationFunctions.ReLUPrime);
+ Assert.IsTrue(z1.ContentEquals(z2));
+
+ // Gradient
+ Tensor.New(xTensor.Entities, xTensor.Length, out Tensor a);
+ KerasWeightsProvider.FillWithHeEtAlUniform(a, 10);
+ conv1.ComputeGradient(a, zTemp, out Tensor dJdwConv1, out Tensor dJdbConv1);
+ conv2.ComputeGradient(aTemp, aConv, out Tensor dJdwConv2, out Tensor dJdbConv2);
+ inception.ComputeGradient(a, aInc, out Tensor dJdwInc, out Tensor dJdbInc);
+ Tensor.Reshape((float*)dJdwInc.Ptr.ToPointer() + (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2), 1, dJdwConv1.Size, out Tensor dJdwInc0);
+ Tensor.Reshape((float*)dJdbInc.Ptr.ToPointer() + 7, 1, dJdbConv1.Size, out Tensor dJdbInc0);
+ Assert.IsTrue(dJdwConv1.ContentEquals(dJdwInc0, 1e-5f));
+ Assert.IsTrue(dJdbConv1.ContentEquals(dJdbInc0, 1e-5f));
+ Tensor.Reshape((float*)dJdwInc.Ptr.ToPointer() + (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2) + dJdwConv1.Size, 1, dJdwConv2.Size, out Tensor dJdwInc1);
+ Tensor.Reshape((float*)dJdbInc.Ptr.ToPointer() + 17, 1, dJdbConv2.Size, out Tensor dJdbInc1);
+ Assert.IsTrue(dJdwConv2.ContentEquals(dJdwInc1, 1e-5f));
+ Assert.IsTrue(dJdbConv2.ContentEquals(dJdbInc1, 1e-5f));
+
+ // Cleanup
+ zTemp.Free();
+ aTemp.Free();
+ zConv.Free();
+ zInc.Free();
+ aConv.Free();
+ aInc.Free();
+ reshaped.Free();
+ }
+ }
+
+ [TestMethod]
+ public unsafe void InceptionPoolPipeline()
+ {
+ float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 12 * 12 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 12 * 12 * 3);
+ CuDnnPoolingLayer pool = new CuDnnPoolingLayer(TensorInfo.CreateForRgbImage(12, 12), PoolingInfo.New(PoolingMode.Max, 3, 3, 1, 1, 1, 1), ActivationFunctionType.ReLU);
+ CuDnnConvolutionalLayer conv = new CuDnnConvolutionalLayer(pool.OutputInfo, ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian);
+ CuDnnInceptionLayer inception = new CuDnnInceptionLayer(TensorInfo.CreateForRgbImage(12, 12), InceptionInfo.New(3, 2, 2, 2, 2, PoolingMode.Max, 10));
+ fixed (float* pw = inception.Weights)
+ Unsafe.InitBlock(pw, 0, (uint)(sizeof(float) * inception.Weights.Length));
+ Buffer.BlockCopy(conv.Weights, 0, inception.Weights, sizeof(float) * (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2 + 3 * 2 + 5 * 5 * 2 * 2), sizeof(float) * conv.Weights.Length);
+ Buffer.BlockCopy(conv.Biases, 0, inception.Biases, sizeof(float) * (3 + 2 + 2 + 2 + 2), sizeof(float) * conv.Biases.Length);
+ fixed (float* px = x)
+ {
+ // Forward + Z
+ Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor);
+ pool.Forward(xTensor, out Tensor zTemp, out Tensor aTemp);
+ conv.Forward(aTemp, out Tensor zConv, out Tensor aConv);
+ inception.Forward(xTensor, out Tensor zInc, out Tensor aInc);
+ Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped);
+ float* pzInc = (float*)zInc.Ptr.ToPointer() + 12 * 12 * (3 + 2 + 2), preshaped = (float*)reshaped.Ptr.ToPointer();
+ for (int i = 0; i < zConv.Entities; i++)
+ Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length);
+ Assert.IsTrue(reshaped.ContentEquals(zConv));
+
+ // A
+ float* paInc = (float*)aInc.Ptr.ToPointer() + 12 * 12 * (3 + 2 + 2);
+ for (int i = 0; i < aConv.Entities; i++)
+ Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length);
+ Assert.IsTrue(reshaped.ContentEquals(aConv));
+
+ // Backpropagation
+ Tensor.New(xTensor.Entities, xTensor.Length, out Tensor z1);
+ KerasWeightsProvider.FillWithHeEtAlUniform(z1, 10);
+ z1.Duplicate(out Tensor z2);
+ conv.Backpropagate(aConv, zTemp, pool.ActivationFunctions.ActivationPrime);
+ pool.Backpropagate(zTemp, z1, ActivationFunctions.ReLUPrime);
+ inception.Backpropagate(aInc, z2, ActivationFunctions.ReLUPrime);
+ Assert.IsTrue(z1.ContentEquals(z2));
+
+ // Gradient
+ conv.ComputeGradient(aTemp, aConv, out Tensor dJdwConv, out Tensor dJdbConv);
+ inception.ComputeGradient(xTensor, aInc, out Tensor dJdwInc, out Tensor dJdbInc);
+ Tensor.Reshape((float*)dJdwInc.Ptr.ToPointer() + (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2 + 3 * 2 + 5 * 5 * 2 * 2), 1, dJdwConv.Size, out Tensor dJdwInc0);
+ Tensor.Reshape((float*)dJdbInc.Ptr.ToPointer() + 11, 1, dJdbConv.Size, out Tensor dJdbInc0);
+ Assert.IsTrue(dJdwConv.ContentEquals(dJdwInc0, 1e-5f));
+ Assert.IsTrue(dJdbConv.ContentEquals(dJdbInc0, 1e-5f));
+
+ // Cleanup
+ zTemp.Free();
+ aTemp.Free();
+ zConv.Free();
+ zInc.Free();
+ aConv.Free();
+ aInc.Free();
+ reshaped.Free();
+ }
+ }
+ }
+}
diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs
index 0b7aed2..87e53d4 100644
--- a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs
+++ b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs
@@ -73,7 +73,7 @@ private static unsafe void TestGradient(WeightedLayerBase cpu, WeightedLayerBase
[TestMethod]
public void FullyConnectedForward()
{
- float[,] x = WeightsProvider.NewFullyConnectedWeights(400, 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250);
+ float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250);
FullyConnectedLayer
cpu = new FullyConnectedLayer(TensorInfo.CreateLinear(250), 127, ActivationFunctionType.LeCunTanh, WeightsInitializationMode.GlorotNormal, BiasInitializationMode.Gaussian),
gpu = new CuDnnFullyConnectedLayer(cpu.InputInfo, cpu.OutputInfo.Size, cpu.Weights, cpu.Biases, cpu.ActivationFunctionType);
@@ -84,8 +84,8 @@ public void FullyConnectedForward()
public void FullyConnectedBackward()
{
float[,]
- delta_1 = WeightsProvider.NewFullyConnectedWeights(400, 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127),
- z = WeightsProvider.NewFullyConnectedWeights(400, 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250);
+ delta_1 = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127),
+ z = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250);
FullyConnectedLayer
cpu = new FullyConnectedLayer(TensorInfo.CreateLinear(250), 127, ActivationFunctionType.LeCunTanh, WeightsInitializationMode.GlorotNormal, BiasInitializationMode.Gaussian),
gpu = new CuDnnFullyConnectedLayer(cpu.InputInfo, cpu.OutputInfo.Size, cpu.Weights, cpu.Biases, cpu.ActivationFunctionType);
@@ -96,8 +96,8 @@ public void FullyConnectedBackward()
public void FullyConnectedGradient()
{
float[,]
- x = WeightsProvider.NewFullyConnectedWeights(400, 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250),
- delta = WeightsProvider.NewFullyConnectedWeights(400, 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127);
+ x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250),
+ delta = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127);
FullyConnectedLayer
cpu = new FullyConnectedLayer(TensorInfo.CreateLinear(250), 127, ActivationFunctionType.LeCunTanh, WeightsInitializationMode.GlorotNormal, BiasInitializationMode.Gaussian),
gpu = new CuDnnFullyConnectedLayer(cpu.InputInfo, cpu.OutputInfo.Size, cpu.Weights, cpu.Biases, cpu.ActivationFunctionType);
@@ -111,7 +111,7 @@ public void FullyConnectedGradient()
[TestMethod]
public void SoftmaxForward()
{
- float[,] x = WeightsProvider.NewFullyConnectedWeights(400, 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250);
+ float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250);
SoftmaxLayer
cpu = new SoftmaxLayer(TensorInfo.CreateLinear(250), 127, WeightsInitializationMode.GlorotNormal, BiasInitializationMode.Gaussian),
gpu = new CuDnnSoftmaxLayer(cpu.InputInfo, cpu.OutputInfo.Size, cpu.Weights, cpu.Biases);
@@ -122,8 +122,8 @@ public void SoftmaxForward()
public void SoftmaxBackward()
{
float[,]
- delta_1 = WeightsProvider.NewFullyConnectedWeights(400, 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127),
- z = WeightsProvider.NewFullyConnectedWeights(400, 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250);
+ delta_1 = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127),
+ z = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250);
SoftmaxLayer
cpu = new SoftmaxLayer(TensorInfo.CreateLinear(250), 127, WeightsInitializationMode.GlorotNormal, BiasInitializationMode.Gaussian),
gpu = new CuDnnSoftmaxLayer(cpu.InputInfo, cpu.OutputInfo.Size, cpu.Weights, cpu.Biases);
@@ -134,8 +134,8 @@ public void SoftmaxBackward()
public void SoftmaxGradient()
{
float[,]
- a = WeightsProvider.NewFullyConnectedWeights(400, 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250),
- delta = WeightsProvider.NewFullyConnectedWeights(400, 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127);
+ a = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250),
+ delta = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127);
SoftmaxLayer
cpu = new SoftmaxLayer(TensorInfo.CreateLinear(250), 127, WeightsInitializationMode.GlorotNormal, BiasInitializationMode.Gaussian),
gpu = new CuDnnSoftmaxLayer(cpu.InputInfo, cpu.OutputInfo.Size, cpu.Weights, cpu.Biases);
@@ -146,7 +146,7 @@ public void SoftmaxGradient()
public unsafe void SoftmaxBackwardOutput()
{
float[,]
- x = WeightsProvider.NewFullyConnectedWeights(400, 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250),
+ x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250),
y = new float[400, 127];
for (int i = 0; i < 400; i++)
y[i, ThreadSafeRandom.NextInt(max: 127)] = 1;
@@ -175,7 +175,7 @@ public unsafe void SoftmaxBackwardOutput()
[TestMethod]
public void ConvolutionForward()
{
- float[,] x = WeightsProvider.NewFullyConnectedWeights(127, 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(127, 58 * 58 * 3);
+ float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(127), 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(127, 58 * 58 * 3);
ConvolutionalLayer
cpu = new ConvolutionalLayer(new TensorInfo(58, 58, 3), ConvolutionInfo.Default, (5, 5), 20, ActivationFunctionType.LeakyReLU, BiasInitializationMode.Gaussian),
gpu = new CuDnnConvolutionalLayer(cpu.InputInfo, ConvolutionInfo.Default, cpu.KernelInfo, cpu.OutputInfo, cpu.Weights, cpu.Biases, cpu.ActivationFunctionType);
@@ -186,8 +186,8 @@ public void ConvolutionForward()
public unsafe void ConvolutionBackward()
{
float[,]
- delta_1 = WeightsProvider.NewFullyConnectedWeights(127, 54 * 54 * 20, WeightsInitializationMode.GlorotNormal).AsMatrix(127, 54 * 54 * 20),
- z = WeightsProvider.NewFullyConnectedWeights(127, 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(127, 58 * 58 * 3);
+ delta_1 = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(127), 54 * 54 * 20, WeightsInitializationMode.GlorotNormal).AsMatrix(127, 54 * 54 * 20),
+ z = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(127), 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(127, 58 * 58 * 3);
ConvolutionalLayer
cpu = new ConvolutionalLayer(new TensorInfo(58, 58, 3), ConvolutionInfo.Default, (5, 5), 20, ActivationFunctionType.LeCunTanh, BiasInitializationMode.Gaussian),
gpu = new CuDnnConvolutionalLayer(cpu.InputInfo, ConvolutionInfo.Default, cpu.KernelInfo, cpu.OutputInfo, cpu.Weights, cpu.Biases, ActivationFunctionType.LeCunTanh);
@@ -228,7 +228,7 @@ public void ConvolutionGradient()
[TestMethod]
public void PoolingForward()
{
- float[,] x = WeightsProvider.NewFullyConnectedWeights(400, 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 58 * 58 * 3);
+ float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 58 * 58 * 3);
PoolingLayer
cpu = new PoolingLayer(new TensorInfo(58, 58, 3), PoolingInfo.Default, ActivationFunctionType.LeakyReLU),
gpu = new CuDnnPoolingLayer(cpu.InputInfo, PoolingInfo.Default, ActivationFunctionType.LeakyReLU);
@@ -236,15 +236,41 @@ public void PoolingForward()
}
[TestMethod]
- public void PoolingBackward()
+ public unsafe void PoolingBackward()
{
- float[,]
- delta_1 = WeightsProvider.NewFullyConnectedWeights(400, 29 * 29 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 29 * 29 * 3),
- z = WeightsProvider.NewFullyConnectedWeights(400, 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 58 * 58 * 3);
+ // Setup
+ Tensor.New(400, 58 * 58 * 3, out Tensor x);
+ KerasWeightsProvider.FillWithHeEtAlUniform(x, 10);
PoolingLayer
cpu = new PoolingLayer(new TensorInfo(58, 58, 3), PoolingInfo.Default, ActivationFunctionType.LeakyReLU),
gpu = new CuDnnPoolingLayer(cpu.InputInfo, PoolingInfo.Default, ActivationFunctionType.LeakyReLU);
- TestBackward(cpu, gpu, delta_1, z);
+ gpu.Forward(x, out Tensor z, out Tensor a);
+ a.Free();
+ x.Duplicate(out Tensor x2);
+ Tensor.New(z.Entities, z.Length, out Tensor delta);
+ KerasWeightsProvider.FillWithHeEtAlUniform(delta, 10);
+
+ // Backward
+ cpu.Backpropagate(delta, x, ActivationFunctions.LeakyReLUPrime);
+ gpu.Backpropagate(delta, x2, ActivationFunctions.LeakyReLUPrime);
+ bool valid = true;
+ float* px = (float*)x.Ptr.ToPointer(), px2 = (float*)x2.Ptr.ToPointer();
+ int count = 0;
+ for (int i = 0; i < x.Size; i++)
+ {
+ if (px[i].EqualsWithDelta(px2[i], 1e-5f)) continue;
+ if (px[i].EqualsWithDelta(px2[i] * 100f, 1e-5f)) count++; // The cuDNN pooling backwards method returns a value scaled by 0.01 from time to time for some reason (less than 2% anyways)
+ else
+ {
+ valid = false;
+ break;
+ }
+ }
+ Assert.IsTrue(valid && count * 100f / x.Size < 2);
+ x.Free();
+ x2.Free();
+ z.Free();
+ delta.Free();
}
#endregion
diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/GpuExtensionsTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/GpuExtensionsTest.cs
new file mode 100644
index 0000000..9531146
--- /dev/null
+++ b/Unit/NeuralNetwork.NET.Cuda.Unit/GpuExtensionsTest.cs
@@ -0,0 +1,55 @@
+using Alea;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using NeuralNetworkNET.APIs.Structs;
+using NeuralNetworkNET.Cuda.Extensions;
+using NeuralNetworkNET.Extensions;
+
+namespace NeuralNetworkNET.Cuda.Unit
+{
+ ///
+ /// Test class for the cuDNN GPU extension methods
+ ///
+ [TestClass]
+ [TestCategory(nameof(GpuExtensionsTest))]
+ public class GpuExtensionsTest
+ {
+ [TestMethod]
+ public void CopyToRows()
+ {
+ float[] test = {1,2,3,4,5,6,7,8,9};
+ Tensor.NewZeroed(3, 10, out Tensor tensor);
+ Gpu gpu = Gpu.Default;
+ using (DeviceMemory m_gpu = gpu.AllocateDevice(test))
+ {
+ m_gpu.CopyTo(tensor, 5, 3);
+ }
+ float[,] expected =
+ {
+ { 0, 0, 0, 0, 0, 1, 2, 3, 0, 0 },
+ { 0, 0, 0, 0, 0, 4, 5, 6, 0, 0 },
+ { 0, 0, 0, 0, 0, 7, 8, 9, 0, 0 }
+ };
+ Assert.IsTrue(tensor.ToArray2D().ContentEquals(expected));
+ }
+
+ [TestMethod]
+ public void AllocateDeviceRows()
+ {
+ float[,] source =
+ {
+ { 0, 0, 0, 0, 0, 1, 2, 3, 0, 0 },
+ { 0, 0, 0, 0, 0, 4, 5, 6, 0, 0 },
+ { 0, 0, 0, 0, 0, 7, 8, 9, 0, 0 }
+ };
+ Tensor.From(source, out Tensor tensor);
+ Gpu gpu = Gpu.Default;
+ using (DeviceMemory m_gpu = gpu.AllocateDevice(tensor, 5, 3))
+ {
+ float[]
+ copy = Gpu.CopyToHost(m_gpu),
+ expected = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+ Assert.IsTrue(copy.ContentEquals(expected));
+ }
+ }
+ }
+}
diff --git a/Unit/NeuralNetwork.NET.Unit/SerializationTest.cs b/Unit/NeuralNetwork.NET.Unit/SerializationTest.cs
index 4ae03d4..f646e86 100644
--- a/Unit/NeuralNetwork.NET.Unit/SerializationTest.cs
+++ b/Unit/NeuralNetwork.NET.Unit/SerializationTest.cs
@@ -1,6 +1,4 @@
-using System;
-using System.IO;
-using System.Linq;
+using System.IO;
using Microsoft.VisualStudio.TestTools.UnitTesting;
using NeuralNetworkNET.APIs;
using NeuralNetworkNET.APIs.Enums;
@@ -50,7 +48,7 @@ public void StreamSerialize()
{
using (MemoryStream stream = new MemoryStream())
{
- float[] w = WeightsProvider.NewFullyConnectedWeights(784, 30, WeightsInitializationMode.GlorotNormal);
+ float[] w = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(784), 30, WeightsInitializationMode.GlorotNormal);
stream.WriteShuffled(w);
Assert.IsTrue(stream.Position == sizeof(float) * w.Length);
stream.Seek(0, SeekOrigin.Begin);