From dbd18936cbbf9bc7a9b394a595c54b973864b356 Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Sat, 23 Dec 2017 22:41:56 +0100 Subject: [PATCH 01/30] Added InceptionInfo struct --- .../APIs/Structs/InceptionInfo.cs | 108 ++++++++++++++++++ .../APIs/Structs/ConvolutionInfo.cs | 23 ++-- NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs | 33 +++--- NeuralNetwork.NET/APIs/Structs/Tensor.cs | 8 +- NeuralNetwork.NET/APIs/Structs/TensorInfo.cs | 6 +- 5 files changed, 137 insertions(+), 41 deletions(-) create mode 100644 NeuralNetwork.NET.Cuda/APIs/Structs/InceptionInfo.cs diff --git a/NeuralNetwork.NET.Cuda/APIs/Structs/InceptionInfo.cs b/NeuralNetwork.NET.Cuda/APIs/Structs/InceptionInfo.cs new file mode 100644 index 0000000..5ddcd9f --- /dev/null +++ b/NeuralNetwork.NET.Cuda/APIs/Structs/InceptionInfo.cs @@ -0,0 +1,108 @@ +using JetBrains.Annotations; +using NeuralNetworkNET.APIs.Enums; +using Newtonsoft.Json; +using System; +using System.Runtime.CompilerServices; + +namespace NeuralNetworkNET.APIs.Structs +{ + /// + /// A containing all the info on an inception module + /// + [JsonObject(MemberSerialization.Fields)] + public readonly struct InceptionInfo : IEquatable + { + #region Fields + + /// + /// The number of 1x1 convolution kernels used in the first step of the forward pass + /// + public readonly int Primary1x1ConvolutionKernels; + + /// + /// The number of 3x3 convolution kernels + /// + public readonly int Secondary3x3ConvolutionKernels; + + /// + /// The number of 5x5 convolution kernels + /// + public readonly int Secondary5x5ConvolutionKernels; + + /// + /// The kind of pooling operation performed on the layer + /// + public readonly PoolingMode Pooling; + + /// + /// The number of 1x1 convolution kernels after the pooling operation + /// + public readonly int Chained1x1AfterPoolingConvolutionKernels; + + #endregion + + #region Constructors + + // Internal constructor + private InceptionInfo(int _1x1Kernels, int _3x3Kernels, int _5x5Kernels, PoolingMode poolingMode, int _1x1SecondaryKernels) + { + Primary1x1ConvolutionKernels = _1x1Kernels >= 1 ? _1x1Kernels : throw new ArgumentOutOfRangeException(nameof(_1x1Kernels), "The number of 1x1 kernels must be at least 1"); + Secondary3x3ConvolutionKernels = _3x3Kernels >= 1 ? _3x3Kernels : throw new ArgumentOutOfRangeException(nameof(_3x3Kernels), "The number of 3x3 kernels must be at least 1"); + Secondary5x5ConvolutionKernels = _5x5Kernels >= 1 ? _5x5Kernels : throw new ArgumentOutOfRangeException(nameof(_5x5Kernels), "The number of 5x5 kernels must be at least 1"); + Chained1x1AfterPoolingConvolutionKernels = _1x1SecondaryKernels >= 1 ? _1x1SecondaryKernels : throw new ArgumentOutOfRangeException(nameof(_1x1SecondaryKernels), "The number of secondary 1x1 kernels must be at least 1"); + Pooling = poolingMode; + } + + /// + /// Creates a new inception layer description with the input parameters + /// + /// The number of 1x1 primary convolution kernels + /// The number of 3x3 convolution kernels + /// The number of 5x5 convolution kernels + /// The pooling mode for the pooling channel + /// The number of secondary 1x1 convolution kernels + [PublicAPI] + [Pure] + public static InceptionInfo New( + int _1x1Kernels, int _3x3Kernels, int _5x5Kernels, + PoolingMode poolingMode, int _1x1SecondaryKernels) + => new InceptionInfo(_1x1Kernels, _3x3Kernels, _5x5Kernels, poolingMode, _1x1SecondaryKernels); + + #endregion + + #region Equality + + /// + public bool Equals(InceptionInfo other) => this == other; + + /// + public override bool Equals(object obj) => obj is InceptionInfo info ? this == info : false; + + /// + public override int GetHashCode() + { + int hash = 17; + unchecked + { + hash = hash * 31 + Primary1x1ConvolutionKernels; + hash = hash * 31 + Chained1x1AfterPoolingConvolutionKernels; + hash = hash * 31 + Secondary3x3ConvolutionKernels; + hash = hash * 31 + Secondary5x5ConvolutionKernels; + hash = hash * 31 + (int)Pooling; + } + return hash; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool operator ==(in InceptionInfo a, in InceptionInfo b) => a.Primary1x1ConvolutionKernels == b.Primary1x1ConvolutionKernels && + a.Chained1x1AfterPoolingConvolutionKernels == b.Chained1x1AfterPoolingConvolutionKernels && + a.Secondary3x3ConvolutionKernels == b.Secondary3x3ConvolutionKernels && + a.Secondary5x5ConvolutionKernels == b.Secondary5x5ConvolutionKernels && + a.Pooling == b.Pooling; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool operator !=(in InceptionInfo a, in InceptionInfo b) => !(a == b); + + #endregion + } +} diff --git a/NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs b/NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs index 00ac1b7..49a34f8 100644 --- a/NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs +++ b/NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs @@ -13,27 +13,27 @@ namespace NeuralNetworkNET.APIs.Structs public readonly struct ConvolutionInfo : IEquatable { /// - /// Gets the current convolution mode for the layer + /// The current convolution mode for the layer /// public readonly ConvolutionMode Mode; /// - /// Gets the optional vertical padding for the convolution operation + /// The optional vertical padding for the convolution operation /// public readonly int VerticalPadding; /// - /// Gets the optional horizontal padding for the convolution operation + /// The optional horizontal padding for the convolution operation /// public readonly int HorizontalPadding; /// - /// Gets the vertical stride length while sliding the receptive window over the input + /// The vertical stride length while sliding the receptive window over the input /// public readonly int VerticalStride; /// - /// Gets the horizontal stride length while sliding the receptive window over the input + /// The horizontal stride length while sliding the receptive window over the input /// public readonly int HorizontalStride; @@ -45,16 +45,11 @@ private ConvolutionInfo( int verticalPadding, int horizontalPadding, int verticalStride, int horizontalStride) { - if (verticalPadding < 0) throw new ArgumentOutOfRangeException(nameof(verticalPadding), "The vertical padding must be greater than or equal to 0"); - if (horizontalPadding < 0) throw new ArgumentOutOfRangeException(nameof(horizontalPadding), "The horizontal padding must be greater than or equal to 0"); - if (verticalStride < 1) throw new ArgumentOutOfRangeException(nameof(verticalStride), "The vertical stride must be at least equal to 1"); - if (horizontalStride < 1) throw new ArgumentOutOfRangeException(nameof(horizontalStride), "The horizontal stride must be at least equal to 1"); - + VerticalPadding = verticalPadding >= 0 ? verticalPadding : throw new ArgumentOutOfRangeException(nameof(verticalPadding), "The vertical padding must be greater than or equal to 0"); + HorizontalPadding = horizontalPadding >= 0 ? horizontalPadding : throw new ArgumentOutOfRangeException(nameof(horizontalPadding), "The horizontal padding must be greater than or equal to 0"); + VerticalStride = verticalStride >= 1 ? verticalStride : throw new ArgumentOutOfRangeException(nameof(verticalStride), "The vertical stride must be at least equal to 1"); + HorizontalStride = horizontalStride >= 1 ? horizontalStride : throw new ArgumentOutOfRangeException(nameof(horizontalStride), "The horizontal stride must be at least equal to 1"); Mode = mode; - VerticalPadding = verticalPadding; - HorizontalPadding = horizontalPadding; - VerticalStride = verticalStride; - HorizontalStride = horizontalStride; } /// diff --git a/NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs b/NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs index 7185d35..ea2166a 100644 --- a/NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs +++ b/NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs @@ -13,37 +13,37 @@ namespace NeuralNetworkNET.APIs.Structs public readonly struct PoolingInfo : IEquatable { /// - /// Gets the current pooling mode for the layer + /// The current pooling mode for the layer /// public readonly PoolingMode Mode; /// - /// Gets the height of each input local receptive field + /// The height of each input local receptive field /// public readonly int WindowHeight; /// - /// Gets the width of each input local receptive field + /// The width of each input local receptive field /// public readonly int WindowWidth; /// - /// Gets the optional vertical padding for the pooling operation + /// The optional vertical padding for the pooling operation /// public readonly int VerticalPadding; /// - /// Gets the optional horizontal padding for the pooling operation + /// The optional horizontal padding for the pooling operation /// public readonly int HorizontalPadding; /// - /// Gets the vertical stride length while sliding the receptive window over the input + /// The vertical stride length while sliding the receptive window over the input /// public readonly int VerticalStride; /// - /// Gets the horizontal stride length while sliding the receptive window over the input + /// The horizontal stride length while sliding the receptive window over the input /// public readonly int HorizontalStride; @@ -55,20 +55,13 @@ private PoolingInfo( int verticalPadding, int horizontalPadding, int verticalStride, int horizontalStride) { - if (windowHeight <= 0) throw new ArgumentOutOfRangeException(nameof(windowHeight), "The window height must be at least equal to 1"); - if (windowWidth <= 0) throw new ArgumentOutOfRangeException(nameof(windowWidth), "The window width must be at least equal to 1"); - if (verticalPadding < 0) throw new ArgumentOutOfRangeException(nameof(verticalPadding), "The vertical padding must be greater than or equal to 0"); - if (horizontalPadding < 0) throw new ArgumentOutOfRangeException(nameof(horizontalPadding), "The horizontal padding must be greater than or equal to 0"); - if (verticalStride < 1) throw new ArgumentOutOfRangeException(nameof(verticalStride), "The vertical stride must be at least equal to 1"); - if (horizontalStride < 1) throw new ArgumentOutOfRangeException(nameof(horizontalStride), "The horizontal stride must be at least equal to 1"); - + WindowHeight = windowHeight > 0 ? windowHeight : throw new ArgumentOutOfRangeException(nameof(windowHeight), "The window height must be at least equal to 1"); + WindowWidth = windowWidth > 0 ? windowWidth : throw new ArgumentOutOfRangeException(nameof(windowWidth), "The window width must be at least equal to 1"); + VerticalPadding = verticalPadding >= 0 ? verticalPadding : throw new ArgumentOutOfRangeException(nameof(verticalPadding), "The vertical padding must be greater than or equal to 0"); + HorizontalPadding = horizontalPadding >= 0 ? horizontalPadding : throw new ArgumentOutOfRangeException(nameof(horizontalPadding), "The horizontal padding must be greater than or equal to 0"); + VerticalStride = verticalStride >= 1 ? verticalStride : throw new ArgumentOutOfRangeException(nameof(verticalStride), "The vertical stride must be at least equal to 1"); + HorizontalStride = horizontalStride >= 1 ? horizontalStride : throw new ArgumentOutOfRangeException(nameof(horizontalStride), "The horizontal stride must be at least equal to 1"); Mode = mode; - WindowHeight = windowHeight; - WindowWidth = windowWidth; - VerticalPadding = verticalPadding; - HorizontalPadding = horizontalPadding; - VerticalStride = verticalStride; - HorizontalStride = horizontalStride; } /// diff --git a/NeuralNetwork.NET/APIs/Structs/Tensor.cs b/NeuralNetwork.NET/APIs/Structs/Tensor.cs index cf57a6d..9252f95 100644 --- a/NeuralNetwork.NET/APIs/Structs/Tensor.cs +++ b/NeuralNetwork.NET/APIs/Structs/Tensor.cs @@ -17,22 +17,22 @@ namespace NeuralNetworkNET.APIs.Structs public readonly struct Tensor { /// - /// Gets the value to the allocated memory + /// The value to the allocated memory /// public readonly IntPtr Ptr; /// - /// Gets the number of entities (rows) in the current + /// The number of entities (rows) in the current /// public readonly int Entities; /// - /// Gets the size of each entity in the current + /// The size of each entity in the current /// public readonly int Length; /// - /// Gets the total size (the number of values) in the current + /// The total size (the number of values) in the current /// public int Size => Entities * Length; diff --git a/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs b/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs index d774cf3..b938fed 100644 --- a/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs +++ b/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs @@ -14,19 +14,19 @@ namespace NeuralNetworkNET.APIs.Structs public readonly struct TensorInfo : IEquatable { /// - /// Gets the height of each 2D slice + /// The height of each 2D slice /// [JsonProperty(nameof(Height), Order = 1)] public readonly int Height; /// - /// Gets the width of each 2D slice + /// The width of each 2D slice /// [JsonProperty(nameof(Width), Order = 2)] public readonly int Width; /// - /// Gets the number of channels for the tensor description + /// The number of channels for the tensor description /// [JsonProperty(nameof(Channels), Order = 3)] public readonly int Channels; From fd7f9ec9e3cafc26c5e9d082848f3e92e09d3952 Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Sun, 24 Dec 2017 01:00:36 +0100 Subject: [PATCH 02/30] Temp move to APIs folder --- .../CuDnnNetworkLayers.cs => APIs/CuDnnNetworkLayers_Move.cs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename NeuralNetwork.NET.Cuda/{APIS/CuDnnNetworkLayers.cs => APIs/CuDnnNetworkLayers_Move.cs} (100%) diff --git a/NeuralNetwork.NET.Cuda/APIS/CuDnnNetworkLayers.cs b/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayers_Move.cs similarity index 100% rename from NeuralNetwork.NET.Cuda/APIS/CuDnnNetworkLayers.cs rename to NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayers_Move.cs From 2cc487929c4c8d808c84f5da0063c85dbef76060 Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Sun, 24 Dec 2017 01:01:02 +0100 Subject: [PATCH 03/30] Name switched back to original (path fix) --- .../APIs/{CuDnnNetworkLayers_Move.cs => CuDnnNetworkLayers.cs} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename NeuralNetwork.NET.Cuda/APIs/{CuDnnNetworkLayers_Move.cs => CuDnnNetworkLayers.cs} (100%) diff --git a/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayers_Move.cs b/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayers.cs similarity index 100% rename from NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayers_Move.cs rename to NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayers.cs From ed56e1ff803230dd9c2c160734e0be35e21aa750 Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Sun, 24 Dec 2017 01:28:58 +0100 Subject: [PATCH 04/30] Initial inception layer structure added --- .../APIs/Structs/InceptionInfo.cs | 2 +- .../Layers/CuDnnConvolutionalLayer.cs | 4 +- .../Layers/CuDnnInceptionLayer.cs | 174 ++++++++++++++++++ NeuralNetwork.NET/APIs/Enums/LayerType.cs | 26 ++- 4 files changed, 201 insertions(+), 5 deletions(-) create mode 100644 NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs diff --git a/NeuralNetwork.NET.Cuda/APIs/Structs/InceptionInfo.cs b/NeuralNetwork.NET.Cuda/APIs/Structs/InceptionInfo.cs index 5ddcd9f..27da1ab 100644 --- a/NeuralNetwork.NET.Cuda/APIs/Structs/InceptionInfo.cs +++ b/NeuralNetwork.NET.Cuda/APIs/Structs/InceptionInfo.cs @@ -59,7 +59,7 @@ private InceptionInfo(int _1x1Kernels, int _3x3Kernels, int _5x5Kernels, Pooling /// The number of 1x1 primary convolution kernels /// The number of 3x3 convolution kernels /// The number of 5x5 convolution kernels - /// The pooling mode for the pooling channel + /// The pooling mode for the pooling pipeline /// The number of secondary 1x1 convolution kernels [PublicAPI] [Pure] diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs index 4aa0982..ff09ac5 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs @@ -47,9 +47,7 @@ internal sealed class CuDnnConvolutionalLayer : ConvolutionalLayer [NotNull] private readonly Dnn DnnInstance = DnnService.Instance; - /// - /// Sets the cuDNN fields that will be used during future forward/backwards operations - /// + // cuDNN fields setup private void SetupCuDnnInfo() { ConvolutionDescription.Set2D(OperationInfo.VerticalPadding, OperationInfo.HorizontalPadding, OperationInfo.VerticalStride, OperationInfo.HorizontalStride, 1, 1, (Alea.cuDNN.ConvolutionMode)OperationInfo.Mode); diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs new file mode 100644 index 0000000..b271567 --- /dev/null +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -0,0 +1,174 @@ +using Alea.cuDNN; +using JetBrains.Annotations; +using NeuralNetworkNET.APIs.Enums; +using NeuralNetworkNET.APIs.Interfaces; +using NeuralNetworkNET.APIs.Structs; +using NeuralNetworkNET.Cuda.Services; +using NeuralNetworkNET.Networks.Activations; +using NeuralNetworkNET.Networks.Activations.Delegates; +using NeuralNetworkNET.Networks.Implementations.Layers.Abstract; +using System; +using System.Runtime.CompilerServices; + +namespace NeuralNetworkNET.Cuda.Layers +{ + /// + /// A simplified inception module, with 4 pipelines combining 1x1 convolution, 1x1 + 3x3, 1x1 + 5x5 and pooling + 1x1 + /// + internal sealed class CuDnnInceptionLayer : WeightedLayerBase + { + #region Parameters + + /// + public override LayerType LayerType { get; } = LayerType.Inception; + + private readonly InceptionInfo _OperationInfo; + + /// + /// Gets the info on the inception parameters used by the layer + /// + public ref readonly InceptionInfo OperationInfo + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => ref _OperationInfo; + } + + #endregion + + #region cuDNN fields + + // The NCHW tensor info for the layer inputs + [NotNull] + private readonly TensorDescriptor InputDescription = new TensorDescriptor(); + + #region 1x1 convolution + + // The NCHW info for the 1x1 convolution weights + [NotNull] + private readonly FilterDescriptor _1x1FilterDescription = new FilterDescriptor(); + + // The info on the 1x1 convolution bias (one value per output channel) + [NotNull] + private readonly TensorDescriptor _1x1BiasDescription = new TensorDescriptor(); + + // The first 1x1 convolution info + [NotNull] + private readonly ConvolutionDescriptor _1x1ConvolutionDescription = new ConvolutionDescriptor(); + + // The NCHW tensor info for the outputs of the first 1x1 convolution + [NotNull] + private readonly TensorDescriptor _1x1OutputDescription = new TensorDescriptor(); + + #endregion + + #region 3x3 secondary convolution + + // The NCHW info for the 3x3 convolution weights + [NotNull] + private readonly FilterDescriptor _3x3FilterDescription = new FilterDescriptor(); + + // The info on the 3x3 convolution bias (one value per output channel) + [NotNull] + private readonly TensorDescriptor _3x3BiasDescription = new TensorDescriptor(); + + // The first 3x3 convolution info + [NotNull] + private readonly ConvolutionDescriptor _3x3ConvolutionDescription = new ConvolutionDescriptor(); + + // The NCHW tensor info for the outputs of the 3x3 convolution + [NotNull] + private readonly TensorDescriptor _3x3OutputDescription = new TensorDescriptor(); + + #endregion + + #region 5x5 secondary convolution + + // The NCHW info for the 5x5 convolution weights + [NotNull] + private readonly FilterDescriptor _5x5FilterDescription = new FilterDescriptor(); + + // The info on the 5x5 convolution bias (one value per output channel) + [NotNull] + private readonly TensorDescriptor _5x5BiasDescription = new TensorDescriptor(); + + // The first 5x5 convolution info + [NotNull] + private readonly ConvolutionDescriptor _5x5ConvolutionDescription = new ConvolutionDescriptor(); + + // The NCHW tensor info for the outputs of the 5x5 convolution + [NotNull] + private readonly TensorDescriptor _5x5OutputDescription = new TensorDescriptor(); + + #endregion + + #region Pooling pipeline + + // The descriptor for the pooling operation performed by the layer + [NotNull] + private readonly PoolingDescriptor PoolingDescription = new PoolingDescriptor(); + + // The NCHW tensor info for the pooling outputs + [NotNull] + private readonly TensorDescriptor PoolingOutputDescription = new TensorDescriptor(); + + // The NCHW info for the secondary 1x1 convolution weights + [NotNull] + private readonly FilterDescriptor Secondary1x1FilterDescription = new FilterDescriptor(); + + // The info on the secondary 1x1 convolution bias (one value per output channel) + [NotNull] + private readonly TensorDescriptor Secondary1x1BiasDescription = new TensorDescriptor(); + + // The first secondary 1x1 convolution info + [NotNull] + private readonly ConvolutionDescriptor Secondary1x1ConvolutionDescription = new ConvolutionDescriptor(); + + // The info on the secondary 1x1 convolution outputs + [NotNull] + private readonly TensorDescriptor Secondary1x1OutputDescription = new TensorDescriptor(); + + #endregion + + /// + /// Gets the instance for the current layer + /// + [NotNull] + private readonly Dnn DnnInstance = DnnService.Instance; + + // cuDNN fields setup + private void SetupCuDnnInfo() + { + + } + + #endregion + + protected CuDnnInceptionLayer(in TensorInfo input, in TensorInfo output, [NotNull] float[] w, [NotNull] float[] b, ActivationFunctionType activation) : base(input, output, w, b, activation) + { + } + + #region Implementation + + public override void Forward(in Tensor x, out Tensor z, out Tensor a) + { + throw new NotImplementedException(); + } + + public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime) + { + throw new NotImplementedException(); + } + + public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJdw, out Tensor dJdb) + { + throw new NotImplementedException(); + } + + #endregion + + public override INetworkLayer Clone() + { + throw new NotImplementedException(); + } + } +} diff --git a/NeuralNetwork.NET/APIs/Enums/LayerType.cs b/NeuralNetwork.NET/APIs/Enums/LayerType.cs index 498e059..4406841 100644 --- a/NeuralNetwork.NET/APIs/Enums/LayerType.cs +++ b/NeuralNetwork.NET/APIs/Enums/LayerType.cs @@ -5,10 +5,34 @@ /// public enum LayerType : byte { + /// + /// A fully connected layer, mapping n inputs to m outputs + /// FullyConnected, + + /// + /// A convolutional layer, which keeps spatial information on the input volume + /// Convolutional, + + /// + /// A pooling layer, useful to reduce the size of the input data volume + /// Pooling, + + /// + /// A fully connected output layer, with an arbitrary activation and cost function + /// Output, - Softmax + + /// + /// A softmax layer, with the softmax activation and log-likelyhood cost function + /// + Softmax, + + /// + /// An inception module, combining different kinds of convolution with a pooling operation + /// + Inception } } \ No newline at end of file From 9c9a79c2baada46c8f39b5056528fc559c964d5c Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Sun, 24 Dec 2017 01:50:19 +0100 Subject: [PATCH 05/30] Added inception cuDNN base initialization --- .../Layers/CuDnnInceptionLayer.cs | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index b271567..2b65f99 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -138,7 +138,28 @@ public ref readonly InceptionInfo OperationInfo // cuDNN fields setup private void SetupCuDnnInfo() { - + // First 1x1 convolution + _1x1ConvolutionDescription.Set2D(0, 0, 1, 1, 1, 1, Alea.cuDNN.ConvolutionMode.CROSS_CORRELATION); + _1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Primary1x1ConvolutionKernels, InputInfo.Channels, 1, 1); + _1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Primary1x1ConvolutionKernels, 1, 1); + + // 3x3 convolution + _3x3ConvolutionDescription.Set2D(1, 1, 1, 1, 1, 1, Alea.cuDNN.ConvolutionMode.CROSS_CORRELATION); // 1-padding to keep size + _3x3FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Secondary3x3ConvolutionKernels, _OperationInfo.Primary1x1ConvolutionKernels, 3, 3); + _3x3BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Secondary3x3ConvolutionKernels, 1, 1); + + // 5x5 convolution + _5x5ConvolutionDescription.Set2D(2, 2, 1, 1, 1, 1, Alea.cuDNN.ConvolutionMode.CROSS_CORRELATION); + _5x5FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Secondary5x5ConvolutionKernels, _OperationInfo.Primary1x1ConvolutionKernels, 5, 5); + _5x5BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Secondary5x5ConvolutionKernels, 1, 1); + + // Pooling + PoolingDescription.Set2D(Alea.cuDNN.PoolingMode.AVERAGE_COUNT_EXCLUDE_PADDING, NanPropagation.PROPAGATE_NAN, 3, 3, 1, 1, 1, 1); + + // Secondary 1x1 convolution + Secondary1x1ConvolutionDescription.Set2D(0, 0, 1, 1, 1, 1, Alea.cuDNN.ConvolutionMode.CROSS_CORRELATION); + Secondary1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, InputInfo.Channels, _OperationInfo.Chained1x1AfterPoolingConvolutionKernels, 1, 1); + Secondary1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Chained1x1AfterPoolingConvolutionKernels, 1, 1); } #endregion From 9bc9e809968476d04c30e5f29fce918796107bee Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Sun, 24 Dec 2017 02:10:51 +0100 Subject: [PATCH 06/30] Added inception layer weights initialization, minor changes --- .../APIs/Structs/InceptionInfo.cs | 0 .../Layers/ConvolutionalLayer.cs | 2 +- .../Layers/FullyConnectedLayer.cs | 2 +- .../Layers/Helpers/WeightsProvider.cs | 59 ++++++++++++++----- .../CuDnnLayersTest.cs | 34 +++++------ .../SerializationTest.cs | 6 +- 6 files changed, 64 insertions(+), 39 deletions(-) rename {NeuralNetwork.NET.Cuda => NeuralNetwork.NET}/APIs/Structs/InceptionInfo.cs (100%) diff --git a/NeuralNetwork.NET.Cuda/APIs/Structs/InceptionInfo.cs b/NeuralNetwork.NET/APIs/Structs/InceptionInfo.cs similarity index 100% rename from NeuralNetwork.NET.Cuda/APIs/Structs/InceptionInfo.cs rename to NeuralNetwork.NET/APIs/Structs/InceptionInfo.cs diff --git a/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs b/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs index 0be71cf..922cf50 100644 --- a/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs +++ b/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs @@ -59,7 +59,7 @@ public ref readonly TensorInfo KernelInfo public ConvolutionalLayer(in TensorInfo input, in ConvolutionInfo operation, (int X, int Y) kernelSize, int kernels, ActivationFunctionType activation, BiasInitializationMode biasMode) : base(input, new TensorInfo(input.Height - kernelSize.X + 1, input.Width - kernelSize.Y + 1, kernels), - WeightsProvider.NewConvolutionalKernels(input.Channels, kernelSize.X, kernelSize.Y, kernels), + WeightsProvider.NewConvolutionalKernels(input, kernelSize.X, kernelSize.Y, kernels), WeightsProvider.NewBiases(kernels, biasMode), activation) { _OperationInfo = operation; diff --git a/NeuralNetwork.NET/Networks/Implementations/Layers/FullyConnectedLayer.cs b/NeuralNetwork.NET/Networks/Implementations/Layers/FullyConnectedLayer.cs index 9086c89..59bff25 100644 --- a/NeuralNetwork.NET/Networks/Implementations/Layers/FullyConnectedLayer.cs +++ b/NeuralNetwork.NET/Networks/Implementations/Layers/FullyConnectedLayer.cs @@ -22,7 +22,7 @@ internal class FullyConnectedLayer : WeightedLayerBase public FullyConnectedLayer(in TensorInfo input, int neurons, ActivationFunctionType activation, WeightsInitializationMode weightsMode, BiasInitializationMode biasMode) : base(input, TensorInfo.CreateLinear(neurons), - WeightsProvider.NewFullyConnectedWeights(input.Size, neurons, weightsMode), + WeightsProvider.NewFullyConnectedWeights(input, neurons, weightsMode), WeightsProvider.NewBiases(neurons, biasMode), activation) { } public FullyConnectedLayer(in TensorInfo input, int neurons, [NotNull] float[] weights, [NotNull] float[] biases, ActivationFunctionType activation) diff --git a/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs b/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs index 87ddb9c..72a6722 100644 --- a/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs +++ b/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs @@ -13,35 +13,34 @@ namespace NeuralNetworkNET.Networks.Implementations.Layers.Helpers internal static class WeightsProvider { /// - /// Creates a weight matrix for a fully connected layer + /// Creates a weights vector for a fully connected layer /// - /// The input neurons + /// The layer inputs /// The output neurons /// The initialization mode for the weights [Pure, NotNull] - public static unsafe float[] NewFullyConnectedWeights(int inputs, int outputs, WeightsInitializationMode mode) + public static unsafe float[] NewFullyConnectedWeights(in TensorInfo input, int outputs, WeightsInitializationMode mode) { - if (inputs <= 0 || outputs <= 0) throw new ArgumentOutOfRangeException("The inputs and outputs must be positive numbers"); - float[] weights = new float[inputs * outputs]; + float[] weights = new float[input.Size * outputs]; fixed (float* pw = weights) { - Tensor.Reshape(pw, inputs, outputs, out Tensor wTensor); + Tensor.Reshape(pw, input.Size, outputs, out Tensor wTensor); switch (mode) { case WeightsInitializationMode.LeCunUniform: - KerasWeightsProvider.FillWithLeCunUniform(wTensor, inputs); + KerasWeightsProvider.FillWithLeCunUniform(wTensor, input.Size); break; case WeightsInitializationMode.GlorotNormal: - KerasWeightsProvider.FillWithGlorotNormal(wTensor, inputs, outputs); + KerasWeightsProvider.FillWithGlorotNormal(wTensor, input.Size, outputs); break; case WeightsInitializationMode.GlorotUniform: - KerasWeightsProvider.FillWithGlorotUniform(wTensor, inputs, outputs); + KerasWeightsProvider.FillWithGlorotUniform(wTensor, input.Size, outputs); break; case WeightsInitializationMode.HeEtAlNormal: - KerasWeightsProvider.FillWithHeEtAlNormal(wTensor, inputs); + KerasWeightsProvider.FillWithHeEtAlNormal(wTensor, input.Size); break; case WeightsInitializationMode.HeEtAlUniform: - KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, inputs); + KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Size); break; default: throw new ArgumentOutOfRangeException(nameof(mode), "Unsupported weights initialization mode"); } @@ -50,21 +49,49 @@ public static unsafe float[] NewFullyConnectedWeights(int inputs, int outputs, W } /// - /// Creates a weight matrix for a convolutional layer + /// Creates a weights vector for a convolutional layer /// - /// The depth of the input volume + /// The layer inputs /// The height of each kernel /// The width of each kernel /// The number of kernels in the layer [Pure, NotNull] - public static unsafe float[] NewConvolutionalKernels(int inputDepth, int kernelsHeight, int kernelsWidth, int kernels) + public static unsafe float[] NewConvolutionalKernels(in TensorInfo input, int kernelsHeight, int kernelsWidth, int kernels) { if (kernels <= 0) throw new ArgumentOutOfRangeException(nameof(kernels), "The number of kernels must be positive"); - float[] weights = new float[kernels * kernelsHeight * kernelsWidth * inputDepth]; + float[] weights = new float[kernels * kernelsHeight * kernelsWidth * input.Channels]; fixed (float* pw = weights) { Tensor.Reshape(pw, 1, weights.Length, out Tensor wTensor); - KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, inputDepth * kernelsHeight * kernelsWidth); + KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels * kernelsHeight * kernelsWidth); + } + return weights; + } + + /// + /// Creates a new mixed weights vector for an inception layer + /// + /// The layer inputs + /// The info on the target inception layer + [Pure, NotNull] + public static unsafe float[] NewInceptionWeights(in TensorInfo input, in InceptionInfo info) + { + int + _1x1Length = input.Channels * info.Primary1x1ConvolutionKernels, + _3x3Length = 3 * 3 * info.Primary1x1ConvolutionKernels * info.Secondary3x3ConvolutionKernels, + _5x5Length = 5 * 5 * info.Primary1x1ConvolutionKernels * info.Secondary5x5ConvolutionKernels, + secondary1x1Length = input.Channels * info.Chained1x1AfterPoolingConvolutionKernels; + float[] weights = new float[_1x1Length + _3x3Length + _5x5Length + secondary1x1Length]; + fixed (float* pw = weights) + { + Tensor.Reshape(pw, 1, _1x1Length, out Tensor wTensor); + KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels); + Tensor.Reshape(pw + _1x1Length, 1, _3x3Length, out wTensor); + KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, 3 * 3 * info.Primary1x1ConvolutionKernels); + Tensor.Reshape(pw + _1x1Length + _3x3Length, 1, _5x5Length, out wTensor); + KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, 5 * 5 * info.Primary1x1ConvolutionKernels); + Tensor.Reshape(pw + _1x1Length + _3x3Length + _5x5Length, 1, secondary1x1Length, out wTensor); + KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels); } return weights; } diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs index 0b7aed2..790a697 100644 --- a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs +++ b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs @@ -73,7 +73,7 @@ private static unsafe void TestGradient(WeightedLayerBase cpu, WeightedLayerBase [TestMethod] public void FullyConnectedForward() { - float[,] x = WeightsProvider.NewFullyConnectedWeights(400, 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250); + float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250); FullyConnectedLayer cpu = new FullyConnectedLayer(TensorInfo.CreateLinear(250), 127, ActivationFunctionType.LeCunTanh, WeightsInitializationMode.GlorotNormal, BiasInitializationMode.Gaussian), gpu = new CuDnnFullyConnectedLayer(cpu.InputInfo, cpu.OutputInfo.Size, cpu.Weights, cpu.Biases, cpu.ActivationFunctionType); @@ -84,8 +84,8 @@ public void FullyConnectedForward() public void FullyConnectedBackward() { float[,] - delta_1 = WeightsProvider.NewFullyConnectedWeights(400, 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127), - z = WeightsProvider.NewFullyConnectedWeights(400, 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250); + delta_1 = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127), + z = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250); FullyConnectedLayer cpu = new FullyConnectedLayer(TensorInfo.CreateLinear(250), 127, ActivationFunctionType.LeCunTanh, WeightsInitializationMode.GlorotNormal, BiasInitializationMode.Gaussian), gpu = new CuDnnFullyConnectedLayer(cpu.InputInfo, cpu.OutputInfo.Size, cpu.Weights, cpu.Biases, cpu.ActivationFunctionType); @@ -96,8 +96,8 @@ public void FullyConnectedBackward() public void FullyConnectedGradient() { float[,] - x = WeightsProvider.NewFullyConnectedWeights(400, 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250), - delta = WeightsProvider.NewFullyConnectedWeights(400, 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127); + x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250), + delta = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127); FullyConnectedLayer cpu = new FullyConnectedLayer(TensorInfo.CreateLinear(250), 127, ActivationFunctionType.LeCunTanh, WeightsInitializationMode.GlorotNormal, BiasInitializationMode.Gaussian), gpu = new CuDnnFullyConnectedLayer(cpu.InputInfo, cpu.OutputInfo.Size, cpu.Weights, cpu.Biases, cpu.ActivationFunctionType); @@ -111,7 +111,7 @@ public void FullyConnectedGradient() [TestMethod] public void SoftmaxForward() { - float[,] x = WeightsProvider.NewFullyConnectedWeights(400, 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250); + float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250); SoftmaxLayer cpu = new SoftmaxLayer(TensorInfo.CreateLinear(250), 127, WeightsInitializationMode.GlorotNormal, BiasInitializationMode.Gaussian), gpu = new CuDnnSoftmaxLayer(cpu.InputInfo, cpu.OutputInfo.Size, cpu.Weights, cpu.Biases); @@ -122,8 +122,8 @@ public void SoftmaxForward() public void SoftmaxBackward() { float[,] - delta_1 = WeightsProvider.NewFullyConnectedWeights(400, 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127), - z = WeightsProvider.NewFullyConnectedWeights(400, 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250); + delta_1 = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127), + z = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250); SoftmaxLayer cpu = new SoftmaxLayer(TensorInfo.CreateLinear(250), 127, WeightsInitializationMode.GlorotNormal, BiasInitializationMode.Gaussian), gpu = new CuDnnSoftmaxLayer(cpu.InputInfo, cpu.OutputInfo.Size, cpu.Weights, cpu.Biases); @@ -134,8 +134,8 @@ public void SoftmaxBackward() public void SoftmaxGradient() { float[,] - a = WeightsProvider.NewFullyConnectedWeights(400, 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250), - delta = WeightsProvider.NewFullyConnectedWeights(400, 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127); + a = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250), + delta = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 127, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 127); SoftmaxLayer cpu = new SoftmaxLayer(TensorInfo.CreateLinear(250), 127, WeightsInitializationMode.GlorotNormal, BiasInitializationMode.Gaussian), gpu = new CuDnnSoftmaxLayer(cpu.InputInfo, cpu.OutputInfo.Size, cpu.Weights, cpu.Biases); @@ -146,7 +146,7 @@ public void SoftmaxGradient() public unsafe void SoftmaxBackwardOutput() { float[,] - x = WeightsProvider.NewFullyConnectedWeights(400, 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250), + x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 250, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 250), y = new float[400, 127]; for (int i = 0; i < 400; i++) y[i, ThreadSafeRandom.NextInt(max: 127)] = 1; @@ -175,7 +175,7 @@ public unsafe void SoftmaxBackwardOutput() [TestMethod] public void ConvolutionForward() { - float[,] x = WeightsProvider.NewFullyConnectedWeights(127, 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(127, 58 * 58 * 3); + float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(127), 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(127, 58 * 58 * 3); ConvolutionalLayer cpu = new ConvolutionalLayer(new TensorInfo(58, 58, 3), ConvolutionInfo.Default, (5, 5), 20, ActivationFunctionType.LeakyReLU, BiasInitializationMode.Gaussian), gpu = new CuDnnConvolutionalLayer(cpu.InputInfo, ConvolutionInfo.Default, cpu.KernelInfo, cpu.OutputInfo, cpu.Weights, cpu.Biases, cpu.ActivationFunctionType); @@ -186,8 +186,8 @@ public void ConvolutionForward() public unsafe void ConvolutionBackward() { float[,] - delta_1 = WeightsProvider.NewFullyConnectedWeights(127, 54 * 54 * 20, WeightsInitializationMode.GlorotNormal).AsMatrix(127, 54 * 54 * 20), - z = WeightsProvider.NewFullyConnectedWeights(127, 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(127, 58 * 58 * 3); + delta_1 = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(127), 54 * 54 * 20, WeightsInitializationMode.GlorotNormal).AsMatrix(127, 54 * 54 * 20), + z = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(127), 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(127, 58 * 58 * 3); ConvolutionalLayer cpu = new ConvolutionalLayer(new TensorInfo(58, 58, 3), ConvolutionInfo.Default, (5, 5), 20, ActivationFunctionType.LeCunTanh, BiasInitializationMode.Gaussian), gpu = new CuDnnConvolutionalLayer(cpu.InputInfo, ConvolutionInfo.Default, cpu.KernelInfo, cpu.OutputInfo, cpu.Weights, cpu.Biases, ActivationFunctionType.LeCunTanh); @@ -228,7 +228,7 @@ public void ConvolutionGradient() [TestMethod] public void PoolingForward() { - float[,] x = WeightsProvider.NewFullyConnectedWeights(400, 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 58 * 58 * 3); + float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 58 * 58 * 3); PoolingLayer cpu = new PoolingLayer(new TensorInfo(58, 58, 3), PoolingInfo.Default, ActivationFunctionType.LeakyReLU), gpu = new CuDnnPoolingLayer(cpu.InputInfo, PoolingInfo.Default, ActivationFunctionType.LeakyReLU); @@ -239,8 +239,8 @@ public void PoolingForward() public void PoolingBackward() { float[,] - delta_1 = WeightsProvider.NewFullyConnectedWeights(400, 29 * 29 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 29 * 29 * 3), - z = WeightsProvider.NewFullyConnectedWeights(400, 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 58 * 58 * 3); + delta_1 = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 29 * 29 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 29 * 29 * 3), + z = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 58 * 58 * 3); PoolingLayer cpu = new PoolingLayer(new TensorInfo(58, 58, 3), PoolingInfo.Default, ActivationFunctionType.LeakyReLU), gpu = new CuDnnPoolingLayer(cpu.InputInfo, PoolingInfo.Default, ActivationFunctionType.LeakyReLU); diff --git a/Unit/NeuralNetwork.NET.Unit/SerializationTest.cs b/Unit/NeuralNetwork.NET.Unit/SerializationTest.cs index 4ae03d4..f646e86 100644 --- a/Unit/NeuralNetwork.NET.Unit/SerializationTest.cs +++ b/Unit/NeuralNetwork.NET.Unit/SerializationTest.cs @@ -1,6 +1,4 @@ -using System; -using System.IO; -using System.Linq; +using System.IO; using Microsoft.VisualStudio.TestTools.UnitTesting; using NeuralNetworkNET.APIs; using NeuralNetworkNET.APIs.Enums; @@ -50,7 +48,7 @@ public void StreamSerialize() { using (MemoryStream stream = new MemoryStream()) { - float[] w = WeightsProvider.NewFullyConnectedWeights(784, 30, WeightsInitializationMode.GlorotNormal); + float[] w = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(784), 30, WeightsInitializationMode.GlorotNormal); stream.WriteShuffled(w); Assert.IsTrue(stream.Position == sizeof(float) * w.Length); stream.Seek(0, SeekOrigin.Begin); From de8a9e7c6e0acc458071869bd31a97aad043e3d6 Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Sun, 24 Dec 2017 12:47:20 +0100 Subject: [PATCH 07/30] Inception constructors and Clone method implemented --- .../Layers/CuDnnInceptionLayer.cs | 22 ++++++++++++++----- .../APIs/Structs/InceptionInfo.cs | 12 +++++++++- NeuralNetwork.NET/APIs/Structs/TensorInfo.cs | 11 ++++++++-- 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index 2b65f99..5cd550b 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -7,6 +7,7 @@ using NeuralNetworkNET.Networks.Activations; using NeuralNetworkNET.Networks.Activations.Delegates; using NeuralNetworkNET.Networks.Implementations.Layers.Abstract; +using NeuralNetworkNET.Networks.Implementations.Layers.Helpers; using System; using System.Runtime.CompilerServices; @@ -164,8 +165,21 @@ private void SetupCuDnnInfo() #endregion - protected CuDnnInceptionLayer(in TensorInfo input, in TensorInfo output, [NotNull] float[] w, [NotNull] float[] b, ActivationFunctionType activation) : base(input, output, w, b, activation) + internal CuDnnInceptionLayer(in TensorInfo input, in InceptionInfo info, BiasInitializationMode biasMode = BiasInitializationMode.Zero) + : base(input, new TensorInfo(input.Height, input.Width, info.OutputChannels), + WeightsProvider.NewInceptionWeights(input, info), + WeightsProvider.NewBiases(info.OutputChannels, biasMode), + ActivationFunctionType.ReLU) { + _OperationInfo = info; + SetupCuDnnInfo(); + } + + internal CuDnnInceptionLayer(in TensorInfo input, in InceptionInfo info, [NotNull] float[] w, [NotNull] float[] b) + : base(input, new TensorInfo(input.Height, input.Width, info.OutputChannels), w, b, ActivationFunctionType.ReLU) + { + _OperationInfo = info; + SetupCuDnnInfo(); } #region Implementation @@ -187,9 +201,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ #endregion - public override INetworkLayer Clone() - { - throw new NotImplementedException(); - } + /// + public override INetworkLayer Clone() => new CuDnnInceptionLayer(InputInfo, OperationInfo, Weights, Biases); } } diff --git a/NeuralNetwork.NET/APIs/Structs/InceptionInfo.cs b/NeuralNetwork.NET/APIs/Structs/InceptionInfo.cs index 27da1ab..676b86a 100644 --- a/NeuralNetwork.NET/APIs/Structs/InceptionInfo.cs +++ b/NeuralNetwork.NET/APIs/Structs/InceptionInfo.cs @@ -12,7 +12,7 @@ namespace NeuralNetworkNET.APIs.Structs [JsonObject(MemberSerialization.Fields)] public readonly struct InceptionInfo : IEquatable { - #region Fields + #region Fields and properties /// /// The number of 1x1 convolution kernels used in the first step of the forward pass @@ -39,6 +39,16 @@ namespace NeuralNetworkNET.APIs.Structs /// public readonly int Chained1x1AfterPoolingConvolutionKernels; + /// + /// Gets the number of output channels after the depth concatenation + /// + public int OutputChannels + { + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => Primary1x1ConvolutionKernels + Secondary3x3ConvolutionKernels + Secondary5x5ConvolutionKernels + Chained1x1AfterPoolingConvolutionKernels; + } + #endregion #region Constructors diff --git a/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs b/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs index b938fed..0853a92 100644 --- a/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs +++ b/NeuralNetwork.NET/APIs/Structs/TensorInfo.cs @@ -13,6 +13,8 @@ namespace NeuralNetworkNET.APIs.Structs [DebuggerDisplay("Height: {Height}, Width: {Width}, Channels: {Channels}, Size: {Size}")] public readonly struct TensorInfo : IEquatable { + #region Fields and parameters + /// /// The height of each 2D slice /// @@ -52,13 +54,16 @@ public int SliceSize get => Height * Width; } + #endregion + + #region Constructors + internal TensorInfo(int height, int width, int channels) { if (height * width <= 0) throw new ArgumentException("The height and width of the kernels must be positive values"); - if (channels < 1) throw new ArgumentOutOfRangeException(nameof(channels), "The number of channels must be at least equal to 1"); Height = height; Width = width; - Channels = channels; + Channels = channels >= 1 ? channels : throw new ArgumentOutOfRangeException(nameof(channels), "The number of channels must be at least equal to 1"); } /// @@ -87,6 +92,8 @@ internal TensorInfo(int height, int width, int channels) [Pure] public static TensorInfo CreateLinear(int size) => new TensorInfo(1, 1, size); + #endregion + #region Equality /// From f41a4a8efebce75c7abaeb5e8651bace90be96d5 Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Sun, 24 Dec 2017 15:18:16 +0100 Subject: [PATCH 08/30] Inception layer forward method implemented (WIP) --- .../Extensions/GpuExtensions.cs | 18 +++ .../Layers/CuDnnInceptionLayer.cs | 111 ++++++++++++++++-- 2 files changed, 122 insertions(+), 7 deletions(-) diff --git a/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs b/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs index 3426b0a..6576b2b 100644 --- a/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs +++ b/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs @@ -54,6 +54,24 @@ public static void CopyToHost([NotNull] this DeviceMemory source, int n, source.CopyTo(result); } + /// + /// Copies the source data into the target , splitting each individual entry into its own row + /// + /// The source memory area with the concatenated data for each entry + /// The destination that will store the data + /// The column offset for the data for each entry + /// The number of values to copy for each entry + public static unsafe void CopyToRows([NotNull] this DeviceMemory source, in Tensor destination, int offset, int length) + { + if (source.Length / length != destination.Entities) throw new ArgumentOutOfRangeException(nameof(length), "The input length doesn't match the given arguments"); + if (destination.Length - offset > length) throw new ArgumentOutOfRangeException(nameof(offset), "The input offset isn't valid"); + CUDAInterop.cudaError_enum result = CUDAInterop.cudaError_enum.CUDA_SUCCESS; + for (int i = 0; i < destination.Entities; i++) + result |= CUDAInterop.cuMemcpy(new IntPtr((float*)destination + offset), source.Handle + i * destination.Length, new IntPtr(sizeof(float) * length)); + if (result != CUDAInterop.cudaError_enum.CUDA_SUCCESS) + throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}"); + } + #endregion /// diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index 5cd550b..0392e33 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -1,8 +1,10 @@ -using Alea.cuDNN; +using Alea; +using Alea.cuDNN; using JetBrains.Annotations; using NeuralNetworkNET.APIs.Enums; using NeuralNetworkNET.APIs.Interfaces; using NeuralNetworkNET.APIs.Structs; +using NeuralNetworkNET.Cuda.Extensions; using NeuralNetworkNET.Cuda.Services; using NeuralNetworkNET.Networks.Activations; using NeuralNetworkNET.Networks.Activations.Delegates; @@ -120,16 +122,16 @@ public ref readonly InceptionInfo OperationInfo [NotNull] private readonly TensorDescriptor Secondary1x1BiasDescription = new TensorDescriptor(); - // The first secondary 1x1 convolution info - [NotNull] - private readonly ConvolutionDescriptor Secondary1x1ConvolutionDescription = new ConvolutionDescriptor(); - // The info on the secondary 1x1 convolution outputs [NotNull] private readonly TensorDescriptor Secondary1x1OutputDescription = new TensorDescriptor(); #endregion + // The shared ReLU activation description for the current layer + [NotNull] + private readonly ActivationDescriptor ActivationDescription = new ActivationDescriptor(); + /// /// Gets the instance for the current layer /// @@ -158,9 +160,11 @@ private void SetupCuDnnInfo() PoolingDescription.Set2D(Alea.cuDNN.PoolingMode.AVERAGE_COUNT_EXCLUDE_PADDING, NanPropagation.PROPAGATE_NAN, 3, 3, 1, 1, 1, 1); // Secondary 1x1 convolution - Secondary1x1ConvolutionDescription.Set2D(0, 0, 1, 1, 1, 1, Alea.cuDNN.ConvolutionMode.CROSS_CORRELATION); Secondary1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, InputInfo.Channels, _OperationInfo.Chained1x1AfterPoolingConvolutionKernels, 1, 1); Secondary1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Chained1x1AfterPoolingConvolutionKernels, 1, 1); + + // Activation + ActivationDescription.Set(ActivationMode.RELU, NanPropagation.PROPAGATE_NAN, 0); } #endregion @@ -186,7 +190,100 @@ internal CuDnnInceptionLayer(in TensorInfo input, in InceptionInfo info, [NotNul public override void Forward(in Tensor x, out Tensor z, out Tensor a) { - throw new NotImplementedException(); + Tensor.New(x.Entities, OutputInfo.Size, out z); + Tensor.New(x.Entities, OutputInfo.Size, out a); + using (DeviceMemory + w_gpu = DnnInstance.Gpu.AllocateDevice(Weights), + b_gpu = DnnInstance.Gpu.AllocateDevice(Biases)) + { + using (DeviceMemory _1x1Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels)) + { + // First 1x1 convolution + using (DeviceMemory x_gpu = DnnInstance.Gpu.AllocateDevice(x)) + { + // Descriptors setup and first 1x1 convolution + InputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width); + _1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Primary1x1ConvolutionKernels, InputInfo.Height, InputInfo.Width); + DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, _1x1FilterDescription, _1x1ConvolutionDescription, _1x1OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); + DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, _1x1FilterDescription, _1x1ConvolutionDescription, _1x1OutputDescription, algorithm, out IntPtr size); + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, _1x1FilterDescription, w_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _1x1OutputDescription, _1x1Output_gpu.Ptr); + } + DnnInstance.AddTensor(1, _1x1BiasDescription, b_gpu.Ptr, 1, _1x1OutputDescription, _1x1Output_gpu.Ptr); + _1x1Output_gpu.CopyToRows(z, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels); + + // 1x1 convolution activation + DnnInstance.ActivationForward(ActivationDescription, 1, _1x1OutputDescription, _1x1Output_gpu.Ptr, 0, _1x1OutputDescription, _1x1Output_gpu.Ptr); + _1x1Output_gpu.CopyToRows(a, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels); + } + + // 3x3 convolution + using (DeviceMemory _3x3Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels)) + { + _3x3OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Secondary3x3ConvolutionKernels, InputInfo.Height, InputInfo.Width); + DnnInstance.GetConvolutionForwardAlgorithm(_1x1OutputDescription, _3x3FilterDescription, _3x3ConvolutionDescription, _3x3OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); + DnnInstance.GetConvolutionForwardWorkspaceSize(_1x1OutputDescription, _3x3FilterDescription, _3x3ConvolutionDescription, _3x3OutputDescription, algorithm, out IntPtr size); + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + DnnInstance.ConvolutionForward(1, _1x1OutputDescription, _1x1Output_gpu.Ptr, _3x3FilterDescription, w_gpu.Ptr + InputInfo.Channels * OperationInfo.Primary1x1ConvolutionKernels, _3x3ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3OutputDescription, _3x3Output_gpu.Ptr); + } + DnnInstance.AddTensor(1, _3x3BiasDescription, b_gpu.Ptr + OperationInfo.Primary1x1ConvolutionKernels, 1, _3x3OutputDescription, _3x3Output_gpu.Ptr); + _3x3Output_gpu.CopyToRows(z, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels); + + // 3x3 convolution activation + DnnInstance.ActivationForward(ActivationDescription, 1, _3x3OutputDescription, _3x3Output_gpu.Ptr, 0, _3x3OutputDescription, _3x3Output_gpu.Ptr); + _3x3Output_gpu.CopyToRows(a, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels); + } + + // 5x5 convolution + using (DeviceMemory _5x5Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels)) + { + _5x5OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Secondary5x5ConvolutionKernels, InputInfo.Height, InputInfo.Width); + DnnInstance.GetConvolutionForwardAlgorithm(_1x1OutputDescription, _5x5FilterDescription, _5x5ConvolutionDescription, _5x5OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); + DnnInstance.GetConvolutionForwardWorkspaceSize(_1x1OutputDescription, _5x5FilterDescription, _5x5ConvolutionDescription, _5x5OutputDescription, algorithm, out IntPtr size); + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + DnnInstance.ConvolutionForward(1, _1x1OutputDescription, _1x1Output_gpu.Ptr, _5x5FilterDescription, w_gpu.Ptr + InputInfo.Channels * OperationInfo.Primary1x1ConvolutionKernels + 3 * 3 * OperationInfo.Primary1x1ConvolutionKernels * OperationInfo.Secondary3x3ConvolutionKernels, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5OutputDescription, _5x5Output_gpu.Ptr); + } + DnnInstance.AddTensor(1, _5x5BiasDescription, b_gpu.Ptr + OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels, 1, _5x5OutputDescription, _5x5Output_gpu.Ptr); + _5x5Output_gpu.CopyToRows(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); + + // 3x3 convolution activation + DnnInstance.ActivationForward(ActivationDescription, 1, _5x5OutputDescription, _5x5Output_gpu.Ptr, 0, _5x5OutputDescription, _5x5Output_gpu.Ptr); + _5x5Output_gpu.CopyToRows(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); + } + } + + // Pooling pipeline + PoolingOutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width); + using (DeviceMemory y_gpu = DnnInstance.Gpu.AllocateDevice(x.Size)) + { + // Pooling + using (DeviceMemory x_gpu = DnnInstance.Gpu.AllocateDevice(x)) + { + DnnInstance.PoolingForward(PoolingDescription, 1, InputDescription, x_gpu.Ptr, 0, InputDescription, y_gpu.Ptr); + } + + // 1x1 convolution + using (DeviceMemory _1x1Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Chained1x1AfterPoolingConvolutionKernels)) + { + _1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Chained1x1AfterPoolingConvolutionKernels, InputInfo.Height, InputInfo.Width); + DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, Secondary1x1FilterDescription, _1x1ConvolutionDescription, Secondary1x1OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); + DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, Secondary1x1FilterDescription, _1x1ConvolutionDescription, Secondary1x1OutputDescription, algorithm, out IntPtr size); + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + DnnInstance.ConvolutionForward(1, InputDescription, y_gpu.Ptr, Secondary1x1FilterDescription, w_gpu.Ptr + InputInfo.Channels * OperationInfo.Primary1x1ConvolutionKernels + 3 * 3 * OperationInfo.Primary1x1ConvolutionKernels * OperationInfo.Secondary3x3ConvolutionKernels + 5 * 5 * OperationInfo.Primary1x1ConvolutionKernels * OperationInfo.Secondary5x5ConvolutionKernels, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr); + } + DnnInstance.AddTensor(1, Secondary1x1BiasDescription, b_gpu.Ptr + OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels, 1, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr); + _1x1Output_gpu.CopyToRows(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Chained1x1AfterPoolingConvolutionKernels); + + // 1x1 convolution activation + DnnInstance.ActivationForward(ActivationDescription, 1, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr, 0, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr); + _1x1Output_gpu.CopyToRows(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Chained1x1AfterPoolingConvolutionKernels); + } + } + } } public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime) From 127e241a844f8f4dae01cb9e90d4aecad37768f8 Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Sun, 24 Dec 2017 15:48:45 +0100 Subject: [PATCH 09/30] Minor code improvements to the CuDnn layers --- .../Layers/CuDnnConvolutionalLayer.cs | 90 +++++++++---------- .../Layers/CuDnnFullyConnectedLayer.cs | 42 ++++----- .../Layers/CuDnnSoftmaxLayer.cs | 16 ++-- 3 files changed, 64 insertions(+), 84 deletions(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs index 4aa0982..df31119 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs @@ -74,71 +74,63 @@ public CuDnnConvolutionalLayer( #region Implementation /// - public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a) + public override void Forward(in Tensor x, out Tensor z, out Tensor a) { - fixed (float* pw = Weights) + using (DeviceMemory z_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * OutputInfo.Size)) { - Tensor.Reshape(pw, OutputInfo.Channels, KernelInfo.Size, out Tensor wTensor); - using (DeviceMemory z_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * OutputInfo.Size)) + // Tensors info setup + InputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width); + OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OutputInfo.Channels, OutputInfo.Height, OutputInfo.Width); + + // Forward convolution + DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); + DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, algorithm, out IntPtr size); + using (DeviceMemory + x_gpu = DnnInstance.Gpu.AllocateDevice(x), + w_gpu = DnnInstance.Gpu.AllocateDevice(Weights)) + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) { - // Tensors info setup - InputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width); - OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OutputInfo.Channels, OutputInfo.Height, OutputInfo.Width); - - // Forward convolution - DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); - DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, FilterDescription, ConvolutionDescription, OutputDescription, algorithm, out IntPtr size); - using (DeviceMemory - x_gpu = DnnInstance.Gpu.AllocateDevice(x), - w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor)) - using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) - { - DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, FilterDescription, w_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, OutputDescription, z_gpu.Ptr); - } + DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, FilterDescription, w_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, OutputDescription, z_gpu.Ptr); + } - // Biases - using (DeviceMemory b_gpu = DnnInstance.Gpu.AllocateDevice(Biases)) - { - DnnInstance.AddTensor(1, BiasDescription, b_gpu.Ptr, 1, OutputDescription, z_gpu.Ptr); - } - z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z); + // Biases + using (DeviceMemory b_gpu = DnnInstance.Gpu.AllocateDevice(Biases)) + { + DnnInstance.AddTensor(1, BiasDescription, b_gpu.Ptr, 1, OutputDescription, z_gpu.Ptr); + } + z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z); - // Activation - if (ActivationFunctionType == ActivationFunctionType.Identity) z.Duplicate(out a); - else - { - DnnInstance.ActivationForward(z.Entities, z.Length, z_gpu.Ptr, z_gpu.Ptr, ActivationFunctions.Activation); - z_gpu.CopyToHost(z.Entities, z.Length, out a); - } + // Activation + if (ActivationFunctionType == ActivationFunctionType.Identity) z.Duplicate(out a); + else + { + DnnInstance.ActivationForward(z.Entities, z.Length, z_gpu.Ptr, z_gpu.Ptr, ActivationFunctions.Activation); + z_gpu.CopyToHost(z.Entities, z.Length, out a); } } } /// - public override unsafe void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime) + public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime) { - fixed (float* pw = Weights) + using (DeviceMemory delta_gpu = DnnInstance.Gpu.AllocateDevice(z.Size)) { - Tensor.Reshape(pw, OutputInfo.Channels, KernelInfo.Size, out Tensor wTensor); + // Convolution DnnInstance.GetConvolutionBackwardDataAlgorithm(FilterDescription, OutputDescription, ConvolutionDescription, InputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdDataAlgo algorithm); DnnInstance.GetConvolutionBackwardDataWorkspaceSize(FilterDescription, OutputDescription, ConvolutionDescription, InputDescription, algorithm, out IntPtr size); - using (DeviceMemory delta_gpu = DnnInstance.Gpu.AllocateDevice(z.Size)) + using (DeviceMemory + delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1), + w_gpu = DnnInstance.Gpu.AllocateDevice(Weights)) + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) { - // Backwards convolution - using (DeviceMemory - delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1), - w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor)) - using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) - { - DnnInstance.ConvolutionBackwardData(1, FilterDescription, w_gpu.Ptr, OutputDescription, delta_1_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, InputDescription, delta_gpu.Ptr); - } + DnnInstance.ConvolutionBackwardData(1, FilterDescription, w_gpu.Ptr, OutputDescription, delta_1_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, InputDescription, delta_gpu.Ptr); + } - // Activation - using (DeviceMemory z_gpu = DnnInstance.Gpu.AllocateDevice(z)) - { - DnnInstance.ActivationBackward(z.Entities, z.Length, z_gpu.Ptr, delta_gpu.Ptr, activationPrime); - z_gpu.CopyTo(z); - } + // Activation + using (DeviceMemory z_gpu = DnnInstance.Gpu.AllocateDevice(z)) + { + DnnInstance.ActivationBackward(z.Entities, z.Length, z_gpu.Ptr, delta_gpu.Ptr, activationPrime); + z_gpu.CopyTo(z); } } } diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs index 87685d3..f1c587c 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs @@ -30,39 +30,31 @@ public CuDnnFullyConnectedLayer(in TensorInfo input, int neurons, [NotNull] floa #region Implementation /// - public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a) + public override void Forward(in Tensor x, out Tensor z, out Tensor a) { - fixed (float* pw = Weights) + using (DeviceMemory + x_gpu = DnnInstance.Gpu.AllocateDevice(x), + w_gpu = DnnInstance.Gpu.AllocateDevice(Weights), + y_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * OutputInfo.Size), + b_gpu = DnnInstance.Gpu.AllocateDevice(Biases)) { - Tensor.Reshape(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor); - using (DeviceMemory - x_gpu = DnnInstance.Gpu.AllocateDevice(x), - w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor), - y_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * OutputInfo.Size), - b_gpu = DnnInstance.Gpu.AllocateDevice(Biases)) - { - DnnInstance.FullyConnectedForward(x.Entities, x.Length, OutputInfo.Size, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, y_gpu.Ptr); - y_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z); - DnnInstance.ActivationForward(z.Entities, z.Length, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation); - y_gpu.CopyToHost(z.Entities, z.Length, out a); - } + DnnInstance.FullyConnectedForward(x.Entities, x.Length, OutputInfo.Size, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, y_gpu.Ptr); + y_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z); + DnnInstance.ActivationForward(z.Entities, z.Length, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation); + y_gpu.CopyToHost(z.Entities, z.Length, out a); } } /// - public override unsafe void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime) + public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime) { - fixed (float* pw = Weights) + using (DeviceMemory + delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1), + w_gpu = DnnInstance.Gpu.AllocateDevice(Weights), + z_gpu = DnnInstance.Gpu.AllocateDevice(z)) { - Tensor.Reshape(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor); - using (DeviceMemory - delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1), - w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor), - z_gpu = DnnInstance.Gpu.AllocateDevice(z)) - { - DnnInstance.FullyConnectedBackwardData(z.Entities, InputInfo.Size, OutputInfo.Size, z_gpu.Ptr, delta_1_gpu.Ptr, w_gpu.Ptr, activationPrime); - z_gpu.CopyTo(z); - } + DnnInstance.FullyConnectedBackwardData(z.Entities, InputInfo.Size, OutputInfo.Size, z_gpu.Ptr, delta_1_gpu.Ptr, w_gpu.Ptr, activationPrime); + z_gpu.CopyTo(z); } } diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnSoftmaxLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnSoftmaxLayer.cs index abc7f07..be8a0cd 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnSoftmaxLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnSoftmaxLayer.cs @@ -42,17 +42,13 @@ public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a) using (DeviceMemory z_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * OutputInfo.Size)) { // Linear pass - fixed (float* pw = Weights) + using (DeviceMemory + x_gpu = DnnInstance.Gpu.AllocateDevice(x), + w_gpu = DnnInstance.Gpu.AllocateDevice(Weights), + b_gpu = DnnInstance.Gpu.AllocateDevice(Biases)) { - Tensor.Reshape(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor); - using (DeviceMemory - x_gpu = DnnInstance.Gpu.AllocateDevice(x), - w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor), - b_gpu = DnnInstance.Gpu.AllocateDevice(Biases)) - { - DnnInstance.FullyConnectedForward(x.Entities, x.Length, OutputInfo.Size, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, z_gpu.Ptr); - z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z); - } + DnnInstance.FullyConnectedForward(x.Entities, x.Length, OutputInfo.Size, x_gpu.Ptr, w_gpu.Ptr, b_gpu.Ptr, z_gpu.Ptr); + z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z); } // Activation From c16c249690380e34197c931db063b72ea6ec65c5 Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Sun, 24 Dec 2017 17:01:40 +0100 Subject: [PATCH 10/30] Minor bug fixes --- NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs b/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs index 6576b2b..79723f2 100644 --- a/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs +++ b/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs @@ -66,8 +66,14 @@ public static unsafe void CopyToRows([NotNull] this DeviceMemory source, if (source.Length / length != destination.Entities) throw new ArgumentOutOfRangeException(nameof(length), "The input length doesn't match the given arguments"); if (destination.Length - offset > length) throw new ArgumentOutOfRangeException(nameof(offset), "The input offset isn't valid"); CUDAInterop.cudaError_enum result = CUDAInterop.cudaError_enum.CUDA_SUCCESS; + int + bytes = sizeof(float) * length, // Bytes to copy for each row + lineBytes = sizeof(float) * destination.Length; // Bytes to skip for each entry to jump to the line below at the same offset + IntPtr + start = destination.Ptr + sizeof(float) * offset, // Initial destination offset + size = new IntPtr(bytes); for (int i = 0; i < destination.Entities; i++) - result |= CUDAInterop.cuMemcpy(new IntPtr((float*)destination + offset), source.Handle + i * destination.Length, new IntPtr(sizeof(float) * length)); + result |= CUDAInterop.cuMemcpy(start + i * bytes, source.Handle + i * lineBytes, size); if (result != CUDAInterop.cudaError_enum.CUDA_SUCCESS) throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}"); } From 6a82479855107d41a645178793e2bf6b8cecefb0 Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Sun, 24 Dec 2017 18:10:12 +0100 Subject: [PATCH 11/30] Minor improvements to the Tensor struct --- NeuralNetwork.NET/APIs/Structs/Tensor.cs | 27 ++++++++++++++++++- .../Networks/Implementations/NeuralNetwork.cs | 4 +-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/NeuralNetwork.NET/APIs/Structs/Tensor.cs b/NeuralNetwork.NET/APIs/Structs/Tensor.cs index 9252f95..dee5e5e 100644 --- a/NeuralNetwork.NET/APIs/Structs/Tensor.cs +++ b/NeuralNetwork.NET/APIs/Structs/Tensor.cs @@ -34,7 +34,22 @@ public readonly struct Tensor /// /// The total size (the number of values) in the current /// - public int Size => Entities * Length; + public int Size + { + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => Entities * Length; + } + + /// + /// Gets whether or not the current instance is linked to an allocated memory area + /// + public bool Null + { + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => Ptr == IntPtr.Zero; + } #region Initialization @@ -192,6 +207,16 @@ public float[] ToArray() [MethodImpl(MethodImplOptions.AggressiveInlining)] public void Free() => Marshal.FreeHGlobal(Ptr); + /// + /// Frees the memory associated with the current instance, if needed + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void TryFree() + { + if (Ptr != IntPtr.Zero) + Marshal.FreeHGlobal(Ptr); + } + // Implicit pointer conversion [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe implicit operator float*(in Tensor tensor) => (float*)tensor.Ptr.ToPointer(); diff --git a/NeuralNetwork.NET/Networks/Implementations/NeuralNetwork.cs b/NeuralNetwork.NET/Networks/Implementations/NeuralNetwork.cs index 493282f..6707591 100644 --- a/NeuralNetwork.NET/Networks/Implementations/NeuralNetwork.cs +++ b/NeuralNetwork.NET/Networks/Implementations/NeuralNetwork.cs @@ -248,7 +248,7 @@ internal unsafe void Backpropagate(in TrainingBatch batch, float dropout, [NotNu * Multiply the previous delta with the transposed weights of the following layer * Compute d(l), the Hadamard product of z'(l) and delta(l + 1) * W(l + 1)T */ _Layers[l + 1].Backpropagate(*deltas[l + 1], zList[l], _Layers[l].ActivationFunctions.ActivationPrime); - if (dropoutMasks[l].Ptr != IntPtr.Zero) zList[l].InPlaceHadamardProduct(dropoutMasks[l]); + if (!dropoutMasks[l].Null) zList[l].InPlaceHadamardProduct(dropoutMasks[l]); deltas[l] = zList + l; } @@ -285,7 +285,7 @@ internal unsafe void Backpropagate(in TrainingBatch batch, float dropout, [NotNu { zList[i].Free(); aList[i].Free(); - if (dropoutMasks[i].Ptr != IntPtr.Zero) dropoutMasks[i].Free(); + dropoutMasks[i].TryFree(); } zList[_Layers.Length - 1].Free(); aList[_Layers.Length - 1].Free(); From 657c277464d1c9740da7f5c8f9b32a4fac5f586c Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Mon, 25 Dec 2017 12:46:54 +0100 Subject: [PATCH 12/30] InceptionInfo struct improved, minor changes --- .../Layers/CuDnnInceptionLayer.cs | 59 +++++++++++++++---- .../APIs/Structs/InceptionInfo.cs | 44 +++++++++++--- .../Layers/Helpers/WeightsProvider.cs | 22 ++++--- 3 files changed, 99 insertions(+), 26 deletions(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index 0392e33..5875030 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -64,6 +64,22 @@ public ref readonly InceptionInfo OperationInfo #endregion + #region 3x3 reduce 1x1 convolution + + // The NCHW info for the 3x3 reduce 1x1 convolution weights + [NotNull] + private readonly FilterDescriptor _3x3Reduce1x1FilterDescription = new FilterDescriptor(); + + // The info on the 3x3 reduce 1x1 convolution bias (one value per output channel) + [NotNull] + private readonly TensorDescriptor _3x3Reduce1x1BiasDescription = new TensorDescriptor(); + + // The NCHW tensor info for the outputs of the 3x3 reduce 1x1 convolution + [NotNull] + private readonly TensorDescriptor _3x3Reduce1x1OutputDescription = new TensorDescriptor(); + + #endregion + #region 3x3 secondary convolution // The NCHW info for the 3x3 convolution weights @@ -84,6 +100,22 @@ public ref readonly InceptionInfo OperationInfo #endregion + #region 3x3 reduce 1x1 convolution + + // The NCHW info for the 5x5 reduce 1x1 convolution weights + [NotNull] + private readonly FilterDescriptor _5x5Reduce1x1FilterDescription = new FilterDescriptor(); + + // The info on the 5x5 reduce 1x1 convolution bias (one value per output channel) + [NotNull] + private readonly TensorDescriptor _5x5Reduce1x1BiasDescription = new TensorDescriptor(); + + // The NCHW tensor info for the outputs of the 5x5 reduce 1x1 convolution + [NotNull] + private readonly TensorDescriptor _5x5Reduce1x1OutputDescription = new TensorDescriptor(); + + #endregion + #region 5x5 secondary convolution // The NCHW info for the 5x5 convolution weights @@ -142,26 +174,33 @@ public ref readonly InceptionInfo OperationInfo private void SetupCuDnnInfo() { // First 1x1 convolution - _1x1ConvolutionDescription.Set2D(0, 0, 1, 1, 1, 1, Alea.cuDNN.ConvolutionMode.CROSS_CORRELATION); _1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Primary1x1ConvolutionKernels, InputInfo.Channels, 1, 1); _1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Primary1x1ConvolutionKernels, 1, 1); + // 3x3 reduce 1x1 convolution + _3x3Reduce1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, InputInfo.Channels, 1, 1); + _3x3Reduce1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, 1, 1); + // 3x3 convolution _3x3ConvolutionDescription.Set2D(1, 1, 1, 1, 1, 1, Alea.cuDNN.ConvolutionMode.CROSS_CORRELATION); // 1-padding to keep size - _3x3FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Secondary3x3ConvolutionKernels, _OperationInfo.Primary1x1ConvolutionKernels, 3, 3); + _3x3FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Secondary3x3ConvolutionKernels, _OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, 3, 3); _3x3BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Secondary3x3ConvolutionKernels, 1, 1); + // 5x5 reduce 1x1 convolution + _5x5Reduce1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, InputInfo.Channels, 1, 1); + _5x5Reduce1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, 1, 1); + // 5x5 convolution _5x5ConvolutionDescription.Set2D(2, 2, 1, 1, 1, 1, Alea.cuDNN.ConvolutionMode.CROSS_CORRELATION); - _5x5FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Secondary5x5ConvolutionKernels, _OperationInfo.Primary1x1ConvolutionKernels, 5, 5); + _5x5FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Secondary5x5ConvolutionKernels, _OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, 5, 5); _5x5BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Secondary5x5ConvolutionKernels, 1, 1); // Pooling PoolingDescription.Set2D(Alea.cuDNN.PoolingMode.AVERAGE_COUNT_EXCLUDE_PADDING, NanPropagation.PROPAGATE_NAN, 3, 3, 1, 1, 1, 1); // Secondary 1x1 convolution - Secondary1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, InputInfo.Channels, _OperationInfo.Chained1x1AfterPoolingConvolutionKernels, 1, 1); - Secondary1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Chained1x1AfterPoolingConvolutionKernels, 1, 1); + Secondary1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, InputInfo.Channels, _OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, 1, 1); + Secondary1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, 1, 1); // Activation ActivationDescription.Set(ActivationMode.RELU, NanPropagation.PROPAGATE_NAN, 0); @@ -172,7 +211,7 @@ private void SetupCuDnnInfo() internal CuDnnInceptionLayer(in TensorInfo input, in InceptionInfo info, BiasInitializationMode biasMode = BiasInitializationMode.Zero) : base(input, new TensorInfo(input.Height, input.Width, info.OutputChannels), WeightsProvider.NewInceptionWeights(input, info), - WeightsProvider.NewBiases(info.OutputChannels, biasMode), + WeightsProvider.NewBiases(info.ConvolutionKernels, biasMode), ActivationFunctionType.ReLU) { _OperationInfo = info; @@ -266,9 +305,9 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) } // 1x1 convolution - using (DeviceMemory _1x1Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Chained1x1AfterPoolingConvolutionKernels)) + using (DeviceMemory _1x1Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize)) // TODO { - _1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Chained1x1AfterPoolingConvolutionKernels, InputInfo.Height, InputInfo.Width); + _1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, -1, InputInfo.Height, InputInfo.Width); DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, Secondary1x1FilterDescription, _1x1ConvolutionDescription, Secondary1x1OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, Secondary1x1FilterDescription, _1x1ConvolutionDescription, Secondary1x1OutputDescription, algorithm, out IntPtr size); using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) @@ -276,11 +315,11 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) DnnInstance.ConvolutionForward(1, InputDescription, y_gpu.Ptr, Secondary1x1FilterDescription, w_gpu.Ptr + InputInfo.Channels * OperationInfo.Primary1x1ConvolutionKernels + 3 * 3 * OperationInfo.Primary1x1ConvolutionKernels * OperationInfo.Secondary3x3ConvolutionKernels + 5 * 5 * OperationInfo.Primary1x1ConvolutionKernels * OperationInfo.Secondary5x5ConvolutionKernels, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr); } DnnInstance.AddTensor(1, Secondary1x1BiasDescription, b_gpu.Ptr + OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels, 1, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr); - _1x1Output_gpu.CopyToRows(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Chained1x1AfterPoolingConvolutionKernels); + _1x1Output_gpu.CopyToRows(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize); // TODO // 1x1 convolution activation DnnInstance.ActivationForward(ActivationDescription, 1, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr, 0, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr); - _1x1Output_gpu.CopyToRows(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Chained1x1AfterPoolingConvolutionKernels); + _1x1Output_gpu.CopyToRows(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize); // TODO } } } diff --git a/NeuralNetwork.NET/APIs/Structs/InceptionInfo.cs b/NeuralNetwork.NET/APIs/Structs/InceptionInfo.cs index 676b86a..2a69aab 100644 --- a/NeuralNetwork.NET/APIs/Structs/InceptionInfo.cs +++ b/NeuralNetwork.NET/APIs/Structs/InceptionInfo.cs @@ -19,11 +19,21 @@ namespace NeuralNetworkNET.APIs.Structs /// public readonly int Primary1x1ConvolutionKernels; + /// + /// The number of 1x1 convolution kernels before the 3x3 convolution + /// + public readonly int Primary3x3Reduce1x1ConvolutionKernels; + /// /// The number of 3x3 convolution kernels /// public readonly int Secondary3x3ConvolutionKernels; + /// + /// The number of 1x1 convolution kernels before the 5x5 convolution + /// + public readonly int Primary5x5Reduce1x1ConvolutionKernels; + /// /// The number of 5x5 convolution kernels /// @@ -37,7 +47,7 @@ namespace NeuralNetworkNET.APIs.Structs /// /// The number of 1x1 convolution kernels after the pooling operation /// - public readonly int Chained1x1AfterPoolingConvolutionKernels; + public readonly int Secondary1x1AfterPoolingConvolutionKernels; /// /// Gets the number of output channels after the depth concatenation @@ -46,7 +56,17 @@ public int OutputChannels { [Pure] [MethodImpl(MethodImplOptions.AggressiveInlining)] - get => Primary1x1ConvolutionKernels + Secondary3x3ConvolutionKernels + Secondary5x5ConvolutionKernels + Chained1x1AfterPoolingConvolutionKernels; + get => Primary1x1ConvolutionKernels + Secondary3x3ConvolutionKernels + Secondary5x5ConvolutionKernels + Secondary1x1AfterPoolingConvolutionKernels; + } + + /// + /// Gets the total number of convolution kernels for the current instance + /// + public int ConvolutionKernels + { + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => Primary1x1ConvolutionKernels + Primary3x3Reduce1x1ConvolutionKernels + Secondary3x3ConvolutionKernels + Primary5x5Reduce1x1ConvolutionKernels + Secondary5x5ConvolutionKernels + Secondary1x1AfterPoolingConvolutionKernels; } #endregion @@ -54,12 +74,14 @@ public int OutputChannels #region Constructors // Internal constructor - private InceptionInfo(int _1x1Kernels, int _3x3Kernels, int _5x5Kernels, PoolingMode poolingMode, int _1x1SecondaryKernels) + private InceptionInfo(int _1x1Kernels, int _3x3Reduce1x1Kernels, int _3x3Kernels, int _5x5Reduce1x1Kernels, int _5x5Kernels, PoolingMode poolingMode, int _1x1SecondaryKernels) { Primary1x1ConvolutionKernels = _1x1Kernels >= 1 ? _1x1Kernels : throw new ArgumentOutOfRangeException(nameof(_1x1Kernels), "The number of 1x1 kernels must be at least 1"); + Primary3x3Reduce1x1ConvolutionKernels = _3x3Reduce1x1Kernels >= 1 ? _3x3Reduce1x1Kernels : throw new ArgumentOutOfRangeException(nameof(_3x3Reduce1x1Kernels), "The number of 3x3 reduction 1x1 kernels must be at least 1"); Secondary3x3ConvolutionKernels = _3x3Kernels >= 1 ? _3x3Kernels : throw new ArgumentOutOfRangeException(nameof(_3x3Kernels), "The number of 3x3 kernels must be at least 1"); + Primary5x5Reduce1x1ConvolutionKernels = _5x5Reduce1x1Kernels >= 1 ? _5x5Reduce1x1Kernels : throw new ArgumentOutOfRangeException(nameof(_3x3Kernels), "The number of 5x5 reduction 1x1 kernels must be at least 1"); Secondary5x5ConvolutionKernels = _5x5Kernels >= 1 ? _5x5Kernels : throw new ArgumentOutOfRangeException(nameof(_5x5Kernels), "The number of 5x5 kernels must be at least 1"); - Chained1x1AfterPoolingConvolutionKernels = _1x1SecondaryKernels >= 1 ? _1x1SecondaryKernels : throw new ArgumentOutOfRangeException(nameof(_1x1SecondaryKernels), "The number of secondary 1x1 kernels must be at least 1"); + Secondary1x1AfterPoolingConvolutionKernels = _1x1SecondaryKernels >= 1 ? _1x1SecondaryKernels : throw new ArgumentOutOfRangeException(nameof(_1x1SecondaryKernels), "The number of secondary 1x1 kernels must be at least 1"); Pooling = poolingMode; } @@ -67,16 +89,18 @@ private InceptionInfo(int _1x1Kernels, int _3x3Kernels, int _5x5Kernels, Pooling /// Creates a new inception layer description with the input parameters /// /// The number of 1x1 primary convolution kernels + /// The number of 3x3 reduction 1x1 kernels /// The number of 3x3 convolution kernels + /// The number of 5x5 reduction 1x1 kernels /// The number of 5x5 convolution kernels /// The pooling mode for the pooling pipeline /// The number of secondary 1x1 convolution kernels [PublicAPI] [Pure] public static InceptionInfo New( - int _1x1Kernels, int _3x3Kernels, int _5x5Kernels, + int _1x1Kernels, int _3x3Reduce1x1Kernels, int _3x3Kernels, int _5x5Reduce1x1Kernels, int _5x5Kernels, PoolingMode poolingMode, int _1x1SecondaryKernels) - => new InceptionInfo(_1x1Kernels, _3x3Kernels, _5x5Kernels, poolingMode, _1x1SecondaryKernels); + => new InceptionInfo(_1x1Kernels, _3x3Reduce1x1Kernels, _3x3Kernels, _5x5Reduce1x1Kernels, _5x5Kernels, poolingMode, _1x1SecondaryKernels); #endregion @@ -95,9 +119,11 @@ public override int GetHashCode() unchecked { hash = hash * 31 + Primary1x1ConvolutionKernels; - hash = hash * 31 + Chained1x1AfterPoolingConvolutionKernels; + hash = hash * 31 + Primary3x3Reduce1x1ConvolutionKernels; hash = hash * 31 + Secondary3x3ConvolutionKernels; + hash = hash * 31 + Primary5x5Reduce1x1ConvolutionKernels; hash = hash * 31 + Secondary5x5ConvolutionKernels; + hash = hash * 31 + Secondary1x1AfterPoolingConvolutionKernels; hash = hash * 31 + (int)Pooling; } return hash; @@ -105,9 +131,11 @@ public override int GetHashCode() [MethodImpl(MethodImplOptions.AggressiveInlining)] public static bool operator ==(in InceptionInfo a, in InceptionInfo b) => a.Primary1x1ConvolutionKernels == b.Primary1x1ConvolutionKernels && - a.Chained1x1AfterPoolingConvolutionKernels == b.Chained1x1AfterPoolingConvolutionKernels && + a.Primary3x3Reduce1x1ConvolutionKernels == b.Primary3x3Reduce1x1ConvolutionKernels && a.Secondary3x3ConvolutionKernels == b.Secondary3x3ConvolutionKernels && + a.Primary5x5Reduce1x1ConvolutionKernels == b.Primary5x5Reduce1x1ConvolutionKernels && a.Secondary5x5ConvolutionKernels == b.Secondary5x5ConvolutionKernels && + a.Secondary1x1AfterPoolingConvolutionKernels == b.Secondary1x1AfterPoolingConvolutionKernels && a.Pooling == b.Pooling; [MethodImpl(MethodImplOptions.AggressiveInlining)] diff --git a/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs b/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs index 72a6722..572ef96 100644 --- a/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs +++ b/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs @@ -78,19 +78,25 @@ public static unsafe float[] NewInceptionWeights(in TensorInfo input, in Incepti { int _1x1Length = input.Channels * info.Primary1x1ConvolutionKernels, - _3x3Length = 3 * 3 * info.Primary1x1ConvolutionKernels * info.Secondary3x3ConvolutionKernels, - _5x5Length = 5 * 5 * info.Primary1x1ConvolutionKernels * info.Secondary5x5ConvolutionKernels, - secondary1x1Length = input.Channels * info.Chained1x1AfterPoolingConvolutionKernels; + _3x3Reduce1x1Length = input.Channels * info.Primary3x3Reduce1x1ConvolutionKernels, + _3x3Length = 3 * 3 * info.Primary3x3Reduce1x1ConvolutionKernels * info.Secondary3x3ConvolutionKernels, + _5x5Reduce1x1Length = input.Channels * info.Primary5x5Reduce1x1ConvolutionKernels, + _5x5Length = 5 * 5 * info.Primary5x5Reduce1x1ConvolutionKernels * info.Secondary5x5ConvolutionKernels, + secondary1x1Length = input.Channels * info.Secondary1x1AfterPoolingConvolutionKernels; float[] weights = new float[_1x1Length + _3x3Length + _5x5Length + secondary1x1Length]; fixed (float* pw = weights) { Tensor.Reshape(pw, 1, _1x1Length, out Tensor wTensor); KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels); - Tensor.Reshape(pw + _1x1Length, 1, _3x3Length, out wTensor); - KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, 3 * 3 * info.Primary1x1ConvolutionKernels); - Tensor.Reshape(pw + _1x1Length + _3x3Length, 1, _5x5Length, out wTensor); - KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, 5 * 5 * info.Primary1x1ConvolutionKernels); - Tensor.Reshape(pw + _1x1Length + _3x3Length + _5x5Length, 1, secondary1x1Length, out wTensor); + Tensor.Reshape(pw + _1x1Length, 1, _3x3Reduce1x1Length, out wTensor); + KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels); + Tensor.Reshape(pw + _1x1Length + _3x3Reduce1x1Length, 1, _3x3Length, out wTensor); + KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, 3 * 3 * info.Primary3x3Reduce1x1ConvolutionKernels); + Tensor.Reshape(pw + _1x1Length + _3x3Reduce1x1Length + _3x3Length, 1, _5x5Reduce1x1Length, out wTensor); + KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels); + Tensor.Reshape(pw + _1x1Length + _3x3Reduce1x1Length + _3x3Length + _5x5Reduce1x1Length, 1, _5x5Length, out wTensor); + KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, 5 * 5 * info.Primary5x5Reduce1x1ConvolutionKernels); + Tensor.Reshape(pw + _1x1Length + _3x3Reduce1x1Length + _3x3Length + _5x5Reduce1x1Length + _5x5Length, 1, secondary1x1Length, out wTensor); KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels); } return weights; From 801b4caa40dce467c18e201c717dd10e378e528c Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Mon, 25 Dec 2017 17:11:19 +0100 Subject: [PATCH 13/30] Inception layer forward method implemented --- .../Layers/CuDnnInceptionLayer.cs | 179 +++++++++++++----- 1 file changed, 130 insertions(+), 49 deletions(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index 5875030..714bfb0 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -36,6 +36,53 @@ public ref readonly InceptionInfo OperationInfo get => ref _OperationInfo; } + #endregion + + #region Weights info + + // 1x1 convolution weights on first pipeline + private int _1x1Weights + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => InputInfo.Channels * OperationInfo.Primary1x1ConvolutionKernels; + } + + // 1x1 convolution weights on 3x3 pipeline + private int _3x3Reduce1x1Weights + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => InputInfo.Channels * OperationInfo.Primary3x3Reduce1x1ConvolutionKernels; + } + + // 3x3 convolution weights + private int _3x3Weights + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => 3 * 3 * OperationInfo.Primary3x3Reduce1x1ConvolutionKernels * OperationInfo.Secondary3x3ConvolutionKernels; + } + + // 1x1 convolution weights on 5x5 pipeline + private int _5x5Reduce1x1Weights + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => InputInfo.Channels * OperationInfo.Primary5x5Reduce1x1ConvolutionKernels; + } + + // 5x5 convolution weights + private int _5x5Weights + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => 5 * 5 * OperationInfo.Primary5x5Reduce1x1ConvolutionKernels * OperationInfo.Secondary5x5ConvolutionKernels; + } + + // 1x1 convolution weights on pooling pipeline + private int Secondary1x1Weights + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + get => InputInfo.Channels * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels; + } + + #endregion #region cuDNN fields @@ -227,6 +274,7 @@ internal CuDnnInceptionLayer(in TensorInfo input, in InceptionInfo info, [NotNul #region Implementation + /// public override void Forward(in Tensor x, out Tensor z, out Tensor a) { Tensor.New(x.Entities, OutputInfo.Size, out z); @@ -235,63 +283,94 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) w_gpu = DnnInstance.Gpu.AllocateDevice(Weights), b_gpu = DnnInstance.Gpu.AllocateDevice(Biases)) { - using (DeviceMemory _1x1Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels)) + // Pointers + deviceptr pw_gpu = w_gpu.Ptr, pb_gpu = b_gpu.Ptr; + + // First 1x1 convolution + using (DeviceMemory y_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels)) { - // First 1x1 convolution + // Descriptors setup and first 1x1 convolution + InputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width); + _1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Primary1x1ConvolutionKernels, InputInfo.Height, InputInfo.Width); + DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, _1x1FilterDescription, _1x1ConvolutionDescription, _1x1OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); + DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, _1x1FilterDescription, _1x1ConvolutionDescription, _1x1OutputDescription, algorithm, out IntPtr size); using (DeviceMemory x_gpu = DnnInstance.Gpu.AllocateDevice(x)) + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) { - // Descriptors setup and first 1x1 convolution - InputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width); - _1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Primary1x1ConvolutionKernels, InputInfo.Height, InputInfo.Width); - DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, _1x1FilterDescription, _1x1ConvolutionDescription, _1x1OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); - DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, _1x1FilterDescription, _1x1ConvolutionDescription, _1x1OutputDescription, algorithm, out IntPtr size); - using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) - { - DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, _1x1FilterDescription, w_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _1x1OutputDescription, _1x1Output_gpu.Ptr); - } - DnnInstance.AddTensor(1, _1x1BiasDescription, b_gpu.Ptr, 1, _1x1OutputDescription, _1x1Output_gpu.Ptr); - _1x1Output_gpu.CopyToRows(z, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels); + DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, _1x1FilterDescription, pw_gpu, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _1x1OutputDescription, y_gpu.Ptr); + } + DnnInstance.AddTensor(1, _1x1BiasDescription, pb_gpu, 1, _1x1OutputDescription, y_gpu.Ptr); + y_gpu.CopyToRows(z, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels); - // 1x1 convolution activation - DnnInstance.ActivationForward(ActivationDescription, 1, _1x1OutputDescription, _1x1Output_gpu.Ptr, 0, _1x1OutputDescription, _1x1Output_gpu.Ptr); - _1x1Output_gpu.CopyToRows(a, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels); + // 1x1 convolution activation + DnnInstance.ActivationForward(ActivationDescription, 1, _1x1OutputDescription, y_gpu.Ptr, 0, _1x1OutputDescription, y_gpu.Ptr); + y_gpu.CopyToRows(a, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels); + } + + // 1x1 + 3x3 convolution + using (DeviceMemory + y1x1_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Primary3x3Reduce1x1ConvolutionKernels), + y_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels)) + { + // 1x1 convolution + _3x3Reduce1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, InputInfo.Height, InputInfo.Width); + DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, _3x3Reduce1x1FilterDescription, _1x1ConvolutionDescription, _3x3Reduce1x1OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); + DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, _3x3Reduce1x1FilterDescription, _1x1ConvolutionDescription, _3x3Reduce1x1OutputDescription, algorithm, out IntPtr size); + using (DeviceMemory x_gpu = DnnInstance.Gpu.AllocateDevice(x)) + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, _3x3Reduce1x1FilterDescription, pw_gpu += _1x1Weights, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3Reduce1x1OutputDescription, y1x1_gpu.Ptr); } + DnnInstance.AddTensor(1, _3x3Reduce1x1BiasDescription, pb_gpu += OperationInfo.Primary1x1ConvolutionKernels, 1, _3x3Reduce1x1OutputDescription, y1x1_gpu.Ptr); + DnnInstance.ActivationForward(ActivationDescription, 1, _3x3Reduce1x1OutputDescription, y1x1_gpu.Ptr, 0, _3x3Reduce1x1OutputDescription, y1x1_gpu.Ptr); // 3x3 convolution - using (DeviceMemory _3x3Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels)) + _3x3OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Secondary3x3ConvolutionKernels, InputInfo.Height, InputInfo.Width); + DnnInstance.GetConvolutionForwardAlgorithm(_3x3Reduce1x1OutputDescription, _3x3FilterDescription, _3x3ConvolutionDescription, _3x3OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm); + DnnInstance.GetConvolutionForwardWorkspaceSize(_3x3Reduce1x1OutputDescription, _3x3FilterDescription, _3x3ConvolutionDescription, _3x3OutputDescription, algorithm, out size); + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) { - _3x3OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Secondary3x3ConvolutionKernels, InputInfo.Height, InputInfo.Width); - DnnInstance.GetConvolutionForwardAlgorithm(_1x1OutputDescription, _3x3FilterDescription, _3x3ConvolutionDescription, _3x3OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); - DnnInstance.GetConvolutionForwardWorkspaceSize(_1x1OutputDescription, _3x3FilterDescription, _3x3ConvolutionDescription, _3x3OutputDescription, algorithm, out IntPtr size); - using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) - { - DnnInstance.ConvolutionForward(1, _1x1OutputDescription, _1x1Output_gpu.Ptr, _3x3FilterDescription, w_gpu.Ptr + InputInfo.Channels * OperationInfo.Primary1x1ConvolutionKernels, _3x3ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3OutputDescription, _3x3Output_gpu.Ptr); - } - DnnInstance.AddTensor(1, _3x3BiasDescription, b_gpu.Ptr + OperationInfo.Primary1x1ConvolutionKernels, 1, _3x3OutputDescription, _3x3Output_gpu.Ptr); - _3x3Output_gpu.CopyToRows(z, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels); + DnnInstance.ConvolutionForward(1, _3x3Reduce1x1OutputDescription, y1x1_gpu.Ptr, _3x3FilterDescription, pw_gpu += _3x3Reduce1x1Weights, _3x3ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3OutputDescription, y_gpu.Ptr); + } + DnnInstance.AddTensor(1, _3x3BiasDescription, pb_gpu += OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, 1, _3x3OutputDescription, y_gpu.Ptr); + y_gpu.CopyToRows(z, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels); + + // Activation + DnnInstance.ActivationForward(ActivationDescription, 1, _3x3OutputDescription, y_gpu.Ptr, 0, _3x3OutputDescription, y_gpu.Ptr); + y_gpu.CopyToRows(a, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels); + } - // 3x3 convolution activation - DnnInstance.ActivationForward(ActivationDescription, 1, _3x3OutputDescription, _3x3Output_gpu.Ptr, 0, _3x3OutputDescription, _3x3Output_gpu.Ptr); - _3x3Output_gpu.CopyToRows(a, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels); + // 1x1 + 5x5 convolution + using (DeviceMemory + y1x1_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Primary5x5Reduce1x1ConvolutionKernels), + y_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels)) + { + // 1x1 convolution + _5x5Reduce1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, InputInfo.Height, InputInfo.Width); + DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, _5x5Reduce1x1FilterDescription, _1x1ConvolutionDescription, _5x5Reduce1x1OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); + DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, _5x5Reduce1x1FilterDescription, _1x1ConvolutionDescription, _5x5Reduce1x1OutputDescription, algorithm, out IntPtr size); + using (DeviceMemory x_gpu = DnnInstance.Gpu.AllocateDevice(x)) + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, _5x5Reduce1x1FilterDescription, pw_gpu += _3x3Weights, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr); } + DnnInstance.AddTensor(1, _5x5Reduce1x1BiasDescription, pb_gpu += OperationInfo.Secondary3x3ConvolutionKernels, 1, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr); + DnnInstance.ActivationForward(ActivationDescription, 1, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr, 0, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr); // 5x5 convolution - using (DeviceMemory _5x5Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels)) + _5x5OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Secondary5x5ConvolutionKernels, InputInfo.Height, InputInfo.Width); + DnnInstance.GetConvolutionForwardAlgorithm(_5x5Reduce1x1OutputDescription, _5x5FilterDescription, _5x5ConvolutionDescription, _5x5OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm); + DnnInstance.GetConvolutionForwardWorkspaceSize(_5x5Reduce1x1OutputDescription, _5x5FilterDescription, _5x5ConvolutionDescription, _5x5OutputDescription, algorithm, out size); + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) { - _5x5OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Secondary5x5ConvolutionKernels, InputInfo.Height, InputInfo.Width); - DnnInstance.GetConvolutionForwardAlgorithm(_1x1OutputDescription, _5x5FilterDescription, _5x5ConvolutionDescription, _5x5OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); - DnnInstance.GetConvolutionForwardWorkspaceSize(_1x1OutputDescription, _5x5FilterDescription, _5x5ConvolutionDescription, _5x5OutputDescription, algorithm, out IntPtr size); - using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) - { - DnnInstance.ConvolutionForward(1, _1x1OutputDescription, _1x1Output_gpu.Ptr, _5x5FilterDescription, w_gpu.Ptr + InputInfo.Channels * OperationInfo.Primary1x1ConvolutionKernels + 3 * 3 * OperationInfo.Primary1x1ConvolutionKernels * OperationInfo.Secondary3x3ConvolutionKernels, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5OutputDescription, _5x5Output_gpu.Ptr); - } - DnnInstance.AddTensor(1, _5x5BiasDescription, b_gpu.Ptr + OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels, 1, _5x5OutputDescription, _5x5Output_gpu.Ptr); - _5x5Output_gpu.CopyToRows(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); - - // 3x3 convolution activation - DnnInstance.ActivationForward(ActivationDescription, 1, _5x5OutputDescription, _5x5Output_gpu.Ptr, 0, _5x5OutputDescription, _5x5Output_gpu.Ptr); - _5x5Output_gpu.CopyToRows(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); + DnnInstance.ConvolutionForward(1, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr, _5x5FilterDescription, pw_gpu += _5x5Reduce1x1Weights, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5OutputDescription, y_gpu.Ptr); } + DnnInstance.AddTensor(1, _5x5BiasDescription, pb_gpu += OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, 1, _5x5OutputDescription, y_gpu.Ptr); + y_gpu.CopyToRows(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); + + // Activation + DnnInstance.ActivationForward(ActivationDescription, 1, _3x3OutputDescription, y_gpu.Ptr, 0, _3x3OutputDescription, y_gpu.Ptr); + y_gpu.CopyToRows(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); } // Pooling pipeline @@ -305,31 +384,33 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) } // 1x1 convolution - using (DeviceMemory _1x1Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize)) // TODO + using (DeviceMemory _1x1Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels)) { - _1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, -1, InputInfo.Height, InputInfo.Width); + Secondary1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width); DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, Secondary1x1FilterDescription, _1x1ConvolutionDescription, Secondary1x1OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, Secondary1x1FilterDescription, _1x1ConvolutionDescription, Secondary1x1OutputDescription, algorithm, out IntPtr size); using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) { - DnnInstance.ConvolutionForward(1, InputDescription, y_gpu.Ptr, Secondary1x1FilterDescription, w_gpu.Ptr + InputInfo.Channels * OperationInfo.Primary1x1ConvolutionKernels + 3 * 3 * OperationInfo.Primary1x1ConvolutionKernels * OperationInfo.Secondary3x3ConvolutionKernels + 5 * 5 * OperationInfo.Primary1x1ConvolutionKernels * OperationInfo.Secondary5x5ConvolutionKernels, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr); + DnnInstance.ConvolutionForward(1, InputDescription, y_gpu.Ptr, Secondary1x1FilterDescription, pw_gpu += _5x5Weights, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr); } - DnnInstance.AddTensor(1, Secondary1x1BiasDescription, b_gpu.Ptr + OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels, 1, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr); - _1x1Output_gpu.CopyToRows(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize); // TODO + DnnInstance.AddTensor(1, Secondary1x1BiasDescription, pb_gpu += OperationInfo.Secondary5x5ConvolutionKernels, 1, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr); + _1x1Output_gpu.CopyToRows(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels); // 1x1 convolution activation DnnInstance.ActivationForward(ActivationDescription, 1, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr, 0, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr); - _1x1Output_gpu.CopyToRows(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize); // TODO + _1x1Output_gpu.CopyToRows(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels); } } } } + /// public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime) { throw new NotImplementedException(); } + /// public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJdw, out Tensor dJdb) { throw new NotImplementedException(); From 7e6366bbda65f29c71ed5815410a3d131b60228c Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Tue, 26 Dec 2017 00:13:35 +0100 Subject: [PATCH 14/30] Inception backpropagation 90% completed --- .../Layers/CuDnnInceptionLayer.cs | 188 ++++++++++++++++-- 1 file changed, 172 insertions(+), 16 deletions(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index 714bfb0..009b273 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -18,7 +18,7 @@ namespace NeuralNetworkNET.Cuda.Layers /// /// A simplified inception module, with 4 pipelines combining 1x1 convolution, 1x1 + 3x3, 1x1 + 5x5 and pooling + 1x1 /// - internal sealed class CuDnnInceptionLayer : WeightedLayerBase + internal sealed class CuDnnInceptionLayer : WeightedLayerBase, IDisposable { #region Parameters @@ -38,7 +38,7 @@ public ref readonly InceptionInfo OperationInfo #endregion - #region Weights info + #region Private fields and parameters // 1x1 convolution weights on first pipeline private int _1x1Weights @@ -82,6 +82,32 @@ private int Secondary1x1Weights get => InputInfo.Channels * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels; } + // 3x3 reduction 1x1 convolution activity + private Tensor _3x3Reduce1x1Z; + + // 3x3 reduction 1x1 convolution activation + private Tensor _3x3Reduce1x1A; + + // 3x3 reduction 1x1 convolution output delta + private Tensor _3x3Reduce1x1Delta; + + // 5x5 reduction 1x1 convolution activity + private Tensor _5x5Reduce1x1Z; + + // 5x5 reduction 1x1 convolution activation + private Tensor _5x5Reduce1x1A; + + // 5x5 reduction 1x1 convolution output delta + private Tensor _5x5Reduce1x1Delta; + + // Pooling output activity + private Tensor PoolingZ; + + // Pooling output activation + private Tensor PoolingA; + + // Pooling output delta + private Tensor PoolingDelta; #endregion @@ -207,10 +233,6 @@ private int Secondary1x1Weights #endregion - // The shared ReLU activation description for the current layer - [NotNull] - private readonly ActivationDescriptor ActivationDescription = new ActivationDescriptor(); - /// /// Gets the instance for the current layer /// @@ -248,9 +270,6 @@ private void SetupCuDnnInfo() // Secondary 1x1 convolution Secondary1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, InputInfo.Channels, _OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, 1, 1); Secondary1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, 1, 1); - - // Activation - ActivationDescription.Set(ActivationMode.RELU, NanPropagation.PROPAGATE_NAN, 0); } #endregion @@ -303,7 +322,7 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) y_gpu.CopyToRows(z, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels); // 1x1 convolution activation - DnnInstance.ActivationForward(ActivationDescription, 1, _1x1OutputDescription, y_gpu.Ptr, 0, _1x1OutputDescription, y_gpu.Ptr); + DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation); y_gpu.CopyToRows(a, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels); } @@ -322,7 +341,11 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, _3x3Reduce1x1FilterDescription, pw_gpu += _1x1Weights, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3Reduce1x1OutputDescription, y1x1_gpu.Ptr); } DnnInstance.AddTensor(1, _3x3Reduce1x1BiasDescription, pb_gpu += OperationInfo.Primary1x1ConvolutionKernels, 1, _3x3Reduce1x1OutputDescription, y1x1_gpu.Ptr); - DnnInstance.ActivationForward(ActivationDescription, 1, _3x3Reduce1x1OutputDescription, y1x1_gpu.Ptr, 0, _3x3Reduce1x1OutputDescription, y1x1_gpu.Ptr); + _3x3Reduce1x1Z.TryFree(); + y1x1_gpu.CopyToHost(x.Entities, InputInfo.SliceSize * OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, out _3x3Reduce1x1Z); + DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, y1x1_gpu.Ptr, y1x1_gpu.Ptr, ActivationFunctions.Activation); + _3x3Reduce1x1A.TryFree(); + y1x1_gpu.CopyToHost(x.Entities, InputInfo.SliceSize * OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, out _3x3Reduce1x1A); // 3x3 convolution _3x3OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Secondary3x3ConvolutionKernels, InputInfo.Height, InputInfo.Width); @@ -336,7 +359,7 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) y_gpu.CopyToRows(z, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels); // Activation - DnnInstance.ActivationForward(ActivationDescription, 1, _3x3OutputDescription, y_gpu.Ptr, 0, _3x3OutputDescription, y_gpu.Ptr); + DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation); y_gpu.CopyToRows(a, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels); } @@ -355,7 +378,11 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, _5x5Reduce1x1FilterDescription, pw_gpu += _3x3Weights, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr); } DnnInstance.AddTensor(1, _5x5Reduce1x1BiasDescription, pb_gpu += OperationInfo.Secondary3x3ConvolutionKernels, 1, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr); - DnnInstance.ActivationForward(ActivationDescription, 1, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr, 0, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr); + _5x5Reduce1x1Z.TryFree(); + y1x1_gpu.CopyToHost(x.Entities, InputInfo.SliceSize * OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, out _5x5Reduce1x1Z); + DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, y1x1_gpu.Ptr, y1x1_gpu.Ptr, ActivationFunctions.Activation); + _5x5Reduce1x1A.TryFree(); + y1x1_gpu.CopyToHost(x.Entities, InputInfo.SliceSize * OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, out _5x5Reduce1x1A); // 5x5 convolution _5x5OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Secondary5x5ConvolutionKernels, InputInfo.Height, InputInfo.Width); @@ -369,7 +396,7 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) y_gpu.CopyToRows(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); // Activation - DnnInstance.ActivationForward(ActivationDescription, 1, _3x3OutputDescription, y_gpu.Ptr, 0, _3x3OutputDescription, y_gpu.Ptr); + DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation); y_gpu.CopyToRows(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); } @@ -382,6 +409,11 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) { DnnInstance.PoolingForward(PoolingDescription, 1, InputDescription, x_gpu.Ptr, 0, InputDescription, y_gpu.Ptr); } + PoolingZ.TryFree(); + y_gpu.CopyToHost(x.Entities, InputInfo.Size, out PoolingZ); + DnnInstance.ActivationForward(x.Entities, x.Length, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation); + PoolingA.TryFree(); + y_gpu.CopyToHost(x.Entities, InputInfo.Size, out PoolingA); // 1x1 convolution using (DeviceMemory _1x1Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels)) @@ -397,7 +429,7 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) _1x1Output_gpu.CopyToRows(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels); // 1x1 convolution activation - DnnInstance.ActivationForward(ActivationDescription, 1, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr, 0, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr); + DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, _1x1Output_gpu.Ptr, _1x1Output_gpu.Ptr, ActivationFunctions.Activation); _1x1Output_gpu.CopyToRows(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels); } } @@ -407,7 +439,104 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) /// public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime) { - throw new NotImplementedException(); + using (DeviceMemory + dx_gpu = DnnInstance.Gpu.AllocateDevice(z.Size), + dy_gpu = DnnInstance.Gpu.AllocateDevice(delta_1), + w_gpu = DnnInstance.Gpu.AllocateDevice(Weights)) + { + // Pointers + deviceptr pdy_gpu = dy_gpu.Ptr; // TODO: load rows + + // First 1x1 convolution + DnnInstance.GetConvolutionBackwardDataAlgorithm(_1x1FilterDescription, _1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdDataAlgo algorithm); + DnnInstance.GetConvolutionBackwardDataWorkspaceSize(_1x1FilterDescription, _1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, algorithm, out IntPtr size); + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + DnnInstance.ConvolutionBackwardData(1, _1x1FilterDescription, w_gpu.Ptr, _1x1OutputDescription, dy_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, InputDescription, dx_gpu.Ptr); + } + + // 1x1 + 3x3 convolution + using (DeviceMemory _3x3Reduce1x1z_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Reduce1x1Z)) + { + // 3x3 backward + DnnInstance.GetConvolutionBackwardDataAlgorithm(_3x3FilterDescription, _3x3OutputDescription, _3x3ConvolutionDescription, _3x3Reduce1x1OutputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm); + DnnInstance.GetConvolutionBackwardDataWorkspaceSize(_3x3FilterDescription, _3x3OutputDescription, _3x3ConvolutionDescription, _3x3Reduce1x1OutputDescription, algorithm, out size); + using (DeviceMemory _3x3Reduce1x1dx_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Reduce1x1Z.Size)) + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + deviceptr p3x3Weights_gpu = w_gpu.Ptr + _1x1Weights + _3x3Reduce1x1Weights; + DnnInstance.ConvolutionBackwardData(1, _3x3FilterDescription, p3x3Weights_gpu, _3x3OutputDescription, pdy_gpu += InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, _3x3ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3Reduce1x1OutputDescription, _3x3Reduce1x1dx_gpu.Ptr); + DnnInstance.ActivationBackward(_3x3Reduce1x1Z.Entities, _3x3Reduce1x1Z.Length, _3x3Reduce1x1z_gpu.Ptr, _3x3Reduce1x1dx_gpu.Ptr, ActivationFunctions.ActivationPrime); + _3x3Reduce1x1Delta.TryFree(); + _3x3Reduce1x1z_gpu.CopyToHost(_3x3Reduce1x1Z.Entities, _3x3Reduce1x1Z.Length, out _3x3Reduce1x1Delta); + } + + // 3x3 reduce 1x1 backward + DnnInstance.GetConvolutionBackwardDataAlgorithm(_3x3Reduce1x1FilterDescription, _3x3Reduce1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm); + DnnInstance.GetConvolutionBackwardDataWorkspaceSize(_3x3Reduce1x1FilterDescription, _3x3Reduce1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, algorithm, out size); + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + deviceptr p3x3Reduce1x1Weights_gpu = w_gpu.Ptr + _1x1Weights; + DnnInstance.ConvolutionBackwardData(1, _3x3Reduce1x1FilterDescription, p3x3Reduce1x1Weights_gpu, _3x3Reduce1x1OutputDescription, _3x3Reduce1x1z_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 1, InputDescription, dx_gpu.Ptr); + } + } + + // 1x1 + 5x5 convolution + using (DeviceMemory _5x5Reduce1x1z_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Reduce1x1Z)) + { + // 5x5 backward + DnnInstance.GetConvolutionBackwardDataAlgorithm(_5x5FilterDescription, _5x5OutputDescription, _5x5ConvolutionDescription, _5x5Reduce1x1OutputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm); + DnnInstance.GetConvolutionBackwardDataWorkspaceSize(_5x5FilterDescription, _5x5OutputDescription, _5x5ConvolutionDescription, _5x5Reduce1x1OutputDescription, algorithm, out size); + using (DeviceMemory _5x5Reduce1x1dx_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Reduce1x1Z.Size)) + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + deviceptr p5x5Weights_gpu = w_gpu.Ptr + _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights + _5x5Reduce1x1Weights; + DnnInstance.ConvolutionBackwardData(1, _5x5FilterDescription, p5x5Weights_gpu, _5x5OutputDescription, pdy_gpu += InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5Reduce1x1OutputDescription, _5x5Reduce1x1dx_gpu.Ptr); + DnnInstance.ActivationBackward(_5x5Reduce1x1Z.Entities, _5x5Reduce1x1Z.Length, _5x5Reduce1x1z_gpu.Ptr, _5x5Reduce1x1dx_gpu.Ptr, ActivationFunctions.ActivationPrime); + _5x5Reduce1x1Delta.TryFree(); + _5x5Reduce1x1z_gpu.CopyToHost(_5x5Reduce1x1Z.Entities, _5x5Reduce1x1Z.Length, out _5x5Reduce1x1Delta); + } + + // 5x5 reduce 1x1 backward + DnnInstance.GetConvolutionBackwardDataAlgorithm(_5x5Reduce1x1FilterDescription, _5x5Reduce1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm); + DnnInstance.GetConvolutionBackwardDataWorkspaceSize(_5x5Reduce1x1FilterDescription, _5x5Reduce1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, algorithm, out size); + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + deviceptr p5x5Reduce1x1Weights_gpu = w_gpu.Ptr + _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights; + DnnInstance.ConvolutionBackwardData(1, _5x5Reduce1x1FilterDescription, p5x5Reduce1x1Weights_gpu, _5x5Reduce1x1OutputDescription, _5x5Reduce1x1z_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 1, InputDescription, dx_gpu.Ptr); + } + } + + // Pooling + using (DeviceMemory poolDy_gpu = DnnInstance.Gpu.AllocateDevice(PoolingZ)) + { + // 1x1 backward + DnnInstance.GetConvolutionBackwardDataAlgorithm(Secondary1x1FilterDescription, Secondary1x1OutputDescription, _1x1ConvolutionDescription, PoolingOutputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm); + DnnInstance.GetConvolutionBackwardDataWorkspaceSize(Secondary1x1FilterDescription, Secondary1x1OutputDescription, _1x1ConvolutionDescription, PoolingOutputDescription, algorithm, out size); + using (DeviceMemory poolDx_gpu = DnnInstance.Gpu.AllocateDevice(PoolingZ.Size)) + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + deviceptr p1x1PoolingWeights_gpu = w_gpu.Ptr + _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights + _5x5Reduce1x1Weights + _5x5Weights; + DnnInstance.ConvolutionBackwardData(1, Secondary1x1FilterDescription, p1x1PoolingWeights_gpu, Secondary1x1OutputDescription, pdy_gpu, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, PoolingOutputDescription, poolDx_gpu.Ptr); + DnnInstance.ActivationBackward(PoolingZ.Entities, PoolingZ.Length, poolDy_gpu.Ptr, poolDx_gpu.Ptr, ActivationFunctions.ActivationPrime); + PoolingDelta.TryFree(); + poolDy_gpu.CopyToHost(PoolingDelta.Entities, PoolingDelta.Length, out PoolingDelta); + } + + // Pooling backward + using (DeviceMemory poolZ_gpu = DnnInstance.Gpu.AllocateDevice(PoolingZ)) + { + DnnInstance.PoolingBackward(PoolingDescription, 1, PoolingOutputDescription, poolZ_gpu.Ptr, PoolingOutputDescription, poolDy_gpu.Ptr, InputDescription, default, 1, InputDescription, dx_gpu.Ptr); + } + } + + // Activation backward + using (DeviceMemory z_gpu = DnnInstance.Gpu.AllocateDevice(z)) + { + DnnInstance.ActivationBackward(z.Entities, z.Length, z_gpu.Ptr, dx_gpu.Ptr, activationPrime); + z_gpu.CopyTo(z); + } + } } /// @@ -420,5 +549,32 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ /// public override INetworkLayer Clone() => new CuDnnInceptionLayer(InputInfo, OperationInfo, Weights, Biases); + + #region IDisposable + + ~CuDnnInceptionLayer() => Dispose(); + + /// + void IDisposable.Dispose() + { + GC.SuppressFinalize(this); + Dispose(); + } + + // Private Dispose method + private void Dispose() + { + _3x3Reduce1x1Z.TryFree(); + _3x3Reduce1x1A.TryFree(); + _3x3Reduce1x1Delta.TryFree(); + _5x5Reduce1x1Z.TryFree(); + _5x5Reduce1x1A.TryFree(); + _5x5Reduce1x1Delta.TryFree(); + PoolingZ.TryFree(); + PoolingA.TryFree(); + PoolingDelta.TryFree(); + } + + #endregion } } From d1de1abd52205f6bd3e77931d8608a33d0f37956 Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Tue, 26 Dec 2017 01:01:28 +0100 Subject: [PATCH 15/30] GPU copy to rows and rows allocation methods improved --- .../Extensions/GpuExtensions.cs | 60 +++++++++++++++---- .../GpuExtensionsTest.cs | 55 +++++++++++++++++ 2 files changed, 105 insertions(+), 10 deletions(-) create mode 100644 Unit/NeuralNetwork.NET.Cuda.Unit/GpuExtensionsTest.cs diff --git a/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs b/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs index 79723f2..6adb6cb 100644 --- a/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs +++ b/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs @@ -63,21 +63,61 @@ public static void CopyToHost([NotNull] this DeviceMemory source, int n, /// The number of values to copy for each entry public static unsafe void CopyToRows([NotNull] this DeviceMemory source, in Tensor destination, int offset, int length) { + // Checks if (source.Length / length != destination.Entities) throw new ArgumentOutOfRangeException(nameof(length), "The input length doesn't match the given arguments"); - if (destination.Length - offset > length) throw new ArgumentOutOfRangeException(nameof(offset), "The input offset isn't valid"); - CUDAInterop.cudaError_enum result = CUDAInterop.cudaError_enum.CUDA_SUCCESS; - int - bytes = sizeof(float) * length, // Bytes to copy for each row - lineBytes = sizeof(float) * destination.Length; // Bytes to skip for each entry to jump to the line below at the same offset - IntPtr - start = destination.Ptr + sizeof(float) * offset, // Initial destination offset - size = new IntPtr(bytes); - for (int i = 0; i < destination.Entities; i++) - result |= CUDAInterop.cuMemcpy(start + i * bytes, source.Handle + i * lineBytes, size); + if (destination.Length - offset < length) throw new ArgumentOutOfRangeException(nameof(offset), "The input offset isn't valid"); + + // Memory copy + CUDAInterop.CUDA_MEMCPY2D_st* ptSt = stackalloc CUDAInterop.CUDA_MEMCPY2D_st[1]; + ptSt[0] = new CUDAInterop.CUDA_MEMCPY2D_st + { + srcMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE, + srcDevice = source.Handle, + srcPitch = new IntPtr(sizeof(float) * length), + dstMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_HOST, + dstHost = destination.Ptr + sizeof(float) * offset, + dstPitch = new IntPtr(sizeof(float) * destination.Length), + WidthInBytes = new IntPtr(sizeof(float) * length), + Height = new IntPtr(destination.Entities) + }; + CUDAInterop.cudaError_enum result = CUDAInterop.cuMemcpy2D(ptSt); if (result != CUDAInterop.cudaError_enum.CUDA_SUCCESS) throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}"); } + /// + /// Allocates a memory area on device memory, reading the target values at a given offset from the input + /// + /// The device to use + /// The source with the data to copy + /// The column offset for the data to read from each row + /// + [MustUseReturnValue, NotNull] + public static unsafe DeviceMemory AllocateDevice([NotNull] this Gpu gpu, in Tensor source, int offset, int length) + { + // Checks + if (source.Length - offset < length) throw new ArgumentOutOfRangeException(nameof(offset), "The input offset isn't valid"); + + // Memory copy + DeviceMemory result_gpu = gpu.AllocateDevice(source.Entities * length); + CUDAInterop.CUDA_MEMCPY2D_st* ptSt = stackalloc CUDAInterop.CUDA_MEMCPY2D_st[1]; + ptSt[0] = new CUDAInterop.CUDA_MEMCPY2D_st + { + srcMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_HOST, + srcHost = source.Ptr + sizeof(float) * offset, + srcPitch = new IntPtr(sizeof(float) * source.Length), + dstMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE, + dstDevice = result_gpu.Handle, + dstPitch = new IntPtr(sizeof(float) * length), + WidthInBytes = new IntPtr(sizeof(float) * length), + Height = new IntPtr(source.Entities) + }; + CUDAInterop.cudaError_enum result = CUDAInterop.cuMemcpy2D(ptSt); + return result == CUDAInterop.cudaError_enum.CUDA_SUCCESS + ? result_gpu + : throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}"); + } + #endregion /// diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/GpuExtensionsTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/GpuExtensionsTest.cs new file mode 100644 index 0000000..6676490 --- /dev/null +++ b/Unit/NeuralNetwork.NET.Cuda.Unit/GpuExtensionsTest.cs @@ -0,0 +1,55 @@ +using Alea; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using NeuralNetworkNET.APIs.Structs; +using NeuralNetworkNET.Cuda.Extensions; +using NeuralNetworkNET.Extensions; + +namespace NeuralNetworkNET.Cuda.Unit +{ + /// + /// Test class for the cuDNN GPU extension methods + /// + [TestClass] + [TestCategory(nameof(GpuExtensionsTest))] + public class GpuExtensionsTest + { + [TestMethod] + public void CopyToRows() + { + float[] test = {1,2,3,4,5,6,7,8,9}; + Tensor.NewZeroed(3, 10, out Tensor tensor); + Gpu gpu = Gpu.Default; + using (DeviceMemory m_gpu = gpu.AllocateDevice(test)) + { + m_gpu.CopyToRows(tensor, 5, 3); + } + float[,] expected = + { + { 0, 0, 0, 0, 0, 1, 2, 3, 0, 0 }, + { 0, 0, 0, 0, 0, 4, 5, 6, 0, 0 }, + { 0, 0, 0, 0, 0, 7, 8, 9, 0, 0 } + }; + Assert.IsTrue(tensor.ToArray2D().ContentEquals(expected)); + } + + [TestMethod] + public void AllocateDeviceRows() + { + float[,] source = + { + { 0, 0, 0, 0, 0, 1, 2, 3, 0, 0 }, + { 0, 0, 0, 0, 0, 4, 5, 6, 0, 0 }, + { 0, 0, 0, 0, 0, 7, 8, 9, 0, 0 } + }; + Tensor.From(source, out Tensor tensor); + Gpu gpu = Gpu.Default; + using (DeviceMemory m_gpu = gpu.AllocateDevice(tensor, 5, 3)) + { + float[] + copy = Gpu.CopyToHost(m_gpu), + expected = { 1, 2, 3, 4, 5, 6, 7, 8, 9 }; + Assert.IsTrue(copy.ContentEquals(expected)); + } + } + } +} From 07af79e57fd7cb910db56eb0ca412c88ad01655f Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Tue, 26 Dec 2017 01:13:56 +0100 Subject: [PATCH 16/30] Inception layer delta loading fixed --- .../Layers/CuDnnInceptionLayer.cs | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index 009b273..ef714cb 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -441,15 +441,13 @@ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFun { using (DeviceMemory dx_gpu = DnnInstance.Gpu.AllocateDevice(z.Size), - dy_gpu = DnnInstance.Gpu.AllocateDevice(delta_1), w_gpu = DnnInstance.Gpu.AllocateDevice(Weights)) { - // Pointers - deviceptr pdy_gpu = dy_gpu.Ptr; // TODO: load rows // First 1x1 convolution DnnInstance.GetConvolutionBackwardDataAlgorithm(_1x1FilterDescription, _1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdDataAlgo algorithm); DnnInstance.GetConvolutionBackwardDataWorkspaceSize(_1x1FilterDescription, _1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, algorithm, out IntPtr size); + using (DeviceMemory dy_gpu = DnnInstance.Gpu.AllocateDevice(delta_1, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels)) using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) { DnnInstance.ConvolutionBackwardData(1, _1x1FilterDescription, w_gpu.Ptr, _1x1OutputDescription, dy_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, InputDescription, dx_gpu.Ptr); @@ -461,11 +459,13 @@ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFun // 3x3 backward DnnInstance.GetConvolutionBackwardDataAlgorithm(_3x3FilterDescription, _3x3OutputDescription, _3x3ConvolutionDescription, _3x3Reduce1x1OutputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm); DnnInstance.GetConvolutionBackwardDataWorkspaceSize(_3x3FilterDescription, _3x3OutputDescription, _3x3ConvolutionDescription, _3x3Reduce1x1OutputDescription, algorithm, out size); - using (DeviceMemory _3x3Reduce1x1dx_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Reduce1x1Z.Size)) + using (DeviceMemory + dy_gpu = DnnInstance.Gpu.AllocateDevice(delta_1, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels), + _3x3Reduce1x1dx_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Reduce1x1Z.Size)) using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) { deviceptr p3x3Weights_gpu = w_gpu.Ptr + _1x1Weights + _3x3Reduce1x1Weights; - DnnInstance.ConvolutionBackwardData(1, _3x3FilterDescription, p3x3Weights_gpu, _3x3OutputDescription, pdy_gpu += InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, _3x3ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3Reduce1x1OutputDescription, _3x3Reduce1x1dx_gpu.Ptr); + DnnInstance.ConvolutionBackwardData(1, _3x3FilterDescription, p3x3Weights_gpu, _3x3OutputDescription, dy_gpu.Ptr, _3x3ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3Reduce1x1OutputDescription, _3x3Reduce1x1dx_gpu.Ptr); DnnInstance.ActivationBackward(_3x3Reduce1x1Z.Entities, _3x3Reduce1x1Z.Length, _3x3Reduce1x1z_gpu.Ptr, _3x3Reduce1x1dx_gpu.Ptr, ActivationFunctions.ActivationPrime); _3x3Reduce1x1Delta.TryFree(); _3x3Reduce1x1z_gpu.CopyToHost(_3x3Reduce1x1Z.Entities, _3x3Reduce1x1Z.Length, out _3x3Reduce1x1Delta); @@ -487,11 +487,13 @@ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFun // 5x5 backward DnnInstance.GetConvolutionBackwardDataAlgorithm(_5x5FilterDescription, _5x5OutputDescription, _5x5ConvolutionDescription, _5x5Reduce1x1OutputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm); DnnInstance.GetConvolutionBackwardDataWorkspaceSize(_5x5FilterDescription, _5x5OutputDescription, _5x5ConvolutionDescription, _5x5Reduce1x1OutputDescription, algorithm, out size); - using (DeviceMemory _5x5Reduce1x1dx_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Reduce1x1Z.Size)) + using (DeviceMemory + dy_gpu = DnnInstance.Gpu.AllocateDevice(delta_1, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels), + _5x5Reduce1x1dx_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Reduce1x1Z.Size)) using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) { deviceptr p5x5Weights_gpu = w_gpu.Ptr + _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights + _5x5Reduce1x1Weights; - DnnInstance.ConvolutionBackwardData(1, _5x5FilterDescription, p5x5Weights_gpu, _5x5OutputDescription, pdy_gpu += InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5Reduce1x1OutputDescription, _5x5Reduce1x1dx_gpu.Ptr); + DnnInstance.ConvolutionBackwardData(1, _5x5FilterDescription, p5x5Weights_gpu, _5x5OutputDescription, dy_gpu.Ptr, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5Reduce1x1OutputDescription, _5x5Reduce1x1dx_gpu.Ptr); DnnInstance.ActivationBackward(_5x5Reduce1x1Z.Entities, _5x5Reduce1x1Z.Length, _5x5Reduce1x1z_gpu.Ptr, _5x5Reduce1x1dx_gpu.Ptr, ActivationFunctions.ActivationPrime); _5x5Reduce1x1Delta.TryFree(); _5x5Reduce1x1z_gpu.CopyToHost(_5x5Reduce1x1Z.Entities, _5x5Reduce1x1Z.Length, out _5x5Reduce1x1Delta); @@ -513,11 +515,13 @@ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFun // 1x1 backward DnnInstance.GetConvolutionBackwardDataAlgorithm(Secondary1x1FilterDescription, Secondary1x1OutputDescription, _1x1ConvolutionDescription, PoolingOutputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm); DnnInstance.GetConvolutionBackwardDataWorkspaceSize(Secondary1x1FilterDescription, Secondary1x1OutputDescription, _1x1ConvolutionDescription, PoolingOutputDescription, algorithm, out size); - using (DeviceMemory poolDx_gpu = DnnInstance.Gpu.AllocateDevice(PoolingZ.Size)) + using (DeviceMemory + dy_gpu = DnnInstance.Gpu.AllocateDevice(delta_1, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels), + poolDx_gpu = DnnInstance.Gpu.AllocateDevice(PoolingZ.Size)) using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) { deviceptr p1x1PoolingWeights_gpu = w_gpu.Ptr + _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights + _5x5Reduce1x1Weights + _5x5Weights; - DnnInstance.ConvolutionBackwardData(1, Secondary1x1FilterDescription, p1x1PoolingWeights_gpu, Secondary1x1OutputDescription, pdy_gpu, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, PoolingOutputDescription, poolDx_gpu.Ptr); + DnnInstance.ConvolutionBackwardData(1, Secondary1x1FilterDescription, p1x1PoolingWeights_gpu, Secondary1x1OutputDescription, dy_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, PoolingOutputDescription, poolDx_gpu.Ptr); DnnInstance.ActivationBackward(PoolingZ.Entities, PoolingZ.Length, poolDy_gpu.Ptr, poolDx_gpu.Ptr, ActivationFunctions.ActivationPrime); PoolingDelta.TryFree(); poolDy_gpu.CopyToHost(PoolingDelta.Entities, PoolingDelta.Length, out PoolingDelta); @@ -526,7 +530,7 @@ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFun // Pooling backward using (DeviceMemory poolZ_gpu = DnnInstance.Gpu.AllocateDevice(PoolingZ)) { - DnnInstance.PoolingBackward(PoolingDescription, 1, PoolingOutputDescription, poolZ_gpu.Ptr, PoolingOutputDescription, poolDy_gpu.Ptr, InputDescription, default, 1, InputDescription, dx_gpu.Ptr); + DnnInstance.PoolingBackward(PoolingDescription, 1, PoolingOutputDescription, poolZ_gpu.Ptr, PoolingOutputDescription, poolDy_gpu.Ptr, InputDescription, default, 1, InputDescription, dx_gpu.Ptr); // TODO: finish pooling backward } } From c6a84a69e5492e70b081f0a0a009dacc91270c26 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 26 Dec 2017 17:41:27 +0100 Subject: [PATCH 17/30] Inception layer implementation finished (WIP) --- .../Extensions/GpuExtensions.cs | 86 ++++----- .../Layers/CuDnnInceptionLayer.cs | 176 ++++++++++++++++-- .../GpuExtensionsTest.cs | 2 +- 3 files changed, 200 insertions(+), 64 deletions(-) diff --git a/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs b/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs index 6adb6cb..03d7ead 100644 --- a/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs +++ b/NeuralNetwork.NET.Cuda/Extensions/GpuExtensions.cs @@ -27,6 +27,39 @@ public static DeviceMemory AllocateDevice([NotNull] this Gpu gpu, in Tens : throw new InvalidOperationException($"Failed to copy the source data on the target GPU device, [CUDA ERROR] {result}"); } + /// + /// Allocates a memory area on device memory, reading the target values at a given offset from the input + /// + /// The device to use + /// The source with the data to copy + /// The column offset for the data to read from each row + /// + [MustUseReturnValue, NotNull] + public static unsafe DeviceMemory AllocateDevice([NotNull] this Gpu gpu, in Tensor source, int offset, int length) + { + // Checks + if (source.Length - offset < length) throw new ArgumentOutOfRangeException(nameof(offset), "The input offset isn't valid"); + + // Memory copy + DeviceMemory result_gpu = gpu.AllocateDevice(source.Entities * length); + CUDAInterop.CUDA_MEMCPY2D_st* ptSt = stackalloc CUDAInterop.CUDA_MEMCPY2D_st[1]; + ptSt[0] = new CUDAInterop.CUDA_MEMCPY2D_st + { + srcMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_HOST, + srcHost = source.Ptr + sizeof(float) * offset, + srcPitch = new IntPtr(sizeof(float) * source.Length), + dstMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE, + dstDevice = result_gpu.Handle, + dstPitch = new IntPtr(sizeof(float) * length), + WidthInBytes = new IntPtr(sizeof(float) * length), + Height = new IntPtr(source.Entities) + }; + CUDAInterop.cudaError_enum result = CUDAInterop.cuMemcpy2D(ptSt); + return result == CUDAInterop.cudaError_enum.CUDA_SUCCESS + ? result_gpu + : throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}"); + } + /// /// Copies the contents of the input instance to the target host memory area /// @@ -40,20 +73,6 @@ public static void CopyTo([NotNull] this DeviceMemory source, in Tensor d throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}"); } - /// - /// Copies the contents of the input to a new memory area on the unmanaged heap - /// - /// The source memory to copy - /// The height of the input memory area - /// The width of the input memory area - /// The resulting matrix - [MustUseReturnValue] - public static void CopyToHost([NotNull] this DeviceMemory source, int n, int chw, out Tensor result) - { - Tensor.New(n, chw, out result); - source.CopyTo(result); - } - /// /// Copies the source data into the target , splitting each individual entry into its own row /// @@ -61,7 +80,7 @@ public static void CopyToHost([NotNull] this DeviceMemory source, int n, /// The destination that will store the data /// The column offset for the data for each entry /// The number of values to copy for each entry - public static unsafe void CopyToRows([NotNull] this DeviceMemory source, in Tensor destination, int offset, int length) + public static unsafe void CopyTo([NotNull] this DeviceMemory source, in Tensor destination, int offset, int length) { // Checks if (source.Length / length != destination.Entities) throw new ArgumentOutOfRangeException(nameof(length), "The input length doesn't match the given arguments"); @@ -86,36 +105,17 @@ public static unsafe void CopyToRows([NotNull] this DeviceMemory source, } /// - /// Allocates a memory area on device memory, reading the target values at a given offset from the input + /// Copies the contents of the input to a new memory area on the unmanaged heap /// - /// The device to use - /// The source with the data to copy - /// The column offset for the data to read from each row - /// - [MustUseReturnValue, NotNull] - public static unsafe DeviceMemory AllocateDevice([NotNull] this Gpu gpu, in Tensor source, int offset, int length) + /// The source memory to copy + /// The height of the input memory area + /// The width of the input memory area + /// The resulting matrix + [MustUseReturnValue] + public static void CopyToHost([NotNull] this DeviceMemory source, int n, int chw, out Tensor result) { - // Checks - if (source.Length - offset < length) throw new ArgumentOutOfRangeException(nameof(offset), "The input offset isn't valid"); - - // Memory copy - DeviceMemory result_gpu = gpu.AllocateDevice(source.Entities * length); - CUDAInterop.CUDA_MEMCPY2D_st* ptSt = stackalloc CUDAInterop.CUDA_MEMCPY2D_st[1]; - ptSt[0] = new CUDAInterop.CUDA_MEMCPY2D_st - { - srcMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_HOST, - srcHost = source.Ptr + sizeof(float) * offset, - srcPitch = new IntPtr(sizeof(float) * source.Length), - dstMemoryType = CUDAInterop.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE, - dstDevice = result_gpu.Handle, - dstPitch = new IntPtr(sizeof(float) * length), - WidthInBytes = new IntPtr(sizeof(float) * length), - Height = new IntPtr(source.Entities) - }; - CUDAInterop.cudaError_enum result = CUDAInterop.cuMemcpy2D(ptSt); - return result == CUDAInterop.cudaError_enum.CUDA_SUCCESS - ? result_gpu - : throw new InvalidOperationException($"Failed to copy the source data on the given destination, [CUDA ERROR] {result}"); + Tensor.New(n, chw, out result); + source.CopyTo(result); } #endregion diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index ef714cb..08221df 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -82,6 +82,9 @@ private int Secondary1x1Weights get => InputInfo.Channels * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels; } + // A copy of the forward layer inputs + private Tensor _Inputs; + // 3x3 reduction 1x1 convolution activity private Tensor _3x3Reduce1x1Z; @@ -104,10 +107,10 @@ private int Secondary1x1Weights private Tensor PoolingZ; // Pooling output activation - private Tensor PoolingA; + private Tensor _PoolingA; // Pooling output delta - private Tensor PoolingDelta; + private Tensor _PoolingDelta; #endregion @@ -296,6 +299,8 @@ internal CuDnnInceptionLayer(in TensorInfo input, in InceptionInfo info, [NotNul /// public override void Forward(in Tensor x, out Tensor z, out Tensor a) { + _Inputs.TryFree(); + x.Duplicate(out _Inputs); Tensor.New(x.Entities, OutputInfo.Size, out z); Tensor.New(x.Entities, OutputInfo.Size, out a); using (DeviceMemory @@ -319,11 +324,11 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, _1x1FilterDescription, pw_gpu, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _1x1OutputDescription, y_gpu.Ptr); } DnnInstance.AddTensor(1, _1x1BiasDescription, pb_gpu, 1, _1x1OutputDescription, y_gpu.Ptr); - y_gpu.CopyToRows(z, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels); + y_gpu.CopyTo(z, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels); // 1x1 convolution activation DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation); - y_gpu.CopyToRows(a, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels); + y_gpu.CopyTo(a, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels); } // 1x1 + 3x3 convolution @@ -356,11 +361,11 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) DnnInstance.ConvolutionForward(1, _3x3Reduce1x1OutputDescription, y1x1_gpu.Ptr, _3x3FilterDescription, pw_gpu += _3x3Reduce1x1Weights, _3x3ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3OutputDescription, y_gpu.Ptr); } DnnInstance.AddTensor(1, _3x3BiasDescription, pb_gpu += OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, 1, _3x3OutputDescription, y_gpu.Ptr); - y_gpu.CopyToRows(z, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels); + y_gpu.CopyTo(z, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels); // Activation DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation); - y_gpu.CopyToRows(a, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels); + y_gpu.CopyTo(a, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels); } // 1x1 + 5x5 convolution @@ -393,11 +398,11 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) DnnInstance.ConvolutionForward(1, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr, _5x5FilterDescription, pw_gpu += _5x5Reduce1x1Weights, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5OutputDescription, y_gpu.Ptr); } DnnInstance.AddTensor(1, _5x5BiasDescription, pb_gpu += OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, 1, _5x5OutputDescription, y_gpu.Ptr); - y_gpu.CopyToRows(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); + y_gpu.CopyTo(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); // Activation DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation); - y_gpu.CopyToRows(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); + y_gpu.CopyTo(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); } // Pooling pipeline @@ -412,8 +417,8 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) PoolingZ.TryFree(); y_gpu.CopyToHost(x.Entities, InputInfo.Size, out PoolingZ); DnnInstance.ActivationForward(x.Entities, x.Length, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation); - PoolingA.TryFree(); - y_gpu.CopyToHost(x.Entities, InputInfo.Size, out PoolingA); + _PoolingA.TryFree(); + y_gpu.CopyToHost(x.Entities, InputInfo.Size, out _PoolingA); // 1x1 convolution using (DeviceMemory _1x1Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels)) @@ -426,11 +431,11 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) DnnInstance.ConvolutionForward(1, InputDescription, y_gpu.Ptr, Secondary1x1FilterDescription, pw_gpu += _5x5Weights, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr); } DnnInstance.AddTensor(1, Secondary1x1BiasDescription, pb_gpu += OperationInfo.Secondary5x5ConvolutionKernels, 1, Secondary1x1OutputDescription, _1x1Output_gpu.Ptr); - _1x1Output_gpu.CopyToRows(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels); + _1x1Output_gpu.CopyTo(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels); // 1x1 convolution activation DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, _1x1Output_gpu.Ptr, _1x1Output_gpu.Ptr, ActivationFunctions.Activation); - _1x1Output_gpu.CopyToRows(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels); + _1x1Output_gpu.CopyTo(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels); } } } @@ -443,7 +448,6 @@ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFun dx_gpu = DnnInstance.Gpu.AllocateDevice(z.Size), w_gpu = DnnInstance.Gpu.AllocateDevice(Weights)) { - // First 1x1 convolution DnnInstance.GetConvolutionBackwardDataAlgorithm(_1x1FilterDescription, _1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdDataAlgo algorithm); DnnInstance.GetConvolutionBackwardDataWorkspaceSize(_1x1FilterDescription, _1x1OutputDescription, _1x1ConvolutionDescription, InputDescription, algorithm, out IntPtr size); @@ -523,14 +527,16 @@ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFun deviceptr p1x1PoolingWeights_gpu = w_gpu.Ptr + _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights + _5x5Reduce1x1Weights + _5x5Weights; DnnInstance.ConvolutionBackwardData(1, Secondary1x1FilterDescription, p1x1PoolingWeights_gpu, Secondary1x1OutputDescription, dy_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, PoolingOutputDescription, poolDx_gpu.Ptr); DnnInstance.ActivationBackward(PoolingZ.Entities, PoolingZ.Length, poolDy_gpu.Ptr, poolDx_gpu.Ptr, ActivationFunctions.ActivationPrime); - PoolingDelta.TryFree(); - poolDy_gpu.CopyToHost(PoolingDelta.Entities, PoolingDelta.Length, out PoolingDelta); + _PoolingDelta.TryFree(); + poolDy_gpu.CopyToHost(_PoolingDelta.Entities, _PoolingDelta.Length, out _PoolingDelta); } // Pooling backward - using (DeviceMemory poolZ_gpu = DnnInstance.Gpu.AllocateDevice(PoolingZ)) + using (DeviceMemory + x_gpu = DnnInstance.Gpu.AllocateDevice(_Inputs), + poolZ_gpu = DnnInstance.Gpu.AllocateDevice(PoolingZ)) { - DnnInstance.PoolingBackward(PoolingDescription, 1, PoolingOutputDescription, poolZ_gpu.Ptr, PoolingOutputDescription, poolDy_gpu.Ptr, InputDescription, default, 1, InputDescription, dx_gpu.Ptr); // TODO: finish pooling backward + DnnInstance.PoolingBackward(PoolingDescription, 1, PoolingOutputDescription, poolZ_gpu.Ptr, PoolingOutputDescription, poolDy_gpu.Ptr, InputDescription, x_gpu.Ptr, 1, InputDescription, dx_gpu.Ptr); // TODO: finish pooling backward } } @@ -546,7 +552,136 @@ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFun /// public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJdw, out Tensor dJdb) { - throw new NotImplementedException(); + Tensor.New(1, Weights.Length, out dJdw); + Tensor.New(1, Biases.Length, out dJdb); + using (DeviceMemory a_gpu = DnnInstance.Gpu.AllocateDevice(a)) + { + // 1x1 weights + using (DeviceMemory dy1x1_gpu = DnnInstance.Gpu.AllocateDevice(delta, 0, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels)) + { + DnnInstance.GetConvolutionBackwardFilterAlgorithm(InputDescription, _1x1OutputDescription, _1x1ConvolutionDescription, _1x1FilterDescription, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdFilterAlgo algorithm); + DnnInstance.GetConvolutionBackwardFilterWorkspaceSize(InputDescription, _1x1OutputDescription, _1x1ConvolutionDescription, _1x1FilterDescription, algorithm, out IntPtr size); + using (DeviceMemory dw_gpu = DnnInstance.Gpu.AllocateDevice(_1x1Weights)) + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + DnnInstance.ConvolutionBackwardFilter(1, InputDescription, a_gpu.Ptr, _1x1OutputDescription, dy1x1_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _1x1FilterDescription, dw_gpu.Ptr); + dw_gpu.CopyTo(dJdw, 0, _1x1Weights); + } + + // 1x1 bias + using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Primary1x1ConvolutionKernels)) + { + DnnInstance.ConvolutionBackwardBias(1, _1x1OutputDescription, dy1x1_gpu.Ptr, 0, _1x1BiasDescription, db_gpu.Ptr); + db_gpu.CopyTo(dJdb, 0, OperationInfo.Primary1x1ConvolutionKernels); + } + } + + // 3x3 reduce 1x1 weights + using (DeviceMemory dy3x3Reduce1x1_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Reduce1x1A)) + { + DnnInstance.GetConvolutionBackwardFilterAlgorithm(InputDescription, _3x3Reduce1x1OutputDescription, _1x1ConvolutionDescription, _3x3Reduce1x1FilterDescription, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdFilterAlgo algorithm); + DnnInstance.GetConvolutionBackwardFilterWorkspaceSize(InputDescription, _3x3Reduce1x1OutputDescription, _1x1ConvolutionDescription, _3x3Reduce1x1FilterDescription, algorithm, out IntPtr size); + using (DeviceMemory dw_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Reduce1x1Weights)) + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + DnnInstance.ConvolutionBackwardFilter(1, InputDescription, a_gpu.Ptr, _3x3Reduce1x1OutputDescription, dy3x3Reduce1x1_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3Reduce1x1FilterDescription, dw_gpu.Ptr); + dw_gpu.CopyTo(dJdw, _1x1Weights, _3x3Reduce1x1Weights); + } + + // 3x3 reduce 1x1 bias + using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Primary3x3Reduce1x1ConvolutionKernels)) + { + DnnInstance.ConvolutionBackwardBias(1, _3x3Reduce1x1OutputDescription, dy3x3Reduce1x1_gpu.Ptr, 0, _3x3Reduce1x1BiasDescription, db_gpu.Ptr); + db_gpu.CopyTo(dJdb, OperationInfo.Primary1x1ConvolutionKernels, OperationInfo.Primary3x3Reduce1x1ConvolutionKernels); + } + } + + // 5x5 reduce 1x1 weights + using (DeviceMemory dy5x5Reduce1x1_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Reduce1x1A)) + { + DnnInstance.GetConvolutionBackwardFilterAlgorithm(InputDescription, _5x5Reduce1x1OutputDescription, _1x1ConvolutionDescription, _5x5Reduce1x1FilterDescription, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdFilterAlgo algorithm); + DnnInstance.GetConvolutionBackwardFilterWorkspaceSize(InputDescription, _5x5Reduce1x1OutputDescription, _1x1ConvolutionDescription, _5x5Reduce1x1FilterDescription, algorithm, out IntPtr size); + using (DeviceMemory dw_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Reduce1x1Weights)) + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + DnnInstance.ConvolutionBackwardFilter(1, InputDescription, a_gpu.Ptr, _5x5Reduce1x1OutputDescription, dy5x5Reduce1x1_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5Reduce1x1FilterDescription, dw_gpu.Ptr); + dw_gpu.CopyTo(dJdw, _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights, _5x5Reduce1x1Weights); + } + + // 3x3 reduce 1x1 bias + using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Primary5x5Reduce1x1ConvolutionKernels)) + { + DnnInstance.ConvolutionBackwardBias(1, _5x5Reduce1x1OutputDescription, dy5x5Reduce1x1_gpu.Ptr, 0, _5x5Reduce1x1BiasDescription, db_gpu.Ptr); + db_gpu.CopyTo(dJdb, OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Primary3x3Reduce1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels, OperationInfo.Primary5x5Reduce1x1ConvolutionKernels); + } + } + } + + // 3x3 weights + using (DeviceMemory dy3x3_gpu = DnnInstance.Gpu.AllocateDevice(delta, InputInfo.SliceSize * OperationInfo.Primary1x1ConvolutionKernels, InputInfo.SliceSize * OperationInfo.Secondary3x3ConvolutionKernels)) + { + DnnInstance.GetConvolutionBackwardFilterAlgorithm(_3x3Reduce1x1OutputDescription, _3x3OutputDescription, _3x3ConvolutionDescription, _3x3FilterDescription, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdFilterAlgo algorithm); + DnnInstance.GetConvolutionBackwardFilterWorkspaceSize(_3x3Reduce1x1OutputDescription, _3x3OutputDescription, _3x3ConvolutionDescription, _3x3FilterDescription, algorithm, out IntPtr size); + using (DeviceMemory + a3x3Reduce1x1_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Reduce1x1A), + dw_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Weights)) + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + DnnInstance.ConvolutionBackwardFilter(1, _3x3Reduce1x1OutputDescription, a3x3Reduce1x1_gpu.Ptr, _3x3OutputDescription, dy3x3_gpu.Ptr, _3x3ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _3x3FilterDescription, dw_gpu.Ptr); + dw_gpu.CopyTo(dJdw, _1x1Weights + _3x3Reduce1x1Weights, _3x3Weights); + } + + // 3x3 bias + using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Secondary3x3ConvolutionKernels)) + { + DnnInstance.ConvolutionBackwardBias(1, _3x3OutputDescription, dy3x3_gpu.Ptr, 0, _3x3BiasDescription, db_gpu.Ptr); + db_gpu.CopyTo(dJdb, OperationInfo.Primary1x1ConvolutionKernels, OperationInfo.Secondary3x3ConvolutionKernels); + } + } + + // 5x5 weights + using (DeviceMemory dy5x5_gpu = DnnInstance.Gpu.AllocateDevice(delta, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels)) + { + DnnInstance.GetConvolutionBackwardFilterAlgorithm(_5x5Reduce1x1OutputDescription, _5x5OutputDescription, _5x5ConvolutionDescription, _5x5FilterDescription, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdFilterAlgo algorithm); + DnnInstance.GetConvolutionBackwardFilterWorkspaceSize(_5x5Reduce1x1OutputDescription, _5x5OutputDescription, _5x5ConvolutionDescription, _5x5FilterDescription, algorithm, out IntPtr size); + using (DeviceMemory + a5x5Reduce1x1_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Reduce1x1A), + dw_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Weights)) + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + DnnInstance.ConvolutionBackwardFilter(1, _5x5Reduce1x1OutputDescription, a5x5Reduce1x1_gpu.Ptr, _5x5OutputDescription, dy5x5_gpu.Ptr, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5FilterDescription, dw_gpu.Ptr); + dw_gpu.CopyTo(dJdw, _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights + _5x5Reduce1x1Weights, _5x5Weights); + } + + // 3x3 bias + using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Secondary5x5ConvolutionKernels)) + { + DnnInstance.ConvolutionBackwardBias(1, _5x5OutputDescription, dy5x5_gpu.Ptr, 0, _5x5BiasDescription, db_gpu.Ptr); + db_gpu.CopyTo(dJdb, OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Primary3x3Reduce1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, OperationInfo.Secondary5x5ConvolutionKernels); + } + } + + // Pooling 1x1 convolution + using (DeviceMemory dy1x1Pool_gpu = DnnInstance.Gpu.AllocateDevice(delta, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels)) + { + DnnInstance.GetConvolutionBackwardFilterAlgorithm(PoolingOutputDescription, Secondary1x1OutputDescription, _1x1ConvolutionDescription, Secondary1x1FilterDescription, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdFilterAlgo algorithm); + DnnInstance.GetConvolutionBackwardFilterWorkspaceSize(PoolingOutputDescription, Secondary1x1OutputDescription, _1x1ConvolutionDescription, Secondary1x1FilterDescription, algorithm, out IntPtr size); + using (DeviceMemory + aPool_gpu = DnnInstance.Gpu.AllocateDevice(_PoolingA), + dw_gpu = DnnInstance.Gpu.AllocateDevice(Secondary1x1Weights)) + using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) + { + DnnInstance.ConvolutionBackwardFilter(1, PoolingOutputDescription, aPool_gpu.Ptr, Secondary1x1OutputDescription, dy1x1Pool_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, Secondary1x1FilterDescription, dw_gpu.Ptr); + dw_gpu.CopyTo(dJdw, _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights + _5x5Reduce1x1Weights + _5x5Weights, Secondary1x1Weights); + } + + // Pooling 1x1 bias + using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Secondary1x1AfterPoolingConvolutionKernels)) + { + DnnInstance.ConvolutionBackwardBias(1, PoolingOutputDescription, dy1x1Pool_gpu.Ptr, 0, Secondary1x1BiasDescription, db_gpu.Ptr); + db_gpu.CopyTo(dJdb, OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Primary3x3Reduce1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Primary5x5Reduce1x1ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels, OperationInfo.Secondary1x1AfterPoolingConvolutionKernels); + } + } } #endregion @@ -568,6 +703,7 @@ void IDisposable.Dispose() // Private Dispose method private void Dispose() { + _Inputs.TryFree(); _3x3Reduce1x1Z.TryFree(); _3x3Reduce1x1A.TryFree(); _3x3Reduce1x1Delta.TryFree(); @@ -575,8 +711,8 @@ private void Dispose() _5x5Reduce1x1A.TryFree(); _5x5Reduce1x1Delta.TryFree(); PoolingZ.TryFree(); - PoolingA.TryFree(); - PoolingDelta.TryFree(); + _PoolingA.TryFree(); + _PoolingDelta.TryFree(); } #endregion diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/GpuExtensionsTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/GpuExtensionsTest.cs index 6676490..9531146 100644 --- a/Unit/NeuralNetwork.NET.Cuda.Unit/GpuExtensionsTest.cs +++ b/Unit/NeuralNetwork.NET.Cuda.Unit/GpuExtensionsTest.cs @@ -21,7 +21,7 @@ public void CopyToRows() Gpu gpu = Gpu.Default; using (DeviceMemory m_gpu = gpu.AllocateDevice(test)) { - m_gpu.CopyToRows(tensor, 5, 3); + m_gpu.CopyTo(tensor, 5, 3); } float[,] expected = { From ceae801ab24c58d24e4b0bdb783de25019b8b390 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 26 Dec 2017 17:53:49 +0100 Subject: [PATCH 18/30] Inception layer public API and serialization methods added --- .../APIs/CuDnnNetworkLayers.cs | 13 ++++++++ .../APIs/CuDnnNetworkLayersDeserializer.cs | 1 + .../Layers/CuDnnInceptionLayer.cs | 30 +++++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayers.cs b/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayers.cs index 13c6ab2..1b6f1d6 100644 --- a/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayers.cs +++ b/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayers.cs @@ -67,5 +67,18 @@ public static INetworkLayer Convolutional( [PublicAPI] [Pure, NotNull] public static INetworkLayer Pooling(in TensorInfo input, in PoolingInfo info, ActivationFunctionType activation) => new CuDnnPoolingLayer(input, info, activation); + + /// + /// Creates a new inception layer with the given input and features + /// + /// The input volume to process + /// The info on the operations to execute inside the layer + /// Indicates the desired initialization mode to use for the layer bias values + [PublicAPI] + [Pure, NotNull] + public static INetworkLayer Inception( + in TensorInfo input, in InceptionInfo info, + BiasInitializationMode biasMode = BiasInitializationMode.Zero) + => new CuDnnInceptionLayer(input, info, biasMode); } } \ No newline at end of file diff --git a/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayersDeserializer.cs b/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayersDeserializer.cs index dfafc6a..fda4c29 100644 --- a/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayersDeserializer.cs +++ b/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayersDeserializer.cs @@ -31,6 +31,7 @@ private static INetworkLayer Deserialize([NotNull] Stream stream, LayerType type case LayerType.Convolutional: return CuDnnConvolutionalLayer.Deserialize(stream); case LayerType.Pooling: return CuDnnPoolingLayer.Deserialize(stream); case LayerType.Softmax: return CuDnnSoftmaxLayer.Deserialize(stream); + case LayerType.Inception: return CuDnnInceptionLayer.Deserialize(stream); default: return null; } } diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index 08221df..3b60619 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -6,6 +6,7 @@ using NeuralNetworkNET.APIs.Structs; using NeuralNetworkNET.Cuda.Extensions; using NeuralNetworkNET.Cuda.Services; +using NeuralNetworkNET.Extensions; using NeuralNetworkNET.Networks.Activations; using NeuralNetworkNET.Networks.Activations.Delegates; using NeuralNetworkNET.Networks.Implementations.Layers.Abstract; @@ -686,9 +687,38 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ #endregion + #region Misc + /// public override INetworkLayer Clone() => new CuDnnInceptionLayer(InputInfo, OperationInfo, Weights, Biases); + /// + public override void Serialize(System.IO.Stream stream) + { + base.Serialize(stream); + stream.Write(OperationInfo); + } + + /// + /// Tries to deserialize a new from the input + /// + /// The input to use to read the layer data + [MustUseReturnValue, CanBeNull] + public static INetworkLayer Deserialize([NotNull] System.IO.Stream stream) + { + if (!stream.TryRead(out TensorInfo input)) return null; + if (!stream.TryRead(out _)) return null; + if (!stream.TryRead(out _)) return null; + if (!stream.TryRead(out int wLength)) return null; + float[] weights = stream.ReadUnshuffled(wLength); + if (!stream.TryRead(out int bLength)) return null; + float[] biases = stream.ReadUnshuffled(bLength); + if (!stream.TryRead(out InceptionInfo info)) return null; + return new CuDnnInceptionLayer(input, info, weights, biases); + } + + #endregion + #region IDisposable ~CuDnnInceptionLayer() => Dispose(); From 393d01a73df9c2920fca4c679acfdc0b7c7d1001 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 26 Dec 2017 19:02:59 +0100 Subject: [PATCH 19/30] Minor fixes to the inception layer (WIP) --- NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index 3b60619..7394aaf 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -241,12 +241,13 @@ private int Secondary1x1Weights /// Gets the instance for the current layer /// [NotNull] - private readonly Dnn DnnInstance = DnnService.Instance; + private readonly Dnn DnnInstance = null; // cuDNN fields setup private void SetupCuDnnInfo() { // First 1x1 convolution + _1x1ConvolutionDescription.Set2D(0, 0, 1, 1, 1, 1, Alea.cuDNN.ConvolutionMode.CROSS_CORRELATION); _1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Primary1x1ConvolutionKernels, InputInfo.Channels, 1, 1); _1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Primary1x1ConvolutionKernels, 1, 1); @@ -272,7 +273,7 @@ private void SetupCuDnnInfo() PoolingDescription.Set2D(Alea.cuDNN.PoolingMode.AVERAGE_COUNT_EXCLUDE_PADDING, NanPropagation.PROPAGATE_NAN, 3, 3, 1, 1, 1, 1); // Secondary 1x1 convolution - Secondary1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, InputInfo.Channels, _OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, 1, 1); + Secondary1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, InputInfo.Channels, 1, 1); Secondary1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, 1, 1); } @@ -424,7 +425,7 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) // 1x1 convolution using (DeviceMemory _1x1Output_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels)) { - Secondary1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width); + Secondary1x1OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, InputInfo.Height, InputInfo.Width); DnnInstance.GetConvolutionForwardAlgorithm(InputDescription, Secondary1x1FilterDescription, _1x1ConvolutionDescription, Secondary1x1OutputDescription, ConvolutionFwdPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionFwdAlgo algorithm); DnnInstance.GetConvolutionForwardWorkspaceSize(InputDescription, Secondary1x1FilterDescription, _1x1ConvolutionDescription, Secondary1x1OutputDescription, algorithm, out IntPtr size); using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) From 84502d3f363945459572eb654dd34dba689816ab Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 26 Dec 2017 19:06:03 +0100 Subject: [PATCH 20/30] Ooops! --- NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index 7394aaf..345edc2 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -241,7 +241,7 @@ private int Secondary1x1Weights /// Gets the instance for the current layer /// [NotNull] - private readonly Dnn DnnInstance = null; + private readonly Dnn DnnInstance = DnnService.Instance; // cuDNN fields setup private void SetupCuDnnInfo() From bfe4a04f8d3d6f961000d3d4a50a2d6cf779583a Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 26 Dec 2017 19:31:45 +0100 Subject: [PATCH 21/30] More fixes to the inception layer (WIP) --- .../Layers/CuDnnInceptionLayer.cs | 10 +++++----- .../Layers/Helpers/WeightsProvider.cs | 14 +++++++++++++- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index 345edc2..c3a12b8 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -516,7 +516,7 @@ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFun } // Pooling - using (DeviceMemory poolDy_gpu = DnnInstance.Gpu.AllocateDevice(PoolingZ)) + using (DeviceMemory pooldy_gpu = DnnInstance.Gpu.AllocateDevice(PoolingZ)) { // 1x1 backward DnnInstance.GetConvolutionBackwardDataAlgorithm(Secondary1x1FilterDescription, Secondary1x1OutputDescription, _1x1ConvolutionDescription, PoolingOutputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm); @@ -528,9 +528,9 @@ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFun { deviceptr p1x1PoolingWeights_gpu = w_gpu.Ptr + _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights + _5x5Reduce1x1Weights + _5x5Weights; DnnInstance.ConvolutionBackwardData(1, Secondary1x1FilterDescription, p1x1PoolingWeights_gpu, Secondary1x1OutputDescription, dy_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, PoolingOutputDescription, poolDx_gpu.Ptr); - DnnInstance.ActivationBackward(PoolingZ.Entities, PoolingZ.Length, poolDy_gpu.Ptr, poolDx_gpu.Ptr, ActivationFunctions.ActivationPrime); + DnnInstance.ActivationBackward(PoolingZ.Entities, PoolingZ.Length, pooldy_gpu.Ptr, poolDx_gpu.Ptr, ActivationFunctions.ActivationPrime); _PoolingDelta.TryFree(); - poolDy_gpu.CopyToHost(_PoolingDelta.Entities, _PoolingDelta.Length, out _PoolingDelta); + pooldy_gpu.CopyToHost(PoolingZ.Entities, PoolingZ.Length, out _PoolingDelta); } // Pooling backward @@ -538,7 +538,7 @@ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFun x_gpu = DnnInstance.Gpu.AllocateDevice(_Inputs), poolZ_gpu = DnnInstance.Gpu.AllocateDevice(PoolingZ)) { - DnnInstance.PoolingBackward(PoolingDescription, 1, PoolingOutputDescription, poolZ_gpu.Ptr, PoolingOutputDescription, poolDy_gpu.Ptr, InputDescription, x_gpu.Ptr, 1, InputDescription, dx_gpu.Ptr); // TODO: finish pooling backward + DnnInstance.PoolingBackward(PoolingDescription, 1, PoolingOutputDescription, poolZ_gpu.Ptr, PoolingOutputDescription, pooldy_gpu.Ptr, InputDescription, x_gpu.Ptr, 1, InputDescription, dx_gpu.Ptr); // TODO: finish pooling backward } } @@ -680,7 +680,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ // Pooling 1x1 bias using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Secondary1x1AfterPoolingConvolutionKernels)) { - DnnInstance.ConvolutionBackwardBias(1, PoolingOutputDescription, dy1x1Pool_gpu.Ptr, 0, Secondary1x1BiasDescription, db_gpu.Ptr); + DnnInstance.ConvolutionBackwardBias(1, Secondary1x1OutputDescription, dy1x1Pool_gpu.Ptr, 0, Secondary1x1BiasDescription, db_gpu.Ptr); db_gpu.CopyTo(dJdb, OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Primary3x3Reduce1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Primary5x5Reduce1x1ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels, OperationInfo.Secondary1x1AfterPoolingConvolutionKernels); } } diff --git a/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs b/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs index 572ef96..23cd6be 100644 --- a/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs +++ b/NeuralNetwork.NET/Networks/Implementations/Layers/Helpers/WeightsProvider.cs @@ -76,6 +76,7 @@ public static unsafe float[] NewConvolutionalKernels(in TensorInfo input, int ke [Pure, NotNull] public static unsafe float[] NewInceptionWeights(in TensorInfo input, in InceptionInfo info) { + // Setup int _1x1Length = input.Channels * info.Primary1x1ConvolutionKernels, _3x3Reduce1x1Length = input.Channels * info.Primary3x3Reduce1x1ConvolutionKernels, @@ -83,19 +84,30 @@ public static unsafe float[] NewInceptionWeights(in TensorInfo input, in Incepti _5x5Reduce1x1Length = input.Channels * info.Primary5x5Reduce1x1ConvolutionKernels, _5x5Length = 5 * 5 * info.Primary5x5Reduce1x1ConvolutionKernels * info.Secondary5x5ConvolutionKernels, secondary1x1Length = input.Channels * info.Secondary1x1AfterPoolingConvolutionKernels; - float[] weights = new float[_1x1Length + _3x3Length + _5x5Length + secondary1x1Length]; + float[] weights = new float[_1x1Length + _3x3Reduce1x1Length + _3x3Length + _5x5Reduce1x1Length + _5x5Length + secondary1x1Length]; fixed (float* pw = weights) { + // 1x1 Tensor.Reshape(pw, 1, _1x1Length, out Tensor wTensor); KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels); + + // 3x3 reduce 1x1 Tensor.Reshape(pw + _1x1Length, 1, _3x3Reduce1x1Length, out wTensor); KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels); + + // 3x3 Tensor.Reshape(pw + _1x1Length + _3x3Reduce1x1Length, 1, _3x3Length, out wTensor); KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, 3 * 3 * info.Primary3x3Reduce1x1ConvolutionKernels); + + // 5x5 reduce 1x1 Tensor.Reshape(pw + _1x1Length + _3x3Reduce1x1Length + _3x3Length, 1, _5x5Reduce1x1Length, out wTensor); KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels); + + // 5x5 Tensor.Reshape(pw + _1x1Length + _3x3Reduce1x1Length + _3x3Length + _5x5Reduce1x1Length, 1, _5x5Length, out wTensor); KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, 5 * 5 * info.Primary5x5Reduce1x1ConvolutionKernels); + + // Pool 1x1 Tensor.Reshape(pw + _1x1Length + _3x3Reduce1x1Length + _3x3Length + _5x5Reduce1x1Length + _5x5Length, 1, secondary1x1Length, out wTensor); KerasWeightsProvider.FillWithHeEtAlUniform(wTensor, input.Channels); } From 096994ad605bca5aab0d2f92fc2860d38e330de8 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 26 Dec 2017 23:34:57 +0100 Subject: [PATCH 22/30] Fixed convolution output size --- NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs | 15 +++++++++++++++ .../Implementations/Layers/ConvolutionalLayer.cs | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs b/NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs index 49a34f8..cb8c7ad 100644 --- a/NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs +++ b/NeuralNetwork.NET/APIs/Structs/ConvolutionInfo.cs @@ -75,6 +75,21 @@ public static ConvolutionInfo New( #endregion + /// + /// Calculates the output size after applying a convolution operation to the input tensor + /// + /// The info on the input tensor + /// The size of the convolution kernels + /// The number of convolution kernels to be used + [Pure] + internal TensorInfo GetForwardOutputTensorInfo(in TensorInfo input, (int X, int Y) field, int kernels) + { + int + h = (input.Height - field.X + 2 * VerticalPadding) / VerticalStride + 1, + w = (input.Width - field.Y + 2 * HorizontalPadding) / HorizontalStride + 1; + return new TensorInfo(h, w, kernels); + } + #region Equality /// diff --git a/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs b/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs index 922cf50..4f28de0 100644 --- a/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs +++ b/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs @@ -58,7 +58,7 @@ public ref readonly TensorInfo KernelInfo #endregion public ConvolutionalLayer(in TensorInfo input, in ConvolutionInfo operation, (int X, int Y) kernelSize, int kernels, ActivationFunctionType activation, BiasInitializationMode biasMode) - : base(input, new TensorInfo(input.Height - kernelSize.X + 1, input.Width - kernelSize.Y + 1, kernels), + : base(input, operation.GetForwardOutputTensorInfo(input, kernelSize, kernels), WeightsProvider.NewConvolutionalKernels(input, kernelSize.X, kernelSize.Y, kernels), WeightsProvider.NewBiases(kernels, biasMode), activation) { From fa6c36d874f6a77cf2642702c7d5676d9ab80747 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Tue, 26 Dec 2017 23:35:34 +0100 Subject: [PATCH 23/30] Added initial inception layer tests --- .../Layers/CuDnnInceptionLayer.cs | 2 +- .../CuDnnLayersTest.cs | 77 +++++++++++++++++++ 2 files changed, 78 insertions(+), 1 deletion(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index c3a12b8..a49c6e5 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -406,7 +406,7 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation); y_gpu.CopyTo(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); } - + // Pooling pipeline PoolingOutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width); using (DeviceMemory y_gpu = DnnInstance.Gpu.AllocateDevice(x.Size)) diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs index 790a697..dd473b5 100644 --- a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs +++ b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs @@ -8,6 +8,8 @@ using NeuralNetworkNET.Networks.Implementations.Layers; using NeuralNetworkNET.Networks.Implementations.Layers.Abstract; using NeuralNetworkNET.Networks.Implementations.Layers.Helpers; +using System; +using System.Runtime.CompilerServices; namespace NeuralNetworkNET.Cuda.Unit { @@ -248,5 +250,80 @@ public void PoolingBackward() } #endregion + + #region Inception + + [TestMethod] + public unsafe void InceptionForward1x1() + { + float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 32 * 32 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 32 * 32 * 3); + CuDnnConvolutionalLayer conv = new CuDnnConvolutionalLayer(TensorInfo.CreateForRgbImage(32, 32), ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian); + CuDnnInceptionLayer inception = new CuDnnInceptionLayer(conv.InputInfo, InceptionInfo.New(10, 10, 10, 10, 10, PoolingMode.Max, 10)); + Buffer.BlockCopy(conv.Weights, 0, inception.Weights, 0, sizeof(float) * conv.Weights.Length); + Buffer.BlockCopy(conv.Biases, 0, inception.Biases, 0, sizeof(float) * conv.Biases.Length); + fixed (float* px = x) + { + // Forward + Z + Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor); + conv.Forward(xTensor, out Tensor zConv, out Tensor aConv); + inception.Forward(xTensor, out Tensor zInc, out Tensor aInc); + Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped); + float* pzInc = (float*)zInc.Ptr.ToPointer(), preshaped = (float*)reshaped.Ptr.ToPointer(); + for (int i = 0; i < zConv.Entities; i++) + Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length); + Assert.IsTrue(reshaped.ContentEquals(zConv)); + + // A + float* paInc = (float*)aInc.Ptr.ToPointer(); + for (int i = 0; i < aConv.Entities; i++) + Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length); + Assert.IsTrue(reshaped.ContentEquals(aConv)); + zConv.Free(); + aConv.Free(); + zInc.Free(); + aInc.Free(); + reshaped.Free(); + } + } + + [TestMethod] + public unsafe void InceptionForward3x3Pipeline() + { + float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 32 * 32 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 32 * 32 * 3); + CuDnnConvolutionalLayer + conv1 = new CuDnnConvolutionalLayer(TensorInfo.CreateForRgbImage(32, 32), ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian), + conv2 = new CuDnnConvolutionalLayer(conv1.OutputInfo, ConvolutionInfo.New(ConvolutionMode.CrossCorrelation, 1, 1), (3, 3), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian); + CuDnnInceptionLayer inception = new CuDnnInceptionLayer(TensorInfo.CreateForRgbImage(32, 32), InceptionInfo.New(10, 10, 10, 10, 10, PoolingMode.Max, 10)); + fixed (float* pw = inception.Weights) + Unsafe.InitBlock(pw, 0, (uint)(sizeof(float) * inception.Weights.Length)); + Buffer.BlockCopy(conv1.Weights, 0, inception.Weights, sizeof(float) * 3 * 10, sizeof(float) * conv1.Weights.Length); + Buffer.BlockCopy(conv2.Weights, 0, inception.Weights, sizeof(float) * 3 * 10 + sizeof(float) * conv1.Weights.Length, sizeof(float) * conv2.Weights.Length); + Buffer.BlockCopy(conv1.Biases, 0, inception.Biases, sizeof(float) * 10, sizeof(float) * conv1.Biases.Length); + Buffer.BlockCopy(conv2.Biases, 0, inception.Biases, sizeof(float) * 20, sizeof(float) * conv2.Biases.Length); + fixed (float* px = x) + { + Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor); + conv1.Forward(xTensor, out Tensor zTemp, out Tensor aTemp); + zTemp.Free(); + conv2.Forward(aTemp, out Tensor zConv, out Tensor aConv); + inception.Forward(xTensor, out Tensor zInc, out Tensor aInc); + Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped); + float* pzInc = (float*)zInc.Ptr.ToPointer() + 32 * 32 * 10, preshaped = (float*)reshaped.Ptr.ToPointer(); + for (int i = 0; i < zConv.Entities; i++) + Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length); + Assert.IsTrue(reshaped.ContentEquals(zConv)); + zConv.Free(); + zInc.Free(); + float* paInc = (float*)aInc.Ptr.ToPointer() + 32 * 32 * 10; + for (int i = 0; i < aConv.Entities; i++) + Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length); + Assert.IsTrue(reshaped.ContentEquals(aConv)); + aConv.Free(); + aInc.Free(); + reshaped.Free(); + } + } + + #endregion } } From b3e136c225057a76d4658ed0b230c0a132909bf1 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 27 Dec 2017 18:22:48 +0100 Subject: [PATCH 24/30] Minor bug fixes, inception 5x5 test added --- .../Layers/CuDnnInceptionLayer.cs | 6 +-- .../CuDnnLayersTest.cs | 39 +++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index a49c6e5..e336740 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -382,7 +382,7 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) using (DeviceMemory x_gpu = DnnInstance.Gpu.AllocateDevice(x)) using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) { - DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, _5x5Reduce1x1FilterDescription, pw_gpu += _3x3Weights, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr); + DnnInstance.ConvolutionForward(1, InputDescription, x_gpu.Ptr, _5x5Reduce1x1FilterDescription, pw_gpu += _3x3Weights, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr); } DnnInstance.AddTensor(1, _5x5Reduce1x1BiasDescription, pb_gpu += OperationInfo.Secondary3x3ConvolutionKernels, 1, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr); _5x5Reduce1x1Z.TryFree(); @@ -397,7 +397,7 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) DnnInstance.GetConvolutionForwardWorkspaceSize(_5x5Reduce1x1OutputDescription, _5x5FilterDescription, _5x5ConvolutionDescription, _5x5OutputDescription, algorithm, out size); using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) { - DnnInstance.ConvolutionForward(1, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr, _5x5FilterDescription, pw_gpu += _5x5Reduce1x1Weights, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5OutputDescription, y_gpu.Ptr); + DnnInstance.ConvolutionForward(1, _5x5Reduce1x1OutputDescription, y1x1_gpu.Ptr, _5x5FilterDescription, pw_gpu += _5x5Reduce1x1Weights, _5x5ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, _5x5OutputDescription, y_gpu.Ptr); } DnnInstance.AddTensor(1, _5x5BiasDescription, pb_gpu += OperationInfo.Primary5x5Reduce1x1ConvolutionKernels, 1, _5x5OutputDescription, y_gpu.Ptr); y_gpu.CopyTo(z, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); @@ -406,7 +406,7 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation); y_gpu.CopyTo(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); } - + // Pooling pipeline PoolingOutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width); using (DeviceMemory y_gpu = DnnInstance.Gpu.AllocateDevice(x.Size)) diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs index dd473b5..d7e4132 100644 --- a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs +++ b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs @@ -324,6 +324,45 @@ public unsafe void InceptionForward3x3Pipeline() } } + [TestMethod] + public unsafe void InceptionForward5x5Pipeline() + { + float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 12 * 12 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 12 * 12 * 3); + CuDnnConvolutionalLayer + conv1 = new CuDnnConvolutionalLayer(TensorInfo.CreateForRgbImage(12, 12), ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian), + conv2 = new CuDnnConvolutionalLayer(conv1.OutputInfo, ConvolutionInfo.New(ConvolutionMode.CrossCorrelation, 2, 2), (5, 5), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian); + CuDnnInceptionLayer inception = new CuDnnInceptionLayer(TensorInfo.CreateForRgbImage(12, 12), InceptionInfo.New(3, 2, 2, 10, 10, PoolingMode.Max, 2)); + fixed (float* pw = inception.Weights) + Unsafe.InitBlock(pw, 0, (uint)(sizeof(float) * inception.Weights.Length)); + Buffer.BlockCopy(conv1.Weights, 0, inception.Weights, sizeof(float) * (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2), sizeof(float) * conv1.Weights.Length); + Buffer.BlockCopy(conv2.Weights, 0, inception.Weights, sizeof(float) * (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2 + conv1.Weights.Length), sizeof(float) * conv2.Weights.Length); + Buffer.BlockCopy(conv1.Biases, 0, inception.Biases, sizeof(float) * (3 + 2 + 2), sizeof(float) * conv1.Biases.Length); + Buffer.BlockCopy(conv2.Biases, 0, inception.Biases, sizeof(float) * (3 + 2 + 2 + 10), sizeof(float) * conv2.Biases.Length); + fixed (float* px = x) + { + Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor); + conv1.Forward(xTensor, out Tensor zTemp, out Tensor aTemp); + zTemp.Free(); + conv2.Forward(aTemp, out Tensor zConv, out Tensor aConv); + inception.Forward(xTensor, out Tensor zInc, out Tensor aInc); + Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped); + float* pzInc = (float*)zInc.Ptr.ToPointer() + 12 * 12 * (3 + 2), preshaped = (float*)reshaped.Ptr.ToPointer(); + for (int i = 0; i < zConv.Entities; i++) + Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length); + Assert.IsTrue(reshaped.ContentEquals(zConv)); + aTemp.Free(); + zConv.Free(); + zInc.Free(); + float* paInc = (float*)aInc.Ptr.ToPointer() + 12 * 12 * (3 + 2); + for (int i = 0; i < aConv.Entities; i++) + Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length); + Assert.IsTrue(reshaped.ContentEquals(aConv)); + aConv.Free(); + aInc.Free(); + reshaped.Free(); + } + } + #endregion } } From c4a3966b79e25619b3cce69a0b45ad4754e399a0 Mon Sep 17 00:00:00 2001 From: Sergio Pedri Date: Wed, 27 Dec 2017 19:03:36 +0100 Subject: [PATCH 25/30] Added inception layer pooling test, minor bug fixes --- .../Layers/CuDnnInceptionLayer.cs | 24 ++++++------- .../Layers/CuDnnPoolingLayer.cs | 2 +- NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs | 13 +++++++ .../Implementations/Layers/PoolingLayer.cs | 5 +-- .../CuDnnLayersTest.cs | 36 +++++++++++++++++++ 5 files changed, 63 insertions(+), 17 deletions(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index e336740..4999b46 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -105,7 +105,7 @@ private int Secondary1x1Weights private Tensor _5x5Reduce1x1Delta; // Pooling output activity - private Tensor PoolingZ; + private Tensor _PoolingZ; // Pooling output activation private Tensor _PoolingA; @@ -270,8 +270,8 @@ private void SetupCuDnnInfo() _5x5BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Secondary5x5ConvolutionKernels, 1, 1); // Pooling - PoolingDescription.Set2D(Alea.cuDNN.PoolingMode.AVERAGE_COUNT_EXCLUDE_PADDING, NanPropagation.PROPAGATE_NAN, 3, 3, 1, 1, 1, 1); - + PoolingDescription.Set2D((Alea.cuDNN.PoolingMode)OperationInfo.Pooling, NanPropagation.PROPAGATE_NAN, 3, 3, 1, 1, 1, 1); + // Secondary 1x1 convolution Secondary1x1FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, _OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, InputInfo.Channels, 1, 1); Secondary1x1BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, _OperationInfo.Secondary1x1AfterPoolingConvolutionKernels, 1, 1); @@ -406,7 +406,7 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) DnnInstance.ActivationForward(x.Entities, InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation); y_gpu.CopyTo(a, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary5x5ConvolutionKernels); } - + // Pooling pipeline PoolingOutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, InputInfo.Channels, InputInfo.Height, InputInfo.Width); using (DeviceMemory y_gpu = DnnInstance.Gpu.AllocateDevice(x.Size)) @@ -416,8 +416,8 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) { DnnInstance.PoolingForward(PoolingDescription, 1, InputDescription, x_gpu.Ptr, 0, InputDescription, y_gpu.Ptr); } - PoolingZ.TryFree(); - y_gpu.CopyToHost(x.Entities, InputInfo.Size, out PoolingZ); + _PoolingZ.TryFree(); + y_gpu.CopyToHost(x.Entities, InputInfo.Size, out _PoolingZ); DnnInstance.ActivationForward(x.Entities, x.Length, y_gpu.Ptr, y_gpu.Ptr, ActivationFunctions.Activation); _PoolingA.TryFree(); y_gpu.CopyToHost(x.Entities, InputInfo.Size, out _PoolingA); @@ -516,27 +516,27 @@ public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFun } // Pooling - using (DeviceMemory pooldy_gpu = DnnInstance.Gpu.AllocateDevice(PoolingZ)) + using (DeviceMemory pooldy_gpu = DnnInstance.Gpu.AllocateDevice(_PoolingZ)) { // 1x1 backward DnnInstance.GetConvolutionBackwardDataAlgorithm(Secondary1x1FilterDescription, Secondary1x1OutputDescription, _1x1ConvolutionDescription, PoolingOutputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out algorithm); DnnInstance.GetConvolutionBackwardDataWorkspaceSize(Secondary1x1FilterDescription, Secondary1x1OutputDescription, _1x1ConvolutionDescription, PoolingOutputDescription, algorithm, out size); using (DeviceMemory dy_gpu = DnnInstance.Gpu.AllocateDevice(delta_1, InputInfo.SliceSize * (OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Secondary3x3ConvolutionKernels + OperationInfo.Secondary5x5ConvolutionKernels), InputInfo.SliceSize * OperationInfo.Secondary1x1AfterPoolingConvolutionKernels), - poolDx_gpu = DnnInstance.Gpu.AllocateDevice(PoolingZ.Size)) + poolDx_gpu = DnnInstance.Gpu.AllocateDevice(_PoolingZ.Size)) using (DeviceMemory workspace_gpu = DnnInstance.Gpu.AllocateDevice(size)) { deviceptr p1x1PoolingWeights_gpu = w_gpu.Ptr + _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights + _5x5Reduce1x1Weights + _5x5Weights; DnnInstance.ConvolutionBackwardData(1, Secondary1x1FilterDescription, p1x1PoolingWeights_gpu, Secondary1x1OutputDescription, dy_gpu.Ptr, _1x1ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, PoolingOutputDescription, poolDx_gpu.Ptr); - DnnInstance.ActivationBackward(PoolingZ.Entities, PoolingZ.Length, pooldy_gpu.Ptr, poolDx_gpu.Ptr, ActivationFunctions.ActivationPrime); + DnnInstance.ActivationBackward(_PoolingZ.Entities, _PoolingZ.Length, pooldy_gpu.Ptr, poolDx_gpu.Ptr, ActivationFunctions.ActivationPrime); _PoolingDelta.TryFree(); - pooldy_gpu.CopyToHost(PoolingZ.Entities, PoolingZ.Length, out _PoolingDelta); + pooldy_gpu.CopyToHost(_PoolingZ.Entities, _PoolingZ.Length, out _PoolingDelta); } // Pooling backward using (DeviceMemory x_gpu = DnnInstance.Gpu.AllocateDevice(_Inputs), - poolZ_gpu = DnnInstance.Gpu.AllocateDevice(PoolingZ)) + poolZ_gpu = DnnInstance.Gpu.AllocateDevice(_PoolingZ)) { DnnInstance.PoolingBackward(PoolingDescription, 1, PoolingOutputDescription, poolZ_gpu.Ptr, PoolingOutputDescription, pooldy_gpu.Ptr, InputDescription, x_gpu.Ptr, 1, InputDescription, dx_gpu.Ptr); // TODO: finish pooling backward } @@ -741,7 +741,7 @@ private void Dispose() _5x5Reduce1x1Z.TryFree(); _5x5Reduce1x1A.TryFree(); _5x5Reduce1x1Delta.TryFree(); - PoolingZ.TryFree(); + _PoolingZ.TryFree(); _PoolingA.TryFree(); _PoolingDelta.TryFree(); } diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnPoolingLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnPoolingLayer.cs index 0d5aced..0c5c4be 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnPoolingLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnPoolingLayer.cs @@ -14,7 +14,7 @@ namespace NeuralNetworkNET.Cuda.Layers { /// - /// A pooling layer running on cuDNN, with a 2x2 window and a stride of 2 + /// A pooling layer running on cuDNN, with a custom pooling mode /// [JsonObject(MemberSerialization.OptIn)] internal sealed class CuDnnPoolingLayer : PoolingLayer diff --git a/NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs b/NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs index ea2166a..e497dd1 100644 --- a/NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs +++ b/NeuralNetwork.NET/APIs/Structs/PoolingInfo.cs @@ -89,6 +89,19 @@ public static PoolingInfo New( #endregion + /// + /// Calculates the output size after applying a pooling operation to the input tensor + /// + /// The info on the input tensor + [Pure] + internal TensorInfo GetForwardOutputTensorInfo(in TensorInfo input) + { + int + h = (input.Height - WindowHeight + 2 * VerticalPadding) / VerticalStride + 1, + w = (input.Width - WindowWidth + 2 * HorizontalPadding) / HorizontalStride + 1; + return new TensorInfo(h, w, input.Channels); + } + #region Equality /// diff --git a/NeuralNetwork.NET/Networks/Implementations/Layers/PoolingLayer.cs b/NeuralNetwork.NET/Networks/Implementations/Layers/PoolingLayer.cs index 6cba6bc..2b6c4ab 100644 --- a/NeuralNetwork.NET/Networks/Implementations/Layers/PoolingLayer.cs +++ b/NeuralNetwork.NET/Networks/Implementations/Layers/PoolingLayer.cs @@ -34,10 +34,7 @@ public ref readonly PoolingInfo OperationInfo } public PoolingLayer(in TensorInfo input, in PoolingInfo operation, ActivationFunctionType activation) - : base(input, new TensorInfo( - input.Height / 2 + (input.Height % 2 == 0 ? 0 : 1), - input.Width / 2 + (input.Width % 2 == 0 ? 0 : 1), - input.Channels), activation) + : base(input, operation.GetForwardOutputTensorInfo(input), activation) => _OperationInfo = operation; /// diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs index d7e4132..a661ad8 100644 --- a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs +++ b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs @@ -363,6 +363,42 @@ public unsafe void InceptionForward5x5Pipeline() } } + [TestMethod] + public unsafe void InceptionForwardPoolPipeline() + { + float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 12 * 12 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 12 * 12 * 3); + CuDnnPoolingLayer pool = new CuDnnPoolingLayer(TensorInfo.CreateForRgbImage(12, 12), PoolingInfo.New(PoolingMode.Max, 3, 3, 1, 1, 1, 1), ActivationFunctionType.ReLU); + CuDnnConvolutionalLayer conv = new CuDnnConvolutionalLayer(pool.OutputInfo, ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian); + CuDnnInceptionLayer inception = new CuDnnInceptionLayer(TensorInfo.CreateForRgbImage(12, 12), InceptionInfo.New(3, 2, 2, 2, 2, PoolingMode.Max, 10)); + fixed (float* pw = inception.Weights) + Unsafe.InitBlock(pw, 0, (uint)(sizeof(float) * inception.Weights.Length)); + Buffer.BlockCopy(conv.Weights, 0, inception.Weights, sizeof(float) * (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2 + 3 * 2 + 5 * 5 * 2 * 2), sizeof(float) * conv.Weights.Length); + Buffer.BlockCopy(conv.Biases, 0, inception.Biases, sizeof(float) * (3 + 2 + 2 + 2 + 2), sizeof(float) * conv.Biases.Length); + fixed (float* px = x) + { + Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor); + pool.Forward(xTensor, out Tensor zTemp, out Tensor aTemp); + conv.Forward(aTemp, out Tensor zConv, out Tensor aConv); + inception.Forward(xTensor, out Tensor zInc, out Tensor aInc); + Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped); + float* pzInc = (float*)zInc.Ptr.ToPointer() + 12 * 12 * (3 + 2 + 2), preshaped = (float*)reshaped.Ptr.ToPointer(); + for (int i = 0; i < zConv.Entities; i++) + Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length); + Assert.IsTrue(reshaped.ContentEquals(zConv)); + zTemp.Free(); + aTemp.Free(); + zConv.Free(); + zInc.Free(); + float* paInc = (float*)aInc.Ptr.ToPointer() + 12 * 12 * (3 + 2 + 2); + for (int i = 0; i < aConv.Entities; i++) + Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length); + Assert.IsTrue(reshaped.ContentEquals(aConv)); + aConv.Free(); + aInc.Free(); + reshaped.Free(); + } + } + #endregion } } From ffa603ebeb960a9b62597f828c09c530a1addacf Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Fri, 29 Dec 2017 11:07:53 +0100 Subject: [PATCH 26/30] Minor layer tweaks --- .../Layers/CuDnnConvolutionalLayer.cs | 2 +- .../Layers/CuDnnFullyConnectedLayer.cs | 2 +- NeuralNetwork.NET/APIs/Structs/Tensor.cs | 15 ++++++++++++++- .../Implementations/Layers/ConvolutionalLayer.cs | 3 ++- .../Implementations/Layers/FullyConnectedLayer.cs | 3 ++- 5 files changed, 20 insertions(+), 5 deletions(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs index a6133cb..db5feb5 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs @@ -149,7 +149,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ { DnnInstance.ConvolutionBackwardFilter(1, InputDescription, a_gpu.Ptr, OutputDescription, delta_gpu.Ptr, ConvolutionDescription, algorithm, workspace_gpu.Ptr, size, 0, FilterDescription, w_gpu.Ptr); } - w_gpu.CopyToHost(Kernels, KernelInfo.Size, out dJdw); + w_gpu.CopyToHost(1, Weights.Length, out dJdw); } // Bias diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs index f1c587c..c21c554 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs @@ -67,7 +67,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ w_gpu = DnnInstance.Gpu.AllocateDevice(a.Length * delta.Length)) { DnnInstance.FullyConnectedBackwardFilter(a.Entities, a.Length, delta.Length, a_gpu.Ptr, delta_gpu.Ptr, w_gpu.Ptr); - w_gpu.CopyToHost(a.Length, delta.Length, out dJdw); + w_gpu.CopyToHost(1, Weights.Length, out dJdw); } delta.CompressVertically(out dJdb); // Doing this on CPU is generally faster than launching the kernels } diff --git a/NeuralNetwork.NET/APIs/Structs/Tensor.cs b/NeuralNetwork.NET/APIs/Structs/Tensor.cs index dee5e5e..cbc8732 100644 --- a/NeuralNetwork.NET/APIs/Structs/Tensor.cs +++ b/NeuralNetwork.NET/APIs/Structs/Tensor.cs @@ -201,6 +201,19 @@ public float[] ToArray() #endregion + /// + /// Creates a new instance by wrapping the current memory area + /// + /// The height of the final matrix + /// The width of the final matrix + /// The resulting instance + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public void Reshape(int n, int chw, out Tensor tensor) + { + if (n * chw != Size) throw new ArgumentException("Invalid input resized shape"); + tensor = new Tensor(Ptr, n, chw); + } + /// /// Frees the memory associated with the current instance /// @@ -258,7 +271,7 @@ unsafe float[] ExtractRow(int i) // Spawn the sequence int max = MaximumItemsCount / obj.Length, - up = max.Min(MaximumRowsCount).Max(1); + up = max.Min(MaximumRowsCount).Max(1).Min(obj.Entities); for (int i = 0; i < up; i++) yield return ExtractRow(i); } diff --git a/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs b/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs index 4f28de0..476ac33 100644 --- a/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs +++ b/NeuralNetwork.NET/Networks/Implementations/Layers/ConvolutionalLayer.cs @@ -107,7 +107,8 @@ public override unsafe void Backpropagate(in Tensor delta_1, in Tensor z, Activa public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJdw, out Tensor dJdb) { a.Rotate180(InputInfo.Channels, out Tensor a180); - a180.ConvoluteGradient(InputInfo, delta, OutputInfo, out dJdw); + a180.ConvoluteGradient(InputInfo, delta, OutputInfo, out Tensor dJdwM); + dJdwM.Reshape(1, Weights.Length, out dJdw); a180.Free(); delta.CompressVertically(OutputInfo.Channels, out dJdb); } diff --git a/NeuralNetwork.NET/Networks/Implementations/Layers/FullyConnectedLayer.cs b/NeuralNetwork.NET/Networks/Implementations/Layers/FullyConnectedLayer.cs index 59bff25..6fb8cc6 100644 --- a/NeuralNetwork.NET/Networks/Implementations/Layers/FullyConnectedLayer.cs +++ b/NeuralNetwork.NET/Networks/Implementations/Layers/FullyConnectedLayer.cs @@ -59,7 +59,8 @@ public override unsafe void Backpropagate(in Tensor delta_1, in Tensor z, Activa public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJdw, out Tensor dJdb) { a.Transpose(out Tensor at); - at.Multiply(delta, out dJdw); + at.Multiply(delta, out Tensor dJdwM); + dJdwM.Reshape(1, Weights.Length, out dJdw); at.Free(); delta.CompressVertically(out dJdb); } From 2ef571a88f4f9e47165bae5256e9b7030384008c Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Fri, 29 Dec 2017 15:01:11 +0100 Subject: [PATCH 27/30] ContentEquals method improved with relative threshold --- .../Extensions/MatrixExtensions.cs | 21 +++++++++++-------- .../Extensions/MiscExtensions.cs | 18 +++++++++++++--- 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/NeuralNetwork.NET/Extensions/MatrixExtensions.cs b/NeuralNetwork.NET/Extensions/MatrixExtensions.cs index 069ee72..148d897 100644 --- a/NeuralNetwork.NET/Extensions/MatrixExtensions.cs +++ b/NeuralNetwork.NET/Extensions/MatrixExtensions.cs @@ -706,8 +706,9 @@ public static unsafe float[] BlockCopy([NotNull] this float[] v) /// /// The first to test /// The second to test - /// The comparison threshold - public static unsafe bool ContentEquals(in this Tensor m, in Tensor o, float delta = 1e-6f) + /// The relative comparison threshold + /// The relative comparison threshold + public static unsafe bool ContentEquals(in this Tensor m, in Tensor o,float absolute = 1e-6f, float relative = 1e-6f) { if (m.Ptr == IntPtr.Zero && o.Ptr == IntPtr.Zero) return true; if (m.Ptr == IntPtr.Zero || o.Ptr == IntPtr.Zero) return false; @@ -715,7 +716,7 @@ public static unsafe bool ContentEquals(in this Tensor m, in Tensor o, float del float* pm = m, po = o; int items = m.Size; for (int i = 0; i < items; i++) - if (!pm[i].EqualsWithDelta(po[i], delta)) return false; + if (!pm[i].EqualsWithDelta(po[i], absolute, relative)) return false; return true; } @@ -724,8 +725,9 @@ public static unsafe bool ContentEquals(in this Tensor m, in Tensor o, float del /// /// The first matrix to test /// The second matrix to test - /// The comparison threshold - public static bool ContentEquals([CanBeNull] this float[,] m, [CanBeNull] float[,] o, float delta = 1e-6f) + /// The relative comparison threshold + /// The relative comparison threshold + public static bool ContentEquals([CanBeNull] this float[,] m, [CanBeNull] float[,] o, float absolute = 1e-6f, float relative = 1e-6f) { if (m == null && o == null) return true; if (m == null || o == null) return false; @@ -733,7 +735,7 @@ public static bool ContentEquals([CanBeNull] this float[,] m, [CanBeNull] float[ m.GetLength(1) != o.GetLength(1)) return false; for (int i = 0; i < m.GetLength(0); i++) for (int j = 0; j < m.GetLength(1); j++) - if (!m[i, j].EqualsWithDelta(o[i, j], delta)) return false; + if (!m[i, j].EqualsWithDelta(o[i, j], absolute, relative)) return false; return true; } @@ -742,14 +744,15 @@ public static bool ContentEquals([CanBeNull] this float[,] m, [CanBeNull] float[ /// /// The first vector to test /// The second vector to test - /// The comparison threshold - public static bool ContentEquals([CanBeNull] this float[] v, [CanBeNull] float[] o, float delta = 1e-6f) + /// The relative comparison threshold + /// The relative comparison threshold + public static bool ContentEquals([CanBeNull] this float[] v, [CanBeNull] float[] o, float absolute = 1e-6f, float relative = 1e-6f) { if (v == null && o == null) return true; if (v == null || o == null) return false; if (v.Length != o.Length) return false; for (int i = 0; i < v.Length; i++) - if (!v[i].EqualsWithDelta(o[i], delta)) return false; + if (!v[i].EqualsWithDelta(o[i], absolute, relative)) return false; return true; } diff --git a/NeuralNetwork.NET/Extensions/MiscExtensions.cs b/NeuralNetwork.NET/Extensions/MiscExtensions.cs index 914dfde..d3b2c76 100644 --- a/NeuralNetwork.NET/Extensions/MiscExtensions.cs +++ b/NeuralNetwork.NET/Extensions/MiscExtensions.cs @@ -32,6 +32,15 @@ public static TOut To([NotNull] this TIn item) where TOut : class, TI [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int Max(this int a, int b) => a >= b ? a : b; + /// + /// Returns the maximum value between two numbers + /// + /// The first number + /// The second number + [Pure] + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static float Max(this float a, float b) => a >= b ? a : b; + /// /// Returns the minimum value between two numbers /// @@ -54,16 +63,19 @@ public static TOut To([NotNull] this TIn item) where TOut : class, TI /// /// The first value /// The second value - /// The comparison threshold + /// The relative comparison threshold + /// The relative comparison threshold [Pure] - public static bool EqualsWithDelta(this float value, float other, float delta = 1e-6f) + public static bool EqualsWithDelta(this float value, float other, float absolute = 1e-6f, float relative = 1e-6f) { if (float.IsNaN(value) ^ float.IsNaN(other)) return false; if (float.IsNaN(value) && float.IsNaN(other)) return true; if (float.IsInfinity(value) ^ float.IsInfinity(other)) return false; if (float.IsPositiveInfinity(value) && float.IsPositiveInfinity(other)) return true; if (float.IsNegativeInfinity(value) && float.IsNegativeInfinity(other)) return true; - return (value - other).Abs() < delta; + float abs = (value - other).Abs(); + if (abs < absolute) return true; + return abs <= absolute.Max(relative * value.Abs().Max(other.Abs())); } /// From 968eba607997e5de297a7514588cb3276c127b1e Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Fri, 29 Dec 2017 15:01:26 +0100 Subject: [PATCH 28/30] Inception layer bug fixes, more tests added --- .../Layers/CuDnnInceptionLayer.cs | 10 +- .../CuDnnInceptionLayerTest.cs | 258 ++++++++++++++++++ .../CuDnnLayersTest.cs | 152 ----------- 3 files changed, 263 insertions(+), 157 deletions(-) create mode 100644 Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnInceptionLayerTest.cs diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs index 4999b46..27d7200 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnInceptionLayer.cs @@ -579,7 +579,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ } // 3x3 reduce 1x1 weights - using (DeviceMemory dy3x3Reduce1x1_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Reduce1x1A)) + using (DeviceMemory dy3x3Reduce1x1_gpu = DnnInstance.Gpu.AllocateDevice(_3x3Reduce1x1Delta)) { DnnInstance.GetConvolutionBackwardFilterAlgorithm(InputDescription, _3x3Reduce1x1OutputDescription, _1x1ConvolutionDescription, _3x3Reduce1x1FilterDescription, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdFilterAlgo algorithm); DnnInstance.GetConvolutionBackwardFilterWorkspaceSize(InputDescription, _3x3Reduce1x1OutputDescription, _1x1ConvolutionDescription, _3x3Reduce1x1FilterDescription, algorithm, out IntPtr size); @@ -599,7 +599,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ } // 5x5 reduce 1x1 weights - using (DeviceMemory dy5x5Reduce1x1_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Reduce1x1A)) + using (DeviceMemory dy5x5Reduce1x1_gpu = DnnInstance.Gpu.AllocateDevice(_5x5Reduce1x1Delta)) { DnnInstance.GetConvolutionBackwardFilterAlgorithm(InputDescription, _5x5Reduce1x1OutputDescription, _1x1ConvolutionDescription, _5x5Reduce1x1FilterDescription, ConvolutionBwdFilterPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdFilterAlgo algorithm); DnnInstance.GetConvolutionBackwardFilterWorkspaceSize(InputDescription, _5x5Reduce1x1OutputDescription, _1x1ConvolutionDescription, _5x5Reduce1x1FilterDescription, algorithm, out IntPtr size); @@ -610,7 +610,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ dw_gpu.CopyTo(dJdw, _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights, _5x5Reduce1x1Weights); } - // 3x3 reduce 1x1 bias + // 5x5 reduce 1x1 bias using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Primary5x5Reduce1x1ConvolutionKernels)) { DnnInstance.ConvolutionBackwardBias(1, _5x5Reduce1x1OutputDescription, dy5x5Reduce1x1_gpu.Ptr, 0, _5x5Reduce1x1BiasDescription, db_gpu.Ptr); @@ -637,7 +637,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Secondary3x3ConvolutionKernels)) { DnnInstance.ConvolutionBackwardBias(1, _3x3OutputDescription, dy3x3_gpu.Ptr, 0, _3x3BiasDescription, db_gpu.Ptr); - db_gpu.CopyTo(dJdb, OperationInfo.Primary1x1ConvolutionKernels, OperationInfo.Secondary3x3ConvolutionKernels); + db_gpu.CopyTo(dJdb, OperationInfo.Primary1x1ConvolutionKernels + OperationInfo.Primary3x3Reduce1x1ConvolutionKernels, OperationInfo.Secondary3x3ConvolutionKernels); } } @@ -655,7 +655,7 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ dw_gpu.CopyTo(dJdw, _1x1Weights + _3x3Reduce1x1Weights + _3x3Weights + _5x5Reduce1x1Weights, _5x5Weights); } - // 3x3 bias + // 5x5 bias using (DeviceMemory db_gpu = DnnInstance.Gpu.AllocateDevice(OperationInfo.Secondary5x5ConvolutionKernels)) { DnnInstance.ConvolutionBackwardBias(1, _5x5OutputDescription, dy5x5_gpu.Ptr, 0, _5x5BiasDescription, db_gpu.Ptr); diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnInceptionLayerTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnInceptionLayerTest.cs new file mode 100644 index 0000000..e12a674 --- /dev/null +++ b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnInceptionLayerTest.cs @@ -0,0 +1,258 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using NeuralNetworkNET.APIs.Enums; +using NeuralNetworkNET.APIs.Structs; +using NeuralNetworkNET.Cuda.Layers; +using NeuralNetworkNET.Extensions; +using NeuralNetworkNET.Networks.Activations; +using NeuralNetworkNET.Networks.Implementations.Layers.Helpers; +using System; +using System.Runtime.CompilerServices; + +namespace NeuralNetworkNET.Cuda.Unit +{ + /// + /// Test class for the cuDNN inception layer + /// + [TestClass] + [TestCategory(nameof(CuDnnInceptionLayerTest))] + public class CuDnnInceptionLayerTest + { + [TestMethod] + public unsafe void Inception1x1() + { + float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 32 * 32 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 32 * 32 * 3); + CuDnnConvolutionalLayer conv = new CuDnnConvolutionalLayer(TensorInfo.CreateForRgbImage(32, 32), ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian); + CuDnnInceptionLayer inception = new CuDnnInceptionLayer(conv.InputInfo, InceptionInfo.New(10, 10, 10, 10, 10, PoolingMode.Max, 10)); + fixed (float* pw = inception.Weights) + Unsafe.InitBlock(pw, 0, (uint)(sizeof(float) * inception.Weights.Length)); + Buffer.BlockCopy(conv.Weights, 0, inception.Weights, 0, sizeof(float) * conv.Weights.Length); + Buffer.BlockCopy(conv.Biases, 0, inception.Biases, 0, sizeof(float) * conv.Biases.Length); + fixed (float* px = x) + { + // Forward + Z + Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor); + conv.Forward(xTensor, out Tensor zConv, out Tensor aConv); + inception.Forward(xTensor, out Tensor zInc, out Tensor aInc); + Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped); + float* pzInc = (float*)zInc.Ptr.ToPointer(), preshaped = (float*)reshaped.Ptr.ToPointer(); + for (int i = 0; i < zConv.Entities; i++) + Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length); + Assert.IsTrue(reshaped.ContentEquals(zConv)); + + // A + float* paInc = (float*)aInc.Ptr.ToPointer(); + for (int i = 0; i < aConv.Entities; i++) + Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length); + Assert.IsTrue(reshaped.ContentEquals(aConv)); + + // Backpropagate + Tensor.New(xTensor.Entities, xTensor.Length, out Tensor z1); + KerasWeightsProvider.FillWithHeEtAlUniform(z1, 10); + z1.Duplicate(out Tensor z2); + conv.Backpropagate(aConv, z1, ActivationFunctions.ReLUPrime); + inception.Backpropagate(aInc, z2, ActivationFunctions.ReLUPrime); + Assert.IsTrue(z1.ContentEquals(z2)); + + // Gradient + Tensor.New(xTensor.Entities, xTensor.Length, out Tensor a); + KerasWeightsProvider.FillWithHeEtAlUniform(a, 10); + conv.ComputeGradient(a, aConv, out Tensor dJdwConv, out Tensor dJdbConv); + inception.ComputeGradient(a, aInc, out Tensor dJdwInc, out Tensor dJdbInc); + Tensor.New(1, dJdwConv.Length, out Tensor dJdwInc0); + Buffer.MemoryCopy((float*)dJdwInc.Ptr.ToPointer(), (float*)dJdwInc0.Ptr.ToPointer(), sizeof(float) * dJdwInc0.Size, sizeof(float) * dJdwInc0.Size); + Tensor.New(1, dJdbConv.Length, out Tensor dJdbInc0); + Buffer.MemoryCopy((float*)dJdbInc.Ptr.ToPointer(), (float*)dJdbInc0.Ptr.ToPointer(), sizeof(float) * dJdbInc0.Size, sizeof(float) * dJdbInc0.Size); + Assert.IsTrue(dJdwConv.ContentEquals(dJdwInc0, 1e-5f)); + Assert.IsTrue(dJdbConv.ContentEquals(dJdbInc0, 1e-5f)); + + // Cleanup + dJdwConv.Free(); + dJdbConv.Free(); + dJdwInc.Free(); + dJdbInc.Free(); + dJdwInc0.Free(); + dJdbInc0.Free(); + z1.Free(); + z2.Free(); + zConv.Free(); + aConv.Free(); + zInc.Free(); + aInc.Free(); + reshaped.Free(); + } + } + + [TestMethod] + public unsafe void Inception3x3Pipeline() + { + float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 32 * 32 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 32 * 32 * 3); + CuDnnConvolutionalLayer + conv1 = new CuDnnConvolutionalLayer(TensorInfo.CreateForRgbImage(32, 32), ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian), + conv2 = new CuDnnConvolutionalLayer(conv1.OutputInfo, ConvolutionInfo.New(ConvolutionMode.CrossCorrelation, 1, 1), (3, 3), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian); + CuDnnInceptionLayer inception = new CuDnnInceptionLayer(TensorInfo.CreateForRgbImage(32, 32), InceptionInfo.New(10, 10, 10, 10, 10, PoolingMode.Max, 10)); + fixed (float* pw = inception.Weights) + Unsafe.InitBlock(pw, 0, (uint)(sizeof(float) * inception.Weights.Length)); + Buffer.BlockCopy(conv1.Weights, 0, inception.Weights, sizeof(float) * 3 * 10, sizeof(float) * conv1.Weights.Length); + Buffer.BlockCopy(conv2.Weights, 0, inception.Weights, sizeof(float) * 3 * 10 + sizeof(float) * conv1.Weights.Length, sizeof(float) * conv2.Weights.Length); + Buffer.BlockCopy(conv1.Biases, 0, inception.Biases, sizeof(float) * 10, sizeof(float) * conv1.Biases.Length); + Buffer.BlockCopy(conv2.Biases, 0, inception.Biases, sizeof(float) * 20, sizeof(float) * conv2.Biases.Length); + fixed (float* px = x) + { + // Forward + Z + Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor); + conv1.Forward(xTensor, out Tensor zTemp, out Tensor aTemp); + conv2.Forward(aTemp, out Tensor zConv, out Tensor aConv); + inception.Forward(xTensor, out Tensor zInc, out Tensor aInc); + Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped); + float* pzInc = (float*)zInc.Ptr.ToPointer() + 32 * 32 * 10, preshaped = (float*)reshaped.Ptr.ToPointer(); + for (int i = 0; i < zConv.Entities; i++) + Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length); + Assert.IsTrue(reshaped.ContentEquals(zConv)); + + // A + float* paInc = (float*)aInc.Ptr.ToPointer() + 32 * 32 * 10; + for (int i = 0; i < aConv.Entities; i++) + Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length); + Assert.IsTrue(reshaped.ContentEquals(aConv)); + + // Backpropagation + Tensor.New(xTensor.Entities, xTensor.Length, out Tensor z1); + KerasWeightsProvider.FillWithHeEtAlUniform(z1, 10); + z1.Duplicate(out Tensor z2); + conv2.Backpropagate(aConv, zTemp, conv1.ActivationFunctions.ActivationPrime); + conv1.Backpropagate(zTemp, z1, ActivationFunctions.ReLUPrime); + inception.Backpropagate(aInc, z2, ActivationFunctions.ReLUPrime); + Assert.IsTrue(z1.ContentEquals(z2)); + + // Gradient + Tensor.New(xTensor.Entities, xTensor.Length, out Tensor a); + KerasWeightsProvider.FillWithHeEtAlUniform(a, 10); + conv1.ComputeGradient(a, zTemp, out Tensor dJdwConv1, out Tensor dJdbConv1); + conv2.ComputeGradient(aTemp, aConv, out Tensor dJdwConv2, out Tensor dJdbConv2); + inception.ComputeGradient(a, aInc, out Tensor dJdwInc, out Tensor dJdbInc); + Tensor.Reshape((float*)dJdwInc.Ptr.ToPointer() + 30, 1, dJdwConv1.Size, out Tensor dJdwInc0); + Tensor.Reshape((float*)dJdbInc.Ptr.ToPointer() + 10, 1, dJdbConv1.Size, out Tensor dJdbInc0); + Assert.IsTrue(dJdwConv1.ContentEquals(dJdwInc0, 1e-5f)); + Assert.IsTrue(dJdbConv1.ContentEquals(dJdbInc0, 1e-5f)); + Tensor.Reshape((float*)dJdwInc.Ptr.ToPointer() + 30 + dJdwConv1.Size, 1, dJdwConv2.Size, out Tensor dJdwInc1); + Tensor.Reshape((float*)dJdbInc.Ptr.ToPointer() + 20, 1, dJdbConv2.Size, out Tensor dJdbInc1); + Assert.IsTrue(dJdwConv2.ContentEquals(dJdwInc1, 1e-5f)); + Assert.IsTrue(dJdbConv2.ContentEquals(dJdbInc1, 1e-5f)); + + // Cleanup + z1.Free(); + z2.Free(); + zTemp.Free(); + zConv.Free(); + zInc.Free(); + aConv.Free(); + aInc.Free(); + reshaped.Free(); + } + } + + [TestMethod] + public unsafe void Inception5x5Pipeline() + { + float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 12 * 12 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 12 * 12 * 3); + CuDnnConvolutionalLayer + conv1 = new CuDnnConvolutionalLayer(TensorInfo.CreateForRgbImage(12, 12), ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian), + conv2 = new CuDnnConvolutionalLayer(conv1.OutputInfo, ConvolutionInfo.New(ConvolutionMode.CrossCorrelation, 2, 2), (5, 5), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian); + CuDnnInceptionLayer inception = new CuDnnInceptionLayer(TensorInfo.CreateForRgbImage(12, 12), InceptionInfo.New(3, 2, 2, 10, 10, PoolingMode.Max, 2)); + fixed (float* pw = inception.Weights) + Unsafe.InitBlock(pw, 0, (uint)(sizeof(float) * inception.Weights.Length)); + Buffer.BlockCopy(conv1.Weights, 0, inception.Weights, sizeof(float) * (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2), sizeof(float) * conv1.Weights.Length); + Buffer.BlockCopy(conv2.Weights, 0, inception.Weights, sizeof(float) * (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2 + conv1.Weights.Length), sizeof(float) * conv2.Weights.Length); + Buffer.BlockCopy(conv1.Biases, 0, inception.Biases, sizeof(float) * (3 + 2 + 2), sizeof(float) * conv1.Biases.Length); + Buffer.BlockCopy(conv2.Biases, 0, inception.Biases, sizeof(float) * (3 + 2 + 2 + 10), sizeof(float) * conv2.Biases.Length); + fixed (float* px = x) + { + // Forwaard + Z + Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor); + conv1.Forward(xTensor, out Tensor zTemp, out Tensor aTemp); + conv2.Forward(aTemp, out Tensor zConv, out Tensor aConv); + inception.Forward(xTensor, out Tensor zInc, out Tensor aInc); + Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped); + float* pzInc = (float*)zInc.Ptr.ToPointer() + 12 * 12 * (3 + 2), preshaped = (float*)reshaped.Ptr.ToPointer(); + for (int i = 0; i < zConv.Entities; i++) + Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length); + Assert.IsTrue(reshaped.ContentEquals(zConv)); + + // A + float* paInc = (float*)aInc.Ptr.ToPointer() + 12 * 12 * (3 + 2); + for (int i = 0; i < aConv.Entities; i++) + Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length); + Assert.IsTrue(reshaped.ContentEquals(aConv)); + + // Backpropagation + Tensor.New(xTensor.Entities, xTensor.Length, out Tensor z1); + KerasWeightsProvider.FillWithHeEtAlUniform(z1, 10); + z1.Duplicate(out Tensor z2); + conv2.Backpropagate(aConv, zTemp, conv1.ActivationFunctions.ActivationPrime); + conv1.Backpropagate(zTemp, z1, ActivationFunctions.ReLUPrime); + inception.Backpropagate(aInc, z2, ActivationFunctions.ReLUPrime); + Assert.IsTrue(z1.ContentEquals(z2)); + + // Gradient + Tensor.New(xTensor.Entities, xTensor.Length, out Tensor a); + KerasWeightsProvider.FillWithHeEtAlUniform(a, 10); + conv1.ComputeGradient(a, zTemp, out Tensor dJdwConv1, out Tensor dJdbConv1); + conv2.ComputeGradient(aTemp, aConv, out Tensor dJdwConv2, out Tensor dJdbConv2); + inception.ComputeGradient(a, aInc, out Tensor dJdwInc, out Tensor dJdbInc); + Tensor.Reshape((float*)dJdwInc.Ptr.ToPointer() + (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2), 1, dJdwConv1.Size, out Tensor dJdwInc0); + Tensor.Reshape((float*)dJdbInc.Ptr.ToPointer() + 7, 1, dJdbConv1.Size, out Tensor dJdbInc0); + Assert.IsTrue(dJdwConv1.ContentEquals(dJdwInc0, 1e-5f)); + Assert.IsTrue(dJdbConv1.ContentEquals(dJdbInc0, 1e-5f)); + Tensor.Reshape((float*)dJdwInc.Ptr.ToPointer() + (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2) + dJdwConv1.Size, 1, dJdwConv2.Size, out Tensor dJdwInc1); + Tensor.Reshape((float*)dJdbInc.Ptr.ToPointer() + 17, 1, dJdbConv2.Size, out Tensor dJdbInc1); + Assert.IsTrue(dJdwConv2.ContentEquals(dJdwInc1, 1e-5f)); + Assert.IsTrue(dJdbConv2.ContentEquals(dJdbInc1, 1e-5f)); + + // Cleanup + zTemp.Free(); + aTemp.Free(); + zConv.Free(); + zInc.Free(); + aConv.Free(); + aInc.Free(); + reshaped.Free(); + } + } + + [TestMethod] + public unsafe void InceptionPoolPipeline() + { + float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 12 * 12 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 12 * 12 * 3); + CuDnnPoolingLayer pool = new CuDnnPoolingLayer(TensorInfo.CreateForRgbImage(12, 12), PoolingInfo.New(PoolingMode.Max, 3, 3, 1, 1, 1, 1), ActivationFunctionType.ReLU); + CuDnnConvolutionalLayer conv = new CuDnnConvolutionalLayer(pool.OutputInfo, ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian); + CuDnnInceptionLayer inception = new CuDnnInceptionLayer(TensorInfo.CreateForRgbImage(12, 12), InceptionInfo.New(3, 2, 2, 2, 2, PoolingMode.Max, 10)); + fixed (float* pw = inception.Weights) + Unsafe.InitBlock(pw, 0, (uint)(sizeof(float) * inception.Weights.Length)); + Buffer.BlockCopy(conv.Weights, 0, inception.Weights, sizeof(float) * (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2 + 3 * 2 + 5 * 5 * 2 * 2), sizeof(float) * conv.Weights.Length); + Buffer.BlockCopy(conv.Biases, 0, inception.Biases, sizeof(float) * (3 + 2 + 2 + 2 + 2), sizeof(float) * conv.Biases.Length); + fixed (float* px = x) + { + Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor); + pool.Forward(xTensor, out Tensor zTemp, out Tensor aTemp); + conv.Forward(aTemp, out Tensor zConv, out Tensor aConv); + inception.Forward(xTensor, out Tensor zInc, out Tensor aInc); + Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped); + float* pzInc = (float*)zInc.Ptr.ToPointer() + 12 * 12 * (3 + 2 + 2), preshaped = (float*)reshaped.Ptr.ToPointer(); + for (int i = 0; i < zConv.Entities; i++) + Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length); + Assert.IsTrue(reshaped.ContentEquals(zConv)); + zTemp.Free(); + aTemp.Free(); + zConv.Free(); + zInc.Free(); + float* paInc = (float*)aInc.Ptr.ToPointer() + 12 * 12 * (3 + 2 + 2); + for (int i = 0; i < aConv.Entities; i++) + Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length); + Assert.IsTrue(reshaped.ContentEquals(aConv)); + aConv.Free(); + aInc.Free(); + reshaped.Free(); + } + } + } +} diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs index a661ad8..790a697 100644 --- a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs +++ b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs @@ -8,8 +8,6 @@ using NeuralNetworkNET.Networks.Implementations.Layers; using NeuralNetworkNET.Networks.Implementations.Layers.Abstract; using NeuralNetworkNET.Networks.Implementations.Layers.Helpers; -using System; -using System.Runtime.CompilerServices; namespace NeuralNetworkNET.Cuda.Unit { @@ -250,155 +248,5 @@ public void PoolingBackward() } #endregion - - #region Inception - - [TestMethod] - public unsafe void InceptionForward1x1() - { - float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 32 * 32 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 32 * 32 * 3); - CuDnnConvolutionalLayer conv = new CuDnnConvolutionalLayer(TensorInfo.CreateForRgbImage(32, 32), ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian); - CuDnnInceptionLayer inception = new CuDnnInceptionLayer(conv.InputInfo, InceptionInfo.New(10, 10, 10, 10, 10, PoolingMode.Max, 10)); - Buffer.BlockCopy(conv.Weights, 0, inception.Weights, 0, sizeof(float) * conv.Weights.Length); - Buffer.BlockCopy(conv.Biases, 0, inception.Biases, 0, sizeof(float) * conv.Biases.Length); - fixed (float* px = x) - { - // Forward + Z - Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor); - conv.Forward(xTensor, out Tensor zConv, out Tensor aConv); - inception.Forward(xTensor, out Tensor zInc, out Tensor aInc); - Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped); - float* pzInc = (float*)zInc.Ptr.ToPointer(), preshaped = (float*)reshaped.Ptr.ToPointer(); - for (int i = 0; i < zConv.Entities; i++) - Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length); - Assert.IsTrue(reshaped.ContentEquals(zConv)); - - // A - float* paInc = (float*)aInc.Ptr.ToPointer(); - for (int i = 0; i < aConv.Entities; i++) - Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length); - Assert.IsTrue(reshaped.ContentEquals(aConv)); - zConv.Free(); - aConv.Free(); - zInc.Free(); - aInc.Free(); - reshaped.Free(); - } - } - - [TestMethod] - public unsafe void InceptionForward3x3Pipeline() - { - float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 32 * 32 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 32 * 32 * 3); - CuDnnConvolutionalLayer - conv1 = new CuDnnConvolutionalLayer(TensorInfo.CreateForRgbImage(32, 32), ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian), - conv2 = new CuDnnConvolutionalLayer(conv1.OutputInfo, ConvolutionInfo.New(ConvolutionMode.CrossCorrelation, 1, 1), (3, 3), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian); - CuDnnInceptionLayer inception = new CuDnnInceptionLayer(TensorInfo.CreateForRgbImage(32, 32), InceptionInfo.New(10, 10, 10, 10, 10, PoolingMode.Max, 10)); - fixed (float* pw = inception.Weights) - Unsafe.InitBlock(pw, 0, (uint)(sizeof(float) * inception.Weights.Length)); - Buffer.BlockCopy(conv1.Weights, 0, inception.Weights, sizeof(float) * 3 * 10, sizeof(float) * conv1.Weights.Length); - Buffer.BlockCopy(conv2.Weights, 0, inception.Weights, sizeof(float) * 3 * 10 + sizeof(float) * conv1.Weights.Length, sizeof(float) * conv2.Weights.Length); - Buffer.BlockCopy(conv1.Biases, 0, inception.Biases, sizeof(float) * 10, sizeof(float) * conv1.Biases.Length); - Buffer.BlockCopy(conv2.Biases, 0, inception.Biases, sizeof(float) * 20, sizeof(float) * conv2.Biases.Length); - fixed (float* px = x) - { - Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor); - conv1.Forward(xTensor, out Tensor zTemp, out Tensor aTemp); - zTemp.Free(); - conv2.Forward(aTemp, out Tensor zConv, out Tensor aConv); - inception.Forward(xTensor, out Tensor zInc, out Tensor aInc); - Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped); - float* pzInc = (float*)zInc.Ptr.ToPointer() + 32 * 32 * 10, preshaped = (float*)reshaped.Ptr.ToPointer(); - for (int i = 0; i < zConv.Entities; i++) - Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length); - Assert.IsTrue(reshaped.ContentEquals(zConv)); - zConv.Free(); - zInc.Free(); - float* paInc = (float*)aInc.Ptr.ToPointer() + 32 * 32 * 10; - for (int i = 0; i < aConv.Entities; i++) - Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length); - Assert.IsTrue(reshaped.ContentEquals(aConv)); - aConv.Free(); - aInc.Free(); - reshaped.Free(); - } - } - - [TestMethod] - public unsafe void InceptionForward5x5Pipeline() - { - float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 12 * 12 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 12 * 12 * 3); - CuDnnConvolutionalLayer - conv1 = new CuDnnConvolutionalLayer(TensorInfo.CreateForRgbImage(12, 12), ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian), - conv2 = new CuDnnConvolutionalLayer(conv1.OutputInfo, ConvolutionInfo.New(ConvolutionMode.CrossCorrelation, 2, 2), (5, 5), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian); - CuDnnInceptionLayer inception = new CuDnnInceptionLayer(TensorInfo.CreateForRgbImage(12, 12), InceptionInfo.New(3, 2, 2, 10, 10, PoolingMode.Max, 2)); - fixed (float* pw = inception.Weights) - Unsafe.InitBlock(pw, 0, (uint)(sizeof(float) * inception.Weights.Length)); - Buffer.BlockCopy(conv1.Weights, 0, inception.Weights, sizeof(float) * (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2), sizeof(float) * conv1.Weights.Length); - Buffer.BlockCopy(conv2.Weights, 0, inception.Weights, sizeof(float) * (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2 + conv1.Weights.Length), sizeof(float) * conv2.Weights.Length); - Buffer.BlockCopy(conv1.Biases, 0, inception.Biases, sizeof(float) * (3 + 2 + 2), sizeof(float) * conv1.Biases.Length); - Buffer.BlockCopy(conv2.Biases, 0, inception.Biases, sizeof(float) * (3 + 2 + 2 + 10), sizeof(float) * conv2.Biases.Length); - fixed (float* px = x) - { - Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor); - conv1.Forward(xTensor, out Tensor zTemp, out Tensor aTemp); - zTemp.Free(); - conv2.Forward(aTemp, out Tensor zConv, out Tensor aConv); - inception.Forward(xTensor, out Tensor zInc, out Tensor aInc); - Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped); - float* pzInc = (float*)zInc.Ptr.ToPointer() + 12 * 12 * (3 + 2), preshaped = (float*)reshaped.Ptr.ToPointer(); - for (int i = 0; i < zConv.Entities; i++) - Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length); - Assert.IsTrue(reshaped.ContentEquals(zConv)); - aTemp.Free(); - zConv.Free(); - zInc.Free(); - float* paInc = (float*)aInc.Ptr.ToPointer() + 12 * 12 * (3 + 2); - for (int i = 0; i < aConv.Entities; i++) - Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length); - Assert.IsTrue(reshaped.ContentEquals(aConv)); - aConv.Free(); - aInc.Free(); - reshaped.Free(); - } - } - - [TestMethod] - public unsafe void InceptionForwardPoolPipeline() - { - float[,] x = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(10), 12 * 12 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(10, 12 * 12 * 3); - CuDnnPoolingLayer pool = new CuDnnPoolingLayer(TensorInfo.CreateForRgbImage(12, 12), PoolingInfo.New(PoolingMode.Max, 3, 3, 1, 1, 1, 1), ActivationFunctionType.ReLU); - CuDnnConvolutionalLayer conv = new CuDnnConvolutionalLayer(pool.OutputInfo, ConvolutionInfo.New(ConvolutionMode.CrossCorrelation), (1, 1), 10, ActivationFunctionType.ReLU, BiasInitializationMode.Gaussian); - CuDnnInceptionLayer inception = new CuDnnInceptionLayer(TensorInfo.CreateForRgbImage(12, 12), InceptionInfo.New(3, 2, 2, 2, 2, PoolingMode.Max, 10)); - fixed (float* pw = inception.Weights) - Unsafe.InitBlock(pw, 0, (uint)(sizeof(float) * inception.Weights.Length)); - Buffer.BlockCopy(conv.Weights, 0, inception.Weights, sizeof(float) * (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2 + 3 * 2 + 5 * 5 * 2 * 2), sizeof(float) * conv.Weights.Length); - Buffer.BlockCopy(conv.Biases, 0, inception.Biases, sizeof(float) * (3 + 2 + 2 + 2 + 2), sizeof(float) * conv.Biases.Length); - fixed (float* px = x) - { - Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor); - pool.Forward(xTensor, out Tensor zTemp, out Tensor aTemp); - conv.Forward(aTemp, out Tensor zConv, out Tensor aConv); - inception.Forward(xTensor, out Tensor zInc, out Tensor aInc); - Tensor.New(zConv.Entities, zConv.Length, out Tensor reshaped); - float* pzInc = (float*)zInc.Ptr.ToPointer() + 12 * 12 * (3 + 2 + 2), preshaped = (float*)reshaped.Ptr.ToPointer(); - for (int i = 0; i < zConv.Entities; i++) - Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length); - Assert.IsTrue(reshaped.ContentEquals(zConv)); - zTemp.Free(); - aTemp.Free(); - zConv.Free(); - zInc.Free(); - float* paInc = (float*)aInc.Ptr.ToPointer() + 12 * 12 * (3 + 2 + 2); - for (int i = 0; i < aConv.Entities; i++) - Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length); - Assert.IsTrue(reshaped.ContentEquals(aConv)); - aConv.Free(); - aInc.Free(); - reshaped.Free(); - } - } - - #endregion } } From fb738d119d742ef701fc7ecb82331e328f3531b0 Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Fri, 29 Dec 2017 20:47:42 +0100 Subject: [PATCH 29/30] CuDnnPoolingLayer backpropagation switched to cuDNN --- .../Layers/CuDnnPoolingLayer.cs | 58 ++++++++++++++++++- .../CuDnnLayersTest.cs | 36 ++++++++++-- 2 files changed, 86 insertions(+), 8 deletions(-) diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnPoolingLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnPoolingLayer.cs index 0c5c4be..3cca675 100644 --- a/NeuralNetwork.NET.Cuda/Layers/CuDnnPoolingLayer.cs +++ b/NeuralNetwork.NET.Cuda/Layers/CuDnnPoolingLayer.cs @@ -1,4 +1,5 @@ -using Alea; +using System; +using Alea; using Alea.cuDNN; using NeuralNetworkNET.Extensions; using NeuralNetworkNET.Cuda.Extensions; @@ -17,7 +18,7 @@ namespace NeuralNetworkNET.Cuda.Layers /// A pooling layer running on cuDNN, with a custom pooling mode /// [JsonObject(MemberSerialization.OptIn)] - internal sealed class CuDnnPoolingLayer : PoolingLayer + internal sealed class CuDnnPoolingLayer : PoolingLayer, IDisposable { #region cuDNN fields @@ -41,6 +42,16 @@ internal sealed class CuDnnPoolingLayer : PoolingLayer #endregion + #region Fields + + // A copy of the layer inputs + private Tensor _X; + + // A copy of the layer output activity + private Tensor _Z; + + #endregion + public CuDnnPoolingLayer(in TensorInfo input, in PoolingInfo operation, ActivationFunctionType activation) : base(input, operation, activation) { PoolingDescription.Set2D((PoolingMode)operation.Mode, NanPropagation.PROPAGATE_NAN, operation.WindowHeight, operation.WindowWidth, operation.VerticalPadding, operation.HorizontalPadding, operation.VerticalStride, operation.HorizontalStride); @@ -49,6 +60,8 @@ public CuDnnPoolingLayer(in TensorInfo input, in PoolingInfo operation, Activati /// public override void Forward(in Tensor x, out Tensor z, out Tensor a) { + _X.TryFree(); + x.Duplicate(out _X); using (DeviceMemory x_gpu = DnnInstance.Gpu.AllocateDevice(x), z_gpu = DnnInstance.Gpu.AllocateDevice(x.Entities * OutputInfo.Size)) @@ -58,6 +71,8 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) OutputDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, x.Entities, OutputInfo.Channels, OutputInfo.Height, OutputInfo.Width); DnnInstance.PoolingForward(PoolingDescription, 1, InputDescription, x_gpu.Ptr, 0, OutputDescription, z_gpu.Ptr); z_gpu.CopyToHost(x.Entities, OutputInfo.Size, out z); + _Z.TryFree(); + z.Duplicate(out _Z); // Activation DnnInstance.ActivationForward(z.Entities, z.Length, z_gpu.Ptr, z_gpu.Ptr, ActivationFunctions.Activation); @@ -66,7 +81,24 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) } /// - public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime) => z.UpscalePool2x2(delta_1, InputInfo.Channels); + public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime) + { + using (DeviceMemory dx_gpu = DnnInstance.Gpu.AllocateDevice(z.Size)) + { + using (DeviceMemory + x_gpu = DnnInstance.Gpu.AllocateDevice(_X), + y_gpu = DnnInstance.Gpu.AllocateDevice(_Z), + dy_gpu = DnnInstance.Gpu.AllocateDevice(delta_1)) + { + DnnInstance.PoolingBackward(PoolingDescription, 1, OutputDescription, y_gpu.Ptr, OutputDescription, dy_gpu.Ptr, InputDescription, x_gpu.Ptr, 0, InputDescription, dx_gpu.Ptr); + } + using (DeviceMemory z_gpu = DnnInstance.Gpu.AllocateDevice(z)) + { + DnnInstance.ActivationBackward(z.Entities, z.Length, z_gpu.Ptr, dx_gpu.Ptr, activationPrime); + z_gpu.CopyTo(z); + } + } + } /// public override INetworkLayer Clone() => new CuDnnPoolingLayer(InputInfo, OperationInfo, ActivationFunctionType); @@ -84,5 +116,25 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a) if (!stream.TryRead(out PoolingInfo operation)) return null; return new CuDnnPoolingLayer(input, operation, activation); } + + #region IDisposable + + ~CuDnnPoolingLayer() => Dispose(); + + /// + void IDisposable.Dispose() + { + GC.SuppressFinalize(this); + Dispose(); + } + + // Private Dispose method + private void Dispose() + { + _X.TryFree(); + _Z.TryFree(); + } + + #endregion } } \ No newline at end of file diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs index 790a697..87e53d4 100644 --- a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs +++ b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnLayersTest.cs @@ -236,15 +236,41 @@ public void PoolingForward() } [TestMethod] - public void PoolingBackward() + public unsafe void PoolingBackward() { - float[,] - delta_1 = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 29 * 29 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 29 * 29 * 3), - z = WeightsProvider.NewFullyConnectedWeights(TensorInfo.CreateLinear(400), 58 * 58 * 3, WeightsInitializationMode.GlorotNormal).AsMatrix(400, 58 * 58 * 3); + // Setup + Tensor.New(400, 58 * 58 * 3, out Tensor x); + KerasWeightsProvider.FillWithHeEtAlUniform(x, 10); PoolingLayer cpu = new PoolingLayer(new TensorInfo(58, 58, 3), PoolingInfo.Default, ActivationFunctionType.LeakyReLU), gpu = new CuDnnPoolingLayer(cpu.InputInfo, PoolingInfo.Default, ActivationFunctionType.LeakyReLU); - TestBackward(cpu, gpu, delta_1, z); + gpu.Forward(x, out Tensor z, out Tensor a); + a.Free(); + x.Duplicate(out Tensor x2); + Tensor.New(z.Entities, z.Length, out Tensor delta); + KerasWeightsProvider.FillWithHeEtAlUniform(delta, 10); + + // Backward + cpu.Backpropagate(delta, x, ActivationFunctions.LeakyReLUPrime); + gpu.Backpropagate(delta, x2, ActivationFunctions.LeakyReLUPrime); + bool valid = true; + float* px = (float*)x.Ptr.ToPointer(), px2 = (float*)x2.Ptr.ToPointer(); + int count = 0; + for (int i = 0; i < x.Size; i++) + { + if (px[i].EqualsWithDelta(px2[i], 1e-5f)) continue; + if (px[i].EqualsWithDelta(px2[i] * 100f, 1e-5f)) count++; // The cuDNN pooling backwards method returns a value scaled by 0.01 from time to time for some reason (less than 2% anyways) + else + { + valid = false; + break; + } + } + Assert.IsTrue(valid && count * 100f / x.Size < 2); + x.Free(); + x2.Free(); + z.Free(); + delta.Free(); } #endregion From b84d99172534dcf0c57c6c41b4c094ee6b62fdc5 Mon Sep 17 00:00:00 2001 From: Sergio0694 Date: Fri, 29 Dec 2017 21:57:57 +0100 Subject: [PATCH 30/30] Inception layer pool gradient test added --- .../CuDnnInceptionLayerTest.cs | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnInceptionLayerTest.cs b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnInceptionLayerTest.cs index e12a674..66ebcd5 100644 --- a/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnInceptionLayerTest.cs +++ b/Unit/NeuralNetwork.NET.Cuda.Unit/CuDnnInceptionLayerTest.cs @@ -232,6 +232,7 @@ public unsafe void InceptionPoolPipeline() Buffer.BlockCopy(conv.Biases, 0, inception.Biases, sizeof(float) * (3 + 2 + 2 + 2 + 2), sizeof(float) * conv.Biases.Length); fixed (float* px = x) { + // Forward + Z Tensor.Reshape(px, x.GetLength(0), x.GetLength(1), out Tensor xTensor); pool.Forward(xTensor, out Tensor zTemp, out Tensor aTemp); conv.Forward(aTemp, out Tensor zConv, out Tensor aConv); @@ -241,14 +242,35 @@ public unsafe void InceptionPoolPipeline() for (int i = 0; i < zConv.Entities; i++) Buffer.MemoryCopy(pzInc + i * zInc.Length, preshaped + i * zConv.Length, sizeof(float) * zConv.Length, sizeof(float) * zConv.Length); Assert.IsTrue(reshaped.ContentEquals(zConv)); - zTemp.Free(); - aTemp.Free(); - zConv.Free(); - zInc.Free(); + + // A float* paInc = (float*)aInc.Ptr.ToPointer() + 12 * 12 * (3 + 2 + 2); for (int i = 0; i < aConv.Entities; i++) Buffer.MemoryCopy(paInc + i * aInc.Length, preshaped + i * aConv.Length, sizeof(float) * aConv.Length, sizeof(float) * aConv.Length); Assert.IsTrue(reshaped.ContentEquals(aConv)); + + // Backpropagation + Tensor.New(xTensor.Entities, xTensor.Length, out Tensor z1); + KerasWeightsProvider.FillWithHeEtAlUniform(z1, 10); + z1.Duplicate(out Tensor z2); + conv.Backpropagate(aConv, zTemp, pool.ActivationFunctions.ActivationPrime); + pool.Backpropagate(zTemp, z1, ActivationFunctions.ReLUPrime); + inception.Backpropagate(aInc, z2, ActivationFunctions.ReLUPrime); + Assert.IsTrue(z1.ContentEquals(z2)); + + // Gradient + conv.ComputeGradient(aTemp, aConv, out Tensor dJdwConv, out Tensor dJdbConv); + inception.ComputeGradient(xTensor, aInc, out Tensor dJdwInc, out Tensor dJdbInc); + Tensor.Reshape((float*)dJdwInc.Ptr.ToPointer() + (3 * 3 + 3 * 2 + 3 * 3 * 2 * 2 + 3 * 2 + 5 * 5 * 2 * 2), 1, dJdwConv.Size, out Tensor dJdwInc0); + Tensor.Reshape((float*)dJdbInc.Ptr.ToPointer() + 11, 1, dJdbConv.Size, out Tensor dJdbInc0); + Assert.IsTrue(dJdwConv.ContentEquals(dJdwInc0, 1e-5f)); + Assert.IsTrue(dJdbConv.ContentEquals(dJdbInc0, 1e-5f)); + + // Cleanup + zTemp.Free(); + aTemp.Free(); + zConv.Free(); + zInc.Free(); aConv.Free(); aInc.Free(); reshaped.Free();