Merge pull request #45 from Sergio0694/dev

Serialization and other changes
Sergio0694 · Dec 23, 2017 · 054fa90 · 054fa90
2 parents a6c808d + a7e494e
commit 054fa90
Show file tree

Hide file tree

Showing 54 changed files with 1,489 additions and 1,205 deletions.
diff --git a/NeuralNetwork.NET.Cuda/APIS/CuDnnNetworkLayers.cs b/NeuralNetwork.NET.Cuda/APIS/CuDnnNetworkLayers.cs
@@ -45,26 +45,27 @@ public static INetworkLayer Softmax(
         /// Creates a convolutional layer with the desired number of kernels
         /// </summary>
         /// <param name="input">The input volume to process</param>
+        /// <param name="info">The info on the convolution operation to perform</param>
         /// <param name="kernel">The volume information of the kernels used in the layer</param>
         /// <param name="kernels">The number of convolution kernels to apply to the input volume</param>
         /// <param name="activation">The desired activation function to use in the network layer</param>
-        /// <param name="mode">The desired convolution mode to use</param>
         /// <param name="biasMode">Indicates the desired initialization mode to use for the layer bias values</param>
         [PublicAPI]
         [Pure, NotNull]
         public static INetworkLayer Convolutional(
-            TensorInfo input, (int X, int Y) kernel, int kernels, ActivationFunctionType activation, 
-            ConvolutionMode mode = ConvolutionMode.Convolution, 
+            in TensorInfo input, 
+            in ConvolutionInfo info, (int X, int Y) kernel, int kernels, ActivationFunctionType activation,
             BiasInitializationMode biasMode = BiasInitializationMode.Zero) 
-            => new CuDnnConvolutionalLayer(input, kernel, kernels, activation, mode, biasMode);
+            => new CuDnnConvolutionalLayer(input, info, kernel, kernels, activation, biasMode);
 
         /// <summary>
         /// Creates a pooling layer with a window of size 2 and a stride of 2
         /// </summary>
         /// <param name="input">The input volume to pool</param>
+        /// <param name="info">The info on the pooling operation to perform</param>
         /// <param name="activation">The desired activation function to use in the network layer</param>
         [PublicAPI]
         [Pure, NotNull]
-        public static INetworkLayer Pooling(TensorInfo input, ActivationFunctionType activation) => new CuDnnPoolingLayer(input, activation);
+        public static INetworkLayer Pooling(in TensorInfo input, in PoolingInfo info, ActivationFunctionType activation) => new CuDnnPoolingLayer(input, info, activation);
     }
 }
diff --git a/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayersDeserializer.cs b/NeuralNetwork.NET.Cuda/APIs/CuDnnNetworkLayersDeserializer.cs
@@ -0,0 +1,38 @@
+using JetBrains.Annotations;
+using NeuralNetworkNET.APIs.Delegates;
+using NeuralNetworkNET.APIs.Enums;
+using NeuralNetworkNET.APIs.Interfaces;
+using NeuralNetworkNET.Cuda.Layers;
+using System.IO;
+
+namespace NeuralNetworkNET.APIs
+{
+    /// <summary>
+    /// A static class that exposes a single deserialization method that can be used to load a saved network using the cuDNN layers
+    /// </summary>
+    public static class CuDnnNetworkLayersDeserializer
+    {
+        /// <summary>
+        /// Gets the <see cref="LayerDeserializer"/> instance to load cuDNN network layers
+        /// </summary>
+        [PublicAPI]
+        public static LayerDeserializer Deserializer { get; } = Deserialize;
+
+        /// <summary>
+        /// Deserializes a layer of the given type from the input <see cref="Stream"/>
+        /// </summary>
+        /// <param name="stream">The <see cref="Stream"/> to use to load the layer data</param>
+        /// <param name="type">The type of network layer to return</param>
+        private static INetworkLayer Deserialize([NotNull] Stream stream, LayerType type)
+        {
+            switch (type)
+            {
+                case LayerType.FullyConnected: return CuDnnFullyConnectedLayer.Deserialize(stream);
+                case LayerType.Convolutional: return CuDnnConvolutionalLayer.Deserialize(stream);
+                case LayerType.Pooling: return CuDnnPoolingLayer.Deserialize(stream);
+                case LayerType.Softmax: return CuDnnSoftmaxLayer.Deserialize(stream);
+                default: return null;
+            }
+        } 
+    }
+}
diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnConvolutionalLayer.cs
@@ -9,6 +9,8 @@
 using NeuralNetworkNET.Networks.Implementations.Layers;
 using NeuralNetworkNET.APIs.Structs;
 using NeuralNetworkNET.APIs.Enums;
+using NeuralNetworkNET.Extensions;
+using NeuralNetworkNET.APIs.Interfaces;
 
 namespace NeuralNetworkNET.Cuda.Layers
 {
@@ -48,35 +50,35 @@ internal sealed class CuDnnConvolutionalLayer : ConvolutionalLayer
         /// <summary>
         /// Sets the cuDNN fields that will be used during future forward/backwards operations
         /// </summary>
-        /// <param name="mode">The desired convolution mode</param>
-        private void SetupCuDnnInfo(APIs.Enums.ConvolutionMode mode)
+        private void SetupCuDnnInfo()
         {
-            ConvolutionDescription.Set2D(0, 0, 1, 1, 1, 1, (Alea.cuDNN.ConvolutionMode)mode);
+            ConvolutionDescription.Set2D(OperationInfo.VerticalPadding, OperationInfo.HorizontalPadding, OperationInfo.VerticalStride, OperationInfo.HorizontalStride, 1, 1, (Alea.cuDNN.ConvolutionMode)OperationInfo.Mode);
             FilterDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, OutputInfo.Channels, KernelInfo.Channels, KernelInfo.Height, KernelInfo.Width);
             BiasDescription.Set4D(DataType.FLOAT, TensorFormat.CUDNN_TENSOR_NCHW, 1, OutputInfo.Channels, 1, 1);
         }
 
         #endregion
 
         public CuDnnConvolutionalLayer(
-            TensorInfo input, (int X, int Y) kernelSize, int kernels,
-            ActivationFunctionType activation, APIs.Enums.ConvolutionMode mode, BiasInitializationMode biasMode)
-            : base(input, kernelSize, kernels, activation, biasMode)
-            => SetupCuDnnInfo(mode);
+            in TensorInfo input, in ConvolutionInfo operation, (int X, int Y) kernelSize, int kernels,
+            ActivationFunctionType activation, BiasInitializationMode biasMode)
+            : base(input, operation, kernelSize, kernels, activation, biasMode)
+            => SetupCuDnnInfo();
 
         public CuDnnConvolutionalLayer(
-            TensorInfo input, TensorInfo kernels, TensorInfo output,
-            [NotNull] float[,] weights, [NotNull] float[] biases,
-            ActivationFunctionType activation, APIs.Enums.ConvolutionMode mode)
-            : base(input, kernels, output, weights, biases, activation)
-            => SetupCuDnnInfo(mode);
+            in TensorInfo input, in ConvolutionInfo operation, TensorInfo kernels, TensorInfo output,
+            [NotNull] float[] weights, [NotNull] float[] biases, ActivationFunctionType activation)
+            : base(input, operation, kernels, output, weights, biases, activation)
+            => SetupCuDnnInfo();
+
+        #region Implementation
 
         /// <inheritdoc/>
         public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a)
         {
             fixed (float* pw = Weights)
             {
-                Tensor.Fix(pw, OutputInfo.Channels, KernelInfo.Size, out Tensor wTensor);
+                Tensor.Reshape(pw, OutputInfo.Channels, KernelInfo.Size, out Tensor wTensor);
                 using (DeviceMemory<float> z_gpu = DnnInstance.Gpu.AllocateDevice<float>(x.Entities * OutputInfo.Size))
                 {
                     // Tensors info setup
@@ -117,7 +119,7 @@ public override unsafe void Backpropagate(in Tensor delta_1, in Tensor z, Activa
         {
             fixed (float* pw = Weights)
             {
-                Tensor.Fix(pw, OutputInfo.Channels, KernelInfo.Size, out Tensor wTensor);
+                Tensor.Reshape(pw, OutputInfo.Channels, KernelInfo.Size, out Tensor wTensor);
                 DnnInstance.GetConvolutionBackwardDataAlgorithm(FilterDescription, OutputDescription, ConvolutionDescription, InputDescription, ConvolutionBwdDataPreference.PREFER_FASTEST, IntPtr.Zero, out ConvolutionBwdDataAlgo algorithm);
                 DnnInstance.GetConvolutionBackwardDataWorkspaceSize(FilterDescription, OutputDescription, ConvolutionDescription, InputDescription, algorithm, out IntPtr size);
                 using (DeviceMemory<float> delta_gpu = DnnInstance.Gpu.AllocateDevice<float>(z.Size))
@@ -168,5 +170,33 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ
                 }
             }
         }
+
+        #endregion
+
+        #region Misc
+
+        /// <inheritdoc/>
+        public override INetworkLayer Clone() => new CuDnnConvolutionalLayer(InputInfo, OperationInfo, KernelInfo, OutputInfo, Weights.BlockCopy(), Biases.BlockCopy(), ActivationFunctionType);
+
+        /// <summary>
+        /// Tries to deserialize a new <see cref="CuDnnConvolutionalLayer"/> from the input <see cref="System.IO.Stream"/>
+        /// </summary>
+        /// <param name="stream">The input <see cref="System.IO.Stream"/> to use to read the layer data</param>
+        [MustUseReturnValue, CanBeNull]
+        public new static INetworkLayer Deserialize([NotNull] System.IO.Stream stream)
+        {
+            if (!stream.TryRead(out TensorInfo input)) return null;
+            if (!stream.TryRead(out TensorInfo output)) return null;
+            if (!stream.TryRead(out ActivationFunctionType activation)) return null;
+            if (!stream.TryRead(out int wLength)) return null;
+            float[] weights = stream.ReadUnshuffled(wLength);
+            if (!stream.TryRead(out int bLength)) return null;
+            float[] biases = stream.ReadUnshuffled(bLength);
+            if (!stream.TryRead(out ConvolutionInfo operation)) return null;
+            if (!stream.TryRead(out TensorInfo kernels)) return null;
+            return new CuDnnConvolutionalLayer(input, operation, kernels, output, weights, biases, activation);
+        }
+
+        #endregion
     }
 }
diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnFullyConnectedLayer.cs
@@ -9,6 +9,7 @@
 using NeuralNetworkNET.Networks.Implementations.Layers;
 using NeuralNetworkNET.APIs.Structs;
 using NeuralNetworkNET.APIs.Enums;
+using NeuralNetworkNET.APIs.Interfaces;
 
 namespace NeuralNetworkNET.Cuda.Layers
 {
@@ -20,18 +21,20 @@ internal class CuDnnFullyConnectedLayer : FullyConnectedLayer
         [NotNull]
         private readonly Dnn DnnInstance = DnnService.Instance;
 
-        public CuDnnFullyConnectedLayer(in TensorInfo input, int outputs, ActivationFunctionType activation, WeightsInitializationMode weightsMode, BiasInitializationMode biasMode) 
-            : base(input, outputs, activation, weightsMode, biasMode) { }
+        public CuDnnFullyConnectedLayer(in TensorInfo input, int neurons, ActivationFunctionType activation, WeightsInitializationMode weightsMode, BiasInitializationMode biasMode) 
+            : base(input, neurons, activation, weightsMode, biasMode) { }
 
-        public CuDnnFullyConnectedLayer([NotNull] float[,] weights, [NotNull] float[] biases, ActivationFunctionType activation) 
-            : base(weights, biases, activation) { }
+        public CuDnnFullyConnectedLayer(in TensorInfo input, int neurons, [NotNull] float[] weights, [NotNull] float[] biases, ActivationFunctionType activation) 
+            : base(input, neurons, weights, biases, activation) { }
+
+        #region Implementation
 
         /// <inheritdoc/>
         public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a)
         {
             fixed (float* pw = Weights)
             {
-                Tensor.Fix(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor);
+                Tensor.Reshape(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor);
                 using (DeviceMemory<float>
                     x_gpu = DnnInstance.Gpu.AllocateDevice(x),
                     w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor),
@@ -51,7 +54,7 @@ public override unsafe void Backpropagate(in Tensor delta_1, in Tensor z, Activa
         {
             fixed (float* pw = Weights)
             {
-                Tensor.Fix(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor);
+                Tensor.Reshape(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor);
                 using (DeviceMemory<float>
                     delta_1_gpu = DnnInstance.Gpu.AllocateDevice(delta_1),
                     w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor),
@@ -76,5 +79,24 @@ public override void ComputeGradient(in Tensor a, in Tensor delta, out Tensor dJ
             }
             delta.CompressVertically(out dJdb); // Doing this on CPU is generally faster than launching the kernels
         }
+
+        #endregion
+
+        /// <summary>
+        /// Tries to deserialize a new <see cref="CuDnnFullyConnectedLayer"/> from the input <see cref="System.IO.Stream"/>
+        /// </summary>
+        /// <param name="stream">The input <see cref="System.IO.Stream"/> to use to read the layer data</param>
+        [MustUseReturnValue, CanBeNull]
+        public new static INetworkLayer Deserialize([NotNull] System.IO.Stream stream)
+        {
+            if (!stream.TryRead(out TensorInfo input)) return null;
+            if (!stream.TryRead(out TensorInfo output)) return null;
+            if (!stream.TryRead(out ActivationFunctionType activation)) return null;
+            if (!stream.TryRead(out int wLength)) return null;
+            float[] weights = stream.ReadUnshuffled(wLength);
+            if (!stream.TryRead(out int bLength)) return null;
+            float[] biases = stream.ReadUnshuffled(bLength);
+            return new CuDnnFullyConnectedLayer(input, output.Size, weights, biases, activation);
+        }
     }
 }
diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnPoolingLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnPoolingLayer.cs
@@ -41,9 +41,9 @@ internal sealed class CuDnnPoolingLayer : PoolingLayer
 
         #endregion
 
-        public CuDnnPoolingLayer(TensorInfo input, ActivationFunctionType activation) : base(input, activation)
+        public CuDnnPoolingLayer(in TensorInfo input, in PoolingInfo operation, ActivationFunctionType activation) : base(input, operation, activation)
         {
-            PoolingDescription.Set2D(PoolingMode.MAX, NanPropagation.PROPAGATE_NAN, 2, 2, 0, 0, 2, 2);
+            PoolingDescription.Set2D((PoolingMode)operation.Mode, NanPropagation.PROPAGATE_NAN, operation.WindowHeight, operation.WindowWidth, operation.VerticalPadding, operation.HorizontalPadding, operation.VerticalStride, operation.HorizontalStride);
         }
 
         /// <inheritdoc/>
@@ -69,6 +69,20 @@ public override void Forward(in Tensor x, out Tensor z, out Tensor a)
         public override void Backpropagate(in Tensor delta_1, in Tensor z, ActivationFunction activationPrime) => z.UpscalePool2x2(delta_1, InputInfo.Channels);
 
         /// <inheritdoc/>
-        public override INetworkLayer Clone() => new PoolingLayer(InputInfo, ActivationFunctionType);
+        public override INetworkLayer Clone() => new CuDnnPoolingLayer(InputInfo, OperationInfo, ActivationFunctionType);
+
+        /// <summary>
+        /// Tries to deserialize a new <see cref="CuDnnPoolingLayer"/> from the input <see cref="System.IO.Stream"/>
+        /// </summary>
+        /// <param name="stream">The input <see cref="System.IO.Stream"/> to use to read the layer data</param>
+        [MustUseReturnValue, CanBeNull]
+        public new static INetworkLayer Deserialize([NotNull] System.IO.Stream stream)
+        {
+            if (!stream.TryRead(out TensorInfo input)) return null;
+            if (!stream.TryRead(out TensorInfo _)) return null;
+            if (!stream.TryRead(out ActivationFunctionType activation)) return null;
+            if (!stream.TryRead(out PoolingInfo operation)) return null;
+            return new CuDnnPoolingLayer(input, operation, activation);
+        }
     }
 }
diff --git a/NeuralNetwork.NET.Cuda/Layers/CuDnnSoftmaxLayer.cs b/NeuralNetwork.NET.Cuda/Layers/CuDnnSoftmaxLayer.cs
@@ -1,11 +1,15 @@
 using Alea;
 using Alea.cuDNN;
 using JetBrains.Annotations;
+using NeuralNetworkNET.Extensions;
 using NeuralNetworkNET.Cuda.Services;
 using NeuralNetworkNET.Cuda.Extensions;
 using NeuralNetworkNET.Networks.Implementations.Layers;
 using NeuralNetworkNET.APIs.Structs;
 using NeuralNetworkNET.APIs.Enums;
+using NeuralNetworkNET.APIs.Interfaces;
+using NeuralNetworkNET.Networks.Activations;
+using NeuralNetworkNET.Networks.Cost;
 
 namespace NeuralNetworkNET.Cuda.Layers
 {
@@ -30,7 +34,7 @@ internal sealed class CuDnnSoftmaxLayer : SoftmaxLayer
 
         public CuDnnSoftmaxLayer(in TensorInfo input, int outputs, WeightsInitializationMode weightsMode, BiasInitializationMode biasMode) : base(input, outputs, weightsMode, biasMode) { }
 
-        public CuDnnSoftmaxLayer([NotNull] float[,] weights, [NotNull] float[] biases) : base(weights, biases) { }
+        public CuDnnSoftmaxLayer(in TensorInfo input, int outputs, [NotNull] float[] weights, [NotNull] float[] biases) : base(input, outputs, weights, biases) { }
 
         /// <inheritdoc/>
         public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a)
@@ -40,7 +44,7 @@ public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a)
                 // Linear pass
                 fixed (float* pw = Weights)
                 {
-                    Tensor.Fix(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor);
+                    Tensor.Reshape(pw, InputInfo.Size, OutputInfo.Size, out Tensor wTensor);
                     using (DeviceMemory<float>
                         x_gpu = DnnInstance.Gpu.AllocateDevice(x),
                         w_gpu = DnnInstance.Gpu.AllocateDevice(wTensor),
@@ -60,5 +64,23 @@ public override unsafe void Forward(in Tensor x, out Tensor z, out Tensor a)
                 }
             }
         }
+
+        /// <summary>
+        /// Tries to deserialize a new <see cref="CuDnnSoftmaxLayer"/> from the input <see cref="System.IO.Stream"/>
+        /// </summary>
+        /// <param name="stream">The input <see cref="Stream"/> to use to read the layer data</param>
+        [MustUseReturnValue, CanBeNull]
+        public new static INetworkLayer Deserialize([NotNull] System.IO.Stream stream)
+        {
+            if (!stream.TryRead(out TensorInfo input)) return null;
+            if (!stream.TryRead(out TensorInfo output)) return null;
+            if (!stream.TryRead(out ActivationFunctionType activation) && activation == ActivationFunctionType.Softmax) return null;
+            if (!stream.TryRead(out int wLength)) return null;
+            float[] weights = stream.ReadUnshuffled(wLength);
+            if (!stream.TryRead(out int bLength)) return null;
+            float[] biases = stream.ReadUnshuffled(bLength);
+            if (!stream.TryRead(out CostFunctionType cost) && cost == CostFunctionType.LogLikelyhood) return null;
+            return new CuDnnSoftmaxLayer(input, output.Size, weights, biases);
+        }
     }
 }
diff --git a/NeuralNetwork.NET/APIs/Delegates/LayerDeserializer.cs b/NeuralNetwork.NET/APIs/Delegates/LayerDeserializer.cs
@@ -0,0 +1,15 @@
+using System.IO;
+using JetBrains.Annotations;
+using NeuralNetworkNET.APIs.Enums;
+using NeuralNetworkNET.APIs.Interfaces;
+
+namespace NeuralNetworkNET.APIs.Delegates
+{
+    /// <summary>
+    /// A <see cref="delegate"/> that tries to deserialize a network layer from the input <see cref="Stream"/>, assuming the layer is of the given <see cref="LayerType"/>
+    /// </summary>
+    /// <param name="stream">The source <see cref="Stream"/> to load data from. If the layer type is not supported, the <see cref="Stream"/> should not be read at all</param>
+    /// <param name="type">The type of network layer to deserialize from the <see cref="Stream"/></param>
+    [CanBeNull]
+    public delegate INetworkLayer LayerDeserializer([NotNull] Stream stream, LayerType type);
+}
diff --git a/...rk.NET.Cuda/APIs/Enums/ConvolutionMode.cs → ...Network.NET/APIs/Enums/ConvolutionMode.cs b/...rk.NET.Cuda/APIs/Enums/ConvolutionMode.cs → ...Network.NET/APIs/Enums/ConvolutionMode.cs
@@ -1,7 +1,7 @@
 namespace NeuralNetworkNET.APIs.Enums
 {
     /// <summary>
-    /// A simple wrapper over the <see cref="Alea.cuDNN.ConvolutionMode"/> <see cref="enum"/>
+    /// A simple <see cref="enum"/> indicating the type of convolution operation to perform
     /// </summary>
     public enum ConvolutionMode
     {

diff --git a/NeuralNetwork.NET/APIs/Enums/LayerType.cs b/NeuralNetwork.NET/APIs/Enums/LayerType.cs
@@ -1,4 +1,4 @@
-namespace NeuralNetworkNET.APIs.Misc
+namespace NeuralNetworkNET.APIs.Enums
 {
     /// <summary>
     /// Indicates the type of a neural network layer (for serialization purposes only)