Working on GH-7: Make the first example for a Sequential model pass

cesarsouza · Aug 14, 2017 · aad0c5b · aad0c5b
1 parent fe0ecf7
commit aad0c5b
Show file tree

Hide file tree

Showing 12 changed files with 321 additions and 132 deletions.
diff --git a/Sources/Backends/BackendBase.cs b/Sources/Backends/BackendBase.cs
@@ -0,0 +1,52 @@
+// Keras-Sharp: C# port of the Keras library
+// https://github.com/cesarsouza/keras-sharp
+//
+// Based under the Keras library for Python. See LICENSE text for more details.
+//
+//    The MIT License(MIT)
+//    
+//    Permission is hereby granted, free of charge, to any person obtaining a copy
+//    of this software and associated documentation files (the "Software"), to deal
+//    in the Software without restriction, including without limitation the rights
+//    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+//    copies of the Software, and to permit persons to whom the Software is
+//    furnished to do so, subject to the following conditions:
+//    
+//    The above copyright notice and this permission notice shall be included in all
+//    copies or substantial portions of the Software.
+//    
+//    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+//    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+//    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+//    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+//    SOFTWARE.
+//
+
+namespace KerasSharp.Backends
+{
+    using System;
+    using System.Collections.Generic;
+    using System.Linq;
+    using System.Text;
+    using System.Threading.Tasks;
+    using KerasSharp.Engine.Topology;
+    using KerasSharp.Losses;
+    using KerasSharp.Models;
+    using TensorFlow;
+    using Accord.Math;
+    using static KerasSharp.Python;
+
+    public abstract class BackendBase
+    {
+        /// <summary>
+        ///   Returns the value of the fuzz factor used in numeric expressions.
+        /// </summary>
+        /// 
+        public float epsilon()
+        {
+            return 1e-8f;
+        }
+    }
+}
diff --git a/Sources/Backends/Base/IBackend.cs b/Sources/Backends/Base/IBackend.cs
@@ -48,7 +48,7 @@ public interface IBackend : IDisposable
 
         Tensor clip(Tensor norms, int v, int maxValue);
 
-        Tensor epsilon();
+        float epsilon();
 
         TFDataType floatx();
 
@@ -141,7 +141,7 @@ public interface IBackend : IDisposable
 
         Tensor abs(Tensor input);
 
-        Tensor categorical_crossentropy(Tensor expected, Tensor actual);
+        Tensor categorical_crossentropy(Tensor target, Tensor output, bool from_logits = false);
 
         Tensor sum(Tensor tensor, int axis);
 
@@ -189,7 +189,10 @@ public interface IBackend : IDisposable
 
         void batch_set_value(List<Tuple<Tensor, Array>> weight_value_tuples);
 
-        Tensor placeholder(int?[] shape, TFDataType? dtype = Utils.DEFAULT_DTYPE, bool sparse = false, string name = null);
+        Tensor placeholder(int?[] shape = null, int? ndim = null, TFDataType? dtype = Utils.DEFAULT_DTYPE, bool sparse = false, string name = null);
+
+        // Tensor placeholder(int ndim, TFDataType? dtype = Utils.DEFAULT_DTYPE, bool sparse = false, string name = null);
+
 
         int?[] int_shape(TFTensor input_tensor);
 
@@ -201,7 +204,6 @@ public interface IBackend : IDisposable
 
         Tensor update_add(Tensor iterations, int v);
 
-        Tensor placeholder(int ndim, string name);
 
         object get_variable_shape(Tensor p);
 
@@ -211,7 +213,6 @@ public interface IBackend : IDisposable
 
         bool is_sparse(Tensor tensor);
 
-        Tensor placeholder(int ndim, string name, bool sparse, TFDataType? dtype);
 
         object learning_phase();
 

diff --git a/Sources/Backends/TensorFlowBackend.cs b/Sources/Backends/TensorFlowBackend.cs
@@ -36,10 +36,11 @@ namespace KerasSharp.Backends
     using KerasSharp.Models;
     using TensorFlow;
     using Accord.Math;
+    using static KerasSharp.Python;
 
     // TODO:
 
-    public class TensorFlowBackend : IBackend
+    public class TensorFlowBackend : BackendBase, IBackend
     {
         internal TFGraph tf;
 
@@ -169,8 +170,34 @@ public Tensor cast(object v1, object v2)
             throw new NotImplementedException();
         }
 
-        public Tensor categorical_crossentropy(Tensor expected, Tensor actual)
+        /// <summary>
+        ///   Categorical crossentropy between an output tensor and a target tensor.
+        /// </summary>
+        /// 
+        /// <param name="target">A tensor of the same shape as `output`.</param>
+        /// <param name="output">A tensor resulting from a softmax (unless `from_logits` is True, in which case `output` is expected to be the logits).</param>
+        /// <param name="from_logits">Boolean, whether `output` is the result of a softmax, or is a tensor of logits.</param>
+        /// 
+        /// <returns>Output tensor.</returns>
+        /// 
+        public Tensor categorical_crossentropy(Tensor target, Tensor output, bool from_logits = false)
         {
+            // Note: tf.nn.softmax_cross_entropy_with_logits
+            // expects logits, Keras expects probabilities.
+            if (!from_logits)
+            {
+                // scale preds so that the class probas of each sample sum to 1
+                var shape = output.shape;
+                var o = output.output;
+                o = tf.Div(o, tf.ReduceSum(output.output, axis: tf.Const(new TFTensor(shape.Length - 1)), keep_dims: true));
+                // manual computation of crossentropy
+                var _epsilon = constant(epsilon(), dtype: output.dtype);
+                //output = tf.clip_by_value(output, _epsilon, 1.0 - _epsilon);
+                //return -tf.reduce_sum(target * tf.log(output), axis: len(output.get_shape()) - 1);
+            }
+
+            //return tf.softmax_cross_entropy_with_logits(labels: target, logits: output);
+
             throw new NotImplementedException();
         }
 
@@ -246,11 +273,6 @@ public Tensor elu(object x)
             throw new NotImplementedException();
         }
 
-        public Tensor epsilon()
-        {
-            throw new NotImplementedException();
-        }
-
         public Tensor exp(object v)
         {
             throw new NotImplementedException();
@@ -367,7 +389,7 @@ public Tensor in_train_phase(Func<Tensor> dropped_inputs, Tensor inputs, bool? t
 
         public bool is_sparse(Tensor tensor)
         {
-            throw new NotImplementedException();
+            return false;
         }
 
         public Tensor l2_normalize(Tensor expected, int axis)
@@ -503,34 +525,30 @@ public IDisposable name_scope(string name)
             return tf.GetTensorNumDims(x.output);
         }
 
-        public Tensor placeholder(int?[] shape, TFDataType? dtype = Utils.DEFAULT_DTYPE, bool sparse = false, string name = null)
+        public Tensor placeholder(int?[] shape = null, int? ndim = null, TFDataType? dtype = Utils.DEFAULT_DTYPE, bool sparse = false, string name = null)
         {
             // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/backend/tensorflow_backend.py#L397
 
             if (sparse)
                 throw new NotImplementedException();
+
             if (dtype == null)
                 dtype = floatx();
 
+            if (shape == null)
+            {
+                if (ndim != null)
+                    shape = new int?[ndim.Value];
+            }
+
             var tfshape = this.shape(shape);
 
-            Tensor x = new Tensor(this);
-            x.output = tf.Placeholder(dtype.Value, tfshape, operName: name);
+            Tensor x = tensor(tf.Placeholder(dtype.Value, tfshape, operName: name));
             x._keras_shape = shape;
             x._uses_learning_phase = false;
             return x;
         }
 
-        public Tensor placeholder(int ndim, string name)
-        {
-            throw new NotImplementedException();
-        }
-
-        public Tensor placeholder(int ndim, string name, bool sparse, TFDataType? dtype)
-        {
-            throw new NotImplementedException();
-        }
-
         /// <summary>
         ///   Returns a tensor with uniform distribution of values.
         /// </summary>
@@ -550,8 +568,8 @@ public Tensor random_uniform(int?[] shape, double minval = 0.0, double maxval =
             var tf_shape = tf.Const(shape.Apply(x => (long)x));
             TFOutput u = tf.RandomUniform(tf_shape, dtype: dtype, seed: seed, operName: name);
 
-            
-            return tensor (tf.Add(tf.Mul(u, tf.Const(new TFTensor(maxval - minval), dtype: dtype)),
+
+            return tensor(tf.Add(tf.Mul(u, tf.Const(new TFTensor(maxval - minval), dtype: dtype)),
                                         tf.Const(new TFTensor(minval), dtype: dtype)));
         }
 

diff --git a/Sources/Engine/Topology/Container.cs b/Sources/Engine/Topology/Container.cs
@@ -55,32 +55,32 @@ namespace KerasSharp.Engine.Topology
     /// 
     public class Container : Layer
     {
-        object _per_input_losses;
-        object _per_input_updates;
-
-        public List<Tensor> inputs;
-        public List<Tensor> masks;
-        public List<Tensor> outputs;
-        List<Layer> input_layers;
-        private List<int> input_layers_node_indices;
-        private List<int> input_layers_tensor_indices;
-        List<Layer> output_layers;
-        private List<int> output_layers_node_indices;
-        private List<int> output_layers_tensor_indices;
-        private Dictionary<string, List<Tensor>> _output_tensor_cache;
-        private Dictionary<string, List<int?[]>> _output_shape_cache;
-        Dictionary<string, List<Tensor>> _output_mask_cache;
-        private List<string> input_names;
-        private List<string> output_names;
-        private List<string> _feed_input_names;
-        private List<List<Tensor>> _feed_inputs;
-        public List<int?[]> _feed_input_shapes;
-        private int?[][] internal_input_shapes;
-        public int?[][] internal_output_shapes;
-        private List<Layer> layers;
-        private Dictionary<int, List<Layer>> layers_by_depth;
-        private HashSet<string> container_nodes;
-        private Dictionary<int, List<Node>> nodes_by_depth;
+        protected internal object _per_input_losses;
+        protected internal object _per_input_updates;
+
+        protected List<Tensor> inputs;
+        protected List<Tensor> masks;
+        protected List<Tensor> outputs;
+        public List<Layer> input_layers;
+        public List<int> input_layers_node_indices;
+        public List<int> input_layers_tensor_indices;
+        public List<Layer> output_layers;
+        public List<int> output_layers_node_indices;
+        public List<int> output_layers_tensor_indices;
+        protected internal Dictionary<string, List<Tensor>> _output_tensor_cache;
+        protected internal Dictionary<string, List<int?[]>> _output_shape_cache;
+        protected internal Dictionary<string, List<Tensor>> _output_mask_cache;
+        public List<string> input_names;
+        public List<string> output_names;
+        protected internal List<string> _feed_input_names;
+        protected internal List<Tensor> _feed_inputs;
+        protected internal List<int?[]> _feed_input_shapes;
+        protected int?[][] internal_input_shapes;
+        protected int?[][] internal_output_shapes;
+        protected List<Layer> layers;
+        protected Dictionary<int, List<Layer>> layers_by_depth;
+        public HashSet<string> container_nodes;
+        public Dictionary<int, List<Node>> nodes_by_depth;
 
 
         public Container()
@@ -203,7 +203,7 @@ public Container(List<Tensor> inputs, List<Tensor> outputs, string name = null)
             this.input_names = new List<string>();
             this.output_names = new List<string>();
             this._feed_input_names = new List<string>();
-            this._feed_inputs = new List<List<Tensor>>();
+            this._feed_inputs = new List<Tensor>();
             this._feed_input_shapes = new List<int?[]>();
 
             for (int i = 0; i < this.input_layers.Count; i++)
@@ -219,7 +219,7 @@ public Container(List<Tensor> inputs, List<Tensor> outputs, string name = null)
                 if (layer.is_placeholder)
                 {
                     this._feed_input_names.Add(layer.name);
-                    this._feed_inputs.Add(layer.input);
+                    this._feed_inputs.AddRange(layer.input);
                     this._feed_input_shapes.Add(this.inputs[i]._keras_shape);
                 }
             }
@@ -274,7 +274,7 @@ void build_map_of_graph(Tensor tensor, HashSet<Node> finished_nodes = null, Hash
                 container_nodes.Add(node_key);
 
                 // Store the traversal order for layer sorting.
-                if (layer_indices.ContainsKey(layer))
+                if (!layer_indices.ContainsKey(layer))
                     layer_indices[layer] = layer_indices.Count;
 
                 nodes_in_progress.Add(node);
@@ -293,6 +293,7 @@ void build_map_of_graph(Tensor tensor, HashSet<Node> finished_nodes = null, Hash
                 nodes_in_progress.Remove(node);
 
                 nodes_in_decreasing_depth.Add(node);
+                return;
             }
 
             {
@@ -311,7 +312,8 @@ void build_map_of_graph(Tensor tensor, HashSet<Node> finished_nodes = null, Hash
 
                     // Update the depth of the corresponding layer
                     int previous_depth = 0;
-                    layers_depths.TryGetValue(node.outbound_layer, out previous_depth);
+                    if (layers_depths.ContainsKey(node.outbound_layer))
+                        previous_depth = layers_depths[node.outbound_layer];
 
                     // If we've seen this layer before at a higher depth, we should use that depth instead
                     // of the node depth.  This is necessary for shared layers that have inputs at different
@@ -326,6 +328,7 @@ void build_map_of_graph(Tensor tensor, HashSet<Node> finished_nodes = null, Hash
                         var inbound_layer = node.inbound_layers[i];
                         int node_index = node.node_indices[i].Value;
                         var inbound_node = inbound_layer.inbound_nodes[node_index];
+                        previous_depth = 0;
                         nodes_depths.TryGetValue(inbound_node, out previous_depth);
                         nodes_depths[inbound_node] = Math.Max(depth + 1, previous_depth);
                     }
@@ -793,7 +796,7 @@ public List<Tensor> call(List<Tensor> inputs, List<Tensor> mask = null)
         public override List<Tensor> compute_mask(List<Tensor> inputs, List<Tensor> mask)
         {
             if (mask == null)
-                masks = inputs.Select(x => (Tensor)x).ToList();
+                masks = inputs.Select(x => (Tensor)null).ToList();
 
             string cache_key = String.Join(",", inputs.Select(x => str(id(x))));
             cache_key += '_' + String.Join(",", masks.Select(x => str(id(x))));
@@ -1104,13 +1107,13 @@ public List<Tensor> get_source_inputs(Tensor tensor, Layer layer = null, int? no
             if (layer == null || node_index == null)
                 (layer, node_index, _) = tensor._keras_history.Value;
 
-            if (layer.inbound_nodes.Count > 0)
+            if (layer.inbound_nodes.Count == 0)
                 return new List<Tensor>() { tensor };
 
 
             var node = layer.inbound_nodes[node_index.Value];
 
-            if (node.inbound_layers.Count > 0)
+            if (node.inbound_layers.Count == 0)
             {
                 // Reached an Input layer, stop recursion.
                 return node.input_tensors;

diff --git a/Sources/Engine/Topology/Input.cs b/Sources/Engine/Topology/Input.cs
@@ -67,8 +67,8 @@ public static List<Tensor> Input(int?[] shape = null, int?[] batch_shape = null,
 
             if (batch_shape == null && tensor == null)
             {
-                throw new ArgumentException("Please provide to Input either a `shape` or a `batch_shape` argument. Note that " +
-                    "`shape` does not include the batch dimension.");
+                throw new ArgumentException("Please provide to Input either a 'shape' or a 'batch_shape' argument. Note that " +
+                    "'shape' does not include the batch dimension.");
             }
 
             if (shape != null && batch_shape != null)