diff --git a/Backends/CNTK.CPU/CNTKBackend.cs b/Backends/CNTK.CPU/CNTKBackend.cs
index 5542143..bf67f4a 100644
--- a/Backends/CNTK.CPU/CNTKBackend.cs
+++ b/Backends/CNTK.CPU/CNTKBackend.cs
@@ -387,8 +387,11 @@ public Tensor add(Tensor a, Tensor b)
             return Out(new Variable(In(a).function) + new Variable(In(b).function));
         }
 
-        public Tensor bias_add(Tensor output, Tensor bias, string name = null)
+        public Tensor bias_add(Tensor output, Tensor bias, DataFormatType? data_format = null, string name = null)
         {
+            if (data_format != null)
+                throw new NotImplementedException();
+
             using (this.name_scope("bias_add"))
             {
                 CNTKTensor _x = In(output);
@@ -474,6 +477,21 @@ public Tensor transpose(Tensor tensor)
             return Out(C.Transpose(In(tensor)));
         }
 
+        /// <summary>
+        ///   Turn a nD tensor into a 2D tensor with same 0th dimension. In other words, it flattens each data samples of a batch.
+        /// </summary>
+        /// 
+        public Tensor batch_flatten(Tensor x)
+        {
+            // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/backend/cntk_backend.py#L1460
+            // cntk's batch axis is not in shape,
+            // so just flatten all the dim in x.shape
+            int dim = Matrix.Product(x.shape.Select(s => s.Value).ToArray());
+            x = Out(C.Reshape(In(x), NDShape.CreateNDShape(new[] { -1 })));
+            x._keras_shape = new int?[] { null, dim };
+            return x;
+        }
+
         public object eval(Tensor tensor)
         {
             log(new { tensor });
@@ -504,7 +522,7 @@ public Tensor clip(Tensor norms, double minval, double maxval)
             throw new NotImplementedException();
         }
 
-        public Tensor random_uniform(int?[] shape, double minval = 0, double maxval = 1, DataType? dtype = null, int? seed = null, string name = null)
+        public Tensor random_uniform(int[] shape, double minval = 0, double maxval = 1, DataType? dtype = null, int? seed = null, string name = null)
         {
             if (dtype == null)
                 dtype = floatx();
@@ -694,7 +712,7 @@ public Tensor in_train_phase(Func<Tensor> x, Func<Tensor> alt, bool? training)
             return Out(In(input_tensor).function.Output.DataType);
         }
 
-        public Tensor constant<T>(T value, int?[] shape = null, KerasSharp.DataType? dtype = null, string name = null)
+        public Tensor constant<T>(T value, int[] shape = null, KerasSharp.DataType? dtype = null, string name = null)
         {
             log(new { value, shape, dtype, name });
 
@@ -705,7 +723,7 @@ public Tensor constant<T>(T value, int?[] shape = null, KerasSharp.DataType? dty
             return Out(_const, shape);
         }
 
-        public Constant InGeneric<T>(T value, int?[] shape = null, KerasSharp.DataType? dtype = null, string name = null)
+        public Constant InGeneric<T>(T value, int[] shape = null, KerasSharp.DataType? dtype = null, string name = null)
         {
             if (dtype == null)
                 dtype = floatx();
@@ -726,7 +744,7 @@ public Constant InGeneric<T>(T value, int?[] shape = null, KerasSharp.DataType?
             }
             else
             {
-                _shape = shape.Select(x => x.Value).ToArray();
+                _shape = shape;
             }
 
             Constant c = _constant(value, _shape, _dtype, name);
@@ -979,6 +997,20 @@ public Tensor reshape(Tensor x, int[] shape)
         }
 
 
+        public Tensor conv1d(Tensor inputs, Tensor kernel, int strides, PaddingType padding, DataFormatType? data_format = null, int dilation_rate = 1, string name = null)
+        {
+            throw new NotImplementedException();
+        }
+
+        public Tensor conv2d(Tensor inputs, Tensor kernel, int[] strides, PaddingType padding, DataFormatType? data_format = null, int[] dilation_rate = null, string name = null)
+        {
+            throw new NotImplementedException();
+        }
+
+        public Tensor conv3d(Tensor inputs, Tensor kernel, int[] strides, PaddingType padding, DataFormatType? data_format = null, int[] dilation_rate = null, string name = null)
+        {
+            throw new NotImplementedException();
+        }
 
 
 
@@ -1084,6 +1116,11 @@ public NDShape InShape(int[] shape)
             return s;
         }
 
+        public Tensor Out(CNTK.Function function, int[] keras_shape)
+        {
+            return Out(function, keras_shape.Select(x => (int?)x).ToArray());
+        }
+
         public Tensor Out(CNTK.Function function, int?[] keras_shape = null)
         {
             var t = new CNTKTensor(this)
@@ -1207,7 +1244,6 @@ public void Dispose()
             // TODO: uncomment the following line if the finalizer is overridden above.
             // GC.SuppressFinalize(this);
         }
-
         #endregion
     }
 }
diff --git a/Backends/TensorFlow/TensorFlowBackend.cs b/Backends/TensorFlow/TensorFlowBackend.cs
index 79c8827..560d031 100644
--- a/Backends/TensorFlow/TensorFlowBackend.cs
+++ b/Backends/TensorFlow/TensorFlowBackend.cs
@@ -52,7 +52,7 @@ public class TensorFlowBackend : BackendBase, IBackend
         // This dictionary holds a mapping {graph: learning_phase}.
         // A learning phase is a bool tensor used to run Keras models in
         // either train mode (learning_phase == 1) or test mode (learning_phase == 0).
-        private Dictionary<TFGraph, TFOutput> _GRAPH_LEARNING_PHASES = new Dictionary<TFGraph, TFOutput>();
+        private Dictionary<TFGraph, object> _GRAPH_LEARNING_PHASES = new Dictionary<TFGraph, object>();
 
         // This dictionary holds a mapping {graph: UID_DICT}.
         // each UID_DICT is a dictionary mapping name prefixes to a current index,
@@ -115,7 +115,7 @@ public void clear_session()
             //
             reset_uids();
             TFOutput phase = tf.Placeholder(dtype: TFDataType.Bool, operName: "keras_learning_phase");
-            _GRAPH_LEARNING_PHASES = new Dictionary<TFGraph, TFOutput>();
+            _GRAPH_LEARNING_PHASES = new Dictionary<TFGraph, object>();
             _GRAPH_LEARNING_PHASES[tf] = phase;
         }
 
@@ -254,7 +254,7 @@ public Tensor clip_norm(Tensor g, double clipnorm, Tensor norm)
             throw new NotImplementedException();
         }
 
-        public Tensor constant<T>(T value, int?[] shape = null, DataType? dtype = null, string name = null)
+        public Tensor constant<T>(T value, int[] shape = null, DataType? dtype = null, string name = null)
         {
             if (dtype == null)
                 dtype = floatx();
@@ -266,11 +266,11 @@ public Tensor constant<T>(T value, int?[] shape = null, DataType? dtype = null,
                 if (arr != null)
                     _shape = arr.GetLength();
                 else _shape = new int[0];
-                shape = _shape.Select(x => (int?)x).ToArray();
+                shape = _shape;
             }
             else
             {
-                _shape = shape.Select(x => x.Value).ToArray();
+                _shape = shape;
             }
 
             TFOutput o;
@@ -444,7 +444,9 @@ public Tensor in_train_phase(Func<Tensor> x, Func<Tensor> alt, bool? training)
 
             if (training == null)
             {
-                training = (bool)learning_phase();
+                var t = learning_phase();
+                if (t is bool)
+                    training = (bool)t;
                 uses_learning_phase = true;
             }
             else
@@ -463,14 +465,21 @@ public Tensor in_train_phase(Func<Tensor> x, Func<Tensor> alt, bool? training)
             else
             {
                 //else: assume learning phase is a placeholder tensor.
-                throw new NotImplementedException();
-            }
 
-            // Tensor xx = @switch(training, x, alt);
+                Tensor xx = @switch((Tensor)learning_phase(), x, alt);
 
-            if (uses_learning_phase)
-                x()._uses_learning_phase = true;
-            return x();
+                if (uses_learning_phase)
+                    xx._uses_learning_phase = true;
+                return xx;
+            }
+        }
+
+        /// <summary>
+        ///   Selects `x` in test phase, and `alt` otherwise. Note that `alt` should have the* same shape* as `x`.
+        /// </summary>
+        public Tensor in_test_phase(Func<Tensor> x, Func<Tensor> alt, bool? training = null)
+        {
+            return in_train_phase(alt, x, training: training);
         }
 
         /// <summary>
@@ -491,12 +500,10 @@ public Tensor @switch(Tensor condition, Func<Tensor> then_expression, Func<Tenso
             if (_condition.dtype != TFDataType.Bool)
                 condition = Out(tf.Cast(_condition, TFDataType.Bool));
 
-            throw new NotImplementedException();
-
-            //TFOutput x = tf.cond(condition,
-            //            () => then_expression().output,
-            //            () => else_expression().output);
-            //return tensor(x);
+            TFOutput x = tf.Cond(In(condition),
+                        () => In(then_expression()),
+                        () => In(else_expression()));
+            return Out(x);
         }
 
         public bool is_sparse(Tensor tensor)
@@ -530,7 +537,15 @@ public object learning_phase()
                 _GRAPH_LEARNING_PHASES[graph] = phase;
             }
 
-            return Out(_GRAPH_LEARNING_PHASES[graph]);
+            return _GRAPH_LEARNING_PHASES[graph];
+        }
+
+        /// <summary>
+        ///   Sets the learning phase to a fixed value.
+        /// </summary>
+        public void set_learning_phase(bool value)
+        {
+            _GRAPH_LEARNING_PHASES[tf] = value;
         }
 
         public Tensor max(Tensor x, int v, object p)
@@ -553,6 +568,17 @@ public Tensor maximum(double v, Tensor tensor)
             throw new NotImplementedException();
         }
 
+        /// <summary>
+        ///   Turn a nD tensor into a 2D tensor with same 0th dimension. In other words, it flattens each data samples of a batch.
+        /// </summary>
+        /// 
+        public Tensor batch_flatten(Tensor x)
+        {
+            var _x = In(x);
+            TFOutput shape = tf.Shape(_x);
+            TFOutput dim = tf.Prod(tf.Slice(shape, tf.Const(1), tf.Rank(shape)), reduction_indices: tf.ReduceDims(shape, null));
+            return Out(tf.Reshape(In(x), tf.Stack(new TFOutput[] { tf.Const(-1), dim } )));
+        }
 
 
         public TFOutput _normalize_axis(int[] axis, int? ndim)
@@ -664,9 +690,25 @@ public Tensor add(Tensor a, Tensor b)
             return Out(tf.Add(In(a).output, In(b).output));
         }
 
-        public Tensor bias_add(Tensor a, Tensor b, string name = null)
+        public Tensor bias_add(Tensor a, Tensor b, DataFormatType? data_format = null, string name = null)
+        {
+            return Out(tf.BiasAdd(In(a), In(b), data_format: In(data_format), operName: name));
+        }
+
+        private string In(DataFormatType? data_format)
         {
-            return Out(tf.BiasAdd(In(a), In(b), operName: name));
+            if (data_format == null)
+                return null;
+
+            switch (data_format.Value)
+            {
+                case DataFormatType.ChannelsFirst:
+                    return "channels_first";
+                case DataFormatType.ChannelsLast:
+                    return "channels_last";
+                default:
+                    throw new Exception();
+            }
         }
 
         public Tensor add<T>(T a, Tensor b)
@@ -766,7 +808,7 @@ public Tensor placeholder(int?[] shape = null, int? ndim = null, DataType? dtype
         /// 
         /// <returns>A tensor.</returns>
         /// 
-        public Tensor random_uniform(int?[] shape, double minval = 0.0, double maxval = 1.0, DataType? dtype = null, int? seed = null, string name = null)
+        public Tensor random_uniform(int[] shape, double minval = 0.0, double maxval = 1.0, DataType? dtype = null, int? seed = null, string name = null)
         {
             if (dtype == null)
                 dtype = floatx();
@@ -989,6 +1031,11 @@ public Tensor transpose(Tensor tensor)
             return Out(tf.Transpose(In(tensor).output));
         }
 
+        public Tensor transpose(Tensor tensor, int[] perm)
+        {
+            return Out(tf.Transpose(In(tensor).output, _constant(perm)));
+        }
+
 
         public object eval(Tensor tensor)
         {
@@ -1021,6 +1068,94 @@ public object eval(TFOutput output)
 
 
 
+        public Tensor conv1d(Tensor inputs, Tensor kernel, int strides, PaddingType padding, DataFormatType? data_format = null, int dilation_rate = 1, string name = null)
+        {
+            throw new NotImplementedException();
+        }
+
+        public Tensor conv2d(Tensor inputs, Tensor kernel, int[] strides, PaddingType padding, DataFormatType? data_format = null, int[] dilation_rate = null, string name = null)
+        {
+            // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/backend/tensorflow_backend.py#L3102
+            if (data_format == null)
+                data_format = image_data_format();
+
+            if (!dilation_rate.IsEqual(new[] { 1, 1 }))
+                throw new NotImplementedException();
+
+            TFOutput x = In(inputs).output;
+            TFOutput _kernel = In(kernel).output;
+
+            // With 4d inputs, tf.nn.convolution only supports
+            // data_format NHWC, so we transpose the inputs
+            // in case we are in data_format channels_first.
+            x = _preprocess_conv2d_input(x, data_format.Value);
+            string _padding = _preprocess_padding(padding);
+            x = tf.Conv2D(
+                input: x,
+                filter: _kernel,
+                //dilation_rate: dilation_rate,
+                strides: strides.Select(i => (long)i).ToArray(),
+                padding: _padding,
+                data_format: "NHWC");
+            return Out(_postprocess_conv2d_output(x, data_format.Value));
+        }
+
+        /// <summary>
+        ///   Transpose and cast the output from conv2d if needed.
+        /// </summary>
+        private TFOutput _postprocess_conv2d_output(TFOutput x, DataFormatType data_format)
+        {
+            if (data_format == DataFormatType.ChannelsFirst)
+                x = tf.Transpose(x, _constant(new[] { 0, 3, 1, 2 }));
+
+            if (floatx() == DataType.Double)
+                x = tf.Cast(x, TFDataType.Double);
+            return x;
+        }
+
+        /// <summary>
+        ///   Convert keras' padding to tensorflow's padding.
+        /// </summary>
+        /// 
+        public string _preprocess_padding(PaddingType padding)
+        {
+            switch (padding)
+            {
+                case PaddingType.Same:
+                    return "SAME";
+                case PaddingType.Valid:
+                    return "VALID";
+            }
+
+            throw new ArgumentException($"Invalid padding: {padding}");
+        }
+
+        /// <summary>
+        ///   Transpose and cast the input before the conv2d.
+        /// </summary>
+        private TFOutput _preprocess_conv2d_input(TFOutput x, DataFormatType data_format)
+        {
+            if (x.OutputType == TFDataType.Double)
+                x = tf.Cast(x, TFDataType.Float);
+
+            if (data_format == DataFormatType.ChannelsFirst)
+            {
+                // TF uses the last dimension as channel dimension,
+                // instead of the 2nd one.
+                // TH input shape: (samples, input_depth, rows, cols)
+                // TF input shape: (samples, rows, cols, input_depth)
+                x = tf.Transpose(x, _constant(new[] { 0, 2, 3, 1 }));
+            }
+
+            return x;
+        }
+
+        public Tensor conv3d(Tensor inputs, Tensor kernel, int[] strides, PaddingType padding, DataFormatType? data_format = null, int[] dilation_rate = null, string name = null)
+        {
+            throw new NotImplementedException();
+        }
+
+
 
         /// <summary>
         ///   Instantiates an all-zeros variable and returns it.
diff --git a/Sources/Backends/Base/IBackend.cs b/Sources/Backends/Base/IBackend.cs
index b421fbb..e2fbc13 100644
--- a/Sources/Backends/Base/IBackend.cs
+++ b/Sources/Backends/Base/IBackend.cs
@@ -48,7 +48,7 @@ public interface IBackend : IDisposable
         Tensor round(Tensor x);
         Tensor argmax(Tensor x, int axis = -1);
         Tensor sum(Tensor x, int axis, bool keepdims = false, string name = null);
-
+        Tensor batch_flatten(Tensor inputs);
 
 
         Tensor clip(Tensor norms, int v, int maxValue);
@@ -65,6 +65,7 @@ public interface IBackend : IDisposable
         void clear_session();
 
         Tensor cast(Tensor x, DataType dataType);
+        
 
         Tensor dropout(object p, double retain_prob, object noise_shape, object seed);
 
@@ -137,6 +138,8 @@ public interface IBackend : IDisposable
 
         Tensor print_tensor(Tensor x, string message);
 
+        DataFormatType image_data_format();
+
         Tensor softsign(Tensor x);
 
         Tensor tanh(Tensor x);
@@ -152,7 +155,7 @@ public interface IBackend : IDisposable
 
 
 
-        Tensor random_uniform(int?[] shape, double minval = 0.0, double maxval = 1.0, DataType? dtype = null, int? seed = null, string name = null);
+        Tensor random_uniform(int[] shape, double minval = 0.0, double maxval = 1.0, DataType? dtype = null, int? seed = null, string name = null);
 
         Tensor l2_normalize(Tensor expected, int axis);
 
@@ -185,7 +188,7 @@ public interface IBackend : IDisposable
 
         DataType? dtype(Tensor input_tensor);
 
-        Tensor constant<T>(T value, int?[] shape = null, DataType? dtype = null, string name = null);
+        Tensor constant<T>(T value, int[] shape = null, DataType? dtype = null, string name = null);
 
         Tensor transpose(Tensor tensor);
 
@@ -236,6 +239,12 @@ public interface IBackend : IDisposable
 
         Tensor not_equal<T>(Tensor weights, T v) where T : struct;
 
-        Tensor bias_add(Tensor output, Tensor bias, string name = null);
+        Tensor bias_add(Tensor output, Tensor bias, DataFormatType? data_format = null, string name = null);
+
+        Tensor conv1d(Tensor inputs, Tensor kernel, int strides, PaddingType padding, DataFormatType? data_format, int dilation_rate, string name = null);
+
+        Tensor conv2d(Tensor inputs, Tensor kernel, int[] strides, PaddingType padding, DataFormatType? data_format, int[] dilation_rate, string name = null);
+
+        Tensor conv3d(Tensor inputs, Tensor kernel, int[] strides, PaddingType padding, DataFormatType? data_format, int[] dilation_rate, string name = null);
     }
 }
diff --git a/Sources/Engine/Topology/Layer.cs b/Sources/Engine/Topology/Layer.cs
index e7f9fd2..eb75621 100644
--- a/Sources/Engine/Topology/Layer.cs
+++ b/Sources/Engine/Topology/Layer.cs
@@ -279,7 +279,7 @@ public virtual List<Tensor> non_trainable_weights
         /// 
         /// <return>The created weight variable.</return>
         /// 
-        public Tensor add_weight(string name, int?[] shape, DataType? dtype = null,
+        public Tensor add_weight(string name, int[] shape, DataType? dtype = null,
             IWeightInitializer initializer = null, IWeightRegularizer regularizer = null,
                    bool trainable = true, IWeightConstraint constraint = null)
         {
diff --git a/Sources/Keras Sharp.csproj b/Sources/Keras Sharp.csproj
index 9dc45f5..aac0172 100644
--- a/Sources/Keras Sharp.csproj	
+++ b/Sources/Keras Sharp.csproj	
@@ -132,6 +132,9 @@
     <Compile Include="Initializers\Ones.cs" />
     <Compile Include="Initializers\VarianceScaling.cs" />
     <Compile Include="Initializers\Zeros.cs" />
+    <Compile Include="Utils\ConvUtils.cs" />
+    <Compile Include="Layers\Convolutional\DataFormatType.cs" />
+    <Compile Include="Layers\Convolutional\PaddingType.cs" />
     <Compile Include="Layers\Core\Activation.cs" />
     <Compile Include="Layers\Core\Merge.cs" />
     <Compile Include="Layers\Embeddings\Embedding.cs" />
diff --git a/Sources/Layers/Convolutional/Conv2D.cs b/Sources/Layers/Convolutional/Conv2D.cs
index b9dcfd6..e4c3596 100644
--- a/Sources/Layers/Convolutional/Conv2D.cs
+++ b/Sources/Layers/Convolutional/Conv2D.cs
@@ -31,7 +31,7 @@ namespace KerasSharp
     using System.Linq;
     using System.Text;
     using System.Threading.Tasks;
-    
+
     using System.Runtime.Serialization;
     using KerasSharp.Constraints;
     using KerasSharp.Regularizers;
@@ -41,14 +41,389 @@ namespace KerasSharp
 
     using static KerasSharp.Backends.Current;
 
+    /// <summary>
+    ///   Abstract nD convolution layer (private, used as implementation base).
+    ///   This layer creates a convolution kernel that is convolved
+    ///   with the layer input to produce a tensor of outputs.
+    ///   If `use_bias` is True, a bias vector is created and added to the outputs.
+    ///   Finally, if `activation` is not `None`,
+    ///   it is applied to the outputs as well.
+    /// </summary>
+    /// 
+    public class _Conv : Layer
+    {
+        private int rank;
+        private int filters;
+        private int[] kernel_size;
+        private int[] strides;
+        private PaddingType padding;
+        private DataFormatType? data_format;
+        private int[] dilation_rate;
+        private IActivationFunction activation;
+        private bool use_bias;
+        private IWeightInitializer kernel_initializer;
+        private IWeightInitializer bias_initializer;
+        private IWeightRegularizer kernel_regularizer;
+        private IWeightRegularizer bias_regularizer;
+        private IWeightConstraint kernel_constraint;
+        private IWeightConstraint bias_constraint;
+        private Tensor kernel;
+        private Tensor bias;
+
+        /// <summary>
+        /// Initializes a new instance of the <see cref="_Conv" /> class.
+        /// </summary>
+        /// <param name="rank">rank: An integer, the rank of the convolution, e.g. "2" for 2D convolution.</param>
+        /// <param name="filters">Integer, the dimensionality of the output space (i.e.the number output of filters in the convolution).</param>
+        /// <param name="kernel_size">An integer or tuple/list of n integers, specifying the dimensions of the convolution window.</param>
+        /// <param name="strides">An integer or tuple/list of n integers, specifying the strides of the convolution. Specifying any stride value != 1 is incompatible with specifying any `dilation_rate` value != 1.</param>
+        /// <param name="padding">One of `"valid"` or `"same"` (case-insensitive).</param>
+        /// <param name="data_format">A string, one of `channels_last` (default) or `channels_first`. The ordering of the dimensions in the inputs. 
+        ///   `channels_last` corresponds to inputs with shape `(batch, ..., channels)` while `channels_first` corresponds to inputs with shape 
+        ///   `(batch, channels, ...)`. It defaults to the `image_data_format` value found in your Keras config file at `~/.keras/keras.json`. 
+        ///   If you never set it, then it will be "channels_last".</param>
+        /// <param name="dilation_rate">An integer or tuple/list of n integers, specifying the dilation rate to use for dilated convolution. Currently, specifying any `dilation_rate` value != 1 is incompatible with specifying any `strides` value != 1.</param>
+        /// <param name="activation">Activation function to use (see[activations](../activations.md)). If you don't specify anything, no activation is applied (ie. "linear" activation: `a(x) = x`).</param>
+        /// <param name="use_bias">Boolean, whether the layer uses a bias vector.</param>
+        /// <param name="kernel_initializer">Initializer for the `kernel` weights matrix (see[initializers](../initializers.md)).</param>
+        /// <param name="bias_initializer">Initializer for the bias vector (see[initializers](../initializers.md)).</param>
+        /// <param name="kernel_regularizer">Regularizer function applied to the `kernel` weights matrix (see[regularizer](../regularizers.md)).</param>
+        /// <param name="bias_regularizer">Regularizer function applied to the bias vector (see[regularizer](../regularizers.md)).</param>
+        /// <param name="activity_regularizer">Regularizer function applied to the output of the layer(its "activation"). (see[regularizer](../regularizers.md)).</param>
+        /// <param name="kernel_constraint">Constraint function applied to the kernel matrix (see[constraints](../constraints.md)).</param>
+        /// <param name="bias_constraint">Constraint function applied to the bias vector (see[constraints](../constraints.md)).</param>
+        public _Conv(int rank,
+                         int filters,
+                         int[] kernel_size,
+                         int[] strides = null,
+                         PaddingType padding = PaddingType.Valid,
+                         DataFormatType? data_format = null,
+                         int[] dilation_rate = null,
+                         IActivationFunction activation = null,
+                         bool use_bias = true,
+                         IWeightInitializer kernel_initializer = null,
+                         IWeightInitializer bias_initializer = null,
+                         IWeightRegularizer kernel_regularizer = null,
+                         IWeightRegularizer bias_regularizer = null,
+                         IWeightRegularizer activity_regularizer = null,
+                         IWeightConstraint kernel_constraint = null,
+                         IWeightConstraint bias_constraint = null,
+                         int?[] input_shape = null)
+            : base(input_shape: input_shape)
+        {
+            if (kernel_initializer == null)
+                kernel_initializer = new GlorotUniform();
+
+            if (bias_initializer == null)
+                bias_initializer = new Zeros();
+
+            if (strides == null)
+                strides = Vector.Create<int>(size: rank, value: 1);
+
+            if (dilation_rate == null)
+                dilation_rate = Vector.Create<int>(size: rank, value: 1);
+
+            if (data_format == null)
+                data_format = K.image_data_format();
+
+            if (kernel_size.Length != rank)
+                throw new ArgumentException("kernel_size");
+
+            if (strides.Length != rank)
+                throw new ArgumentException("strides");
+
+            if (dilation_rate.Length != rank)
+                throw new ArgumentException("dilation_rate");
+
+            // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/layers/convolutional.py#L101
+
+            this.rank = rank;
+            this.filters = filters;
+            this.kernel_size = kernel_size;
+            this.strides = strides;
+            this.padding = padding;
+            this.data_format = data_format;
+            this.dilation_rate = dilation_rate;
+            this.activation = activation;
+            this.use_bias = use_bias;
+            this.kernel_initializer = kernel_initializer;
+            this.bias_initializer = bias_initializer;
+            this.kernel_regularizer = kernel_regularizer;
+            this.bias_regularizer = bias_regularizer;
+            this.activity_regularizer = activity_regularizer;
+            this.kernel_constraint = kernel_constraint;
+            this.bias_constraint = bias_constraint;
+            this.input_spec = new List<InputSpec> { new InputSpec(ndim: this.rank + 2) };
+        }
+
+        protected override void build(List<int?[]> input_shapes)
+        {
+            if (input_shapes.Count > 1)
+                throw new Exception();
+
+            var input_shape = input_shapes[0];
+
+            // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/layers/convolutional.py#L119
+
+            int channel_axis;
+            if (this.data_format == DataFormatType.ChannelsFirst)
+                channel_axis = 1;
+            else
+                channel_axis = -1;
+
+            if (input_shape.Get(channel_axis) == null)
+                throw new Exception("The channel dimension of the inputs should be defined. Found `None`.");
+
+            int input_dim = input_shape.Get(channel_axis).Value;
+            int[] kernel_shape = this.kernel_size.Concat(new[] { input_dim, this.filters }).ToArray();
+
+            this.kernel = this.add_weight(shape: kernel_shape,
+                                      initializer: this.kernel_initializer,
+                                      name: "kernel",
+                                      regularizer: this.kernel_regularizer,
+                                      constraint: this.kernel_constraint);
+            if (this.use_bias)
+            {
+                this.bias = this.add_weight(shape: new int[] { this.filters },
+                                            initializer: this.bias_initializer,
+                                            name: "bias",
+                                            regularizer: this.bias_regularizer,
+                                            constraint: this.bias_constraint);
+            }
+            else
+            {
+                this.bias = null;
+            }
+
+            // Set input spec.
+            this.input_spec = new List<InputSpec> { new InputSpec(ndim: this.rank + 2, axes: new Dictionary<int, int> { { channel_axis, input_dim } }) };
+            this.built = true;
+        }
 
+        protected override Tensor InnerCall(Tensor inputs, Tensor mask = null, bool? training = null)
+        {
+            // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/layers/convolutional.py#L149
+
+            if (mask != null)
+                throw new Exception();
+
+            if (training != null)
+                throw new Exception();
+
+            Tensor outputs = null;
+
+            if (this.rank == 1)
+            {
+                outputs = K.conv1d(
+                    inputs,
+                    this.kernel,
+                    strides: this.strides[0],
+                    padding: this.padding,
+                    data_format: this.data_format,
+                    dilation_rate: this.dilation_rate[0]);
+            }
+            if (this.rank == 2)
+            {
+                outputs = K.conv2d(
+                    inputs,
+                    this.kernel,
+                    strides: this.strides,
+                    padding: this.padding,
+                    data_format: this.data_format,
+                    dilation_rate: this.dilation_rate);
+            }
+            if (this.rank == 3)
+            {
+                outputs = K.conv3d(
+                    inputs,
+                    this.kernel,
+                    strides: this.strides,
+                    padding: this.padding,
+                    data_format: this.data_format,
+                    dilation_rate: this.dilation_rate);
+            }
+            if (this.use_bias)
+            {
+                outputs = K.bias_add(
+                    outputs,
+                    this.bias,
+                    data_format: this.data_format);
+            }
+
+            if (this.activation != null)
+                return this.activation.Call(outputs, null);
+            return outputs;
+        }
+
+        public override List<int?[]> compute_output_shape(List<int?[]> input_shapes)
+        {
+            if (input_shapes.Count != 1)
+                throw new Exception("Expected a single input.");
+            int?[] input_shape = input_shapes[0];
+
+            // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/layers/convolutional.py#L185
+
+            if (this.data_format == DataFormatType.ChannelsLast)
+            {
+                var space = input_shape.Get(1, -1);
+                var new_space = new List<int?>();
+                for (int i = 0; i < space.Length; i++)
+                {
+                    int? new_dim = conv_utils.conv_output_length(
+                        space[i],
+                        this.kernel_size[i],
+                        padding: this.padding,
+                        stride: this.strides[i],
+                        dilation: this.dilation_rate[i]);
+                    new_space.Add(new_dim);
+                }
+
+                return new[] { new[] { input_shape[0] }.Concat(new_space).Concat(new int?[] { this.filters }).ToArray() }.ToList();
+            }
+            else if (this.data_format == DataFormatType.ChannelsFirst)
+            {
+                var space = input_shape.Get(2, 0);
+                var new_space = new List<int?>();
+                for (int i = 0; i < space.Length; i++)
+                {
+                    int? new_dim = conv_utils.conv_output_length(
+                        space[i],
+                        this.kernel_size[i],
+                        padding: this.padding,
+                        stride: this.strides[i],
+                        dilation: this.dilation_rate[i]);
+                    new_space.Add(new_dim);
+                }
+
+                return new[] { new[] { input_shape[0] }.Concat(new int?[] { this.filters }).Concat(new_space).ToArray() }.ToList();
+            }
+            else
+            {
+                throw new Exception();
+            }
+        }
+
+        //override Dictionary<string, object> get_config()
+        //{
+        //    return new Dictionary<string, object>
+        //    {
+        //        { "rank",  this.rank },
+        //        { "filters",  this.filters },
+        //        { "kernel_size",  this.kernel_size },
+        //        { "strides",  this.strides },
+        //        { "padding",  this.padding },
+        //        { "data_format",  this.data_format },
+        //        { "dilation_rate",  this.dilation_rate },
+        //        { "activation",  activations.serialize(this.activation) },
+        //        { "use_bias",  this.use_bias },
+        //        { "kernel_initializer",  initializers.serialize(this.kernel_initializer) },
+        //        { "bias_initializer",  initializers.serialize(this.bias_initializer) },
+        //        { "kernel_regularizer",  regularizers.serialize(this.kernel_regularizer) },
+        //        { "bias_regularizer",  regularizers.serialize(this.bias_regularizer) },
+        //        { "activity_regularizer",  regularizers.serialize(this.activity_regularizer) },
+        //        { "kernel_constraint",  constraints.serialize(this.kernel_constraint) },
+        //        { "bias_constraint",  constraints.serialize(this.bias_constraint) },
+        //    };
+
+        //base_config = super(_Conv, self).get_config()
+        //    return dict(list(base_config.items()) + list(config.items()))
+    }
+
+
+    /// <summary>
+    ///   2D convolution layer (e.g. spatial convolution over images).
+    /// </summary>
+    /// 
+    /// <remarks>
+    ///   This layer creates a convolution kernel that is convolved
+    ///   with the layer input to produce a tensor of
+    ///   outputs.If `use_bias` is True,
+    ///   a bias vector is created and added to the outputs.Finally, if
+    ///   `activation` is not `None`, it is applied to the outputs as well.
+    ///   When using this layer as the first layer in a model,
+    ///   provide the keyword argument `input_shape`
+    ///   (tuple of integers, does not include the sample axis),
+    ///   e.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures
+    ///   in `data_format="channels_last"`.
+    /// </remarks>
+    /// 
+    /// <seealso cref="KerasSharp.Engine.Topology.Layer" />
+    /// 
     [DataContract]
-    public class Conv2D : Layer
+    public class Conv2D : _Conv
     {
 
-        public Conv2D(int v1, int[] v2, string activation, int?[] input_shape = null)
+        public Conv2D(int filters,
+                 int[] kernel_size = null,
+                 int[] strides = null,
+                 PaddingType padding = PaddingType.Valid,
+                 DataFormatType? data_format = null,
+                 int[] dilation_rate = null,
+                 IActivationFunction activation = null,
+                 bool use_bias = true,
+                 IWeightInitializer kernel_initializer = null,
+                 IWeightInitializer bias_initializer = null,
+                 IWeightRegularizer kernel_regularizer = null,
+                 IWeightRegularizer bias_regularizer = null,
+                 IWeightRegularizer activity_regularizer = null,
+                 IWeightConstraint kernel_constraint = null,
+                 IWeightConstraint bias_constraint = null,
+                 int?[] input_shape = null)
+         : base(rank: 2,
+            filters: filters,
+            kernel_size: kernel_size,
+            strides: strides,
+            padding: padding,
+            data_format: data_format,
+            dilation_rate: dilation_rate,
+            activation: activation,
+            use_bias: use_bias,
+            kernel_initializer: kernel_initializer,
+            bias_initializer: bias_initializer,
+            kernel_regularizer: kernel_regularizer,
+            bias_regularizer: bias_regularizer,
+            activity_regularizer: activity_regularizer,
+            kernel_constraint: kernel_constraint,
+            bias_constraint: bias_constraint,
+            input_shape: input_shape)
         {
-            throw new NotImplementedException();
+            this.input_spec = new List<InputSpec> { new InputSpec(ndim: 4) };
         }
+
+        public Conv2D(int filters,
+         int[] kernel_size = null,
+         int[] strides = null,
+         PaddingType padding = PaddingType.Valid,
+         DataFormatType? data_format = null,
+         int[] dilation_rate = null,
+         string activation = null,
+         bool use_bias = true,
+         IWeightInitializer kernel_initializer = null,
+         IWeightInitializer bias_initializer = null,
+         IWeightRegularizer kernel_regularizer = null,
+         IWeightRegularizer bias_regularizer = null,
+         IWeightRegularizer activity_regularizer = null,
+         IWeightConstraint kernel_constraint = null,
+         IWeightConstraint bias_constraint = null,
+         int?[] input_shape = null)
+     : this(filters: filters,
+        kernel_size: kernel_size,
+        strides: strides,
+        padding: padding,
+        data_format: data_format,
+        dilation_rate: dilation_rate,
+        activation: Activation.Create(activation),
+        use_bias: use_bias,
+        kernel_initializer: kernel_initializer,
+        bias_initializer: bias_initializer,
+        kernel_regularizer: kernel_regularizer,
+        bias_regularizer: bias_regularizer,
+        activity_regularizer: activity_regularizer,
+        kernel_constraint: kernel_constraint,
+        bias_constraint: bias_constraint,
+        input_shape: input_shape)
+        {
+        }
+
     }
 }
diff --git a/Sources/Layers/Core/Dense.cs b/Sources/Layers/Core/Dense.cs
index 36520b9..83387de 100644
--- a/Sources/Layers/Core/Dense.cs
+++ b/Sources/Layers/Core/Dense.cs
@@ -157,7 +157,7 @@ protected override void build(List<int?[]> input_shape)
 
             int input_dim = Matrix.Get(input_shape[0], -1).Value;
 
-            this.kernel = add_weight(shape: new int?[] { input_dim, this.units },
+            this.kernel = add_weight(shape: new int[] { input_dim, this.units },
                 initializer: this.kernel_initializer,
                 regularizer: this.kernel_regularizer,
                 constraint: this.kernel_constraint,
@@ -165,7 +165,7 @@ protected override void build(List<int?[]> input_shape)
 
             if (this.use_bias)
             {
-                this.bias = base.add_weight(shape: new int?[] { this.units },
+                this.bias = base.add_weight(shape: new int[] { this.units },
                     name: "bias",
                     initializer: bias_initializer,
                     regularizer: bias_regularizer,
diff --git a/Sources/Layers/Core/Dropout.cs b/Sources/Layers/Core/Dropout.cs
index 8e5e741..29be283 100644
--- a/Sources/Layers/Core/Dropout.cs
+++ b/Sources/Layers/Core/Dropout.cs
@@ -82,8 +82,10 @@ protected override Tensor InnerCall(Tensor inputs, Tensor mask, bool? training =
             if (0.0 < this.rate && this.rate < 1.0)
             {
                 var noise_shape = this._get_noise_shape(inputs);
-                Func<Tensor> dropped_inputs = () => K.dropout(inputs, this.rate, noise_shape, seed: this.seed);
-                return K.in_train_phase(dropped_inputs, () => inputs, training: training);
+                return K.in_train_phase(
+                    () => K.dropout(inputs, this.rate, noise_shape, seed: this.seed),
+                    () => inputs, 
+                    training: training);
             }
 
             return inputs;
diff --git a/Sources/Layers/Core/Flatten.cs b/Sources/Layers/Core/Flatten.cs
index f05ce54..cfe2cdc 100644
--- a/Sources/Layers/Core/Flatten.cs
+++ b/Sources/Layers/Core/Flatten.cs
@@ -31,7 +31,7 @@ namespace KerasSharp
     using System.Linq;
     using System.Text;
     using System.Threading.Tasks;
-    
+
     using System.Runtime.Serialization;
     using KerasSharp.Constraints;
     using KerasSharp.Regularizers;
@@ -41,10 +41,33 @@ namespace KerasSharp
 
     using static KerasSharp.Backends.Current;
 
-
+    /// <summary>
+    ///   Flattens the input. Does not affect the batch size.
+    /// </summary>
+    /// <seealso cref="KerasSharp.Engine.Topology.Layer" />
     [DataContract]
     public class Flatten : Layer
     {
+        protected override Tensor InnerCall(Tensor inputs, Tensor mask = null, bool? training = null)
+        {
+            return K.batch_flatten(inputs);
+        }
+
+        public override List<int?[]> compute_output_shape(List<int?[]> input_shapes)
+        {
+            // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/layers/core.py#L473
+            if (input_shapes.Count > 0)
+                throw new Exception();
+
+            var input_shape = input_shapes[0];
+
+            if (!input_shape.Get(1, 0).All(x => x > 0))
+            {
+                throw new Exception($"The shape of the input to 'Flatten' is not fully defined  (got {input_shape.Get(1, 0)}). " +
+                    $"Make sure to pass a complete {input_shape} or {batch_input_shape} argument to the first layer in your model.");
+            }
 
+            return new List<int?[]> { new int?[] { input_shape[0], Matrix.Product(input_shape.Select(x=>x.Value).ToArray().Get(1, 0)) } };
+        }
     }
 }
diff --git a/Sources/Utils/ConvUtils.cs b/Sources/Utils/ConvUtils.cs
new file mode 100644
index 0000000..9d13870
--- /dev/null
+++ b/Sources/Utils/ConvUtils.cs
@@ -0,0 +1,98 @@
+﻿// Keras-Sharp: C# port of the Keras library
+// https://github.com/cesarsouza/keras-sharp
+//
+// Based under the Keras library for Python. See LICENSE text for more details.
+//
+//    The MIT License(MIT)
+//    
+//    Permission is hereby granted, free of charge, to any person obtaining a copy
+//    of this software and associated documentation files (the "Software"), to deal
+//    in the Software without restriction, including without limitation the rights
+//    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+//    copies of the Software, and to permit persons to whom the Software is
+//    furnished to do so, subject to the following conditions:
+//    
+//    The above copyright notice and this permission notice shall be included in all
+//    copies or substantial portions of the Software.
+//    
+//    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+//    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+//    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+//    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+//    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+//    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+//    SOFTWARE.
+//
+
+namespace KerasSharp
+{
+    using System;
+    using static Python;
+    using Accord.Math;
+
+    using static KerasSharp.Backends.Current;
+    public class conv_utils
+    {
+        /// <summary>
+        ///   Transforms a single int or iterable of ints into an int tuple.
+        /// </summary>
+        /// <param name="value">The value to validate and convert. Could an int, or any iterable of ints.</param>
+        /// <param name="n">The size of the tuple to be returned.</param>
+        /// <param name="name">The name of the argument being validated, e.g. "strides" or "kernel_size".This is only used to format error messages.</param>
+        /// <returns>System.Object.</returns>
+        internal int[] normalize_tuple(int value, int n, string name)
+        {
+            return Vector.Create<int>(size: n, value: value);
+        }
+
+        /// <summary>
+        ///   Transforms a single int or iterable of ints into an int tuple.
+        /// </summary>
+        /// <param name="value">The value to validate and convert. Could an int, or any iterable of ints.</param>
+        /// <param name="n">The size of the tuple to be returned.</param>
+        /// <param name="name">The name of the argument being validated, e.g. "strides" or "kernel_size".This is only used to format error messages.</param>
+        /// <returns>System.Object.</returns>
+        internal int[] normalize_tuple(int[] value_tuple, int n, string name)
+        {
+            // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/utils/conv_utils.py#L23
+
+            if (len(value_tuple) != n)
+                throw new Exception($"The {name} argument must be a tuple of {n} integers. Received: {value_tuple}");
+
+            return value_tuple;
+        }
+
+        internal object normalize_data_format(DataFormatType? value)
+        {
+            // https://github.com/fchollet/keras/blob/f65a56fb65062c8d14d215c9f4b1015b97cc5bf3/keras/utils/conv_utils.py#L46
+
+            if (value == null)
+                value = K.image_data_format();
+
+            return value;
+        }
+
+        /// <summary>
+        ///   Determines output length of a convolution given input length.
+        /// </summary>
+        /// 
+        public static int? conv_output_length(int? input_length, int filter_size, PaddingType padding, int stride, int dilation = 1)
+        {
+            if (input_length == null)
+                return null;
+            int dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1);
+            int output_length = 0;
+            if (padding == PaddingType.Same)
+                output_length = input_length.Value;
+            else if (padding == PaddingType.Valid)
+                output_length = input_length.Value - dilated_filter_size + 1;
+            else if (padding == PaddingType.Causal)
+                output_length = input_length.Value;
+            else if (padding == PaddingType.Full)
+                output_length = input_length.Value + dilated_filter_size - 1;
+            else
+                throw new Exception();
+            return (output_length + stride - 1); // stride
+        }
+    }
+}