From d8f7a06dfcc592773521732dbad418981826228c Mon Sep 17 00:00:00 2001
From: chavicoski <alvaro.lopez.chilet@gmail.com>
Date: Thu, 2 Dec 2021 16:51:49 +0100
Subject: [PATCH 01/35] Refactored the implementation of the ONNX functions for
 distributed training

---
 examples/onnx/4_onnx_test_gradients.cpp       | 304 +++++++++---------
 .../eddl/serialization/onnx/import_helpers.h  |   4 +
 .../onnx/layers/conv/conv_onnx.h              |   8 +
 .../onnx/layers/core/dense_onnx.h             |   8 +
 .../serialization/onnx/layers/layers_onnx.h   |   8 +
 .../onnx/net/eddl_onnx_import.cpp             | 217 +------------
 src/serialization/onnx/net/import_helpers.cpp |  60 ++++
 .../onnx/net/layers/conv/conv_onnx.cpp        |  20 ++
 .../onnx/net/layers/core/dense_onnx.cpp       |  20 ++
 .../onnx/net/layers/layers_onnx.cpp           |  38 +++
 10 files changed, 330 insertions(+), 357 deletions(-)
diff --git a/examples/onnx/4_onnx_test_gradients.cpp b/examples/onnx/4_onnx_test_gradients.cpp
index 013f1c458..1e0e66351 100644
--- a/examples/onnx/4_onnx_test_gradients.cpp
+++ b/examples/onnx/4_onnx_test_gradients.cpp
@@ -1,16 +1,14 @@
 /*
-* EDDL Library - European Distributed Deep Learning Library.
-* Version: 1.0
-* copyright (c) 2021, Universitat Politècnica de València (UPV), PRHLT Research Centre
-* Date: November 2021
-* Author: PRHLT Research Centre, UPV, (rparedes@prhlt.upv.es), (jon@prhlt.upv.es)
-* All rights reserved
-*/
+ * EDDL Library - European Distributed Deep Learning Library.
+ * Version: 1.0
+ * copyright (c) 2021, Universitat Politècnica de València (UPV), PRHLT Research
+ * Centre Date: November 2021 Author: PRHLT Research Centre, UPV,
+ * (rparedes@prhlt.upv.es), (jon@prhlt.upv.es) All rights reserved
+ */
 
+#include <iostream>
 #include <stdio.h>
 #include <stdlib.h>
-#include <iostream>
-
 
 #include "eddl/apis/eddl.h"
 
@@ -18,147 +16,149 @@
 
 using namespace eddl;
 
-//////////////////////////////////
-// mnist_mlp.cpp:
-// A very basic MLP for mnist
-// Using fit for training
-//////////////////////////////////
-
-
-int main(int argc, char **argv) { 
-
-    // Download mnist
-    download_mnist();
-
-    // Settings
-    int epochs = 1;
-    int batch_size = 100;
-    int num_classes = 10;
-    CompServ* export_CS = CS_GPU({1});
-    CompServ* import_CS = CS_GPU({1});
-
-    // Define network
-    layer in = Input({784});
-    layer l = in;  // Aux var
-	
-  	l=Reshape(l,{1,28, 28});
-  	l=ReLu(Conv(l,32,{3,3},{1,1}));
-	//l=BatchNormalization(l, true);
-	l=MaxPool(l,{2,2});
-  	l=ReLu(Conv(l,32,{3,3},{1,1}));
-	//l=BatchNormalization(l, true);
-	l=MaxPool(l,{2,2});
-
-  	l=Reshape(l,{-1});
-
-    layer out = Activation(Dense(l, num_classes), "softmax");
-
-	cout << "Creating model" << endl;
-    model net = Model({in}, {out});
-	cout << "Model created" << endl;
-
-    // Build model
-	cout << "Building the model" << endl;
-    build(net,
-          rmsprop(0.01), // Optimizer
-          {"soft_cross_entropy"}, // Losses
-          {"categorical_accuracy"}, // Metrics
-          export_CS, // Computing service
-		  true // Enable parameters initialization
-    );
-	cout << "Model is correctly built" << endl;
-
-	cout << "Enabling distributed training" << endl;
-	net->enable_distributed();
-	cout << "Distributed training enabled" << endl;
-
-    // Export the net before training
-	void* serialized_net;
-	cout << "Serializing net (without training) to pointer" << endl;
-	size_t model_size = serialize_net_to_onnx_pointer(net, serialized_net, false);
-	cout << "Net serialized to pointer" << endl;
-
-    // View model
-    summary(net);
-
-    // Load dataset
-    Tensor* x_train = Tensor::load("mnist_trX.bin");
-    Tensor* y_train = Tensor::load("mnist_trY.bin");
-    Tensor* x_test = Tensor::load("mnist_tsX.bin");
-    Tensor* y_test = Tensor::load("mnist_tsY.bin");
-
-    // Preprocessing
-    x_train->div_(255.0f);
-    x_test->div_(255.0f);
-
-    // Train model
-	cout << "Training the first model" << endl;
-    fit(net, {x_train}, {y_train}, batch_size, epochs);
-
-    // Evaluate
-	cout << "Evaluating the first model" << endl;
-    evaluate(net, {x_test}, {y_test});
-
-	// Export gradients
-	void* serialized_gradients;
-	string path("mnist.onnx");
-	cout << "Exporting gradients" << endl;
-	size_t gradients_size = serialize_net_to_onnx_pointer(net, serialized_gradients, true);
-	cout << "Gradients exported" << endl;
-
-    // Export trained model
-	void * serialized_net_once_trained;
-	cout << "Exporting trained weights" << endl;
-	size_t snot_size = serialize_net_to_onnx_pointer(net, serialized_net_once_trained, false);
-	cout << "Trained weights exported" << endl;
-
-    // Reset the counter of the layers index
-	LConv::reset_name_counter();
-	LDense::reset_name_counter();
-	
-	// Import net topology without trained weights
-	cout << "Importing original net topology (without training)" << endl;
-	Net* imported_net = import_net_from_onnx_pointer(serialized_net, model_size);
-	cout << "Untrained net imported" << endl;
-
-    // Build model
-	cout << "Building the loaded topology" << endl;
-    build(imported_net,
-          rmsprop(0.01), // Optimizer
-          {"soft_cross_entropy"}, // Losses
-          {"categorical_accuracy"}, // Metrics
-          import_CS, // Computing service
-		  false  // Disable parameters initialization
-    );
-	cout << "Model is correctly built" << endl;
-
-    // Resize the net to the desired batch size
-	imported_net->resize(batch_size);
-
-    // View loaded model
-    summary(imported_net);
-
-    // Evaluate with untrained model
-	cout << "Evaluating test with the untrained weights" << endl;
-    evaluate(imported_net, {x_test}, {y_test});
-
-	// Apply grads
-	cout << "Applying grads from training" << endl;
-	apply_grads_from_onnx_pointer(imported_net, serialized_gradients, gradients_size);
-	cout << "Grads applied" << endl;
-	
-    // Evaluate net with accumulated gradients applied
-	cout << "Evaluating test after applying gradients" << endl;
-    evaluate(imported_net, {x_test}, {y_test});
-	
-	// Set trained weights
-	cout << "Putting the trained weights" << endl;
-	set_weights_from_onnx_pointer(imported_net, serialized_net_once_trained, snot_size);
-	cout << "Trained weights set" << endl;
-
-    // Evaluate with trained weights
-	cout << "Evaluating test after putting the trained weights" << endl;
-    evaluate(imported_net, {x_test}, {y_test});
-
-	return 0;
+///////////////////////////////////////////
+// 4_onnx_test_gradients.cpp:
+// An example on how to use the functions
+// for exporting weights and gradients
+// using the ONNX format
+///////////////////////////////////////////
+
+int main(int argc, char **argv) {
+  // Read arguments
+  bool export_cpu = false;
+  bool import_cpu = false;
+  for (int i = 1; i < argc; ++i) {
+    if (strcmp(argv[i], "--export-cpu") == 0) export_cpu = true;
+    else if (strcmp(argv[i], "--import-cpu") == 0) import_cpu = true;
+  }
+
+  // Download mnist
+  download_mnist();
+
+  // Settings
+  int epochs = 1;
+  int batch_size = 100;
+  int num_classes = 10;
+  CompServ *export_CS = export_cpu ? CS_CPU() : CS_GPU({1});
+  CompServ *import_CS = import_cpu ? CS_CPU() : CS_GPU({1});
+
+  // Define network
+  layer in = Input({784});
+  layer l = in; // Aux var
+
+  l = Reshape(l, {1, 28, 28});
+  l = ReLu(Conv(l, 32, {3, 3}, {1, 1}, "same", false));
+  l = MaxPool(l, {2, 2});
+  l = ReLu(Conv(l, 32, {3, 3}, {1, 1}));
+  l = MaxPool(l, {2, 2});
+
+  l = Reshape(l, {-1});
+  l = Dense(l, 128, false);
+  layer out = Activation(Dense(l, num_classes), "softmax");
+
+  cout << "Creating model" << endl;
+  model net = Model({in}, {out});
+  cout << "Model created" << endl;
+
+  // Build model
+  cout << "Building the model" << endl;
+  build(net,
+        rmsprop(0.01),            // Optimizer
+        {"soft_cross_entropy"},   // Losses
+        {"categorical_accuracy"}, // Metrics
+        export_CS,                // Computing service
+        true                      // Enable parameters initialization
+  );
+  cout << "Model is correctly built" << endl;
+
+  cout << "Enabling distributed training" << endl;
+  net->enable_distributed();
+  cout << "Distributed training enabled" << endl;
+
+  // Export the net before training
+  void *serialized_net;
+  cout << "Serializing net (without training) to pointer" << endl;
+  size_t model_size = serialize_net_to_onnx_pointer(net, serialized_net, false);
+  cout << "Net serialized to pointer" << endl;
+
+  // View model
+  summary(net);
+
+  // Load dataset
+  Tensor *x_train = Tensor::load("mnist_trX.bin");
+  Tensor *y_train = Tensor::load("mnist_trY.bin");
+  Tensor *x_test = Tensor::load("mnist_tsX.bin");
+  Tensor *y_test = Tensor::load("mnist_tsY.bin");
+
+  // Preprocessing
+  x_train->div_(255.0f);
+  x_test->div_(255.0f);
+
+  // Train model
+  cout << "Training the first model" << endl;
+  fit(net, {x_train}, {y_train}, batch_size, epochs);
+
+  // Evaluate
+  cout << "Evaluating the first model" << endl;
+  evaluate(net, {x_test}, {y_test}, batch_size);
+
+  // Export gradients
+  void *serialized_gradients;
+  string path("mnist.onnx");
+  cout << "Exporting gradients" << endl;
+  size_t gradients_size = serialize_net_to_onnx_pointer(net, serialized_gradients, true);
+  cout << "Gradients exported" << endl;
+
+  // Export trained model
+  void *serialized_net_once_trained;
+  cout << "Exporting trained weights" << endl;
+  size_t snot_size = serialize_net_to_onnx_pointer(net, serialized_net_once_trained, false);
+  cout << "Trained weights exported" << endl;
+
+  // Reset the counter of the layers index
+  LConv::reset_name_counter();
+  LDense::reset_name_counter();
+
+  // Import net topology without trained weights
+  cout << "Importing original net topology (without training)" << endl;
+  Net *imported_net = import_net_from_onnx_pointer(serialized_net, model_size);
+  cout << "Untrained net imported" << endl;
+
+  // Build model
+  cout << "Building the loaded topology" << endl;
+  build(imported_net,
+        rmsprop(0.01),            // Optimizer
+        {"soft_cross_entropy"},   // Losses
+        {"categorical_accuracy"}, // Metrics
+        import_CS,                // Computing service
+        false                     // Disable parameters initialization
+  );
+  cout << "Model is correctly built" << endl;
+
+  // View loaded model
+  summary(imported_net);
+
+  // Evaluate with untrained model
+  cout << "Evaluating test with the untrained weights" << endl;
+  evaluate(imported_net, {x_test}, {y_test}, batch_size);
+
+  // Apply grads
+  cout << "Applying grads from training" << endl;
+  apply_grads_from_onnx_pointer(imported_net, serialized_gradients, gradients_size);
+  cout << "Grads applied" << endl;
+
+  // Evaluate net with accumulated gradients applied
+  cout << "Evaluating test after applying gradients" << endl;
+  evaluate(imported_net, {x_test}, {y_test}, batch_size);
+
+  // Set trained weights
+  cout << "Putting the trained weights" << endl;
+  set_weights_from_onnx_pointer(imported_net, serialized_net_once_trained, snot_size);
+  cout << "Trained weights set" << endl;
+
+  // Evaluate with trained weights
+  cout << "Evaluating test after putting the trained weights" << endl;
+  evaluate(imported_net, {x_test}, {y_test}, batch_size);
+
+  return 0;
 }
diff --git a/include/eddl/serialization/onnx/import_helpers.h b/include/eddl/serialization/onnx/import_helpers.h
index a298de4f7..7462dfcaa 100644
--- a/include/eddl/serialization/onnx/import_helpers.h
+++ b/include/eddl/serialization/onnx/import_helpers.h
@@ -76,5 +76,9 @@ void process_node_queue(queue<onnx::NodeProto *> &nodeQueue,
 
 Net *build_net_onnx(onnx::ModelProto model, vector<int> input_shape, int mem, LOG_LEVEL log_level);
 
+void set_weights_from_model_proto(Net *net, onnx::ModelProto model_proto);
+
+void apply_grads_from_model_proto(Net *net, onnx::ModelProto model_proto);
+
 #endif // EDDL_IMPORT_HELPERS_H
 #endif // cPROTO
diff --git a/include/eddl/serialization/onnx/layers/conv/conv_onnx.h b/include/eddl/serialization/onnx/layers/conv/conv_onnx.h
index 0146bb91f..0b3962e8e 100644
--- a/include/eddl/serialization/onnx/layers/conv/conv_onnx.h
+++ b/include/eddl/serialization/onnx/layers/conv/conv_onnx.h
@@ -27,5 +27,13 @@ Layer* build_conv_layer(onnx::NodeProto *node,
 // OPSET: 11, 1
 void build_conv_node(LConv *layer, onnx::GraphProto *graph, bool gradients);
 
+/*
+ * DISTRIBUTED TRAINING
+ */
+
+void update_conv_weights(LConv *layer, vector<Tensor *> weights);
+
+void apply_grads_to_conv(LConv *layer, vector<Tensor *> grads);
+
 #endif // EDDL_CONV_ONNX_H
 #endif // cPROTO
diff --git a/include/eddl/serialization/onnx/layers/core/dense_onnx.h b/include/eddl/serialization/onnx/layers/core/dense_onnx.h
index 05b011919..f8b64c6ad 100644
--- a/include/eddl/serialization/onnx/layers/core/dense_onnx.h
+++ b/include/eddl/serialization/onnx/layers/core/dense_onnx.h
@@ -30,5 +30,13 @@ void build_gemm_node(LDense *layer, onnx::GraphProto *graph, bool gradients);
 //         followed by an additon in case of using bias
 void build_dense_with_matmul_node(LDense *layer, onnx::GraphProto *graph, bool gradients);
 
+/*
+ * DISTRIBUTED TRAINING
+ */
+
+void update_dense_weights(LDense *layer, vector<Tensor *> weights);
+
+void apply_grads_to_dense(LDense *layer, vector<Tensor *> grads);
+
 #endif // EDDL_DENSE_ONNX_H
 #endif // cPROTO
diff --git a/include/eddl/serialization/onnx/layers/layers_onnx.h b/include/eddl/serialization/onnx/layers/layers_onnx.h
index 6c4cacc0e..25a53910b 100644
--- a/include/eddl/serialization/onnx/layers/layers_onnx.h
+++ b/include/eddl/serialization/onnx/layers/layers_onnx.h
@@ -95,5 +95,13 @@ Layer* build_layer_from_node(onnx::NodeProto *node,
 
 void build_node_from_layer(Layer *layer, onnx::GraphProto *graph, bool gradients, bool is_recurrent);
 
+/*
+ * DISTRIBUTED TRAINING
+ */
+
+void update_layer_weights(Layer *layer, vector<Tensor *> weights);
+
+void apply_grads_to_layer(Layer *layer, vector<Tensor *> grads);
+
 #endif // EDDL_LAYERS_ONNX_H
 #endif // cPROTO
diff --git a/src/serialization/onnx/net/eddl_onnx_import.cpp b/src/serialization/onnx/net/eddl_onnx_import.cpp
index f42ca724e..8a70cf442 100644
--- a/src/serialization/onnx/net/eddl_onnx_import.cpp
+++ b/src/serialization/onnx/net/eddl_onnx_import.cpp
@@ -8,9 +8,6 @@
 #include "eddl/serialization/onnx/utils_onnx.h"
 #include "eddl/serialization/onnx/import_helpers.h"
 
-#include "eddl/layers/core/layer_core.h"
-#include "eddl/layers/conv/layer_conv.h"
-
 #if defined(cPROTO)
 #include "eddl/serialization/onnx/onnx.pb.h"
 #endif
@@ -102,231 +99,41 @@ Net *import_net_from_onnx_string(string *model_string, int mem)
 // Sets the weights of a input Net to the ones stored in the onnx net inside the pointer
 void set_weights_from_onnx_pointer(Net *net, void *ptr_model, size_t model_size)
 {
-  // TODO: Change implementation and use a generic one changing the params vectors
-  onnx::ModelProto model;
-  if (!model.ParseFromArray(ptr_model, model_size))
+  onnx::ModelProto model_proto;
+  if (!model_proto.ParseFromArray(ptr_model, model_size))
     cerr << "Failed to parse model." << endl;
 
-  map<string, vector<Tensor *>> tensors = get_tensors_from_onnx(model);
-  LConv *conv;
-  LDense *dense;
-  for (Layer *l : net->layers)
-  {
-    if (!tensors.count(l->name))
-    {
-      //cout << "Layer with name " << l->name << " is not trainable " << endl;
-      continue;
-    }
-    vector<Tensor *> layer_tensors = tensors[l->name];
-    if ((conv = dynamic_cast<LConv *>(l)))
-    {
-      if (layer_tensors.size() > 1)
-        conv->update_weights(layer_tensors[0], layer_tensors[1]);
-      else
-      {
-        cerr << "EDDL has not implemented convolutional without bias " << endl;
-        //conv.update_weights(layer_tensors[0]);
-      }
-    }
-    else if ((dense = dynamic_cast<LDense *>(l)))
-    {
-      if (layer_tensors.size() > 1)
-        dense->update_weights(layer_tensors[0], layer_tensors[1]);
-      else
-        dense->update_weights(layer_tensors[0]);
-    }
-    else
-      cerr << "not implemented layer type" << endl;
-  }
-
-  // copy the new weights to devices
-  share_weights(net);
-
-  // erase the map we used to free the memory
-  map<string, vector<Tensor *>>::iterator it;
-  vector<Tensor *> delete_tensors;
-  for (it = tensors.begin(); it != tensors.end(); ++it)
-  {
-    delete_tensors = it->second;
-    for (int i = 0; i < delete_tensors.size(); ++i)
-    {
-      delete delete_tensors[i];
-    }
-  }
+  set_weights_from_model_proto(net, model_proto);
 }
 
 // Sets the weights of a input Net to the ones stored in the onnx net inside the c++ string
 void set_weights_from_onnx(Net *net, std::string *model_string)
 {
-  // TODO: Change implementation and use a generic one changing the params vectors
-  onnx::ModelProto model;
-  if (!model.ParseFromString(*model_string))
-  {
+  onnx::ModelProto model_proto;
+  if (!model_proto.ParseFromString(*model_string))
     cerr << "Failed to parse model." << endl;
-  }
 
-  map<string, vector<Tensor *>> tensors = get_tensors_from_onnx(model);
-  LConv *conv;
-  LDense *dense;
-  for (Layer *l : net->layers)
-  {
-    if (!tensors.count(l->name))
-    {
-      //cout << "Layer with name " << l->name << " is not trainable " << endl;
-      continue;
-    }
-    vector<Tensor *> layer_tensors = tensors[l->name];
-    if ((conv = dynamic_cast<LConv *>(l)))
-    {
-      if (layer_tensors.size() > 1)
-        conv->update_weights(layer_tensors[0], layer_tensors[1]);
-      else
-      {
-        cerr << "EDDL has not implemented convolutional without bias " << endl;
-        //conv.update_weights(layer_tensors[0]);
-      }
-    }
-    else if ((dense = dynamic_cast<LDense *>(l)))
-    {
-      if (layer_tensors.size() > 1)
-        dense->update_weights(layer_tensors[0], layer_tensors[1]);
-      else
-        dense->update_weights(layer_tensors[0]);
-    }
-    else
-      cerr << "not implemented layer type" << endl;
-  }
-
-  // copy the new weights to devices
-  share_weights(net);
-
-  // erase the map we used to free the memory
-  map<string, vector<Tensor *>>::iterator it;
-  vector<Tensor *> delete_tensors;
-  for (it = tensors.begin(); it != tensors.end(); ++it)
-  {
-    delete_tensors = it->second;
-    for (int i = 0; i < delete_tensors.size(); ++i)
-    {
-      delete delete_tensors[i];
-    }
-  }
+  set_weights_from_model_proto(net, model_proto);
 }
 
 // Accumulates the gradients stored in the pointer to the input net
 void apply_grads_from_onnx_pointer(Net *net, void *ptr_onnx, size_t count)
 {
-  // TODO: Change implementation and use a generic one adding the grads to the params vectors
-  onnx::ModelProto model;
-  if (!model.ParseFromArray(ptr_onnx, count))
-  {
+  onnx::ModelProto model_proto;
+  if (!model_proto.ParseFromArray(ptr_onnx, count))
     cerr << "Failed to parse model." << endl;
-  }
 
-  map<string, vector<Tensor *>> tensors = get_tensors_from_onnx(model);
-  LConv *conv;
-  LDense *dense;
-  for (Layer *l : net->layers)
-  {
-    if (!tensors.count(l->name))
-    {
-      continue;
-    }
-    vector<Tensor *> layer_tensors = tensors[l->name];
-    if ((conv = dynamic_cast<LConv *>(l)))
-    {
-      if (layer_tensors.size() > 1)
-      {
-        conv->accumulate_accumulated_gradients(layer_tensors[0], layer_tensors[1]);
-      }
-      else
-      {
-        cerr << "EDDL has not implemented convolutional without bias." << endl;
-        //conv.update_weights(layer_tensors[0]);
-      }
-    }
-    else if ((dense = dynamic_cast<LDense *>(l)))
-    {
-      if (layer_tensors.size() > 1)
-      {
-        dense->accumulate_accumulated_gradients(layer_tensors[0], layer_tensors[1]);
-      }
-      else
-      {
-        dense->accumulate_accumulated_gradients(layer_tensors[0]);
-      }
-    }
-    else
-      cerr << "not implemented layer type" << endl;
-  }
-  // erase the map we used to free the memory
-  map<string, vector<Tensor *>>::iterator it;
-  vector<Tensor *> delete_tensors;
-  for (it = tensors.begin(); it != tensors.end(); ++it)
-  {
-    delete_tensors = it->second;
-    for (int i = 0; i < delete_tensors.size(); ++i)
-    {
-      delete delete_tensors[i];
-    }
-  }
-
-  // copy the new weights to devices
-  share_weights(net);
+  apply_grads_from_model_proto(net, model_proto);
 }
 
 // Accumulates the gradients stored in the c++ string to the input net
 void apply_grads_from_onnx(Net *net, std::string *model_string)
 {
-  // TODO: Change implementation and use a generic one adding the grads to the params vectors
-  onnx::ModelProto model;
-  if (!model.ParseFromString(*model_string))
-  {
+  onnx::ModelProto model_proto;
+  if (!model_proto.ParseFromString(*model_string))
     cerr << "Failed to parse model." << endl;
-  }
-
-  map<string, vector<Tensor *>> tensors = get_tensors_from_onnx(model);
-  LConv *conv;
-  LDense *dense;
-  for (Layer *l : net->layers)
-  {
-    if (!tensors.count(l->name))
-      continue;
-    vector<Tensor *> layer_tensors = tensors[l->name];
-    if ((conv = dynamic_cast<LConv *>(l)))
-    {
-      if (layer_tensors.size() > 1)
-        conv->accumulate_accumulated_gradients(layer_tensors[0], layer_tensors[1]);
-      else
-      {
-        cerr << "EDDL has not implemented convolutional without bias " << endl;
-        //conv.update_weights(layer_tensors[0]);
-      }
-    }
-    else if ((dense = dynamic_cast<LDense *>(l)))
-    {
-      if (layer_tensors.size() > 1)
-        dense->accumulate_accumulated_gradients(layer_tensors[0], layer_tensors[1]);
-      else
-        dense->accumulate_accumulated_gradients(layer_tensors[0]);
-    }
-    else
-      cerr << "not implemented layer type" << endl;
-  }
-  // erase the map we used to free the memory
-  map<string, vector<Tensor *>>::iterator it;
-  vector<Tensor *> delete_tensors;
-  for (it = tensors.begin(); it != tensors.end(); ++it)
-  {
-    delete_tensors = it->second;
-    for (int i = 0; i < delete_tensors.size(); ++i)
-    {
-      delete delete_tensors[i];
-    }
-  }
 
-  // copy the new weights to devices
-  share_weights(net);
+  apply_grads_from_model_proto(net, model_proto);
 }
 
 #else // If protobuf is not enabled
diff --git a/src/serialization/onnx/net/import_helpers.cpp b/src/serialization/onnx/net/import_helpers.cpp
index 2d5bcbdf6..993a91c92 100644
--- a/src/serialization/onnx/net/import_helpers.cpp
+++ b/src/serialization/onnx/net/import_helpers.cpp
@@ -680,4 +680,64 @@ Net *build_net_onnx(onnx::ModelProto model, vector<int> input_shape, int mem, LO
   return imported_net;
 }
 
+void set_weights_from_model_proto(Net *net, onnx::ModelProto model_proto)
+{
+  map<string, vector<Tensor *>> tensors = get_tensors_from_onnx(model_proto);
+  for (Layer *l : net->layers)
+  {
+    // Check if we have tensors with weights for the current layer
+    if (!tensors.count(l->name))
+      continue;
+
+    // Get the layer weights
+    vector<Tensor *> layer_tensors = tensors[l->name];
+
+    // Apply the new weights
+    update_layer_weights(l, layer_tensors);
+  }
+
+  // Copy the new weights to devices
+  share_weights(net);
+
+  // Erase the map we used to free the memory
+  map<string, vector<Tensor *>>::iterator it;
+  vector<Tensor *> delete_tensors;
+  for (it = tensors.begin(); it != tensors.end(); ++it)
+  {
+    delete_tensors = it->second;
+    for (int i = 0; i < delete_tensors.size(); ++i)
+      delete delete_tensors[i];
+  }
+}
+
+void apply_grads_from_model_proto(Net *net, onnx::ModelProto model_proto)
+{
+  map<string, vector<Tensor *>> tensors = get_tensors_from_onnx(model_proto);
+  for (Layer *l : net->layers)
+  {
+    // Check if we have tensors with gradients for the current layer
+    if (!tensors.count(l->name))
+      continue;
+
+    // Get the layer gradients
+    vector<Tensor *> layer_tensors = tensors[l->name];
+
+    // Apply the gradients
+    apply_grads_to_layer(l, layer_tensors);
+  }
+
+  // Erase the map we used to free the memory
+  map<string, vector<Tensor *>>::iterator it;
+  vector<Tensor *> delete_tensors;
+  for (it = tensors.begin(); it != tensors.end(); ++it)
+  {
+    delete_tensors = it->second;
+    for (int i = 0; i < delete_tensors.size(); ++i)
+      delete delete_tensors[i];
+  }
+
+  // Copy the new weights to devices
+  share_weights(net);
+}
+
 #endif // defined(cPROTO)
diff --git a/src/serialization/onnx/net/layers/conv/conv_onnx.cpp b/src/serialization/onnx/net/layers/conv/conv_onnx.cpp
index 01a153bfa..bfd89f57e 100644
--- a/src/serialization/onnx/net/layers/conv/conv_onnx.cpp
+++ b/src/serialization/onnx/net/layers/conv/conv_onnx.cpp
@@ -321,4 +321,24 @@ void build_conv_node(LConv *layer, onnx::GraphProto *graph, bool gradients)
   }
 }
 
+/*
+ * DISTRIBUTED TRAINING
+ */
+
+void update_conv_weights(LConv *layer, vector<Tensor *> weights)
+{
+  if (weights.size() > 1)
+    layer->update_weights(weights[0], weights[1]);
+  else
+    layer->update_weights(weights[0]);
+}
+
+void apply_grads_to_conv(LConv *layer, vector<Tensor *> grads)
+{
+  if (grads.size() > 1)
+    layer->accumulate_accumulated_gradients(grads[0], grads[1]);
+  else
+    layer->accumulate_accumulated_gradients(grads[0]);
+}
+
 #endif // defined(cPROTO)
diff --git a/src/serialization/onnx/net/layers/core/dense_onnx.cpp b/src/serialization/onnx/net/layers/core/dense_onnx.cpp
index 5847bdd4d..cbd9c8636 100644
--- a/src/serialization/onnx/net/layers/core/dense_onnx.cpp
+++ b/src/serialization/onnx/net/layers/core/dense_onnx.cpp
@@ -236,4 +236,24 @@ void build_dense_with_matmul_node(LDense *layer, onnx::GraphProto *graph, bool g
   }
 }
 
+/*
+ * DISTRIBUTED TRAINING
+ */
+
+void update_dense_weights(LDense *layer, vector<Tensor *> weights)
+{
+  if (weights.size() > 1)
+    layer->update_weights(weights[0], weights[1]);
+  else
+    layer->update_weights(weights[0]);
+}
+
+void apply_grads_to_dense(LDense *layer, vector<Tensor *> grads)
+{
+  if (grads.size() > 1)
+    layer->accumulate_accumulated_gradients(grads[0], grads[1]);
+  else
+    layer->accumulate_accumulated_gradients(grads[0]);
+}
+
 #endif // defined(cPROTO)
diff --git a/src/serialization/onnx/net/layers/layers_onnx.cpp b/src/serialization/onnx/net/layers/layers_onnx.cpp
index 576b17b73..573f4004d 100644
--- a/src/serialization/onnx/net/layers/layers_onnx.cpp
+++ b/src/serialization/onnx/net/layers/layers_onnx.cpp
@@ -501,4 +501,42 @@ void build_node_from_layer(Layer *layer, onnx::GraphProto *graph, bool gradients
   }
 }
 
+/*
+ * DISTRIBUTED TRAINING
+ */
+
+void update_layer_weights(Layer *layer, vector<Tensor *> weights)
+{
+  if (weights.size() == 0)
+  {
+    cerr << "[ONNX::WARNING] Trying to update the weights of the layer \""
+         << layer->name << "\" with an empty list of tensors." << endl;
+    return;
+  }
+
+  if (LConv *l = dynamic_cast<LConv *>(layer))
+    update_conv_weights(l, weights);
+  else if (LDense *l = dynamic_cast<LDense *>(layer))
+    update_dense_weights(l, weights);
+  else
+    cerr << "The layer " << l->name << " has no support for setting weights" << endl;
+}
+
+void apply_grads_to_layer(Layer *layer, vector<Tensor *> grads)
+{
+  if (grads.size() == 0)
+  {
+    cerr << "[ONNX::WARNING] Trying to apply gradients to the layer \""
+         << layer->name << "\" with an empty list of tensors." << endl;
+    return;
+  }
+
+  if (LConv *l = dynamic_cast<LConv *>(layer))
+    apply_grads_to_conv(l, grads);
+  else if (LDense *l = dynamic_cast<LDense *>(layer))
+    apply_grads_to_dense(l, grads);
+  else
+    cerr << "The layer " << l->name << " has no support for applying gradients" << endl;
+}
+
 #endif // defined(cPROTO)

From 1b73a9920ebac31d84825a633f59b9255f279b9b Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Thu, 2 Dec 2021 19:32:04 +0100
Subject: [PATCH 02/35] Add tile descriptor (tensor)

---
 include/eddl/descriptors/tensor_descriptors.h | 13 +++
 src/descriptors/descriptor_tile.cpp           | 85 +++++++++++++++++++
 2 files changed, 98 insertions(+)
 create mode 100644 src/descriptors/descriptor_tile.cpp

diff --git a/include/eddl/descriptors/tensor_descriptors.h b/include/eddl/descriptors/tensor_descriptors.h
index 50c470526..57e763bf0 100644
--- a/include/eddl/descriptors/tensor_descriptors.h
+++ b/include/eddl/descriptors/tensor_descriptors.h
@@ -107,6 +107,19 @@ class RepeatDescriptor : public SelDescriptor {
     void build_indices() override;
 };
 
+
+class TileDescriptor : public SelDescriptor {
+public:
+    vector<int> vrepeats;
+    int elem_repeats = 0;
+
+    TileDescriptor(vector<int> vrepeats, int dev);
+
+    void build(vector<int> ishape) override;
+    void resize(int b) override;
+    void build_indices() override;
+};
+
 class ReduceDescriptor2 : public TensorDescriptor {
 
 private:
diff --git a/src/descriptors/descriptor_tile.cpp b/src/descriptors/descriptor_tile.cpp
new file mode 100644
index 000000000..8553bcc2c
--- /dev/null
+++ b/src/descriptors/descriptor_tile.cpp
@@ -0,0 +1,85 @@
+
+/*
+* EDDL Library - European Distributed Deep Learning Library.
+* Version: 1.0
+* copyright (c) 2021, Universitat Politècnica de València (UPV), PRHLT Research Centre
+* Date: November 2021
+* Author: PRHLT Research Centre, UPV, (rparedes@prhlt.upv.es), (jon@prhlt.upv.es)
+* All rights reserved
+*/
+
+
+#include <iostream>
+#include "eddl/descriptors/tensor_descriptors.h"
+#include "eddl/utils.h"
+
+TileDescriptor::TileDescriptor(vector<int> vrepeats, int dev) : SelDescriptor(dev) {
+    this->vrepeats = vector<int> (vrepeats);
+
+    // Compute repeats per elements
+    this->elem_repeats = 1;
+    for(int i=0; i<this->vrepeats.size(); i++){ this->elem_repeats *= vrepeats[i]; }
+}
+
+
+void TileDescriptor::build(vector<int> ishape){
+    // Clear shapes
+    this->ishape.clear();
+    this->oshape.clear();
+
+    // Set input shape
+    this->ishape = ishape;
+
+    // Set output shape
+    for(int i=0; i<this->ishape.size(); i++){
+        oshape.push_back(this->ishape[i] * this->vrepeats[i]);
+    }
+
+    // Build indices
+    this->build_indices();
+}
+
+void TileDescriptor::resize(int b){
+    this->build_indices();
+}
+
+void TileDescriptor::build_indices(){
+    // Get struct data
+    int isize = shape2size(this->ishape);
+    int osize = shape2size(this->oshape);
+    vector<int> A_strides = shape2stride(this->ishape);
+    vector<int> B_strides = shape2stride(this->oshape);
+    int ndim = this->ishape.size();
+
+    // Delete previous allocations
+    this->free_memory();
+
+    // Reserve memory
+    this->cpu_addresses = new int[osize];
+
+    // Compute index translation (output=>input)
+    for (int A_address=0; A_address<isize; A_address++) {
+        vector<int> A_indices(ndim, 0);
+        vector<int> B_indices(ndim, 0);
+
+        // Get A Indices
+        fast_address2indices(A_address, A_indices.data(), this->ishape.data(),A_strides.data(), ndim);
+
+        vector<vector<int>> axis_idxs;
+        for(int axis=0; axis<ndim; axis++) { // 2
+            vector<int> tmp_axis_idxs;
+            tmp_axis_idxs.reserve(vrepeats[axis]);
+            for (int i_rep = 0; i_rep < vrepeats[axis]; i_rep++) { // (2, 2) => 4
+                tmp_axis_idxs.push_back(this->ishape[axis]*i_rep + A_indices[axis]);
+            }
+            axis_idxs.push_back(tmp_axis_idxs);
+        }
+
+        vector<vector<int>> tmp_indices = cartesian_product(axis_idxs);
+        for(int i=0; i<tmp_indices.size(); i++) {
+//            std::cout << "B => (";  for(int k=0; k<ndim; k++) { std::cout << tmp_indices[i][k] << ", "; } std::cout << ")" << std::endl;
+            int B_address = fast_indices2address(tmp_indices[i].data(), B_strides.data(), ndim);
+            this->cpu_addresses[B_address] = A_address;
+        }
+    }
+}

From c1f6a158035050dceacfba2bb55d50f785815640 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Thu, 2 Dec 2021 19:32:26 +0100
Subject: [PATCH 03/35] Add tile operation (tensor)

---
 include/eddl/tensor/tensor.h | 13 ++++++++++++-
 src/tensor/tensor_core.cpp   | 29 ++++++++++++++++++++++++++++-
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/include/eddl/tensor/tensor.h b/include/eddl/tensor/tensor.h
index e97ce63c8..0da8e86a5 100644
--- a/include/eddl/tensor/tensor.h
+++ b/include/eddl/tensor/tensor.h
@@ -2933,6 +2933,17 @@ class Tensor {
     static Tensor* repeat_desc(Tensor* A, const vector<unsigned int>& repeats, unsigned int axis=0, Tensor* output=nullptr);
     static Tensor* repeat_desc(Tensor* A, unsigned int repeats, unsigned int axis=0, Tensor* output=nullptr);
 
+    /**
+      *  @brief Construct an array by repeating A the number of times given by reps.
+      *
+      *  @param A   Input tensor.
+      *  @param repeats   The number of repetitions of A along each axis.
+      *  @param output   Output tensor
+      *  @param derivative   Apply derivative for: output = repeat(A)
+    */
+    static Tensor* tile(Tensor* A, const vector<int>& repeats);
+
+
     /**
       *  @brief Returns an array with the selected indices of the tensor.
       *
@@ -3173,7 +3184,7 @@ class Tensor {
     static void fill(Tensor *A, int aini, int aend, Tensor *B, int bini, int bend, int inc);  // TODO DEPRECATED
     static void select(Tensor *A, Tensor *B, vector<int> sind, int ini, int end, bool mask_zeros=false); // TODO DEPRECATED
     static void deselect(Tensor *A, Tensor *B, vector<int> sind, int ini, int end,int inc=0, bool mask_zeros=false); // TODO DEPRECATED
-    static void tile(Tensor *A, Tensor *B);
+    static void tile_deprecated(Tensor *A, Tensor *B);
 
     // TODO: REFACTOR!!! ************************
 
diff --git a/src/tensor/tensor_core.cpp b/src/tensor/tensor_core.cpp
index 4010c8a3f..15e2bd4f8 100644
--- a/src/tensor/tensor_core.cpp
+++ b/src/tensor/tensor_core.cpp
@@ -737,6 +737,33 @@ Tensor* Tensor::repeat_desc(Tensor* A, unsigned int repeats, unsigned int axis,
     return Tensor::repeat_desc(A, vrepeats, axis, output);
 }
 
+Tensor* Tensor::tile(Tensor* A, const vector<int>& repeats){
+    // Check dimensions
+    if(A->ndim != repeats.size()){
+        msg("The number of dimensions in tensor 'A' must match the size of 'repeats'", "Tensor::tile");
+    }
+
+    // Dimensions must be positive
+    for(int i=0; i<repeats.size(); i++){
+        if(repeats[i] < 1){
+            msg("All repetitions must be greater or equal than 1", "Tensor::tile");
+        }
+    }
+
+    // Build descriptor
+    auto *td = new TileDescriptor(repeats, A->device);
+    td->build(A->shape);
+
+    // Initialize tensor
+    auto* new_t = new Tensor(td->oshape, A->device);
+
+    // Perform select
+    Tensor::select(A, new_t, td);
+
+    delete td;
+    return new_t;
+}
+
 Tensor* Tensor::select(const vector<string>& indices){
     // Build descriptor
     auto *sd = new SelDescriptor(indices, this->device);
@@ -1050,7 +1077,7 @@ void Tensor::deselect(Tensor *A, Tensor *B, vector<int> sind, int ini, int end,i
 
 }
 
-void Tensor::tile(Tensor *A, Tensor *B)
+void Tensor::tile_deprecated(Tensor *A, Tensor *B)
 {
 
     int Asize=A->shape[0];

From b0490816668d20c374b1680d403508e5ade7b216 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Thu, 2 Dec 2021 19:32:50 +0100
Subject: [PATCH 04/35] Add cartesian_product function to utils

---
 include/eddl/utils.h |  5 +++--
 src/utils.cpp        | 17 +++++++++++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/include/eddl/utils.h b/include/eddl/utils.h
index b98438dc3..e6ba4f91d 100755
--- a/include/eddl/utils.h
+++ b/include/eddl/utils.h
@@ -79,7 +79,6 @@ vector<int> compute_unsqueeze(vector<int> shape, int axis, bool ignore_batch=fal
 
 vector<int> address2indices(int address, const vector<int>& shape, const vector<int>& strides);
 unsigned int indices2address(const vector<int>& indices, const vector<int>& strides);
-
 // https://isocpp.org/wiki/faq/inline-functions#inline-member-fns
 inline int fast_indices2address(const int* indices, const int* strides, int ndim){
     int address = 0;
@@ -89,15 +88,17 @@ inline int fast_indices2address(const int* indices, const int* strides, int ndim
     return address;
 }
 
-
 inline void fast_address2indices( int address, int* indices, const int* shape, const int* strides, int ndim){
     for(int i=0; i<ndim; i++) {
         indices[i] = address / strides[i] % shape[i];
     }
 }
 
+
 bool isPaddingAsymmetric(vector<int> padding);
 
+vector<vector<int>> cartesian_product(const vector<vector<int>>& vectors);
+
 
 template<typename T>
 string printVector(vector<T> myvector){
diff --git a/src/utils.cpp b/src/utils.cpp
index 09cb95ed1..0129a603a 100755
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -633,6 +633,23 @@ bool isPaddingAsymmetric(vector<int> padding){
     return false;
 }
 
+vector<vector<int>> cartesian_product(const vector<vector<int>>& vectors){
+    vector<vector<int>> results = {{}};
+    for (auto &vec : vectors){ // Vectors: {0, 1}, {5, 6, 7}, {8, 9}
+        vector<vector<int>> temp;
+
+        for(auto &res : results){  // Previous solution: {{0}, {1}}
+            for(auto &elem : vec){  // Elements 1, 2, 3,...
+                vector<int> new_vec = res;
+                new_vec.push_back(elem);
+                temp.push_back(new_vec);
+            }
+        }
+        results.clear();
+        results = temp;
+    }
+    return results;
+}
 
 WrappingMode getWrappingMode(string mode){
     if(mode == "constant"){

From 5d738c8e31b5170c2352bf382ab28ebcca39d339 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Thu, 2 Dec 2021 19:33:14 +0100
Subject: [PATCH 05/35] Add test for tile operation

---
 tests/tensor/test_tensor_core.cpp | 46 ++++++++++++++++++++++++-------
 1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/tests/tensor/test_tensor_core.cpp b/tests/tensor/test_tensor_core.cpp
index e74eb6270..b5c24b359 100644
--- a/tests/tensor/test_tensor_core.cpp
+++ b/tests/tensor/test_tensor_core.cpp
@@ -187,9 +187,6 @@ TEST(TensorTestSuite, tensor_repeat_timed){
     clock_t begin = clock();
     for(int i=0; i<times; i++){
         Tensor::repeat(t1, factor, 0, t1_res);
-//        tensorNN::ReLu(t1_out, t1_out);
-//        tensorNN::repeat_batch(t1, t1_out);
-//        tensorNN::Conv2D(cd);
     }
     clock_t end = clock();
     double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
@@ -198,13 +195,7 @@ TEST(TensorTestSuite, tensor_repeat_timed){
 
     // Delete tensors
     delete t1; delete t1_res;
-
-//    // Print
-//    t1->print();
-//    t1_out->print();
     ASSERT_TRUE(elapsed_secs<1000);  // Dummy test. Just for debugging
-
-
 }
 
 
@@ -243,4 +234,39 @@ TEST(TensorTestSuite, tensor_repeat){
     delete t1_cpu; delete t1_cpu_res;
     delete t1_gpu; delete t1_gpu_res;
 #endif
-}
\ No newline at end of file
+}
+
+TEST(TensorTestSuite, tensor_tile){
+    Tensor* t1 = new Tensor({1, 2}, {2, 1}, DEV_CPU);
+    Tensor* t1_ref = new Tensor({1,1, 2,2, 1,1, 2,2}, {4, 2}, DEV_CPU);
+
+//    t1->print(0);
+    Tensor* t1_res = Tensor::tile(t1, {2, 2});
+//    t1_res->print(0);
+    ASSERT_TRUE(Tensor::equivalent(t1_res, t1_ref, 1e-3f, 0.0f, true, true));
+
+    Tensor* t2 = new Tensor({1,2,3}, {3, 1}, DEV_CPU);
+    Tensor* t2_ref = new Tensor({1,1,1, 2,2,2, 3,3,3}, {3, 3}, DEV_CPU);
+
+//    t2->print(0);
+    Tensor* t2_res = Tensor::tile(t2, {1, 3});
+//    t2_res->print(0);
+
+    ASSERT_TRUE(Tensor::equivalent(t2_res, t2_ref, 1e-3f, 0.0f, true, true));
+
+    // Test GPU
+#ifdef cGPU
+    // Test #1
+    Tensor* t1_cpu = Tensor::randn({1, 3, 1, 1});
+    Tensor* t1_gpu = t1_cpu->clone(); t1_gpu->toGPU();
+
+    Tensor* t1_cpu_res = Tensor::tile(t1_cpu, {1, 1, 128, 128});
+    Tensor* t1_gpu_res = Tensor::tile(t1_gpu, {1, 1, 128, 128});; t1_gpu_res->toCPU();
+    ASSERT_TRUE(Tensor::equivalent(t1_cpu_res, t1_gpu_res, 1e-3f, 0.0f, true, true));
+
+    delete t1_cpu; delete t1_cpu_res;
+    delete t1_gpu; delete t1_gpu_res;
+#endif
+}
+
+//vector<vector<int>> res = cartesian_product({{0,1}, {0,1}, {0,1}});

From 9625c8a29ccce3cadba565f84fcfe79641b9eac7 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Thu, 2 Dec 2021 20:18:31 +0100
Subject: [PATCH 06/35] Add broadcast function to tensor

---
 include/eddl/tensor/tensor.h | 14 +++++------
 src/tensor/tensor_core.cpp   | 45 ++++++++++++++++++++++++++++--------
 2 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/include/eddl/tensor/tensor.h b/include/eddl/tensor/tensor.h
index 0da8e86a5..23e5729b9 100644
--- a/include/eddl/tensor/tensor.h
+++ b/include/eddl/tensor/tensor.h
@@ -2943,6 +2943,13 @@ class Tensor {
     */
     static Tensor* tile(Tensor* A, const vector<int>& repeats);
 
+    /**
+      *  @brief Returns a new tensor A to be broadcasted into B
+      *   @param A Input tensor.
+      *   @param B Input tensor.
+      *   @return A new tensor C based on A, but prepared to be broadcasted in to B
+    */
+    static Tensor* broadcast(Tensor* A, Tensor* B);
 
     /**
       *  @brief Returns an array with the selected indices of the tensor.
@@ -3307,13 +3314,6 @@ class Tensor {
    */
     static string max_accelerator_supported();
 
-    /**
-      *  @brief Returns a new tensor A to be broadcasted into B
-      *   @param A Input tensor.
-      *   @param B Input tensor.
-      *   @return A new tensor C based on A, but prepared to be broadcasted in to B
-    */
-    static Tensor* broadcast(Tensor* A, Tensor* B);
 };
 
 
diff --git a/src/tensor/tensor_core.cpp b/src/tensor/tensor_core.cpp
index 15e2bd4f8..ff85b86ad 100644
--- a/src/tensor/tensor_core.cpp
+++ b/src/tensor/tensor_core.cpp
@@ -1107,26 +1107,51 @@ void Tensor::tile_deprecated(Tensor *A, Tensor *B)
 Tensor* Tensor::broadcast(Tensor* A, Tensor* B){
     // Source: https://numpy.org/doc/stable/user/basics.broadcasting.html
 
+    bool isCompatible= false;
+    vector<int> old_shape = A->shape;
+    vector<int> broadcast_shape(B->ndim, 1);
+
     // Check dimensions
-    if (A->ndim != B->ndim){
-        msg("The dimensions of both tensors must be equal", "Tensor::broadcast");
+    if (A->ndim == B->ndim){  // fine
+        isCompatible = true;
+        broadcast_shape = A->shape;
+    }else if(A->ndim==1){  // Check compatibility
+        // Broadcast: [3] AND [12, 3, 7, 7] => [1, 3, 1, 1]
+        for(int i = 0; i < B->shape.size(); ++i) {
+            if (A->shape[0] == B->shape[i]) {
+                isCompatible = true;
+                broadcast_shape[i] = B->shape[i];  // [1, 3, 1, 1]
+                break;
+            }
+        }
+    }
+
+    // Check compatibility
+    if(!isCompatible){
+        msg("The dimensions of both tensors must be equal or compatible (i.e [3] + [12, 3, 7, 7])", "Tensor::broadcast");
     }
 
     // Check if the shapes can be broadcasted
-    for(int i=0; i<A->ndim; i++){
-        if(A->shape[i]==B->shape[i] || (A->shape[i]==1)  || B->shape[i]==1){
-            // okay. Do nothing
+    vector<int> tile_repetitions;  // Tile repetitions
+    for(int i=0; i<B->ndim; i++){
+        if(broadcast_shape[i]==B->shape[i]) {
+            tile_repetitions.push_back(1);
+        }else if(broadcast_shape[i]==1){  // || B->shape[i]==1){
+            tile_repetitions.push_back(B->shape[i]);
         }else{
-            msg("These tensors cannot be broadcasted. Two dimensions are compatible when: 1) they are equal, or 2) one of them is 1", "Tensor::broadcast");
+            // msg("These tensors cannot be broadcasted. Two dimensions are compatible when: 1) they are equal, or 2) one of them is 1", "Tensor::broadcast");
+            msg("These tensors cannot be broadcasted. Two dimensions are compatible when: 1) they are equal, or 2) if the broadcasted dimension is 1", "Tensor::broadcast");
         }
     }
 
-    std::cerr << "[Experimental function]: Broadcast" << std::endl;
+    // Change dimension (temporarily). I don't like  this...
+    A->reshape_(broadcast_shape);
 
     // Do broadcast
-    Tensor* new_t;
-//    new_t = Tensor::repeat(A, target_size[0]*target_size[1], 1);
-//    new_t->reshape_(B->shape);
+    Tensor* new_t = Tensor::tile(A, tile_repetitions);
+
+    // Return to original dimension. I don't like  this...
+    A->reshape_(old_shape);
 
     return new_t;
 }

From 7cfb534c9daa566213ab2a378c72a835f9d1f840 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Thu, 2 Dec 2021 20:19:02 +0100
Subject: [PATCH 07/35] Update application examples with broadcasting

---
 .../1_image_classification_resnet34.cpp       | 11 +++----
 examples/applications/app_test_model.cpp      | 33 +++++++++----------
 2 files changed, 19 insertions(+), 25 deletions(-)

diff --git a/examples/applications/1_image_classification_resnet34.cpp b/examples/applications/1_image_classification_resnet34.cpp
index 2d55a5e17..32a6184a9 100644
--- a/examples/applications/1_image_classification_resnet34.cpp
+++ b/examples/applications/1_image_classification_resnet34.cpp
@@ -8,8 +8,8 @@ using namespace eddl;
 
 Tensor* preprocess_input_resnet34(Tensor* input, const vector<int> &target_size){
     // Define preprocessing constants
-    auto* mean_vec = new Tensor( {0.485, 0.456, 0.406}, {3, 1}, input->device);
-    auto* std_vec = new Tensor( {0.229, 0.224, 0.225}, {3, 1}, input->device);
+    auto* mean_vec = new Tensor( {0.485, 0.456, 0.406}, {3}, input->device);
+    auto* std_vec = new Tensor( {0.229, 0.224, 0.225}, {3}, input->device);
 
     // ==========================================================================
     // ====== SANITY CHECKS =====================================================
@@ -37,11 +37,8 @@ Tensor* preprocess_input_resnet34(Tensor* input, const vector<int> &target_size)
     new_input->mult_(1/255.0f);
 
     // Standarization: (X-mean)/std
-//    Tensor* mean = Tensor::broadcast(mean_vec, new_input);
-//    Tensor* std = Tensor::broadcast(std_vec, new_input);
-    // 1) [There is no broadcasting...] Repeat dimensions  => Temp!
-    Tensor* mean = Tensor::repeat(mean_vec, target_size[0]*target_size[1], 1);  mean->reshape_(new_input->shape);
-    Tensor* std =  Tensor::repeat(std_vec, target_size[0]*target_size[1], 1); std->reshape_(new_input->shape);
+    Tensor* mean = Tensor::broadcast(mean_vec, new_input);
+    Tensor* std = Tensor::broadcast(std_vec, new_input);
     new_input->sub_(mean);
     new_input->div_(std);
     // ==========================================================================
diff --git a/examples/applications/app_test_model.cpp b/examples/applications/app_test_model.cpp
index 68994a096..8d9b4aabc 100644
--- a/examples/applications/app_test_model.cpp
+++ b/examples/applications/app_test_model.cpp
@@ -8,8 +8,8 @@ using namespace eddl;
 
 Tensor* preprocess_input(Tensor* input, const vector<int> &target_size, bool normalize=true, bool standarize=true){
     // Define preprocessing constants
-    auto* mean_vec = new Tensor( {0.485, 0.456, 0.406}, {3, 1}, input->device);
-    auto* std_vec = new Tensor( {0.229, 0.224, 0.225}, {3, 1}, input->device);
+    auto* mean_vec = new Tensor( {0.485, 0.456, 0.406}, {3}, input->device);
+    auto* std_vec = new Tensor( {0.229, 0.224, 0.225}, {3}, input->device);
 
     // ==========================================================================
     // ====== SANITY CHECKS =====================================================
@@ -40,11 +40,8 @@ Tensor* preprocess_input(Tensor* input, const vector<int> &target_size, bool nor
 
     // Standarization: (X-mean)/std
     if(standarize){
-//    Tensor* mean = Tensor::broadcast(mean_vec, new_input);
-//    Tensor* std = Tensor::broadcast(std_vec, new_input);
-        // 1) [There is no broadcasting...] Repeat dimensions  => Temp!
-        Tensor* mean = Tensor::repeat(mean_vec, target_size[0]*target_size[1], 1);  mean->reshape_(new_input->shape);
-        Tensor* std =  Tensor::repeat(std_vec, target_size[0]*target_size[1], 1); std->reshape_(new_input->shape);
+        Tensor* mean = Tensor::broadcast(mean_vec, new_input);
+        Tensor* std = Tensor::broadcast(std_vec, new_input);
         new_input->sub_(mean);
         new_input->div_(std);
 
@@ -77,7 +74,7 @@ int main(int argc, char **argv) {
 //    string model_path = "models/vgg16-7.onnx";  // 3xHxW  // okay
 //    string model_path = "models/bvlcalexnet-3.onnx";  // 3x224x224  // The onnx node 'LRN' is not supported yet
 //    string model_path = "models/bvlcalexnet-12.onnx";  // 3x224x224  // The onnx node 'LRN' is not supported yet
-//    string model_path = "models/googlenet-3.onnx";  // 3x224x224  // The onnx node 'LRN' is not supported yet
+//    string model_path = "models/googlenet-3_simp.onnx";  // 3x224x224  // The onnx node 'LRN' is not supported yet
 //    string model_path = "models/densenet-3.onnx";  // 3x224x224  // okay
 //    string model_path = "models/inception-v1-3.onnx";  // 3x224x224  // The onnx node 'LRN' is not supported yet
 //    string model_path = "models/efficientnet-lite4-11.onnx";  // 224x224x3  // The onnx node 'LRN' is not supported yet
@@ -102,9 +99,9 @@ int main(int argc, char **argv) {
     int in_height = 224;
     int in_width = 224;
     vector<int> input_shape = {in_channels, in_height, in_width};
-    vector<int> dimensions_order = {0, 1, 2, 3};
-    // ==========================================================================
-
+//    vector<int> dimensions_order = {0, 3, 1, 2};
+//    // ==========================================================================
+//    input_shape = {input_shape[dimensions_order[1]], input_shape[dimensions_order[2]], input_shape[dimensions_order[3]]};
 
     // ==========================================================================
     // ====== LOAD ONNX MODEL ===================================================
@@ -125,12 +122,12 @@ int main(int argc, char **argv) {
 
     // Step 4 (optional): Add a softmax layer to get probabilities directly from the model, since it
     // does not include the softmax layer.
-//    layer input = net->lin[0];   // getLayer(net,"input_layer_name");
-//    layer output = net->lout[0];   // getLayer(net,"output_layer_name");
-//    layer new_output = Softmax(output);
-//
-//    // Create model
-//    net = Model({input},{new_output});
+    layer input = net->lin[0];   // getLayer(net,"input_layer_name");
+    layer output = net->lout[0];   // getLayer(net,"output_layer_name");
+    layer new_output = Softmax(output);
+
+    // Create model
+    net = Model({input},{new_output});
     // ==========================================================================
 
     // ==========================================================================
@@ -156,7 +153,7 @@ int main(int argc, char **argv) {
 
     // Step 3: Preprocess input. (Look up the preprocessing required at the model's page)
     Tensor* image_preprocessed = preprocess_input(image, {in_height, in_width});
-    image_preprocessed->permute_(dimensions_order);
+//    image_preprocessed->permute_(dimensions_order);
 
     // Predict image. Returns a vector of tensors (here one).
     vector<Tensor*> outputs = net->predict({image_preprocessed});

From b925696708e29a32be1c642ba27883c4726ec474 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Thu, 2 Dec 2021 20:19:23 +0100
Subject: [PATCH 08/35] Update markdown with tile and broadcast functions for
 tensor

---
 docs/markdown/eddl_progress_tensor.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/markdown/eddl_progress_tensor.md b/docs/markdown/eddl_progress_tensor.md
index 5759f82a0..326f4745c 100644
--- a/docs/markdown/eddl_progress_tensor.md
+++ b/docs/markdown/eddl_progress_tensor.md
@@ -128,8 +128,8 @@ Numpy-like operations over a raw-tensor object
 | Functionality | CPU  | GPU  | Comments |
 | ------------- | ---- | ---- | -------- |
 | repeat        | 🟢️    | 🟢️️️ ️   |  Repeats the elements of a tensor along the specified dimension |
-| tile        | 🔴️️    | 🔴️️️ ️   |  Repeats the elements of a tensor along the specified dimensions |
-| broadcast        | 🔴️️    | 🔴️️️ ️   |  Produce an object that mimics broadcasting. |
+| tile        | 🟢️    | 🟢️️️ ️   |  Repeats the elements of a tensor along the specified dimensions |
+| broadcast        | 🟢️    | 🟢️️️ ️   |  Produce an object that mimics broadcasting. |
 
 
 ### Adding and removing elements

From e72090e7a9d15698589d87cc74dcf73ed6a21617 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Thu, 2 Dec 2021 20:19:54 +0100
Subject: [PATCH 09/35] Update docs with tile function for tensor

---
 docs/sphinx/source/tensor/manipulation.rst | 43 ++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/docs/sphinx/source/tensor/manipulation.rst b/docs/sphinx/source/tensor/manipulation.rst
index a759196c2..1f01fb18d 100644
--- a/docs/sphinx/source/tensor/manipulation.rst
+++ b/docs/sphinx/source/tensor/manipulation.rst
@@ -230,6 +230,49 @@ repeat
     // ]
 
 
+
+tile
+^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: Tensor::tile(Tensor* A, const vector<int>& repeats)
+
+.. code-block:: c++
+
+    // New tensor
+    Tensor* t1 = new Tensor({1, 2}, {2, 1});
+    // [
+    // [1]
+    // [2]
+    // ]
+
+    // Repeat all rows and columns 2 times each
+    Tensor* t1_res = Tensor::tile(t1, {2, 2});
+    // [
+    // [1 1]
+    // [2 2]
+    // [1 1]
+    // [2 2]
+    // ]
+
+
+    // New tensor
+    Tensor* t2 = new Tensor({1, 2, 3}, {3, 1});
+    // [
+    // [1]
+    // [2]
+    // [3]
+    // ]
+
+    // Repeat columns three times but not rows
+    Tensor* t2_res = Tensor::tile(t2, {1, 3});
+    // [
+    // [1 1 1]
+    // [2 2 2]
+    // [3 3 3]
+    // ]
+
+
+
 Transpose-like operations
 --------------------------
 

From c89ef0fcea3df233cf3b838b8cf49d99583bbc5e Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Thu, 2 Dec 2021 20:26:34 +0100
Subject: [PATCH 10/35] Update application example with broadcasted function

---
 docs/sphinx/source/usage/examples/pretrained_resnet34.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/sphinx/source/usage/examples/pretrained_resnet34.rst b/docs/sphinx/source/usage/examples/pretrained_resnet34.rst
index b33520a17..4bcc16987 100644
--- a/docs/sphinx/source/usage/examples/pretrained_resnet34.rst
+++ b/docs/sphinx/source/usage/examples/pretrained_resnet34.rst
@@ -146,8 +146,8 @@ and standarize it with the known mean and std of the ImageNet dataset.
 
     Tensor* preprocess_input_resnet34(Tensor* input, const vector<int> &target_size){
         // Define preprocessing constants
-        auto* mean_vec = new Tensor( {0.485, 0.456, 0.406}, {3, 1}, input->device);
-        auto* std_vec = new Tensor( {0.229, 0.224, 0.225}, {3, 1}, input->device);
+        auto* mean_vec = new Tensor( {0.485, 0.456, 0.406}, {3}, input->device);
+        auto* std_vec = new Tensor( {0.229, 0.224, 0.225}, {3}, input->device);
 
         // ==========================================================================
         // ====== SANITY CHECKS =====================================================

From ec2ddfcc6c77a057c6c2cfabf8ce0f4f20385a51 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Thu, 2 Dec 2021 20:27:07 +0100
Subject: [PATCH 11/35] Add broadcasting to documentation

---
 docs/sphinx/source/tensor/manipulation.rst | 48 ++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/docs/sphinx/source/tensor/manipulation.rst b/docs/sphinx/source/tensor/manipulation.rst
index 1f01fb18d..4fa66ee4b 100644
--- a/docs/sphinx/source/tensor/manipulation.rst
+++ b/docs/sphinx/source/tensor/manipulation.rst
@@ -272,6 +272,54 @@ tile
     // ]
 
 
+broadcast
+^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: Tensor* broadcast(Tensor* A, Tensor* B);
+
+.. code-block:: c++
+
+        // Example: Image - constant RGB
+
+        // Define mean
+        auto* mean = new Tensor( {0.485, 0.456, 0.406}, {3}, DEV_CPU);
+        // [0.485 0.456 0.406]
+
+        // Fake image
+        auto* image = Tensor::ones( {3, 224, 244});
+        // -------------------------------
+        // class:         Tensor
+        // ndim:          3
+        // shape:         (3, 224, 244)
+        // strides:       (54656, 244, 1)
+        // itemsize:      163968
+        // contiguous:    1
+        // order:         C
+        // data pointer:  0x56305561baa8
+        // is shared:     0
+        // type:          float (4 bytes)
+        // device:        CPU (code = 0)
+        // -------------------------------
+
+        // Compute broadcast for mean
+        Tensor* mean_broadcasted = Tensor::broadcast(mean, image);
+        // -------------------------------
+        // class:         Tensor
+        // ndim:          3
+        // shape:         (3, 224, 244)
+        // strides:       (54656, 244, 1)
+        // itemsize:      163968
+        // contiguous:    1
+        // order:         C
+        // data pointer:  0x56305561f2c8
+        // is shared:     0
+        // type:          float (4 bytes)
+        // device:        CPU (code = 0)
+        // -------------------------------
+
+        // Apply: X-mean
+        image->sub_(mean_broadcasted);
+
 
 Transpose-like operations
 --------------------------

From 2aa8c91e2843650f23c4905e53c278553d6a922e Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Thu, 2 Dec 2021 20:32:14 +0100
Subject: [PATCH 12/35] Fix broadcast in sphinx docs

---
 docs/sphinx/source/tensor/manipulation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sphinx/source/tensor/manipulation.rst b/docs/sphinx/source/tensor/manipulation.rst
index 4fa66ee4b..0ee76d650 100644
--- a/docs/sphinx/source/tensor/manipulation.rst
+++ b/docs/sphinx/source/tensor/manipulation.rst
@@ -275,7 +275,7 @@ tile
 broadcast
 ^^^^^^^^^^^^^^^
 
-.. doxygenfunction:: Tensor* broadcast(Tensor* A, Tensor* B);
+.. doxygenfunction:: Tensor::broadcast(Tensor* A, Tensor* B);
 
 .. code-block:: c++
 

From 6f998507bae1b48a913e0a41bfc79bb05191acb6 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 13:56:45 +0100
Subject: [PATCH 13/35] Add Bypass layer

---
 include/eddl/layers/core/layer_core.h | 25 ++++++++
 src/layers/core/layer_bypass.cpp      | 85 +++++++++++++++++++++++++++
 2 files changed, 110 insertions(+)
 create mode 100644 src/layers/core/layer_bypass.cpp

diff --git a/include/eddl/layers/core/layer_core.h b/include/eddl/layers/core/layer_core.h
index 7d172e74d..dbf809c54 100644
--- a/include/eddl/layers/core/layer_core.h
+++ b/include/eddl/layers/core/layer_core.h
@@ -252,6 +252,31 @@ class LRepeat : public LinLayer {
 
 };
 
+class LBypass : public LinLayer {
+public:
+    static int total_layers;
+    string bypass_name;
+
+    // constructors and clones
+    LBypass(Layer *parent, string bypass_name, string name, int dev, int mem);
+    ~LBypass() override;
+
+    Layer *share(int c, int bs, vector<Layer *> p) override;
+
+    Layer *clone(int c, int bs, vector<Layer *> p, int todev) override;
+
+    // implementation
+    void mem_delta() override;
+    void free_delta() override;
+
+    void forward() override;
+
+    void backward() override;
+
+    string plot(int c) override;
+
+};
+
 /// UpSampling3D Layer
 class LUpSampling3D : public LinLayer {
 public:
diff --git a/src/layers/core/layer_bypass.cpp b/src/layers/core/layer_bypass.cpp
new file mode 100644
index 000000000..808d85d5b
--- /dev/null
+++ b/src/layers/core/layer_bypass.cpp
@@ -0,0 +1,85 @@
+/*
+* EDDL Library - European Distributed Deep Learning Library.
+* Version: 1.0
+* copyright (c) 2021, Universitat Politècnica de València (UPV), PRHLT Research Centre
+* Date: November 2021
+* Author: PRHLT Research Centre, UPV, (rparedes@prhlt.upv.es), (jon@prhlt.upv.es)
+* All rights reserved
+*/
+
+
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+
+#include "eddl/layers/core/layer_core.h"
+
+
+using namespace std;
+
+int LBypass::total_layers = 0;
+
+LBypass::LBypass(Layer *parent, string bypass_name, string name, int dev, int mem) : LinLayer(name, dev, mem, "bypass") {
+    // The bypass layer cannot simply copy the output and delta, but needs to handle mem_delta() and free_delta().
+    if(name.empty()) this->name = "bypass" + to_string(++total_layers);
+
+    // Some vars
+    this->bypass_name = bypass_name;
+
+    // Set input
+    input = parent->output;
+    output= parent->output;
+
+    // Set parent
+    parent->addchild(this);
+    addparent(parent);
+}
+
+LBypass::~LBypass(){
+    // All of these pointers point to the parent layer so let the parent layer do the work
+    this->input = nullptr;
+    this->output = nullptr;
+    this->delta = nullptr;
+}
+
+void LBypass::mem_delta() {
+    parent[0]->mem_delta();
+    delta = parent[0]->delta;
+}
+
+
+void LBypass::free_delta() {
+    // This is de parent's delta. Do not delete it
+    delta = nullptr;
+}
+
+
+void LBypass::forward(){
+
+}
+
+void LBypass::backward(){
+
+}
+
+Layer *LBypass::share(int c, int bs, vector<Layer *> p) {
+    auto *n = new LBypass(p[0], this->bypass_name, "share_"+to_string(c)+this->name, this->dev, this->mem_level);
+    n->orig = this;
+    return n;
+}
+
+Layer *LBypass::clone(int c, int bs, vector<Layer *> p, int todev) {
+    auto *n = new LBypass(p[0], this->bypass_name, this->name, todev, this->mem_level);
+    n->orig = this;
+    return n;
+}
+
+
+string LBypass::plot(int c) {
+    string s;
+
+    if (c) s = name + " [label=" + "\"" + name + "\",style=filled,fontsize=12,fillcolor=bisque4,shape=box]";
+    else s = name + " [label=" + "\"" + name + "\",style=filled,fontsize=12,fillcolor=White,shape=box]";
+
+    return s;
+}

From 51b860396da86c3d4aa3b10718a80893936e38b0 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 13:59:23 +0100
Subject: [PATCH 14/35] Add Bypass layer API

---
 include/eddl/apis/eddl.h | 8 ++++++++
 src/apis/eddl.cpp        | 4 ++++
 2 files changed, 12 insertions(+)

diff --git a/include/eddl/apis/eddl.h b/include/eddl/apis/eddl.h
index e345d1141..a3926df9d 100644
--- a/include/eddl/apis/eddl.h
+++ b/include/eddl/apis/eddl.h
@@ -1074,6 +1074,14 @@ namespace eddl {
     layer Repeat(layer parent, const vector<unsigned int>& repeats, unsigned int axis, string name="");
     layer Repeat(layer parent, unsigned int repeats, unsigned int axis, string name="");
 
+    /**
+      *  @brief Virtual layer. Propagates the output of the parent as their own. Used internally for ONNX.
+      *
+      *  @param parent  Parent layer
+      *  @param bypass_name  Name of the layer being bypassed (e.g.: "bypass_unknown_layer)
+      *  @return     Output of repeat operation
+    */
+    layer Bypass(layer parent, string bypass_name="", string name="");
 
     /**
       *  @brief Dimension of size one is removed at the specified position. (Batch dimension is ignored)
diff --git a/src/apis/eddl.cpp b/src/apis/eddl.cpp
index 1e081543d..305e6ff08 100644
--- a/src/apis/eddl.cpp
+++ b/src/apis/eddl.cpp
@@ -737,6 +737,10 @@ namespace eddl {
         return new LRepeat(parent, vrepeats, axis, name, DEV_CPU, 0);
     }
 
+    layer Bypass(layer parent, string bypass_name, string name){
+        return new LBypass(parent, bypass_name, name, DEV_CPU, 0);
+    }
+
     layer Squeeze(layer parent, const int axis, string name){
         return new LSqueeze(parent, axis, name, DEV_CPU, 0);
     }

From 980e89783d2d71e239b8b1e7215d32a656e92df2 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 14:24:54 +0100
Subject: [PATCH 15/35] Minor changes (improve reading)

---
 src/layers/core/layer_repeat.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/layers/core/layer_repeat.cpp b/src/layers/core/layer_repeat.cpp
index 445e67ee0..228b62252 100644
--- a/src/layers/core/layer_repeat.cpp
+++ b/src/layers/core/layer_repeat.cpp
@@ -35,11 +35,13 @@ LRepeat::LRepeat(Layer *parent, const vector<unsigned int>& repeats, unsigned in
         msg("The size of 'repeats' (" + std::to_string(repeats.size()) + ") must equal the size the the dimension to repeat " + std::to_string(input->shape[axis]) + ")", "LRepeat::LRepeat");
     }
 
+    // Get input shape, but making sure the batch dimension is 1
+    vector<int> input_shape_single_batch(input->shape.begin()+1, input->shape.end());
+    input_shape_single_batch.insert(input_shape_single_batch.begin(), 1);
+
     // Build descriptor
-    vector<int> shape_single_batch(input->shape.begin()+1, input->shape.end());
-    shape_single_batch.insert(shape_single_batch.begin(), 1);
     this->rd = new RepeatDescriptor(repeats, axis, dev);
-    this->rd->build(shape_single_batch);
+    this->rd->build(input_shape_single_batch);
 
     // Set output tensors
     output=new Tensor(this->rd->oshape, dev);

From e3e5dd1f3202ba3d79f56426e88ab80cbd391146 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 14:45:07 +0100
Subject: [PATCH 16/35] Add Tile layer

---
 include/eddl/layers/core/layer_core.h | 23 +++++++
 src/layers/core/layer_tile.cpp        | 90 +++++++++++++++++++++++++++
 2 files changed, 113 insertions(+)
 create mode 100644 src/layers/core/layer_tile.cpp

diff --git a/include/eddl/layers/core/layer_core.h b/include/eddl/layers/core/layer_core.h
index dbf809c54..ed8401e5d 100644
--- a/include/eddl/layers/core/layer_core.h
+++ b/include/eddl/layers/core/layer_core.h
@@ -252,6 +252,29 @@ class LRepeat : public LinLayer {
 
 };
 
+class LTile : public LinLayer {
+public:
+    static int total_layers;
+    TileDescriptor *td;
+
+    // constructors and clones
+    LTile(Layer *parent, const vector<int>& repeats, string name, int dev, int mem);
+
+    Layer *share(int c, int bs, vector<Layer *> p) override;
+
+    Layer *clone(int c, int bs, vector<Layer *> p, int todev) override;
+
+    ~LTile() override;
+
+    void forward() override;
+
+    void backward() override;
+
+    string plot(int c) override;
+
+};
+
+
 class LBypass : public LinLayer {
 public:
     static int total_layers;
diff --git a/src/layers/core/layer_tile.cpp b/src/layers/core/layer_tile.cpp
new file mode 100644
index 000000000..524f79041
--- /dev/null
+++ b/src/layers/core/layer_tile.cpp
@@ -0,0 +1,90 @@
+/*
+* EDDL Library - European Distributed Deep Learning Library.
+* Version: 1.0
+* copyright (c) 2021, Universitat Politècnica de València (UPV), PRHLT Research Centre
+* Date: November 2021
+* Author: PRHLT Research Centre, UPV, (rparedes@prhlt.upv.es), (jon@prhlt.upv.es)
+* All rights reserved
+*/
+
+
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+
+#include "eddl/layers/core/layer_core.h"
+
+
+using namespace std;
+
+int LTile::total_layers = 0;
+
+LTile::LTile(Layer *parent, const vector<int>& repeats, string name, int dev, int mem) : LinLayer(name, dev, mem, "tile") {
+    if(name.empty()) this->name = "tile" + to_string(++total_layers);
+
+    // Check dimensions
+    if(parent->output->ndim != repeats.size()){
+        msg("The number of dimensions of the output layer must match the size of 'repeats'", "LTile::LTile");
+    }
+
+    // Dimensions must be positive
+    for(int i=0; i<repeats.size(); i++){
+        if(repeats[i] < 1){
+            msg("All repetitions must be greater or equal than 1", "LTile::LTile");
+        }
+    }
+
+    // Set input
+    input = parent->output;
+
+    // Get input shape, but making sure the batch dimension is 1
+    vector<int> input_shape_single_batch(input->shape.begin()+1, input->shape.end());
+    input_shape_single_batch.insert(input_shape_single_batch.begin(), 1);
+
+    // Build descriptor (batch must equal to "1")
+    this->td = new TileDescriptor(repeats, dev);
+    this->td->build(input_shape_single_batch);
+
+    // Set output tensors
+    output=new Tensor(this->td->oshape, dev);
+
+    // Set parent
+    parent->addchild(this);
+    addparent(parent);
+}
+
+LTile::~LTile(){
+    delete td;
+}
+
+void LTile::forward(){
+    tensorNN::select(this->input, this->output, this->td);
+}
+
+void LTile::backward(){
+    tensorNN::select_back(this->delta, this->parent[0]->delta, this->td);
+}
+
+Layer *LTile::share(int c, int bs, vector<Layer *> p) {
+    auto *n = new LTile(p[0], this->td->vrepeats, "share_"+to_string(c)+this->name, this->dev, this->mem_level);
+    n->orig = this;
+
+    return n;
+}
+
+Layer *LTile::clone(int c, int bs, vector<Layer *> p, int todev) {
+    auto *n = new LTile(p[0], this->td->vrepeats, this->name, todev, this->mem_level);
+    n->orig = this;
+
+    return n;
+}
+
+
+string LTile::plot(int c) {
+    string s;
+
+    if (c) s = name + " [label=" + "\"" + name + "\",style=filled,fontsize=12,fillcolor=bisque4,shape=box]";
+    else s = name + " [label=" + "\"" + name + "\",style=filled,fontsize=12,fillcolor=White,shape=box]";
+
+    return s;
+}

From b767727dacf10b4dd9c2df3597ffa862e1fe7d94 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 14:45:28 +0100
Subject: [PATCH 17/35] Add tile layer to the API

---
 include/eddl/apis/eddl.h |  9 +++++++++
 src/apis/eddl.cpp        | 13 +++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/include/eddl/apis/eddl.h b/include/eddl/apis/eddl.h
index a3926df9d..8f646d203 100644
--- a/include/eddl/apis/eddl.h
+++ b/include/eddl/apis/eddl.h
@@ -1074,6 +1074,15 @@ namespace eddl {
     layer Repeat(layer parent, const vector<unsigned int>& repeats, unsigned int axis, string name="");
     layer Repeat(layer parent, unsigned int repeats, unsigned int axis, string name="");
 
+    /**
+      *  @brief Constructs a tensor by repeating the elements of input. The repeats argument specifies the number of repetitions in each dimension.
+      *
+      *  @param parent  Parent layer
+      *  @param repeats The number of repetitions per dimension.
+      *  @return     Output of repeat operation
+    */
+    layer Tile(layer parent, const vector<int>& repeats, string name="");
+
     /**
       *  @brief Virtual layer. Propagates the output of the parent as their own. Used internally for ONNX.
       *
diff --git a/src/apis/eddl.cpp b/src/apis/eddl.cpp
index 305e6ff08..c4b413183 100644
--- a/src/apis/eddl.cpp
+++ b/src/apis/eddl.cpp
@@ -737,6 +737,19 @@ namespace eddl {
         return new LRepeat(parent, vrepeats, axis, name, DEV_CPU, 0);
     }
 
+    layer Tile(layer parent, const vector<int>& repeats, string name){
+        // Inset one at the batch dimension
+        vector<int> repeats_with_single_batch = repeats;
+        repeats_with_single_batch.insert(repeats_with_single_batch.begin(), 1);
+
+        // Check dimensions
+        if(parent->output->ndim != repeats_with_single_batch.size()){
+            msg("The number of dimensions of the output layer must match the size of 'repeats' (excluding batch)", "LTile::LTile");
+        }
+
+        return new LTile(parent, repeats_with_single_batch, name, DEV_CPU, 0);
+    }
+
     layer Bypass(layer parent, string bypass_name, string name){
         return new LBypass(parent, bypass_name, name, DEV_CPU, 0);
     }

From bd9aa1dbc7fdee8f88e4b48f2a02424a26c01196 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 14:46:25 +0100
Subject: [PATCH 18/35] Remove transpose layer

---
 include/eddl/layers/core/layer_core.h | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/include/eddl/layers/core/layer_core.h b/include/eddl/layers/core/layer_core.h
index ed8401e5d..d1c7bca02 100644
--- a/include/eddl/layers/core/layer_core.h
+++ b/include/eddl/layers/core/layer_core.h
@@ -402,27 +402,6 @@ class LUnsqueeze : public LinLayer {
 
 };
 
-/// Transpose Layer
-class LTranspose : public LinLayer {
-public:
-    static int total_layers;
-    vector<int> dims;
-    vector<int> rdims;
-
-    // constructors and clones
-    LTranspose(Layer *parent, vector<int> dims, string name, int dev, int mem);
-
-    Layer *share(int c, int bs, vector<Layer *> p) override;
-
-    Layer *clone(int c, int bs, vector<Layer *> p, int todev) override;
-
-    void forward() override;
-
-    void backward() override;
-
-    string plot(int c) override;
-
-};
 
 /// Drop-out Layer
 class LDropout : public LinLayer {

From 2673728e84a91205f5ed431b69e6561cc45ed55f Mon Sep 17 00:00:00 2001
From: chavicoski <alvaro.lopez.chilet@gmail.com>
Date: Fri, 3 Dec 2021 15:30:34 +0100
Subject: [PATCH 19/35] Refactored get_tensors function in ONNX distributed
 module

---
 .../onnx/layers/conv/conv_onnx.h              |  4 +
 .../onnx/layers/core/dense_onnx.h             |  4 +
 .../serialization/onnx/layers/layers_onnx.h   |  3 +
 src/serialization/onnx/net/import_helpers.cpp | 90 ++-----------------
 .../onnx/net/layers/conv/conv_onnx.cpp        | 28 ++++++
 .../onnx/net/layers/core/dense_onnx.cpp       | 27 ++++++
 .../onnx/net/layers/layers_onnx.cpp           | 36 ++++++++
 7 files changed, 109 insertions(+), 83 deletions(-)

diff --git a/include/eddl/serialization/onnx/layers/conv/conv_onnx.h b/include/eddl/serialization/onnx/layers/conv/conv_onnx.h
index 0b3962e8e..5460b2d60 100644
--- a/include/eddl/serialization/onnx/layers/conv/conv_onnx.h
+++ b/include/eddl/serialization/onnx/layers/conv/conv_onnx.h
@@ -35,5 +35,9 @@ void update_conv_weights(LConv *layer, vector<Tensor *> weights);
 
 void apply_grads_to_conv(LConv *layer, vector<Tensor *> grads);
 
+vector<Tensor *> get_conv_tensors(onnx::NodeProto &node,
+                                  map<string, vector<float>> &map_init_values,
+                                  map<string, vector<int>> &map_init_dims);
+
 #endif // EDDL_CONV_ONNX_H
 #endif // cPROTO
diff --git a/include/eddl/serialization/onnx/layers/core/dense_onnx.h b/include/eddl/serialization/onnx/layers/core/dense_onnx.h
index f8b64c6ad..f941d6312 100644
--- a/include/eddl/serialization/onnx/layers/core/dense_onnx.h
+++ b/include/eddl/serialization/onnx/layers/core/dense_onnx.h
@@ -38,5 +38,9 @@ void update_dense_weights(LDense *layer, vector<Tensor *> weights);
 
 void apply_grads_to_dense(LDense *layer, vector<Tensor *> grads);
 
+vector<Tensor *> get_dense_tensors(onnx::NodeProto &node,
+                                   map<string, vector<float>> &map_init_values,
+                                   map<string, vector<int>> &map_init_dims);
+
 #endif // EDDL_DENSE_ONNX_H
 #endif // cPROTO
diff --git a/include/eddl/serialization/onnx/layers/layers_onnx.h b/include/eddl/serialization/onnx/layers/layers_onnx.h
index 25a53910b..793be9c0e 100644
--- a/include/eddl/serialization/onnx/layers/layers_onnx.h
+++ b/include/eddl/serialization/onnx/layers/layers_onnx.h
@@ -103,5 +103,8 @@ void update_layer_weights(Layer *layer, vector<Tensor *> weights);
 
 void apply_grads_to_layer(Layer *layer, vector<Tensor *> grads);
 
+map<string, vector<Tensor *>> get_tensors_from_onnx_nodes(vector<onnx::NodeProto> &nodes,
+                                                          map<string, vector<float>> &map_init_values,
+                                                          map<string, vector<int>> &map_init_dims);
 #endif // EDDL_LAYERS_ONNX_H
 #endif // cPROTO
diff --git a/src/serialization/onnx/net/import_helpers.cpp b/src/serialization/onnx/net/import_helpers.cpp
index 993a91c92..a95b90aff 100644
--- a/src/serialization/onnx/net/import_helpers.cpp
+++ b/src/serialization/onnx/net/import_helpers.cpp
@@ -324,93 +324,17 @@ void share_weights(Net *net)
 // Returns a map containing the name of the layer as key and a tensor with the values of the model as value
 map<string, vector<Tensor *>> get_tensors_from_onnx(onnx::ModelProto model)
 {
-  // TODO: This method should be removed after changing the distibuted functions of ONNX by
-  // a more generic ones (using params vector)
-  map<string, vector<Tensor *>> tensors;
+  onnx::GraphProto graph = model.graph(); // Get the graph of the model
 
-  onnx::GraphProto graph = model.graph(); // Get the graph of the model.
-
-  vector<onnx::TensorProto> initializers = get_initializers(graph); // Retrieves the initializers from the graph.
-  // The weights for the layers can be found in the initializers.
-  map<string, vector<float>> map_init_values;
-  map<string, vector<int>> map_init_dims;
+  // The weights for the layers can be found in the initializers
+  vector<onnx::TensorProto> initializers = get_initializers(graph);
+  map<string, vector<float>> map_init_values; // Key: Layer weights name - Value: Weights
+  map<string, vector<int>> map_init_dims;     // Key: Layer weights name - Value: Shape of the weights
   get_initializers_maps(initializers, map_init_values, map_init_dims); // Creates 2 maps
-  //  Key: Input Name . Value: Weights
-  //  Key: Input Name . Value: Dims
-  vector<onnx::NodeProto> nodes = get_graph_nodes(graph);
-
-  map<string, ONNX_LAYERS> map_layers = create_enum_map();
-  int dev = DEV_CPU;
-
-  for (onnx::NodeProto node : nodes)
-  {
-    string layer_type_name = node.op_type();
-    ONNX_LAYERS layer_type = map_layers[layer_type_name];
-    string name = node.name();
-
-    switch (layer_type)
-    {
-    case ONNX_LAYERS::CONV:
-    {
-      vector<Tensor *> conv_tensors;
-
-      string weights_name = node.input(1); // Get weights and dims
-      vector<float> *weights = &(map_init_values[weights_name]);
-      vector<int> dims = map_init_dims[weights_name];
-
-      Tensor * temp = new Tensor(dims, nullptr, dev);
-      COPY_FROM_VECTOR_PTR_TO_TENSOR(weights, temp);
-      conv_tensors.push_back(temp);
-
-      if (node.input_size() > 2)
-      { // This means we also have a bias
-        string bias_name = node.input(2);
-        vector<float> *bias = &(map_init_values[bias_name]);
-        vector<int> bias_shape;
-        bias_shape.push_back(bias->size());
-        temp = new Tensor(bias_shape, nullptr, dev);
-        COPY_FROM_VECTOR_PTR_TO_TENSOR(bias, temp);
-        conv_tensors.push_back(temp);
-      }
-
-      tensors[name] = conv_tensors;
-      break;
-    }
 
-    case ONNX_LAYERS::DENSE:
-    {
-      vector<Tensor *> dense_tensors;
-
-      string weights_name = node.input(1); // Get weights and dims
-      vector<float> *weights = &(map_init_values[weights_name]);
-      vector<int> dims = map_init_dims[weights_name];
-
-      Tensor * temp = new Tensor(dims, nullptr, dev);
-      COPY_FROM_VECTOR_PTR_TO_TENSOR(weights, temp);
-      dense_tensors.push_back(temp);
-
-      if (node.input_size() > 2)
-      {
-        string bias_name = node.input(2);
-        vector<float> *bias = &(map_init_values[bias_name]);
-        vector<int> bias_dims = map_init_dims[bias_name];
-        temp = new Tensor(bias_dims, nullptr, dev);
-        COPY_FROM_VECTOR_PTR_TO_TENSOR(bias, temp);
-        dense_tensors.push_back(temp);
-      }
-
-      tensors[name] = dense_tensors;
-      break;
-    }
-
-    default:
-      //cout << "The layer with type " << layer_type_name << " has no trainable parameters " << endl;
-      continue;
-      break;
-    }
-  }
+  vector<onnx::NodeProto> nodes = get_graph_nodes(graph); // Nodes == model layers
 
-  return tensors;
+  return get_tensors_from_onnx_nodes(nodes, map_init_values, map_init_dims);
 }
 
 // Shows basic metadata of the model if the log level is DEBUG or lower
diff --git a/src/serialization/onnx/net/layers/conv/conv_onnx.cpp b/src/serialization/onnx/net/layers/conv/conv_onnx.cpp
index bfd89f57e..3df132c40 100644
--- a/src/serialization/onnx/net/layers/conv/conv_onnx.cpp
+++ b/src/serialization/onnx/net/layers/conv/conv_onnx.cpp
@@ -341,4 +341,32 @@ void apply_grads_to_conv(LConv *layer, vector<Tensor *> grads)
     layer->accumulate_accumulated_gradients(grads[0]);
 }
 
+vector<Tensor *> get_conv_tensors(onnx::NodeProto &node,
+                                  map<string, vector<float>> &map_init_values,
+                                  map<string, vector<int>> &map_init_dims)
+{
+  vector<Tensor *> conv_tensors;
+
+  string weights_name = node.input(1); // Get weights and dims
+  vector<float> *weights = &(map_init_values[weights_name]);
+  vector<int> dims = map_init_dims[weights_name];
+
+  Tensor * temp = new Tensor(dims, nullptr, DEV_CPU);
+  COPY_FROM_VECTOR_PTR_TO_TENSOR(weights, temp);
+  conv_tensors.push_back(temp);
+
+  if (node.input_size() > 2)
+  { // This means we also have a bias
+    string bias_name = node.input(2);
+    vector<float> *bias = &(map_init_values[bias_name]);
+    vector<int> bias_shape;
+    bias_shape.push_back(bias->size());
+    temp = new Tensor(bias_shape, nullptr, DEV_CPU);
+    COPY_FROM_VECTOR_PTR_TO_TENSOR(bias, temp);
+    conv_tensors.push_back(temp);
+  }
+
+  return conv_tensors;
+}
+
 #endif // defined(cPROTO)
diff --git a/src/serialization/onnx/net/layers/core/dense_onnx.cpp b/src/serialization/onnx/net/layers/core/dense_onnx.cpp
index cbd9c8636..b2342707d 100644
--- a/src/serialization/onnx/net/layers/core/dense_onnx.cpp
+++ b/src/serialization/onnx/net/layers/core/dense_onnx.cpp
@@ -256,4 +256,31 @@ void apply_grads_to_dense(LDense *layer, vector<Tensor *> grads)
     layer->accumulate_accumulated_gradients(grads[0]);
 }
 
+vector<Tensor *> get_dense_tensors(onnx::NodeProto &node,
+                                   map<string, vector<float>> &map_init_values,
+                                   map<string, vector<int>> &map_init_dims)
+{
+  vector<Tensor *> dense_tensors;
+
+  string weights_name = node.input(1); // Get weights and dims
+  vector<float> *weights = &(map_init_values[weights_name]);
+  vector<int> dims = map_init_dims[weights_name];
+
+  Tensor * temp = new Tensor(dims, nullptr, DEV_CPU);
+  COPY_FROM_VECTOR_PTR_TO_TENSOR(weights, temp);
+  dense_tensors.push_back(temp);
+
+  if (node.input_size() > 2)
+  {
+    string bias_name = node.input(2);
+    vector<float> *bias = &(map_init_values[bias_name]);
+    vector<int> bias_dims = map_init_dims[bias_name];
+    temp = new Tensor(bias_dims, nullptr, DEV_CPU);
+    COPY_FROM_VECTOR_PTR_TO_TENSOR(bias, temp);
+    dense_tensors.push_back(temp);
+  }
+
+  return dense_tensors;
+}
+
 #endif // defined(cPROTO)
diff --git a/src/serialization/onnx/net/layers/layers_onnx.cpp b/src/serialization/onnx/net/layers/layers_onnx.cpp
index 573f4004d..361ea172b 100644
--- a/src/serialization/onnx/net/layers/layers_onnx.cpp
+++ b/src/serialization/onnx/net/layers/layers_onnx.cpp
@@ -539,4 +539,40 @@ void apply_grads_to_layer(Layer *layer, vector<Tensor *> grads)
     cerr << "The layer " << l->name << " has no support for applying gradients" << endl;
 }
 
+map<string, vector<Tensor *>> get_tensors_from_onnx_nodes(vector<onnx::NodeProto> &nodes,
+                                                          map<string, vector<float>> &map_init_values,
+                                                          map<string, vector<int>> &map_init_dims)
+{
+  map<string, ONNX_LAYERS> map_layers = create_enum_map(); // To indentify the layers types
+  int dev = DEV_CPU;
+
+  map<string, vector<Tensor *>> tensors; // To store the layers weights tensors
+  for (onnx::NodeProto node : nodes)
+  {
+    string layer_type_name = node.op_type();
+    ONNX_LAYERS layer_type = map_layers[layer_type_name];
+    string name = node.name();
+
+    switch (layer_type)
+    {
+      case ONNX_LAYERS::CONV:
+      {
+        tensors[name] = get_conv_tensors(node, map_init_values, map_init_dims);
+        break;
+      }
+      case ONNX_LAYERS::DENSE:
+      {
+        tensors[name] = get_dense_tensors(node, map_init_values, map_init_dims);
+        break;
+      }
+      default:
+        // This layer has no trainable parameters
+        continue;
+        break;
+    }
+  }
+
+  return tensors;
+}
+
 #endif // defined(cPROTO)

From e6cac85f3d022af5a671ff48c4bba0dd2f2e4ddf Mon Sep 17 00:00:00 2001
From: chavicoski <alvaro.lopez.chilet@gmail.com>
Date: Fri, 3 Dec 2021 16:38:03 +0100
Subject: [PATCH 20/35] Improved internal functions of distributed training

---
 include/eddl/layers/conv/layer_conv.h         | 38 +++++++++----------
 include/eddl/layers/core/layer_core.h         |  4 +-
 include/eddl/layers/fused/layer_fused.h       |  4 +-
 include/eddl/layers/layer.h                   |  8 ++--
 src/layers/conv/layer_conv1D.cpp              | 28 +++++++++++---
 src/layers/conv/layer_conv2D.cpp              | 28 +++++++++++---
 src/layers/conv/layer_conv3D.cpp              | 28 +++++++++++---
 src/layers/conv/layer_convT2D.cpp             | 28 +++++++++++---
 src/layers/conv/layer_convT3D.cpp             | 28 +++++++++++---
 src/layers/core/layer_dense.cpp               | 28 +++++++++++---
 src/layers/fused/layer_fused.cpp              | 28 +++++++++++---
 .../onnx/net/layers/conv/conv_onnx.cpp        | 10 +----
 .../onnx/net/layers/core/dense_onnx.cpp       | 10 +----
 13 files changed, 185 insertions(+), 85 deletions(-)

diff --git a/include/eddl/layers/conv/layer_conv.h b/include/eddl/layers/conv/layer_conv.h
index a4be979cd..fd4d061f6 100644
--- a/include/eddl/layers/conv/layer_conv.h
+++ b/include/eddl/layers/conv/layer_conv.h
@@ -58,19 +58,19 @@ class LConv : public LinLayer {
 
     void initialize() override;
 
-	void update_weights(Tensor* w, Tensor* bias=nullptr) override;
+    void update_weights(vector<Tensor*> weights) override;
 
-	void accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias=nullptr) override;
+    void accumulate_accumulated_gradients(vector<Tensor*> grads) override;
 
-	void reset_accumulated_gradients() override;
+    void reset_accumulated_gradients() override;
 
-	void apply_accumulated_gradients() override;
+    void apply_accumulated_gradients() override;
 
     string plot(int c) override;
 
-	static void reset_name_counter();
+    static void reset_name_counter();
 
-	void enable_distributed() override;
+    void enable_distributed() override;
 
 };
 
@@ -78,7 +78,7 @@ class LConv : public LinLayer {
 class LConv1D : public LinLayer {
 public:
     static int total_layers;
-	bool distributed_training;
+    bool distributed_training;
 
     Tensor* input_reshaped;
     ConvolDescriptor *cd;
@@ -107,19 +107,19 @@ class LConv1D : public LinLayer {
 
     void initialize() override;
 
-    void update_weights(Tensor* w, Tensor* bias=nullptr) override;
+    void update_weights(vector<Tensor*> weights) override;
 
-	void accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias=nullptr) override;
+    void accumulate_accumulated_gradients(vector<Tensor*> grads) override;
 
-	void reset_accumulated_gradients() override;
+    void reset_accumulated_gradients() override;
 
-	void apply_accumulated_gradients() override;
+    void apply_accumulated_gradients() override;
 
     string plot(int c) override;
 
-	static void reset_name_counter();
+    static void reset_name_counter();
 
-	void enable_distributed() override;
+    void enable_distributed() override;
 
 };
 
@@ -160,9 +160,9 @@ class LConv3D : public LinLayer {
 
     void initialize() override;
 
-    void update_weights(Tensor* w, Tensor* bias=nullptr) override;
+    void update_weights(vector<Tensor*> weights) override;
 
-    void accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias=nullptr) override;
+    void accumulate_accumulated_gradients(vector<Tensor*> grads) override;
 
     void reset_accumulated_gradients() override;
 
@@ -207,9 +207,9 @@ class LConvT2D : public LinLayer {
 
     void initialize() override;
 
-    void update_weights(Tensor* w, Tensor* bias=nullptr) override;
+    void update_weights(vector<Tensor*> weights) override;
 
-    void accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias=nullptr) override;
+    void accumulate_accumulated_gradients(vector<Tensor*> grads) override;
 
     void reset_accumulated_gradients() override;
 
@@ -255,9 +255,9 @@ class LConvT3D : public LinLayer {
 
     void initialize() override;
 
-    void update_weights(Tensor* w, Tensor* bias=nullptr) override;
+    void update_weights(vector<Tensor*> weights) override;
 
-    void accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias=nullptr) override;
+    void accumulate_accumulated_gradients(vector<Tensor*> grads) override;
 
     void reset_accumulated_gradients() override;
 
diff --git a/include/eddl/layers/core/layer_core.h b/include/eddl/layers/core/layer_core.h
index ed8401e5d..725eca788 100644
--- a/include/eddl/layers/core/layer_core.h
+++ b/include/eddl/layers/core/layer_core.h
@@ -136,10 +136,10 @@ class LDense : public LinLayer {
     void backward() override;
 
 	// Sets the weights to the values of the parameter w
-	void update_weights(Tensor* w, Tensor* bias=nullptr) override;
+	void update_weights(vector<Tensor*> weights) override;
 
 	// Adds the values of gw to the current weights of the layer
-	void accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias=nullptr) override;
+	void accumulate_accumulated_gradients(vector<Tensor*> grads) override;
 
 	// Sets to 0.0 the tensors with the accumulated gradients for W and bias
 	void reset_accumulated_gradients() override;
diff --git a/include/eddl/layers/fused/layer_fused.h b/include/eddl/layers/fused/layer_fused.h
index 3f12cffbc..f4694bd2b 100644
--- a/include/eddl/layers/fused/layer_fused.h
+++ b/include/eddl/layers/fused/layer_fused.h
@@ -56,9 +56,9 @@ class LConv2dActivation : public LinLayer {
 
     void initialize() override;
 
-	void update_weights(Tensor* w, Tensor* bias=nullptr) override;
+	void update_weights(vector<Tensor*> weights) override;
 
-	void accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias=nullptr) override;
+	void accumulate_accumulated_gradients(vector<Tensor*> grads) override;
 
 	void reset_accumulated_gradients() override;
 
diff --git a/include/eddl/layers/layer.h b/include/eddl/layers/layer.h
index 2c380eefa..7f421424d 100644
--- a/include/eddl/layers/layer.h
+++ b/include/eddl/layers/layer.h
@@ -124,9 +124,9 @@ class Layer {
 
     virtual void backward() {}
 
-	virtual void update_weights(Tensor* w, Tensor* bias) {}
+	virtual void update_weights(vector<Tensor*> weights) {}
 
-	virtual void accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias) {}
+	virtual void accumulate_accumulated_gradients(vector<Tensor*> grads) {}
 
 	virtual void reset_accumulated_gradients() {}
 
@@ -161,9 +161,9 @@ class LinLayer : public Layer {
 
     void backward() override {}
 
-	void update_weights(Tensor* w, Tensor* bias) override {}
+	void update_weights(vector<Tensor*> weights) override {}
 
-	void accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias) override {}
+	void accumulate_accumulated_gradients(vector<Tensor*> grads) override {}
 
 	void reset_accumulated_gradients() override {}
 
diff --git a/src/layers/conv/layer_conv1D.cpp b/src/layers/conv/layer_conv1D.cpp
index e2578805b..776d48411 100644
--- a/src/layers/conv/layer_conv1D.cpp
+++ b/src/layers/conv/layer_conv1D.cpp
@@ -132,14 +132,30 @@ void LConv1D::backward() {
     if (trainable) if(reg!= nullptr) {reg->apply(cd->K);}
 }
 
-void LConv1D::update_weights(Tensor* w, Tensor* bias) {
-    Tensor::copy( w, cd->K );
-    if ( bias != nullptr ) Tensor::copy( bias, cd->bias );
+void LConv1D::update_weights(vector<Tensor*> weights) {
+    if (weights.size() == 2) {
+        Tensor::copy(weights[0], cd->K);
+        Tensor::copy(weights[1], cd->bias);
+    } else if (weights.size() == 1) {
+        Tensor::copy(weights[0], cd->K);
+    } else {
+        cerr << "[WARNING - LConv1D::update_weights] "
+             << "Unexpected number of weights tensors recieved "
+             << "(weights.size()=" << weights.size() << ")" << endl;
+    }
 }
 
-void LConv1D::accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias) {
-    cd->K->add_( gw );
-    if ( gbias != nullptr ) cd->bias->add_( gbias );
+void LConv1D::accumulate_accumulated_gradients(vector<Tensor*> grads) {
+    if (grads.size() == 2) {
+        cd->K->add_(grads[0]);
+        cd->bias->add_(grads[1]);
+    } else if (grads.size() == 1) {
+        cd->K->add_(grads[0]);
+    } else {
+        cerr << "[WARNING - LConv1D::accumulate_accumulated_gradients] "
+             << "Unexpected number of gradient tensors recieved "
+             << "(grads.size()=" << grads.size() << ")" << endl;
+    }
 }
 
 void LConv1D::reset_accumulated_gradients() {
diff --git a/src/layers/conv/layer_conv2D.cpp b/src/layers/conv/layer_conv2D.cpp
index 657aa50bd..871b215a7 100644
--- a/src/layers/conv/layer_conv2D.cpp
+++ b/src/layers/conv/layer_conv2D.cpp
@@ -109,14 +109,30 @@ void LConv::initialize() {
     params[1]->fill_(0.0f); // Bias
 }
 
-void LConv::update_weights(Tensor* w, Tensor* bias) {
-    Tensor::copy( w, cd->K );
-    if ( bias != nullptr ) Tensor::copy( bias, cd->bias );
+void LConv::update_weights(vector<Tensor*> weights) {
+    if (weights.size() == 2) {
+        Tensor::copy(weights[0], cd->K);
+        Tensor::copy(weights[1], cd->bias);
+    } else if (weights.size() == 1) {
+        Tensor::copy(weights[0], cd->K);
+    } else {
+        cerr << "[WARNING - LConv::update_weights] "
+             << "Unexpected number of weights tensors recieved "
+             << "(weights.size()=" << weights.size() << ")" << endl;
+    }
 }
 
-void LConv::accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias) {
-    cd->K->add_( gw );
-    if ( gbias != nullptr ) cd->bias->add_( gbias );
+void LConv::accumulate_accumulated_gradients(vector<Tensor*> grads) {
+    if (grads.size() == 2) {
+        cd->K->add_(grads[0]);
+        cd->bias->add_(grads[1]);
+    } else if (grads.size() == 1) {
+        cd->K->add_(grads[0]);
+    } else {
+        cerr << "[WARNING - LConv::accumulate_accumulated_gradients] "
+             << "Unexpected number of gradient tensors recieved "
+             << "(grads.size()=" << grads.size() << ")" << endl;
+    }
 }
 
 void LConv::reset_accumulated_gradients() {
diff --git a/src/layers/conv/layer_conv3D.cpp b/src/layers/conv/layer_conv3D.cpp
index 0e88053de..e78c21fe0 100644
--- a/src/layers/conv/layer_conv3D.cpp
+++ b/src/layers/conv/layer_conv3D.cpp
@@ -109,14 +109,30 @@ void LConv3D::backward() {
     if (trainable) if(reg!= nullptr) {reg->apply(cd->K);}
 }
 
-void LConv3D::update_weights(Tensor* w, Tensor* bias) {
-    Tensor::copy( w, cd->K );
-    if ( bias != nullptr ) Tensor::copy( bias, cd->bias );
+void LConv3D::update_weights(vector<Tensor*> weights) {
+    if (weights.size() == 2) {
+        Tensor::copy(weights[0], cd->K);
+        Tensor::copy(weights[1], cd->bias);
+    } else if (weights.size() == 1) {
+        Tensor::copy(weights[0], cd->K);
+    } else {
+        cerr << "[WARNING - LConv3D::update_weights] "
+             << "Unexpected number of weights tensors recieved "
+             << "(weights.size()=" << weights.size() << ")" << endl;
+    }
 }
 
-void LConv3D::accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias) {
-    cd->K->add_( gw );
-    if ( gbias != nullptr ) cd->bias->add_( gbias );
+void LConv3D::accumulate_accumulated_gradients(vector<Tensor*> grads) {
+    if (grads.size() == 2) {
+        cd->K->add_(grads[0]);
+        cd->bias->add_(grads[1]);
+    } else if (grads.size() == 1) {
+        cd->K->add_(grads[0]);
+    } else {
+        cerr << "[WARNING - LConv3D::accumulate_accumulated_gradients] "
+             << "Unexpected number of gradient tensors recieved "
+             << "(grads.size()=" << grads.size() << ")" << endl;
+    }
 }
 
 void LConv3D::reset_accumulated_gradients() {
diff --git a/src/layers/conv/layer_convT2D.cpp b/src/layers/conv/layer_convT2D.cpp
index f629f6f00..9d70a2bf2 100644
--- a/src/layers/conv/layer_convT2D.cpp
+++ b/src/layers/conv/layer_convT2D.cpp
@@ -107,14 +107,30 @@ void LConvT2D::initialize() {
     params[1]->fill_(0.0f); // Bias
 }
 
-void LConvT2D::update_weights(Tensor* w, Tensor* bias) {
-    Tensor::copy( w, cd->K );
-    if ( bias != nullptr ) Tensor::copy( bias, cd->bias );
+void LConvT2D::update_weights(vector<Tensor*> weights) {
+    if (weights.size() == 2) {
+        Tensor::copy(weights[0], cd->K);
+        Tensor::copy(weights[1], cd->bias);
+    } else if (weights.size() == 1) {
+        Tensor::copy(weights[0], cd->K);
+    } else {
+        cerr << "[WARNING - LConvT2D::update_weights] "
+             << "Unexpected number of weights tensors recieved "
+             << "(weights.size()=" << weights.size() << ")" << endl;
+    }
 }
 
-void LConvT2D::accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias) {
-    cd->K->add_( gw );
-    if ( gbias != nullptr ) cd->bias->add_( gbias );
+void LConvT2D::accumulate_accumulated_gradients(vector<Tensor*> grads) {
+    if (grads.size() == 2) {
+        cd->K->add_(grads[0]);
+        cd->bias->add_(grads[1]);
+    } else if (grads.size() == 1) {
+        cd->K->add_(grads[0]);
+    } else {
+        cerr << "[WARNING - LConvT2D::accumulate_accumulated_gradients] "
+             << "Unexpected number of gradient tensors recieved "
+             << "(grads.size()=" << grads.size() << ")" << endl;
+    }
 }
 
 void LConvT2D::reset_accumulated_gradients() {
diff --git a/src/layers/conv/layer_convT3D.cpp b/src/layers/conv/layer_convT3D.cpp
index 2385e45dd..2504dd202 100644
--- a/src/layers/conv/layer_convT3D.cpp
+++ b/src/layers/conv/layer_convT3D.cpp
@@ -107,14 +107,30 @@ void LConvT3D::initialize() {
     params[1]->fill_(0.0f); // Bias
 }
 
-void LConvT3D::update_weights(Tensor* w, Tensor* bias) {
-    Tensor::copy( w, cd->K );
-    if ( bias != nullptr ) Tensor::copy( bias, cd->bias );
+void LConvT3D::update_weights(vector<Tensor*> weights) {
+    if (weights.size() == 2) {
+        Tensor::copy(weights[0], cd->K);
+        Tensor::copy(weights[1], cd->bias);
+    } else if (weights.size() == 1) {
+        Tensor::copy(weights[0], cd->K);
+    } else {
+        cerr << "[WARNING - LConvT3D::update_weights] "
+             << "Unexpected number of weights tensors recieved "
+             << "(weights.size()=" << weights.size() << ")" << endl;
+    }
 }
 
-void LConvT3D::accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias) {
-    cd->K->add_( gw );
-    if ( gbias != nullptr ) cd->bias->add_( gbias );
+void LConvT3D::accumulate_accumulated_gradients(vector<Tensor*> grads) {
+    if (grads.size() == 2) {
+        cd->K->add_(grads[0]);
+        cd->bias->add_(grads[1]);
+    } else if (grads.size() == 1) {
+        cd->K->add_(grads[0]);
+    } else {
+        cerr << "[WARNING - LConvT3D::accumulate_accumulated_gradients] "
+             << "Unexpected number of gradient tensors recieved "
+             << "(grads.size()=" << grads.size() << ")" << endl;
+    }
 }
 
 void LConvT3D::reset_accumulated_gradients() {
diff --git a/src/layers/core/layer_dense.cpp b/src/layers/core/layer_dense.cpp
index 37cdf340b..ed2f3f09a 100644
--- a/src/layers/core/layer_dense.cpp
+++ b/src/layers/core/layer_dense.cpp
@@ -70,14 +70,30 @@ void LDense::backward() {
     if (trainable) if(reg != nullptr) {reg->apply(this->W);}
 }
 
-void LDense::update_weights(Tensor* w, Tensor* bias) {
-    Tensor::copy( w, this->W );
-    if ( bias != nullptr ) Tensor::copy( bias, this->bias );
+void LDense::update_weights(vector<Tensor*> weights) {
+    if (weights.size() == 2) {
+        Tensor::copy(weights[0], this->W);
+        Tensor::copy(weights[1], this->bias);
+    } else if (weights.size() == 1) {
+        Tensor::copy(weights[0], this->W);
+    } else {
+        cerr << "[WARNING - LDense::update_weights] "
+             << "Unexpected number of weights tensors recieved "
+             << "(weights.size()=" << weights.size() << ")" << endl;
+    }
 }
 
-void LDense::accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias) {
-    W->add_( gw );
-    if ( gbias != nullptr ) bias->add_( gbias );
+void LDense::accumulate_accumulated_gradients(vector<Tensor*> grads) {
+    if (grads.size() == 2) {
+        W->add_(grads[0]);
+        bias->add_(grads[1]);
+    } else if (grads.size() == 1) {
+        W->add_(grads[0]);
+    } else {
+        cerr << "[WARNING - LDense::accumulate_accumulated_gradients] "
+             << "Unexpected number of gradient tensors recieved "
+             << "(grads.size()=" << grads.size() << ")" << endl;
+    }
 
     // Regularizer
     if(reg != nullptr) { reg->apply(this->W); }
diff --git a/src/layers/fused/layer_fused.cpp b/src/layers/fused/layer_fused.cpp
index 05cce165d..708ed8f5b 100644
--- a/src/layers/fused/layer_fused.cpp
+++ b/src/layers/fused/layer_fused.cpp
@@ -103,14 +103,30 @@ void LConv2dActivation::initialize() {
     params[1]->fill_(0.0f); // Bias
 }
 
-void LConv2dActivation::update_weights(Tensor* w, Tensor* bias) {
-    Tensor::copy( w, cd->K );
-    if ( bias != nullptr ) Tensor::copy( bias, cd->bias );
+void LConv2dActivation::update_weights(vector<Tensor*> weights) {
+    if (weights.size() == 2) {
+        Tensor::copy(weights[0], cd->K);
+        Tensor::copy(weights[1], cd->bias);
+    } else if (weights.size() == 1) {
+        Tensor::copy(weights[0], cd->K);
+    } else {
+        cerr << "[WARNING - LConv2dActivation::update_weights] "
+             << "Unexpected number of weights tensors recieved "
+             << "(weights.size()=" << weights.size() << ")" << endl;
+    }
 }
 
-void LConv2dActivation::accumulate_accumulated_gradients(Tensor* gw, Tensor* gbias) {
-    cd->K->add_( gw );
-    if ( gbias != nullptr ) cd->bias->add_( gbias );
+void LConv2dActivation::accumulate_accumulated_gradients(vector<Tensor*> grads) {
+    if (grads.size() == 2) {
+        cd->K->add_(grads[0]);
+        cd->bias->add_(grads[1]);
+    } else if (grads.size() == 1) {
+        cd->K->add_(grads[0]);
+    } else {
+        cerr << "[WARNING - LConv2dActivation::accumulate_accumulated_gradients] "
+             << "Unexpected number of gradient tensors recieved "
+             << "(grads.size()=" << grads.size() << ")" << endl;
+    }
 }
 
 void LConv2dActivation::reset_accumulated_gradients() {
diff --git a/src/serialization/onnx/net/layers/conv/conv_onnx.cpp b/src/serialization/onnx/net/layers/conv/conv_onnx.cpp
index 3df132c40..1dc7e2a38 100644
--- a/src/serialization/onnx/net/layers/conv/conv_onnx.cpp
+++ b/src/serialization/onnx/net/layers/conv/conv_onnx.cpp
@@ -327,18 +327,12 @@ void build_conv_node(LConv *layer, onnx::GraphProto *graph, bool gradients)
 
 void update_conv_weights(LConv *layer, vector<Tensor *> weights)
 {
-  if (weights.size() > 1)
-    layer->update_weights(weights[0], weights[1]);
-  else
-    layer->update_weights(weights[0]);
+  layer->update_weights(weights);
 }
 
 void apply_grads_to_conv(LConv *layer, vector<Tensor *> grads)
 {
-  if (grads.size() > 1)
-    layer->accumulate_accumulated_gradients(grads[0], grads[1]);
-  else
-    layer->accumulate_accumulated_gradients(grads[0]);
+  layer->accumulate_accumulated_gradients(grads);
 }
 
 vector<Tensor *> get_conv_tensors(onnx::NodeProto &node,
diff --git a/src/serialization/onnx/net/layers/core/dense_onnx.cpp b/src/serialization/onnx/net/layers/core/dense_onnx.cpp
index b2342707d..4c6e73b5f 100644
--- a/src/serialization/onnx/net/layers/core/dense_onnx.cpp
+++ b/src/serialization/onnx/net/layers/core/dense_onnx.cpp
@@ -242,18 +242,12 @@ void build_dense_with_matmul_node(LDense *layer, onnx::GraphProto *graph, bool g
 
 void update_dense_weights(LDense *layer, vector<Tensor *> weights)
 {
-  if (weights.size() > 1)
-    layer->update_weights(weights[0], weights[1]);
-  else
-    layer->update_weights(weights[0]);
+  layer->update_weights(weights);
 }
 
 void apply_grads_to_dense(LDense *layer, vector<Tensor *> grads)
 {
-  if (grads.size() > 1)
-    layer->accumulate_accumulated_gradients(grads[0], grads[1]);
-  else
-    layer->accumulate_accumulated_gradients(grads[0]);
+  layer->accumulate_accumulated_gradients(grads);
 }
 
 vector<Tensor *> get_dense_tensors(onnx::NodeProto &node,

From dd280c9eb279bf2c2fffceabf6aa935d4a50c2c1 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 17:08:49 +0100
Subject: [PATCH 21/35] Add getBroadcastShape and getTilesRepetitions functions
 to utils

---
 include/eddl/utils.h |  4 ++++
 src/utils.cpp        | 45 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/include/eddl/utils.h b/include/eddl/utils.h
index e6ba4f91d..7086553e2 100755
--- a/include/eddl/utils.h
+++ b/include/eddl/utils.h
@@ -64,6 +64,10 @@ int* ranges2indices(vector<int> ishape, vector<vector<int>> ranges);
 vector<int> expand_shape(const vector<int>& ishape, int size);
 int* expand_indices(const vector<int>& ishape, int size);
 
+vector<int> getBroadcastShape(vector<int> shape1, vector<int> shape2);
+
+vector<int> getTilesRepetitions(const vector<int>& broadcast_from, const vector<int>& broadcast_to);
+
 bool is_number(const std::string& s);
 
 bool pathExists(const std::string &s);
diff --git a/src/utils.cpp b/src/utils.cpp
index 0129a603a..3a1ffc71e 100755
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -490,6 +490,51 @@ int* expand_indices(const vector<int>& ishape, int size){
     return addresses;  // Be careful! It's easy to forget about this pointer and have a memory leak
 }
 
+vector<int> getBroadcastShape(vector<int> shape1, vector<int> shape2){
+    // Normalize broadcast shape: (3)*(1,3), (3)*(6,2,5,3,5), (5, 3)*(5, 3),...
+    vector<int> broadcast_shape;
+
+    // Check dimensions
+    if (shape1.size() == shape2.size()){  // Same shape
+        broadcast_shape = shape2;
+
+    }else if(shape1.size()==1){  // Shape1 has 1 dimension
+        // Broadcast: [3] AND [12, 3, 7, 7] => [1, 3, 1, 1]
+        for(int i = 0; i < shape2.size(); ++i) {
+            if (shape1[0] == shape2[i]) {
+                broadcast_shape.push_back(shape2[i]);
+            }else{ broadcast_shape.push_back(1); }
+        }
+    }else{
+        // None
+    }
+
+    return broadcast_shape;
+}
+
+vector<int> getTilesRepetitions(const vector<int>& broadcast_from, const vector<int>& broadcast_to){
+    // Compute repetitions to perform a given broadcast: (3, 1, 1)*(3, 28, 28) => (1, 28, 28)
+    vector<int> tile_repetitions;  // Tile repetitions
+
+    // Check dimensions
+    if(broadcast_from.size()!=broadcast_from.size()){
+        msg("'broadcast_from' and 'broadcast_to' must have the same number of dimensions", "utils::getTilesRepetitions");
+    }
+
+    // Compute repetitions
+    for(int i=0; i<broadcast_to.size(); i++){
+        if(broadcast_from[i]==broadcast_to[i]) { // 3=>3: No repeat
+            tile_repetitions.push_back(1);
+        }else if(broadcast_from[i]==1){ // 1=>5: Repeat 5 times
+            tile_repetitions.push_back(broadcast_to[i]);
+        }else{
+            // Error
+        }
+    }
+
+    return tile_repetitions;
+}
+
 bool is_number(const std::string& s){
     return !s.empty() && std::find_if(s.begin(), s.end(), [](char c) { return !std::isdigit(c); }) == s.end();
 }

From 7dadd1bbd9c3fe127a0d8ae682f97dad0f670365 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 17:11:47 +0100
Subject: [PATCH 22/35] Improve tensor broadcasting. Now supports both
 directions and has more checks

---
 include/eddl/tensor/tensor.h |  3 +-
 src/tensor/tensor_core.cpp   | 69 ++++++++++++++++--------------------
 2 files changed, 33 insertions(+), 39 deletions(-)

diff --git a/include/eddl/tensor/tensor.h b/include/eddl/tensor/tensor.h
index 23e5729b9..137fe4cb7 100644
--- a/include/eddl/tensor/tensor.h
+++ b/include/eddl/tensor/tensor.h
@@ -2947,9 +2947,10 @@ class Tensor {
       *  @brief Returns a new tensor A to be broadcasted into B
       *   @param A Input tensor.
       *   @param B Input tensor.
+      *   @param output Output tensor (optional).
       *   @return A new tensor C based on A, but prepared to be broadcasted in to B
     */
-    static Tensor* broadcast(Tensor* A, Tensor* B);
+    static Tensor* broadcast(Tensor* A, Tensor* B, Tensor *output=nullptr);
 
     /**
       *  @brief Returns an array with the selected indices of the tensor.
diff --git a/src/tensor/tensor_core.cpp b/src/tensor/tensor_core.cpp
index ff85b86ad..660c378db 100644
--- a/src/tensor/tensor_core.cpp
+++ b/src/tensor/tensor_core.cpp
@@ -1104,54 +1104,47 @@ void Tensor::tile_deprecated(Tensor *A, Tensor *B)
     }
 }
 
-Tensor* Tensor::broadcast(Tensor* A, Tensor* B){
+Tensor* Tensor::broadcast(Tensor* A, Tensor* B, Tensor *output){
     // Source: https://numpy.org/doc/stable/user/basics.broadcasting.html
 
-    bool isCompatible= false;
-    vector<int> old_shape = A->shape;
-    vector<int> broadcast_shape(B->ndim, 1);
+    vector<int> shape1 = A->shape;
+    vector<int> shape2 = B->shape;
+    bool shapes_swapped = false;
 
-    // Check dimensions
-    if (A->ndim == B->ndim){  // fine
-        isCompatible = true;
-        broadcast_shape = A->shape;
-    }else if(A->ndim==1){  // Check compatibility
-        // Broadcast: [3] AND [12, 3, 7, 7] => [1, 3, 1, 1]
-        for(int i = 0; i < B->shape.size(); ++i) {
-            if (A->shape[0] == B->shape[i]) {
-                isCompatible = true;
-                broadcast_shape[i] = B->shape[i];  // [1, 3, 1, 1]
-                break;
-            }
-        }
-    }
+    // Shape1 must be smaller (or equal) than Shape2
+    if(shape1.size()>shape2.size()){ shape1.swap(shape2); shapes_swapped = true; }
 
-    // Check compatibility
-    if(!isCompatible){
-        msg("The dimensions of both tensors must be equal or compatible (i.e [3] + [12, 3, 7, 7])", "Tensor::broadcast");
+    // Get shape to broadcast (normalized)
+    vector<int> broadcast_from = getBroadcastShape(shape1, shape2);
+    if(broadcast_from.empty()){
+        msg("The dimensions of both tensors must be equal or compatible (i.e (3)*(1,3), (3)*(6,2,5,3,5), (5, 3)*(5, 3),...)", "Tensor::broadcast");
     }
 
-    // Check if the shapes can be broadcasted
-    vector<int> tile_repetitions;  // Tile repetitions
-    for(int i=0; i<B->ndim; i++){
-        if(broadcast_shape[i]==B->shape[i]) {
-            tile_repetitions.push_back(1);
-        }else if(broadcast_shape[i]==1){  // || B->shape[i]==1){
-            tile_repetitions.push_back(B->shape[i]);
-        }else{
-            // msg("These tensors cannot be broadcasted. Two dimensions are compatible when: 1) they are equal, or 2) one of them is 1", "Tensor::broadcast");
-            msg("These tensors cannot be broadcasted. Two dimensions are compatible when: 1) they are equal, or 2) if the broadcasted dimension is 1", "Tensor::broadcast");
-        }
+    // Get repetitions to perform a given broadcast
+    vector<int> tile_repetitions = getTilesRepetitions(broadcast_from, shape2);
+    if(tile_repetitions.empty()){
+        msg("These tensors cannot be broadcasted. Two dimensions are compatible when: 1) they are equal, or 2) one of them is 1", "Tensor::broadcast");
     }
 
-    // Change dimension (temporarily). I don't like  this...
-    A->reshape_(broadcast_shape);
+    // Build descriptor
+    auto *td = new TileDescriptor(tile_repetitions, A->device);
+    td->build(broadcast_from);
 
-    // Do broadcast
-    Tensor* new_t = Tensor::tile(A, tile_repetitions);
+    // Create new tensor (if needed)
+    Tensor* new_t;
+    if (output== nullptr){
+        new_t = new Tensor(td->oshape, A->device);
+    }else{
+        new_t = output;
+    }
 
-    // Return to original dimension. I don't like  this...
-    A->reshape_(old_shape);
+    // Do broadcast (tile repetitions)
+    if(!shapes_swapped){
+        Tensor::select(A, new_t, td);
+    }else{
+        Tensor::select(B, new_t, td);
+    }
 
+    delete td;
     return new_t;
 }

From 5834c774ad803b1ae28d43a63cf6b798769121ad Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 17:12:07 +0100
Subject: [PATCH 23/35] Add tests for tensor broadcasting

---
 tests/tensor/test_tensor_core.cpp | 33 +++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tests/tensor/test_tensor_core.cpp b/tests/tensor/test_tensor_core.cpp
index b5c24b359..701c67e3a 100644
--- a/tests/tensor/test_tensor_core.cpp
+++ b/tests/tensor/test_tensor_core.cpp
@@ -269,4 +269,37 @@ TEST(TensorTestSuite, tensor_tile){
 #endif
 }
 
+
+TEST(TensorTestSuite, tensor_broadcast){
+    Tensor* t1 = new Tensor({1, 2, 3}, {3}, DEV_CPU);
+    Tensor* t1_t2_ref = new Tensor({1,1,1,1, 2,2,2,2, 3,3,3,3}, {3, 4}, DEV_CPU);
+    Tensor* t2 = Tensor::zeros(t1_t2_ref->shape, DEV_CPU);
+
+//    t1->print(0);
+//    t2->print(0);
+    Tensor* t1_res = Tensor::broadcast(t1, t2);
+//    t1_res->print(0);
+    ASSERT_TRUE(Tensor::equivalent(t1_res, t1_t2_ref, 1e-3f, 0.0f, true, true));
+
+    Tensor* t2_res = Tensor::broadcast(t2, t1);  // Should be commutative
+//    t1_res->print(0);
+    ASSERT_TRUE(Tensor::equivalent(t2_res, t1_t2_ref, 1e-3f, 0.0f, true, true));
+
+    // Test GPU
+#ifdef cGPU
+    // Test #1
+    Tensor* t1_cpu = Tensor::randn({3});
+    Tensor* t2_cpu = Tensor::randn({1, 3, 224, 224});
+    Tensor* t1_gpu = t1_cpu->clone(); t1_gpu->toGPU();
+    Tensor* t2_gpu = t2_cpu->clone(); t2_gpu->toGPU();
+
+    Tensor* t1_cpu_res = Tensor::broadcast(t1_cpu, t2_cpu);
+    Tensor* t1_gpu_res = Tensor::broadcast(t1_gpu, t2_gpu); t1_gpu_res->toCPU();
+    ASSERT_TRUE(Tensor::equivalent(t1_cpu_res, t1_gpu_res, 1e-3f, 0.0f, true, true));
+
+    delete t1_cpu; delete t1_cpu_res;
+    delete t1_gpu; delete t1_gpu_res;
+#endif
+}
+
 //vector<vector<int>> res = cartesian_product({{0,1}, {0,1}, {0,1}});

From b6c34e2b4c744f45dd6f4024c550a5f2ec8cf1e3 Mon Sep 17 00:00:00 2001
From: chavicoski <alvaro.lopez.chilet@gmail.com>
Date: Fri, 3 Dec 2021 17:28:21 +0100
Subject: [PATCH 24/35] Minor bug fix in new ONNX distributed functions

---
 src/serialization/onnx/net/layers/layers_onnx.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/serialization/onnx/net/layers/layers_onnx.cpp b/src/serialization/onnx/net/layers/layers_onnx.cpp
index 361ea172b..2949cd15b 100644
--- a/src/serialization/onnx/net/layers/layers_onnx.cpp
+++ b/src/serialization/onnx/net/layers/layers_onnx.cpp
@@ -519,7 +519,7 @@ void update_layer_weights(Layer *layer, vector<Tensor *> weights)
   else if (LDense *l = dynamic_cast<LDense *>(layer))
     update_dense_weights(l, weights);
   else
-    cerr << "The layer " << l->name << " has no support for setting weights" << endl;
+    cerr << "The layer " << layer->name << " has no support for setting weights" << endl;
 }
 
 void apply_grads_to_layer(Layer *layer, vector<Tensor *> grads)
@@ -536,7 +536,7 @@ void apply_grads_to_layer(Layer *layer, vector<Tensor *> grads)
   else if (LDense *l = dynamic_cast<LDense *>(layer))
     apply_grads_to_dense(l, grads);
   else
-    cerr << "The layer " << l->name << " has no support for applying gradients" << endl;
+    cerr << "The layer " << layer->name << " has no support for applying gradients" << endl;
 }
 
 map<string, vector<Tensor *>> get_tensors_from_onnx_nodes(vector<onnx::NodeProto> &nodes,

From 4a0250b30335879da21d157f7a03386f85189c6b Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 18:23:54 +0100
Subject: [PATCH 25/35] Fix broadcast tensor function in sphinx docs

---
 docs/sphinx/source/tensor/manipulation.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sphinx/source/tensor/manipulation.rst b/docs/sphinx/source/tensor/manipulation.rst
index 0ee76d650..e3f15e5af 100644
--- a/docs/sphinx/source/tensor/manipulation.rst
+++ b/docs/sphinx/source/tensor/manipulation.rst
@@ -275,7 +275,7 @@ tile
 broadcast
 ^^^^^^^^^^^^^^^
 
-.. doxygenfunction:: Tensor::broadcast(Tensor* A, Tensor* B);
+.. doxygenfunction:: Tensor::broadcast(Tensor* A, Tensor* B, Tensor *output=nullptr);
 
 .. code-block:: c++
 

From 83b92eb3bb945560e85662e392e725327a00460a Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 18:24:16 +0100
Subject: [PATCH 26/35] Add broadcast layer

---
 include/eddl/layers/core/layer_core.h |  25 ++++++
 src/layers/core/layer_broadcast.cpp   | 124 ++++++++++++++++++++++++++
 2 files changed, 149 insertions(+)
 create mode 100644 src/layers/core/layer_broadcast.cpp

diff --git a/include/eddl/layers/core/layer_core.h b/include/eddl/layers/core/layer_core.h
index 93c18ac3e..037b8efa0 100644
--- a/include/eddl/layers/core/layer_core.h
+++ b/include/eddl/layers/core/layer_core.h
@@ -275,6 +275,31 @@ class LTile : public LinLayer {
 };
 
 
+class LBroadcast : public LinLayer {
+public:
+    static int total_layers;
+    TileDescriptor *td;
+    bool shapes_swapped;
+    Layer *p1;  // Small
+    Layer *p2;  // Big
+
+    // constructors and clones
+    LBroadcast(Layer *parent1, Layer *parent2, string name, int dev, int mem);
+
+    Layer *share(int c, int bs, vector<Layer *> p) override;
+
+    Layer *clone(int c, int bs, vector<Layer *> p, int todev) override;
+
+    ~LBroadcast() override;
+
+    void forward() override;
+
+    void backward() override;
+
+    string plot(int c) override;
+
+};
+
 class LBypass : public LinLayer {
 public:
     static int total_layers;
diff --git a/src/layers/core/layer_broadcast.cpp b/src/layers/core/layer_broadcast.cpp
new file mode 100644
index 000000000..4ccb6929c
--- /dev/null
+++ b/src/layers/core/layer_broadcast.cpp
@@ -0,0 +1,124 @@
+/*
+* EDDL Library - European Distributed Deep Learning Library.
+* Version: 1.0
+* copyright (c) 2021, Universitat Politècnica de València (UPV), PRHLT Research Centre
+* Date: November 2021
+* Author: PRHLT Research Centre, UPV, (rparedes@prhlt.upv.es), (jon@prhlt.upv.es)
+* All rights reserved
+*/
+
+
+#include <cstdio>
+#include <cstdlib>
+#include <iostream>
+
+#include "eddl/layers/core/layer_core.h"
+
+
+using namespace std;
+
+int LBroadcast::total_layers = 0;
+
+LBroadcast::LBroadcast(Layer *parent1, Layer *parent2, string name, int dev, int mem) : LinLayer(name, dev, mem, "broadcast") {
+    // Important:
+    // There are two parents, but one of them is "x" and the other provides the constant shape to which broadcast
+    // To give more flexibility, the "x" is going to be the parent from which we are going to broadcast.
+    // Example:
+    //      - f(P1(3), P2(4,2,3,5)) => P1 is x.  (P2 has no delta)
+    //      - f(P1(4,2,3,5), P2(3)) => P2 is x.  (P1 has no delta)
+    if(name.empty()) this->name = "broadcast" + to_string(++total_layers);
+
+    // Get shapes
+    vector<int> shape1 = parent1->output->shape;
+    vector<int> shape2 = parent2->output->shape;
+    this->shapes_swapped = false;
+
+    // Remove batch dimension (some checks are done at shape[0]
+    shape1 = vector<int>(shape1.begin()+1, shape1.end());
+    shape2 = vector<int>(shape2.begin()+1, shape2.end());
+
+    // Determine which parent is the smaller one
+    if(shape1.size()<=shape2.size()) {
+        this->input = parent1->output;  // Input must be the output of the smaller
+    }else{
+        this->input = parent2->output;  // Input must be the output of the smaller
+        shape1.swap(shape2);
+        this->shapes_swapped = true;
+    }
+
+    // Get shape to broadcast (normalized)
+    vector<int> broadcast_from = getBroadcastShape(shape1, shape2);
+    if(broadcast_from.empty()){
+        msg("The dimensions of both tensors must be equal or compatible (i.e (3)*(1,3), (3)*(6,2,5,3,5), (5, 3)*(5, 3),...)", "Tensor::broadcast");
+    }
+
+    // Get repetitions to perform a given broadcast
+    vector<int> tile_repetitions = getTilesRepetitions(broadcast_from, shape2);
+    if(tile_repetitions.empty()){
+        msg("These tensors cannot be broadcasted. Two dimensions are compatible when: 1) they are equal, or 2) one of them is 1", "Tensor::broadcast");
+    }
+
+    // Build descriptor (without batch)
+    this->td = new TileDescriptor(tile_repetitions, dev);
+    td->build(broadcast_from);
+
+    // Set output tensor (add batch)
+    vector<int> oshape(this->td->oshape); oshape.insert(oshape.begin(), 1);  // Add batch with dimension "1" (not needed, but this is for consistency)
+    output = new Tensor(oshape, dev);
+
+    // Set parents
+    p1=parent1; p2=parent2;  // Keep references in the same order
+    if(!this->shapes_swapped){  // parent1 smaller than parent2
+        parent1->addchild(this);
+        addparent(parent1);
+    }else{   // parent2 smaller than parent1
+        parent2->addchild(this);
+        addparent(parent2);
+    }
+}
+
+LBroadcast::~LBroadcast(){
+    this->p1 = nullptr;  // There are simply to store a reference
+    this->p2 = nullptr;  // There are simply to store a reference
+    delete td;
+}
+
+void LBroadcast::forward(){
+    if(!this->shapes_swapped){
+        tensorNN::select(this->p1->input, this->output, this->td);
+    }else{
+        tensorNN::select(this->p2->input, this->output, this->td);
+    }
+}
+
+void LBroadcast::backward(){
+    if(!this->shapes_swapped){
+        tensorNN::select_back(this->delta, this->p1->delta, this->td);
+    }else{
+        tensorNN::select_back(this->delta, this->p2->delta, this->td);
+    }
+}
+
+Layer *LBroadcast::share(int c, int bs, vector<Layer *> p){
+    auto *n = new LBroadcast(this->p1, this->p2, "share_"+to_string(c)+this->name, this->dev, this->mem_level);
+    n->orig = this;
+
+    return n;
+}
+
+Layer *LBroadcast::clone(int c, int bs, vector<Layer *> p, int todev) {
+    auto *n = new LBroadcast(this->p1, this->p2, this->name, todev, this->mem_level);
+    n->orig = this;
+
+    return n;
+}
+
+
+string LBroadcast::plot(int c) {
+    string s;
+
+    if (c) s = name + " [label=" + "\"" + name + "\",style=filled,fontsize=12,fillcolor=bisque4,shape=box]";
+    else s = name + " [label=" + "\"" + name + "\",style=filled,fontsize=12,fillcolor=White,shape=box]";
+
+    return s;
+}

From 1f5d3802c0deafbad63c736d92cc6c9c210c3ea2 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 18:24:32 +0100
Subject: [PATCH 27/35] Add broadcast layer to the API

---
 include/eddl/apis/eddl.h | 12 ++++++++++++
 src/apis/eddl.cpp        |  4 ++++
 2 files changed, 16 insertions(+)

diff --git a/include/eddl/apis/eddl.h b/include/eddl/apis/eddl.h
index 8f646d203..37fa3e7c5 100644
--- a/include/eddl/apis/eddl.h
+++ b/include/eddl/apis/eddl.h
@@ -1083,6 +1083,18 @@ namespace eddl {
     */
     layer Tile(layer parent, const vector<int>& repeats, string name="");
 
+    /**
+      *  @brief Prepares the output of the smaller layer to be broadcasted into the bigger one (parent1 or parent2)
+      *  Example:
+      *     - f(P1(3), P2(4,2,3,5)) => P1 is x.  (P2 has no delta)
+      *     - f(P1(4,2,3,5), P2(3)) => P2 is x.  (P1 has no delta)
+      *
+      *  @param parent1  Parent layer
+      *  @param parent2  Parent layer
+      *  @return     Output of repeat operation
+    */
+    layer Broadcast(layer parent1, layer parent2, string name="");
+
     /**
       *  @brief Virtual layer. Propagates the output of the parent as their own. Used internally for ONNX.
       *
diff --git a/src/apis/eddl.cpp b/src/apis/eddl.cpp
index c4b413183..366e712e4 100644
--- a/src/apis/eddl.cpp
+++ b/src/apis/eddl.cpp
@@ -750,6 +750,10 @@ namespace eddl {
         return new LTile(parent, repeats_with_single_batch, name, DEV_CPU, 0);
     }
 
+    layer Broadcast(layer parent1, layer parent2, string name){
+        return new LBroadcast(parent1, parent2, name, DEV_CPU, 0);
+    }
+
     layer Bypass(layer parent, string bypass_name, string name){
         return new LBypass(parent, bypass_name, name, DEV_CPU, 0);
     }

From 6c435523f425e63317d9e7c5b47c8a337201d49c Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 18:25:03 +0100
Subject: [PATCH 28/35] Remove debugging code + comments

---
 src/layers/core/layer_repeat.cpp | 2 +-
 src/utils.cpp                    | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/layers/core/layer_repeat.cpp b/src/layers/core/layer_repeat.cpp
index 228b62252..932e917f0 100644
--- a/src/layers/core/layer_repeat.cpp
+++ b/src/layers/core/layer_repeat.cpp
@@ -39,7 +39,7 @@ LRepeat::LRepeat(Layer *parent, const vector<unsigned int>& repeats, unsigned in
     vector<int> input_shape_single_batch(input->shape.begin()+1, input->shape.end());
     input_shape_single_batch.insert(input_shape_single_batch.begin(), 1);
 
-    // Build descriptor
+    // Build descriptor (batch must equal to "1")
     this->rd = new RepeatDescriptor(repeats, axis, dev);
     this->rd->build(input_shape_single_batch);
 
diff --git a/src/utils.cpp b/src/utils.cpp
index 3a1ffc71e..43bab009a 100755
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -480,9 +480,6 @@ int* expand_indices(const vector<int>& ishape, int size){
             B_pos += B_idx * ostride[d];
         }
 
-        if(B_pos!=i){
-            int asd = 3;
-        }
         // Save address translation
         addresses[i] = A_pos;
     }

From ed17b4428a659416d9bfd4a431253b6db3535ed5 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 19:27:41 +0100
Subject: [PATCH 29/35] Fix layer shape

---
 include/eddl/layers/auxiliar/layer_auxiliar.h |  4 ++-
 src/layers/auxiliar/layer_shape.cpp           | 30 ++++++++++++++-----
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/include/eddl/layers/auxiliar/layer_auxiliar.h b/include/eddl/layers/auxiliar/layer_auxiliar.h
index ecd20c7f9..9c183ac0f 100644
--- a/include/eddl/layers/auxiliar/layer_auxiliar.h
+++ b/include/eddl/layers/auxiliar/layer_auxiliar.h
@@ -24,8 +24,10 @@ using namespace std;
 class LShape : public LinLayer {
 public:
     static int total_layers;
+    vector<float> data;
+    bool include_batch;
 
-    LShape(Layer *parent, string name, int dev, int mem);
+    LShape(Layer *parent, bool include_batch, string name, int dev, int mem);
 
     Layer *share(int c, int bs, vector<Layer *> p) override;
 
diff --git a/src/layers/auxiliar/layer_shape.cpp b/src/layers/auxiliar/layer_shape.cpp
index a7faa3a3a..ff473349a 100644
--- a/src/layers/auxiliar/layer_shape.cpp
+++ b/src/layers/auxiliar/layer_shape.cpp
@@ -19,14 +19,22 @@ using namespace std;
 
 int LShape::total_layers = 0;
 
-LShape::LShape(Layer *parent, string name, int dev, int mem) : LinLayer(name, dev, mem) {
+LShape::LShape(Layer *parent, bool include_batch, string name, int dev, int mem) : LinLayer(name, dev, mem, "shape") {
     if(name.empty()) this->name = "shape" + to_string(++total_layers);
 
+    // Set input
     input = parent->output;
 
-    // Copy and cast parent's output shape
-    vector<float> data(input->shape.begin(), input->shape.end());
-    output = new Tensor(data, input->shape, dev);
+    // Set vars
+    this->include_batch = include_batch;
+
+    // [DATA]: Copy data, but ignore batch optionally
+    int offset = int(!include_batch);
+    this->data = vector<float>(input->shape.begin() + offset, input->shape.end());
+
+    // [Output]: Data=data, Shape=(1batch, data length)
+    vector<int> oshape; oshape.push_back(1); oshape.push_back(data.size());
+    output = new Tensor(this->data, oshape, this->dev);  // Must include batch
 
     parent->addchild(this);
     addparent(parent);
@@ -35,28 +43,34 @@ LShape::LShape(Layer *parent, string name, int dev, int mem) : LinLayer(name, de
 
 // virtual
 void LShape::resize(int batch){
+    delete this->output;
 
+    // Create tensor with batch 1
+    vector<int> oshape; oshape.push_back(1); oshape.push_back(data.size());
+    auto tmp = new Tensor(this->data, oshape, this->dev);  // batch 1
+
+    // Create output tensor
+    this->output = Tensor::repeat(tmp, batch, 0);
+    delete tmp;
 }
 
 
 void LShape::forward() {
-
 }
 
 void LShape::backward() {
-    msg("NotImplementedError", "LShape::backward");
 }
 
 
 Layer *LShape::share(int c, int bs, vector<Layer *> p) {
-    auto *n = new LShape(p[0], "share_"+to_string(c)+this->name, this->dev, this->mem_level);
+    auto *n = new LShape(p[0], this->include_batch, "share_"+to_string(c)+this->name, this->dev, this->mem_level);
     n->orig = this;
 
     return n;
 }
 
 Layer *LShape::clone(int c, int bs, vector<Layer *> p, int todev) {
-    auto *n = new LShape(p[0], name, todev, this->mem_level);
+    auto *n = new LShape(p[0], this->include_batch, name, todev, this->mem_level);
     n->orig = this;
 
     return n;

From 5cc0156bce27d44cda4bdd9a9293690e9dc97386 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 19:28:03 +0100
Subject: [PATCH 30/35] Fix broadcast layer

---
 src/layers/core/layer_broadcast.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/layers/core/layer_broadcast.cpp b/src/layers/core/layer_broadcast.cpp
index 4ccb6929c..3410c798d 100644
--- a/src/layers/core/layer_broadcast.cpp
+++ b/src/layers/core/layer_broadcast.cpp
@@ -67,13 +67,14 @@ LBroadcast::LBroadcast(Layer *parent1, Layer *parent2, string name, int dev, int
     output = new Tensor(oshape, dev);
 
     // Set parents
-    p1=parent1; p2=parent2;  // Keep references in the same order
     if(!this->shapes_swapped){  // parent1 smaller than parent2
         parent1->addchild(this);
         addparent(parent1);
+        p1=parent1; p2=parent2;
     }else{   // parent2 smaller than parent1
         parent2->addchild(this);
         addparent(parent2);
+        p1=parent2; p2=parent1;
     }
 }
 
@@ -100,14 +101,14 @@ void LBroadcast::backward(){
 }
 
 Layer *LBroadcast::share(int c, int bs, vector<Layer *> p){
-    auto *n = new LBroadcast(this->p1, this->p2, "share_"+to_string(c)+this->name, this->dev, this->mem_level);
+    auto *n = new LBroadcast(p[0], this->p2, "share_"+to_string(c)+this->name, this->dev, this->mem_level);
     n->orig = this;
 
     return n;
 }
 
 Layer *LBroadcast::clone(int c, int bs, vector<Layer *> p, int todev) {
-    auto *n = new LBroadcast(this->p1, this->p2, this->name, todev, this->mem_level);
+    auto *n = new LBroadcast(p[0], this->p2, this->name, todev, this->mem_level);
     n->orig = this;
 
     return n;

From f6322719bf7d5f3365ee8dd08f5764368536c765 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 19:28:34 +0100
Subject: [PATCH 31/35] Add Shape layer to the API

---
 include/eddl/apis/eddl.h | 9 +++++++++
 src/apis/eddl.cpp        | 4 ++++
 2 files changed, 13 insertions(+)

diff --git a/include/eddl/apis/eddl.h b/include/eddl/apis/eddl.h
index 37fa3e7c5..284483d27 100644
--- a/include/eddl/apis/eddl.h
+++ b/include/eddl/apis/eddl.h
@@ -1104,6 +1104,15 @@ namespace eddl {
     */
     layer Bypass(layer parent, string bypass_name="", string name="");
 
+    /**
+      *  @brief This layer returns the shape of its parent as his output
+      *
+      *  @param parent  Parent layer
+      *  @param include_batch  If True, the batch dimension is included in the output
+      *  @return     Output of repeat operation
+    */
+    layer Shape(layer parent, bool include_batch=true, string name="");
+
     /**
       *  @brief Dimension of size one is removed at the specified position. (Batch dimension is ignored)
       *
diff --git a/src/apis/eddl.cpp b/src/apis/eddl.cpp
index 366e712e4..51d22b642 100644
--- a/src/apis/eddl.cpp
+++ b/src/apis/eddl.cpp
@@ -758,6 +758,10 @@ namespace eddl {
         return new LBypass(parent, bypass_name, name, DEV_CPU, 0);
     }
 
+    layer Shape(layer parent, bool include_batch, string name){
+        return new LShape(parent, include_batch, name, DEV_CPU, 0);
+    }
+
     layer Squeeze(layer parent, const int axis, string name){
         return new LSqueeze(parent, axis, name, DEV_CPU, 0);
     }

From d38d84c9819e869acb026d0c9d6e36c93e630c62 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 19:29:48 +0100
Subject: [PATCH 32/35] Fix docs: ConstOfTensor and Where layers

---
 docs/sphinx/source/layers/auxiliar.rst | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/docs/sphinx/source/layers/auxiliar.rst b/docs/sphinx/source/layers/auxiliar.rst
index 44cbed421..a131b0e14 100644
--- a/docs/sphinx/source/layers/auxiliar.rst
+++ b/docs/sphinx/source/layers/auxiliar.rst
@@ -4,20 +4,26 @@ Auxiliar Layers
 Constant Of Tensor
 -------------------
 
-.. doxygenfunction:: ConstOfTensor
+.. doxygenfunction:: ConstOfTensor(Tensor* t, string name = "");
 
 Example:
 
 .. code-block:: c++
 
-    t = Tensor::ones({16, 16, 16}};
-    l = ConstOfTensor(t);
+    // Example #1:
+    Tensor *t = Tensor::ones({16, 16, 16}};
+    Layer *l = ConstOfTensor(t);
+
+    // Example #2:
+    layer mean = ConstOfTensor(new Tensor( {0.485, 0.456, 0.406}, {3}, DEV_CPU));
+    layer std = ConstOfTensor(new Tensor( {0.229, 0.224, 0.225}, {3}, DEV_CPU));
+
     
 
 Where
 ------------------
 
-.. doxygenfunction:: Where
+.. doxygenfunction:: Where(layer parent1, layer parent2, layer condition, string name = "");
 
 Example:
 

From 8a2bd727896162d70622814fae391b2960c5a073 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 19:30:07 +0100
Subject: [PATCH 33/35] Add Bypass layer to the documentation

---
 docs/sphinx/source/layers/auxiliar.rst | 28 ++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/docs/sphinx/source/layers/auxiliar.rst b/docs/sphinx/source/layers/auxiliar.rst
index a131b0e14..026667b7a 100644
--- a/docs/sphinx/source/layers/auxiliar.rst
+++ b/docs/sphinx/source/layers/auxiliar.rst
@@ -30,3 +30,31 @@ Example:
 .. code-block:: c++
 
     l = Where(parent1, parent2, condition);
+
+
+
+Bypass
+-------------------
+
+.. doxygenfunction:: Bypass(layer parent, string bypass_name="", string name="");
+
+Example:
+
+.. code-block:: c++
+
+    // Example #1:
+    Layer *l = Bypass(l, "fake_layer");
+
+
+
+Shape
+-------------------
+
+.. doxygenfunction:: Shape(layer parent, bool include_batch=true, string name="");
+
+Example:
+
+.. code-block:: c++
+
+    // Example #1:
+    Layer *l = Shape(l, true);
\ No newline at end of file

From f12289bae23ceca5a6d42f59f0d4f5229810c37c Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 19:30:48 +0100
Subject: [PATCH 34/35] Add Tile and Broadcasting layers to the documentation +
 fixes on repeat

---
 docs/sphinx/source/layers/core.rst | 39 ++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/docs/sphinx/source/layers/core.rst b/docs/sphinx/source/layers/core.rst
index d7239dd1d..d8ac3ebc0 100644
--- a/docs/sphinx/source/layers/core.rst
+++ b/docs/sphinx/source/layers/core.rst
@@ -232,6 +232,41 @@ Example:
 
 .. code-block:: c++
 
-   l = Repeat(l, 3, 1);  // Repeat depth (axis=3) 3 times
-   l = Repeat(l, {3, 2, 1}, 1);  // Repeat col 1 => 3 times; col 2 => 2 times; col 3 => 1 time. (repeat=[3,2,1], axis=1)
+    // Example #1:
+    l = Repeat(l, 3, 1);  // Repeat depth (axis=3) 3 times
+    l = Repeat(l, {3, 2, 1}, 1);  // Repeat col 1 => 3 times; col 2 => 2 times; col 3 => 1 time. (repeat=[3,2,1], axis=1)
 
+    // Example #2:
+    l = Reshape(l,{1,28,28});
+    l = Repeat(l, 3, 0);  // l => (3, 28, 28)
+
+
+
+Tile
+-------
+
+.. doxygenfunction:: eddl::Tile(layer parent, const vector<int>& repeats, string name="");
+
+Example:
+
+.. code-block:: c++
+
+    l = Reshape(l,{1,28,28});
+    l = Tile(l, {3, 1, 1});  // l => (3, 28, 28)
+
+
+
+Broadcasting
+-------------
+
+.. doxygenfunction:: eddl::Broadcast(layer parent1, layer parent2, string name="");
+
+Example:
+
+.. code-block:: c++
+
+    // Normalize: (X-mean) / std
+    layer mean = ConstOfTensor(new Tensor( {0.485, 0.456, 0.406}, {3}, DEV_CPU));
+    layer std = ConstOfTensor(new Tensor( {0.229, 0.224, 0.225}, {3}, DEV_CPU));
+    x = Sub(x, Broadcast(mean, x));
+    x = Div(x, Broadcast(std, x));

From a97131926685596f25c569c34ba148fc7b4361d9 Mon Sep 17 00:00:00 2001
From: salvacarrion <salva.carrion@outlook.com>
Date: Fri, 3 Dec 2021 19:35:12 +0100
Subject: [PATCH 35/35] Update markdown

---
 docs/markdown/eddl_progress.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/markdown/eddl_progress.md b/docs/markdown/eddl_progress.md
index 2cc744421..49ccb615e 100644
--- a/docs/markdown/eddl_progress.md
+++ b/docs/markdown/eddl_progress.md
@@ -27,17 +27,14 @@
 | Embedding | 🟢️️ | 🟢️️ | 🟢️️ | ️🟢️️ | Turns positive integers (indexes) into dense vectors of fixed size; (also known as mapping). e.g. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]` |
 | Transpose | 🟢️️ | 🟢️️ | 🟢️️ | ️🟢️️ | Permute the last two dimensions |
 | ConstOfTensor | 🟢️️ | 🟢️️ | 🟢️️ | 🟢️ | Repeats a tensor across the batch |
-| Equal | 🟢️️ | 🟢️️ | 🟢️️ | ️🔴️ | Return (x1 == x2) element-wise |
 | Expand        |  🟢️️| 🟢️️ |🟢️️ | 🟢️ | Returns a layer with singleton dimensions expanded to a larger size |
 | Where | 🟢️️ | 🟢️️ | 🟢️️ | ️🔴️ | Return elements chosen from x or y depending on condition |
 | Resize          | 🟢️️ | 🟢️️ |🟢️️ | 🟢️️ | Resize the input image to the given size. `[height, width]` |
 | Clamp / Clip          | 🟢️️ | 🟢️️ |🟢️️ | 🟢️ | Clamps all elements in input into the range `[min, max]`.  |
 | Repeat          | 🟢️️ | 🟢️️ |🟢️️ | 🔴️️ | Repeats the elements of a tensor along the specified dimension. (Elements in an axis can be repeated independently)  |
-| Tile          | 🔴️️ | 🔴 |🔴️ | 🔴️️ | Repeats the elements of a tensor along the specified dimensions.  |
-| Round          | 🔴️️ | 🔴️️ | 🔴️️ | 🔴️️ | Round of the elements of input |
-| Ceil          | 🔴️ | 🔴️️ | 🔴️ | 🔴️️ | Ceil of the elements of input  |
-| Floor          | 🔴️️ | 🔴 |🔴️ | 🔴️️ | Floor of the elements of input  |
-| Broadcast          | 🔴️️ | 🔴 |🔴️ | 🔴️️ | Produce an object that mimics broadcasting. |
+| Tile          | 🟢️️ | 🟢️️ |🟢️️ | 🔴️️ | Repeats the elements of a tensor along the specified dimensions.  |
+| Broadcast          | 🟢️️ | 🟢️️ |🟢️️ | 🔴️️ | Produce an object that mimics broadcasting. |
+| Shape          | 🟢️️ | 🟢️️ |🟢️️️ | 🔴️️ | Returns the shape of its parent as his output |
 
 
 ## Activations
@@ -185,7 +182,10 @@ Apply data transformations with random parametrization.
 | Pow           | 🟢️️ | 🟢️️ |🟢️️ | 🟢 | |
 | Sqrt          |  🟢️️| 🟢️️ |🟢️️ | 🟢️️ | |
 | Sub           | 🟢️️ | 🟢️️ |🟢️️ | 🟢️️ | |
-
+| Round          | 🔴️️ | 🔴️️ | 🔴️️ | 🔴️️ | Round of the elements of input |
+| Ceil          | 🔴️ | 🔴️️ | 🔴️ | 🔴️️ | Ceil of the elements of input  |
+| Floor          | 🔴️️ | 🔴 |🔴️ | 🔴️️ | Floor of the elements of input  |
+| Equal | 🟢️️ | 🟢️️ | 🟢️️ | ️🔴️ | Return (x1 == x2) element-wise |
 
 ## Reduction layers