Skip to content

Commit

Permalink
feat/layer: add Sequential layer
Browse files Browse the repository at this point in the history
Introduce the Sequential layer which serves as a replacement for
the Network struct.
  • Loading branch information
hobofan committed Feb 29, 2016
1 parent 3478586 commit aaacc1e
Show file tree
Hide file tree
Showing 10 changed files with 1,260 additions and 2,235 deletions.
830 changes: 417 additions & 413 deletions benches/network_benches.rs

Large diffs are not rendered by default.

633 changes: 118 additions & 515 deletions examples/benchmarks.rs

Large diffs are not rendered by default.

438 changes: 269 additions & 169 deletions src/layer.rs

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions src/layers/common/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@ pub use self::convolution::{Convolution, ConvolutionConfig};
pub use self::linear::{Linear, LinearConfig};
pub use self::log_softmax::LogSoftmax;
pub use self::pooling::{Pooling, PoolingConfig, PoolingMode};
pub use self::sequential::{Sequential, SequentialConfig};
pub use self::softmax::Softmax;

pub mod convolution;
pub mod linear;
pub mod log_softmax;
pub mod pooling;
pub mod sequential;
pub mod softmax;

/// Provides common utilities for Layers that utilize a filter with stride and padding.
Expand Down
342 changes: 342 additions & 0 deletions src/layers/common/sequential.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,342 @@
//! A container layer that runs operations sequentially on the contained layers.
use std::cell::RefCell;
use std::collections::{HashMap, HashSet};
use std::rc::Rc;
use std::sync::{Arc, RwLock};
use co::{IBackend, SharedTensor};
use layer::*;
use util::{ArcLock, LayerOps};

#[derive(Debug)]
/// Sequential Layer
pub struct Sequential<B: IBackend + LayerOps<f32>> {
layers: Vec<RefCell<Layer<B>>>,

input_tensor_names: Vec<String>,
input_data_tensors: Vec<ArcLock<SharedTensor<f32>>>,
input_gradient_tensors: Vec<ArcLock<SharedTensor<f32>>>,

output_data_tensors: Vec<ArcLock<SharedTensor<f32>>>,
output_gradient_tensors: Vec<ArcLock<SharedTensor<f32>>>,

registry: HashMap<String, (ArcLock<SharedTensor<f32>>, ArcLock<SharedTensor<f32>>)>,
}

impl<B: IBackend + LayerOps<f32> + 'static> Sequential<B> {
/// Create a empty Sequential container layer.
pub fn empty() -> Sequential<B> {
Sequential {
layers: vec![],

input_tensor_names: vec![],
input_data_tensors: vec![],
input_gradient_tensors: vec![],

output_data_tensors: vec![],
output_gradient_tensors: vec![],

registry: HashMap::new(),
}
}

/// Create a Sequential layer from a SequentialConfig.
pub fn from_config(backend: Rc<B>, config: &SequentialConfig) -> Sequential<B> {
let mut layer = Self::empty();

layer.init_layers(backend, &config.clone());

layer
}

/// Initializes a sequential container.
///
/// Sets up the structure of the sequential container. It reads the supplied [SequentialConfig][1],
/// connects the input and output blobs of each layer and determines if the backpropagation has
/// to be executed for each tensor and layer.
///
/// [1]: ./struct.SequentialConfig.html
pub fn init_layers(&mut self, backend: Rc<B>, in_config: &SequentialConfig) {
let mut config = in_config.clone();
let mut registry = HashMap::<String, (ArcLock<SharedTensor<f32>>, ArcLock<SharedTensor<f32>>)>::new();
let weight_registry = &mut HashMap::<String, (ArcLock<SharedTensor<f32>>, ArcLock<SharedTensor<f32>>, Option<f32>, Option<f32>)>::new();

for (input_name, input_shape) in config.inputs {
self.init_input_blob(backend.clone(), &input_name, &input_shape, &mut registry);
}

// add input names to first layer so they correctly connect
if let Some(first_layer) = config.layers.first_mut() {
for container_input in &self.input_tensor_names {
first_layer.add_input(&container_input);
}
}
// connect each layer to the next one
for (i, _) in config.layers.clone().iter().enumerate() {
match i == (config.layers.len() - 1) {
false => {
// layers have already been manually connected
if config.layers[i].outputs.get(0).is_some() && config.layers[i + 1].inputs.get(0).is_some() &&
config.layers[i].outputs.get(0) == config.layers[i + 1].inputs.get(0) {
continue;
}
// TODO: make use of in-place
config.layers[i].add_output(&format!("SEQUENTIAL_{}", i));
config.layers[i + 1].add_input(&format!("SEQUENTIAL_{}", i));
},
// last layer
true => {
config.layers[i].add_output(&format!("SEQUENTIAL_OUTPUT_{}", i));
},
}
}

for layer_config in &config.layers {
self.init_layer(backend.clone(), &layer_config, &mut registry, weight_registry);
}

// Go through the net backwards to determine which blobs contribute to the
// loss. We can skip backward computation for blobs that don't contribute
// to the loss.
// Also checks if all bottom blobs don't need backward computation (possible
// because the skip_propagate_down config) and so we can skip backward
// computation for the entire layer
let blobs_under_loss = &mut HashSet::<String>::new();
let blobs_skip_backp = &mut HashSet::<String>::new();
for layer in &mut self.layers.iter_mut().rev() {
layer.borrow_mut().init_backprop( blobs_under_loss, blobs_skip_backp);
}

if config.force_backward {
for layer in &mut self.layers {
layer.borrow_mut().init_force_backward();
}
}

// Outputs of the last layer are considered output of the container
if let Some(last_layer) = self.layers.last() {
for data_tensor in &last_layer.borrow().output_blobs_data {
self.output_data_tensors.push(data_tensor.clone());
}
for gradient_tensor in &last_layer.borrow().output_blobs_gradient {
self.output_gradient_tensors.push(gradient_tensor.clone());
}
}

self.registry = registry;

info!("Sequential container initialization done.");
}

/// Initialize a input tensor for the Sequential container.
///
/// Appends a input blob to the network, so the first [Layer][1] can
/// [connect][2] to them.
///
/// Used during initialization of the Sequential container.
/// [1]: ../layer/struct.Layer.html
/// [2]: ../layer/struct.Layer.html#method.connect
fn init_input_blob(&mut self,
backend: Rc<B>,
tensor_name: &str,
input_shape: &[usize],
registry: &mut HashMap<String, (ArcLock<SharedTensor<f32>>, ArcLock<SharedTensor<f32>>)> ) {

if registry.contains_key(tensor_name) {
// If we are not doing in-place computation but see two layers trying
// to produce the same tensor, raise an error.
error!("Output tensor {} produced by multiple sources.", tensor_name);
return
} else {
info!("Input {} -> {}", self.input_data_tensors.len(), tensor_name);

let ibackend: Rc<IBackend<F=B::F>> = backend;
let data_tensor: ArcLock<SharedTensor<f32>> = Arc::new(RwLock::new(SharedTensor::new(ibackend.device(), &input_shape).unwrap()));
let gradient_tensor: ArcLock<SharedTensor<f32>> = Arc::new(RwLock::new(SharedTensor::new(ibackend.device(), &input_shape).unwrap()));

self.input_data_tensors.push(data_tensor.clone());
self.input_gradient_tensors.push(gradient_tensor.clone());
self.input_tensor_names.push(tensor_name.to_owned());
registry.insert(tensor_name.to_owned(), (data_tensor, gradient_tensor));
}
}

/// Initializes a single layer of the Sequential container.
///
/// Appends input and output tensors to the [Layer][3]. Apart from explicitly named
/// output tensors it will also append anonymous output tensors that are required by the specific
/// [Layer implemenations][4]. It also sets up the backpropagation flags.
///
/// [3]: ../layer/struct.Layer.html
/// [4]: ../layers/index.html
fn init_layer(&mut self,
backend: Rc<B>,
layer_config: &LayerConfig,
registry: &mut HashMap<String, (ArcLock<SharedTensor<f32>>, ArcLock<SharedTensor<f32>>)>,
weight_registry: &mut HashMap<String, (ArcLock<SharedTensor<f32>>, ArcLock<SharedTensor<f32>>, Option<f32>, Option<f32>)>) {
// Setup layer.
if let Err(e) = layer_config.validate() {
error!("{}", e);
}

info!("Creating Layer {}", &layer_config.name);
let mut layer = Layer::from_config(backend, &layer_config);

// Figure out this layer's input and output
layer.connect(registry, weight_registry);

self.layers.push(RefCell::new(layer));
}
}

impl<B: IBackend + LayerOps<f32> + 'static> ILayer<B> for Sequential<B> {
fn is_container(&self) -> bool {
true
}

fn inputs_data(&self) -> Option<Vec<ArcLock<SharedTensor<f32>>>> {
Some(self.input_data_tensors.clone())
}

fn inputs_gradients(&self) -> Option<Vec<ArcLock<SharedTensor<f32>>>> {
Some(self.input_gradient_tensors.clone())
}

fn outputs_data(&self) -> Option<Vec<ArcLock<SharedTensor<f32>>>> {
Some(self.output_data_tensors.clone())
}

fn outputs_gradients(&self) -> Option<Vec<ArcLock<SharedTensor<f32>>>> {
Some(self.output_gradient_tensors.clone())
}

fn learnable_weights(&self) -> Option<Vec<ArcLock<SharedTensor<f32>>>> {
let weights = self.layers.iter().flat_map(|layer| layer.borrow().learnable_weights_data()).collect();
Some(weights)
}

fn learnable_weights_gradients(&self) -> Option<Vec<ArcLock<SharedTensor<f32>>>> {
let gradients = self.layers.iter().flat_map(|layer| layer.borrow().learnable_weights_gradients()).collect();
Some(gradients)
}

fn forward(&self,
backend: &B,
input_data: &[ArcLock<SharedTensor<f32>>],
weights_data: &[ArcLock<SharedTensor<f32>>],
output_data: &mut [ArcLock<SharedTensor<f32>>]) {
if let Some(first_layer) = self.layers.first() {
for (i, input) in input_data.iter().enumerate() {
first_layer.borrow_mut().input_blobs_data[i] = input.clone();
}
}
for layer in &self.layers {
layer.borrow_mut().forward(&[]);
}
}

fn backward_input(&self,
backend: &B,
weights_data: &[ArcLock<SharedTensor<f32>>],
output_data: &[ArcLock<SharedTensor<f32>>],
output_gradients: &[ArcLock<SharedTensor<f32>>],
input_data: &[ArcLock<SharedTensor<f32>>],
input_gradients: &mut [ArcLock<SharedTensor<f32>>]) {
if let Some(last_layer) = self.layers.last() {
for (i, output_gradient) in output_gradients.iter().enumerate() {
last_layer.borrow_mut().output_blobs_gradient[i] = output_gradient.clone();
}
}
for layer in self.layers.iter().rev() {
layer.borrow_mut().backward_input(&[]);
}
}

fn backward_parameters(&self,
backend: &B,
output_data: &[ArcLock<SharedTensor<f32>>],
output_gradients: &[ArcLock<SharedTensor<f32>>],
input_data: &[ArcLock<SharedTensor<f32>>],
weights_gradients: &mut [ArcLock<SharedTensor<f32>>]) {
for layer in &self.layers {
layer.borrow_mut().backward_parameters();
}
}
}

impl<B: IBackend + LayerOps<f32> + 'static> ComputeOutput<f32, B> for Sequential<B> {
// we are overriding `forward` and not calling `compute_output`
fn compute_output(&self,
backend: &B,
weights: &[&SharedTensor<f32>],
input_data: &[&SharedTensor<f32>],
output_data: &mut [&mut SharedTensor<f32>]) { }
}

impl<B: IBackend + LayerOps<f32> + 'static> ComputeInputGradient<f32, B> for Sequential<B> {
// we are overriding `backward_input` and not calling `compute_input_gradient`
fn compute_input_gradient(&self,
backend: &B,
weights_data: &[&SharedTensor<f32>],
output_data: &[&SharedTensor<f32>],
output_gradients: &[&SharedTensor<f32>],
input_data: &[&SharedTensor<f32>],
input_gradients: &mut [&mut SharedTensor<f32>]) { }
}

impl<B: IBackend + LayerOps<f32> + 'static> ComputeParametersGradient<f32, B> for Sequential<B> {
// we are overriding `backward_parameters` and not calling `compute_parameters_gradient`
fn compute_parameters_gradient(&self,
backend: &B,
output_data: &[&SharedTensor<f32>],
output_gradients: &[&SharedTensor<f32>],
input_data: &[&SharedTensor<f32>],
parameters_gradients: &mut [&mut SharedTensor<f32>]) { }
}

#[derive(Debug, Clone)]
#[allow(missing_copy_implementations)]
/// Specifies configuration parameters for a Sequential Layer.
pub struct SequentialConfig {
/// Defines the layers of the container via [LayerConfig][1]s.
/// [1]: ../layer/struct.LayerConfig.html
pub layers: Vec<LayerConfig>,

/// Defines the names and shapes of the input tensors.
///
/// The inputs are identified by name so they can be referenced as input tensors
/// in a [LayerConfig][layer_config].
///
/// [layer_config]: ../layer/struct.LayerConfig.html
pub inputs: Vec<(String, Vec<usize>)>,

/// Defines if the container will force every layer to do [backpropagation][1].
/// [1]: https://en.wikipedia.org/wiki/Backpropagation
///
/// If set to `false`, then the execution of backpropagation is determined automatically
/// according to the network structure and learning rates.
///
/// Default: `false`
pub force_backward: bool,
}

impl SequentialConfig {
/// Add layer at the end of the sequential container.
pub fn add_layer(&mut self, layer: LayerConfig) {
self.layers.push(layer);
}

/// Add a input to the network.
pub fn add_input(&mut self, input_name: &str, shape: &[usize]) {
self.inputs.push((input_name.to_owned(), shape.to_owned()));
}
}

impl ::std::default::Default for SequentialConfig {
fn default() -> SequentialConfig {
SequentialConfig {
layers: vec![],
inputs: vec![],
force_backward: false,
}
}
}
1 change: 1 addition & 0 deletions src/layers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ pub use self::common::{
Linear, LinearConfig,
LogSoftmax,
Pooling, PoolingConfig, PoolingMode,
Sequential, SequentialConfig,
Softmax,
};

Expand Down
13 changes: 4 additions & 9 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,6 @@
//! ## Examples
//!
//! ```
//! # extern crate leaf;
//! # use leaf::network::{NetworkConfig};
//! # fn main() {
//! # }
//! ```
//!
//! ## Development
Expand Down Expand Up @@ -133,11 +129,10 @@ extern crate collenchyma_blas as coblas;
extern crate collenchyma_nn as conn;
pub mod layer;
pub mod layers;
#[cfg(feature="cuda")]
pub mod solver;
#[cfg(feature="cuda")]
pub mod solvers;
pub mod network;
// #[cfg(feature="cuda")]
// pub mod solver;
// #[cfg(feature="cuda")]
// pub mod solvers;
pub mod weight;

pub mod util;
Loading

0 comments on commit aaacc1e

Please sign in to comment.