Skip to content

Commit

Permalink
feat/layers: add tanh layer
Browse files Browse the repository at this point in the history
  • Loading branch information
hobofan authored and MichaelHirn committed Apr 5, 2016
1 parent 3b25a48 commit b1d5ec9
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 0 deletions.
7 changes: 7 additions & 0 deletions src/layer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,7 @@ impl<B: IBackend + LayerOps<f32> + 'static> Layer<B> {
LayerType::Softmax => Box::new(Softmax::default()),
LayerType::ReLU => Box::new(ReLU),
LayerType::Sigmoid => Box::new(Sigmoid),
LayerType::TanH => Box::new(TanH),
LayerType::NegativeLogLikelihood(layer_config) => Box::new(NegativeLogLikelihood::from_config(&layer_config)),
LayerType::Reshape(layer_config) => Box::new(Reshape::from_config(&layer_config)),
}
Expand Down Expand Up @@ -1123,6 +1124,8 @@ pub enum LayerType {
ReLU,
/// Sigmoid Layer
Sigmoid,
/// TanH Layer
TanH,
// Loss layers
/// NegativeLogLikelihood Layer
NegativeLogLikelihood(NegativeLogLikelihoodConfig),
Expand Down Expand Up @@ -1151,6 +1154,10 @@ impl LayerType {
LayerType::Sigmoid => true,
#[cfg(feature="native")]
LayerType::Sigmoid => false,
#[cfg(all(feature="cuda", not(feature="native")))]
LayerType::TanH => true,
#[cfg(feature="native")]
LayerType::TanH => false,
LayerType::NegativeLogLikelihood(_) => false,
LayerType::Reshape(_) => true,
}
Expand Down
2 changes: 2 additions & 0 deletions src/layers/activation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ macro_rules! impl_ilayer_activation {

pub use self::relu::ReLU;
pub use self::sigmoid::Sigmoid;
pub use self::tanh::TanH;

pub mod relu;
pub mod sigmoid;
pub mod tanh;
142 changes: 142 additions & 0 deletions src/layers/activation/tanh.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
//! Applies the nonlinear TanH function.
//!
//! Non-linearity activation function: y = sinh(x) / cosh(x)
//!
//! You might consider using ReLU as an alternative.
//!
//! ReLU, compared to TanH:
//!
//! * reduces the likelyhood of vanishing gradients
//! * increases the likelyhood of a more beneficial sparse representation
//! * can be computed faster
//! * is therefore the most popular activation function in DNNs as of this writing (2016).
use co::{IBackend, SharedTensor};
use conn;
use layer::*;
use util::ArcLock;

#[derive(Debug, Clone)]
#[allow(missing_copy_implementations)]
/// TanH Activation Layer
pub struct TanH;

//
// Tanh + TanhPointwise
// Only on CUDA
#[cfg(all(feature="cuda", not(feature="native")))]
impl<B: IBackend + conn::Tanh<f32> + conn::TanhPointwise<f32>> ILayer<B> for TanH {
impl_ilayer_activation!();

fn compute_in_place(&self) -> bool {
true
}

fn reshape(&mut self,
backend: ::std::rc::Rc<B>,
input_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
input_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
weights_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
weights_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
output_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
output_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>) {
if let Some(inp) = input_data.get(0) {
let read_inp = inp.read().unwrap();
let input_desc = read_inp.desc();
input_gradient[0].write().unwrap().resize(input_desc).unwrap();
output_data[0].write().unwrap().resize(input_desc).unwrap();
output_gradient[0].write().unwrap().resize(input_desc).unwrap();
}
}
}

#[cfg(all(feature="cuda", not(feature="native")))]
impl<B: IBackend + conn::Tanh<f32> + conn::TanhPointwise<f32>> ComputeOutput<f32, B> for TanH {
fn compute_output(&self,
backend: &B,
_weights: &[&SharedTensor<f32>],
input_data: &[&SharedTensor<f32>],
output_data: &mut [&mut SharedTensor<f32>]) {
match input_data.get(0) {
Some(input) => backend.tanh_plain(input, output_data[0]).unwrap(),
None => backend.tanh_pointwise_plain(output_data[0]).unwrap(),
}
}
}

#[cfg(all(feature="cuda", not(feature="native")))]
impl<B: IBackend + conn::Tanh<f32> + conn::TanhPointwise<f32>> ComputeInputGradient<f32, B> for TanH {
fn compute_input_gradient(&self,
backend: &B,
weights_data: &[&SharedTensor<f32>],
output_data: &[&SharedTensor<f32>],
output_gradients: &[&SharedTensor<f32>],
input_data: &[&SharedTensor<f32>],
input_gradients: &mut [&mut SharedTensor<f32>]) {
match output_data.get(0) {
Some(_) => backend.tanh_grad_plain(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(),
None => backend.tanh_pointwise_grad_plain(input_data[0], input_gradients[0]).unwrap(),
}
}
}

#[cfg(all(feature="cuda", not(feature="native")))]
impl<B: IBackend + conn::Tanh<f32> + conn::TanhPointwise<f32>> ComputeParametersGradient<f32, B> for TanH {}

//
// Tanh without TanhPointwise
// Only on CUDA
//
#[cfg(feature="native")]
impl<B: IBackend + conn::Tanh<f32>> ILayer<B> for TanH {
impl_ilayer_activation!();

fn reshape(&mut self,
backend: ::std::rc::Rc<B>,
input_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
input_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
weights_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
weights_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>,
output_data: &mut Vec<ArcLock<SharedTensor<f32>>>,
output_gradient: &mut Vec<ArcLock<SharedTensor<f32>>>) {
if let Some(inp) = input_data.get(0) {
let read_inp = inp.read().unwrap();
let input_desc = read_inp.desc();
input_gradient[0].write().unwrap().resize(input_desc).unwrap();
output_data[0].write().unwrap().resize(input_desc).unwrap();
output_gradient[0].write().unwrap().resize(input_desc).unwrap();
}
}
}

#[cfg(feature="native")]
impl<B: IBackend + conn::Tanh<f32>> ComputeOutput<f32, B> for TanH {
fn compute_output(&self,
backend: &B,
_weights: &[&SharedTensor<f32>],
input_data: &[&SharedTensor<f32>],
output_data: &mut [&mut SharedTensor<f32>]) {
match input_data.get(0) {
Some(input) => backend.tanh_plain(input, output_data[0]).unwrap(),
None => panic!("No input provided for TanH layer."),
}
}
}

#[cfg(feature="native")]
impl<B: IBackend + conn::Tanh<f32>> ComputeInputGradient<f32, B> for TanH {
fn compute_input_gradient(&self,
backend: &B,
weights_data: &[&SharedTensor<f32>],
output_data: &[&SharedTensor<f32>],
output_gradients: &[&SharedTensor<f32>],
input_data: &[&SharedTensor<f32>],
input_gradients: &mut [&mut SharedTensor<f32>]) {
match output_data.get(0) {
Some(_) => backend.tanh_grad_plain(output_data[0], output_gradients[0], input_data[0], input_gradients[0]).unwrap(),
None => panic!("No output_data provided for TanH layer backward."),
}
}
}

#[cfg(feature="native")]
impl<B: IBackend + conn::Tanh<f32>> ComputeParametersGradient<f32, B> for TanH {}
1 change: 1 addition & 0 deletions src/layers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
pub use self::activation::{
ReLU,
Sigmoid,
TanH,
};

#[cfg(all(feature="cuda", not(feature="native")))]
Expand Down
4 changes: 4 additions & 0 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,14 @@ pub trait LayerOps<F> : conn::Convolution<F>
+ conn::Pooling<F>
+ conn::Relu<F> + conn::ReluPointwise<F>
+ conn::Sigmoid<F> + conn::SigmoidPointwise<F>
+ conn::Tanh<F> + conn::TanhPointwise<F>
+ conn::Softmax<F> + conn::LogSoftmax<F>
+ Gemm<F> {}
#[cfg(feature="native")]
/// Encapsulates all traits used in Layers.
pub trait LayerOps<F> : conn::Relu<F>
+ conn::Sigmoid<F>
+ conn::Tanh<F>
+ conn::Softmax<F> + conn::LogSoftmax<F>
+ Gemm<F> {}

Expand All @@ -125,10 +127,12 @@ impl<T: conn::Convolution<f32>
+ conn::Pooling<f32>
+ conn::Relu<f32> + conn::ReluPointwise<f32>
+ conn::Sigmoid<f32> + conn::SigmoidPointwise<f32>
+ conn::Tanh<f32> + conn::TanhPointwise<f32>
+ conn::Softmax<f32> + conn::LogSoftmax<f32>
+ Gemm<f32>> LayerOps<f32> for T {}
#[cfg(feature="native")]
impl<T: conn::Relu<f32>
+ conn::Sigmoid<f32>
+ conn::Tanh<f32>
+ conn::Softmax<f32> + conn::LogSoftmax<f32>
+ Gemm<f32>> LayerOps<f32> for T {}

0 comments on commit b1d5ec9

Please sign in to comment.