From 08629ea8f1c38047a5d7fec24601e21ba79d704f Mon Sep 17 00:00:00 2001
From: Hobofan <goisser94@gmail.com>
Date: Wed, 9 Dec 2015 13:44:46 +0100
Subject: [PATCH 1/5] feat/features: add framework feature groups

---
 Cargo.toml            | 18 +++++++++++++-----
 README.md             |  4 ++--
 src/frameworks/mod.rs |  3 +++
 src/lib.rs            |  1 +
 4 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 9757839..753264a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -2,23 +2,31 @@
 name            = "collenchyma-nn"
 description     = "Collenchyma plugin for full Neural Network algorithm support"
 version         = "0.0.1"
-authors         = ["MichaelHirn <michael.hirn@storeness.de>"]
+authors         = ["MichaelHirn <michael.hirn@storeness.de>",
+                   "Maximilian Goisser <max@autumnai.com>"]
 
 repository      = "https://github.com/autumnai/collenchyma-nn"
 homepage        = "https://github.com/autumnai/collenchyma-nn"
 documentation   = "https://autumnai.github.io/collenchyma-nn"
 readme          = "README.md"
 
-keywords        = ["neural-network", "collenchyma", "computation", "hfc", "plugin"]
+keywords        = ["neural-network", "collenchyma", "computation", "hpc", "plugin"]
 license         = "MIT"
 
 [dependencies]
-collenchyma = "0.0.4"
-cudnn = "0.1"
+collenchyma = { version = "0.0.5", default-features = false }
 lazy_static = "0.1"
+
+cudnn = { version = "0.1", optional = true }
+
 clippy = { version = "0.0.27", optional = true }
 
 [features]
+default = ["native", "cuda", "opencl"]
+native = ["collenchyma/native"]
+cuda = ["collenchyma/cuda", "cudnn"]
+opencl = ["collenchyma/opencl"]
+
+travis = ["native"]
 dev = []
-travis = []
 lint = ["clippy"]
diff --git a/README.md b/README.md
index efc7781..211cc37 100644
--- a/README.md
+++ b/README.md
@@ -27,8 +27,8 @@ For more information,
 If you're using Cargo, just add collenchyma-NN to your Cargo.toml:
 
     [dependencies]
-    collenchyma = "0.0.4"
-    collenchyma-nn = "0.1.0"
+    collenchyma = "0.0.5"
+    collenchyma-nn = "0.0.1"
 
 If you're using [Cargo Edit][cargo-edit], you can call:
 
diff --git a/src/frameworks/mod.rs b/src/frameworks/mod.rs
index 69d36b9..43dee93 100644
--- a/src/frameworks/mod.rs
+++ b/src/frameworks/mod.rs
@@ -1,5 +1,8 @@
 //! Provides the specific Framework implementations for the Library Operations.
 
+#[cfg(feature = "native")]
 mod native;
+#[cfg(feature = "opencl")]
 mod opencl;
+#[cfg(feature = "cuda")]
 mod cuda;
diff --git a/src/lib.rs b/src/lib.rs
index dc6917c..18d41c5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -39,6 +39,7 @@
         unused_import_braces, unused_qualifications)]
 
 extern crate collenchyma;
+#[cfg(feature = "cuda")]
 extern crate cudnn;
 #[macro_use]
 extern crate lazy_static;

From 8ea1a29016c364536755e2fb5d13a52352b059ab Mon Sep 17 00:00:00 2001
From: Hobofan <goisser94@gmail.com>
Date: Sat, 12 Dec 2015 20:41:20 +0000
Subject: [PATCH 2/5] feat/sigmoid: add full sigmoid CUDA implementation

---
 Cargo.toml               |   2 +-
 src/binary.rs            |   2 +-
 src/frameworks/cuda.rs   |  84 ++++++++-----------------
 src/frameworks/mod.rs    |   4 +-
 src/frameworks/native.rs |  10 +--
 src/frameworks/opencl.rs |  10 +--
 src/helper.rs            | 130 ++++++++++++++++++++++++++++++++-------
 src/lib.rs               |   2 +-
 src/operation.rs         |   6 +-
 src/plugin.rs            |  63 +++++++++----------
 10 files changed, 185 insertions(+), 128 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index dd84f32..879f069 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,7 @@ license         = "MIT"
 
 [dependencies]
 collenchyma = { path = "/home/hobofan/collenchyma" }
-cudnn = { path = "/home/hobofan/rust-cudnn/cudnn" }
+cudnn = "0.1"
 libc = "0.2"
 lazy_static = "0.1"
 clippy = { version = "0.0.27", optional = true }
diff --git a/src/binary.rs b/src/binary.rs
index 813f9e6..fc42ee2 100644
--- a/src/binary.rs
+++ b/src/binary.rs
@@ -1,7 +1,7 @@
 //! Provides the INnBinary Binary trait for Collenchyma's Framework implementation.
 
 use super::operation::*;
-use collenchyma::plugin::numeric_helpers::Float;
+use co::plugin::numeric_helpers::Float;
 
 /// Describes the operation binding for a NN Binary implementation.
 pub trait INnBinary<F: Float> {
diff --git a/src/frameworks/cuda.rs b/src/frameworks/cuda.rs
index 81dcf4a..c7e6ae1 100644
--- a/src/frameworks/cuda.rs
+++ b/src/frameworks/cuda.rs
@@ -3,12 +3,12 @@
 use ::operation::*;
 use ::binary::*;
 use ::plugin::*;
-use collenchyma::backend::Backend;
-use collenchyma::device::DeviceType;
-use collenchyma::memory::MemoryType;
-use collenchyma::tensor::{SharedTensor, TensorDesc, ITensorDesc};
-use collenchyma::plugin::Error as PluginError;
-use collenchyma::frameworks::cuda::{Function, Module, Cuda};
+use co::backend::Backend;
+use co::device::DeviceType;
+use co::memory::MemoryType;
+use co::tensor::{SharedTensor, TensorDesc, ITensorDesc};
+use co::plugin::Error as PluginError;
+use co::frameworks::cuda::{Function, Module, Cuda};
 use cudnn::*;
 use std::mem::transmute;
 
@@ -16,9 +16,13 @@ lazy_static! {
     static ref SIGMOID: Function = Function::from_isize(1);
 }
 
-pub trait ICudnnTensorDesc : ITensorDesc {
-    fn get_cudnn_desc(&self, data_type: DataType) -> Result<TensorDescriptor, PluginError> {
-        match TensorDescriptor::new(&self.dims_i32(), &self.default_stride_i32(), data_type) {
+pub trait ICudnnTensorDesc<T> : ITensorDesc {
+    fn get_cudnn_desc(&self) -> Result<TensorDescriptor, PluginError>;
+}
+
+impl ICudnnTensorDesc<f32> for TensorDesc {
+    fn get_cudnn_desc(&self) -> Result<TensorDescriptor, PluginError> {
+        match TensorDescriptor::new(&self.dims_i32(), &self.default_stride_i32(), DataType::Float) {
             Ok(desc) => Ok(desc),
             Err(err) => {
                 println!("{:?}", err);
@@ -28,7 +32,17 @@ pub trait ICudnnTensorDesc : ITensorDesc {
     }
 }
 
-impl ICudnnTensorDesc for TensorDesc {}
+impl ICudnnTensorDesc<f64> for TensorDesc {
+    fn get_cudnn_desc(&self) -> Result<TensorDescriptor, PluginError> {
+        match TensorDescriptor::new(&self.dims_i32(), &self.default_stride_i32(), DataType::Double) {
+            Ok(desc) => Ok(desc),
+            Err(err) => {
+                println!("{:?}", err);
+                Err(PluginError::Plugin("Unable to create CuDNN TensorDescriptor."))
+            }
+        }
+    }
+}
 
 pub trait ICudnn {
     fn cudnn(&self) -> Cudnn {
@@ -82,30 +96,7 @@ impl_plugin_for!(f64, Backend<Cuda>);
 impl INn<f32> for Backend<Cuda> {
     type B = Module;
 
-    fn sigmoid(&self, x: &mut SharedTensor<f32>, result: &mut SharedTensor<f32>) -> Result<(), ::collenchyma::error::Error> {
-        match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
-        match result.add_device(self.device()) { _ => () }
-        let src_desc = try!(x.desc().get_cudnn_desc(DataType::Float));
-        let src_data = try!(try!(x.get(self.device()).ok_or(PluginError::MissingMemoryForDevice("Unable to resolve memory for `x`")))
-                .as_cuda().ok_or(PluginError::MissingMemoryForDevice("Unable to receive native memory for `x`.")))
-                .id_c();
-        let dest_desc = try!(result.desc().get_cudnn_desc(DataType::Float));
-        let dest_data = try!(try!(result.get_mut(self.device()).ok_or(PluginError::MissingMemoryForDevice("Unable to resolve memory for `result`")))
-                .as_cuda().ok_or(PluginError::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
-                .id_c();
-
-        Ok(try!(match self.binary().cudnn().sigmoid_forward(
-            &src_desc, unsafe { transmute::<u64, *const ::libc::c_void>(src_data) },
-            &dest_desc, unsafe { transmute::<u64, *mut ::libc::c_void>(dest_data) },
-            <ScalParams as IScalParamsDefault<f32>>::default()
-        ) {
-            Ok(_) => Ok(()),
-            Err(err) => {
-                println!("{:?}", err);
-                Err(PluginError::Operation("Unable to execute CUDA cuDNN Activation Sigmoid Forward."))
-            }
-        }))
-    }
+    impl_ops_sigmoid_for!(f32, Backend<Cuda>);
 
     fn binary(&self) -> &Self::B {
         self.binary()
@@ -119,30 +110,7 @@ impl INn<f32> for Backend<Cuda> {
 impl INn<f64> for Backend<Cuda> {
     type B = Module;
 
-    fn sigmoid(&self, x: &mut SharedTensor<f64>, result: &mut SharedTensor<f64>) -> Result<(), ::collenchyma::error::Error> {
-        match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
-        match result.add_device(self.device()) { _ => () }
-        let src_desc = try!(x.desc().get_cudnn_desc(DataType::Double));
-        let src_data = try!(try!(x.get(self.device()).ok_or(PluginError::MissingMemoryForDevice("Unable to resolve memory for `x`")))
-                .as_cuda().ok_or(PluginError::MissingMemoryForDevice("Unable to receive native memory for `x`.")))
-                .id_c();
-        let dest_desc = try!(result.desc().get_cudnn_desc(DataType::Double));
-        let dest_data = try!(try!(result.get_mut(self.device()).ok_or(PluginError::MissingMemoryForDevice("Unable to resolve memory for `result`")))
-                .as_cuda().ok_or(PluginError::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
-                .id_c();
-
-        Ok(try!(match self.binary().cudnn().sigmoid_forward(
-            &src_desc, unsafe { transmute::<u64, *const ::libc::c_void>(src_data) },
-            &dest_desc, unsafe { transmute::<u64, *mut ::libc::c_void>(dest_data) },
-            <ScalParams as IScalParamsDefault<f64>>::default()
-        ) {
-            Ok(_) => Ok(()),
-            Err(err) => {
-                println!("{:?}", err);
-                Err(PluginError::Operation("Unable to execute CUDA cuDNN Activation Sigmoid Forward."))
-            }
-        }))
-    }
+    impl_ops_sigmoid_for!(f64, Backend<Cuda>);
 
     fn binary(&self) -> &Self::B {
         self.binary()
diff --git a/src/frameworks/mod.rs b/src/frameworks/mod.rs
index 69d36b9..eabe9a2 100644
--- a/src/frameworks/mod.rs
+++ b/src/frameworks/mod.rs
@@ -1,5 +1,5 @@
 //! Provides the specific Framework implementations for the Library Operations.
 
-mod native;
-mod opencl;
+//mod native;
+//mod opencl;
 mod cuda;
diff --git a/src/frameworks/native.rs b/src/frameworks/native.rs
index 4ed8c7a..980d26f 100644
--- a/src/frameworks/native.rs
+++ b/src/frameworks/native.rs
@@ -3,11 +3,11 @@
 use ::operation::*;
 use ::binary::*;
 use ::plugin::*;
-use collenchyma::device::DeviceType;
-use collenchyma::backend::Backend;
-use collenchyma::memory::MemoryType;
-use collenchyma::frameworks::native::{Native, Function, Binary};
-use collenchyma::plugin::Error;
+use co::device::DeviceType;
+use co::backend::Backend;
+use co::memory::MemoryType;
+use co::frameworks::native::{Native, Function, Binary};
+use co::plugin::Error;
 
 macro_rules! impl_binary(($($t: ident), +) => (
     $(
diff --git a/src/frameworks/opencl.rs b/src/frameworks/opencl.rs
index 1192efa..4ba580e 100644
--- a/src/frameworks/opencl.rs
+++ b/src/frameworks/opencl.rs
@@ -3,11 +3,11 @@
 use ::operation::*;
 use ::binary::*;
 use ::plugin::*;
-use collenchyma::backend::Backend;
-use collenchyma::device::DeviceType;
-use collenchyma::memory::MemoryType;
-use collenchyma::plugin::Error;
-use collenchyma::frameworks::opencl::{Kernel, Program, OpenCL};
+use co::backend::Backend;
+use co::device::DeviceType;
+use co::memory::MemoryType;
+use co::plugin::Error;
+use co::frameworks::opencl::{Kernel, Program, OpenCL};
 
 impl INnBinary<f32> for Program {
     type Sigmoid = Kernel;
diff --git a/src/helper.rs b/src/helper.rs
index 77d65a1..108a293 100644
--- a/src/helper.rs
+++ b/src/helper.rs
@@ -3,30 +3,118 @@
 #[macro_export]
 macro_rules! impl_ops_sigmoid_for {
     ($t:ident, $b:ty) => (
-        fn sigmoid(&self,
-            x: &mut ::collenchyma::tensor::SharedTensor<$t>,
-            result: &mut ::collenchyma::tensor::SharedTensor<$t>
-        ) -> Result<(), ::collenchyma::error::Error> {
+        fn sigmoid(
+            &self,
+            x: &mut ::co::tensor::SharedTensor<$t>,
+            result: &mut ::co::tensor::SharedTensor<$t>
+        ) -> Result<(), ::co::error::Error> {
             match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
             match result.add_device(self.device()) { _ => () }
-            Ok(try!(
-                <$b as IOperationSigmoid<$t>>::compute(&self,
-                    try!(x.get(self.device()).ok_or(::collenchyma::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `x`"))),
-                    try!(result.get_mut(self.device()).ok_or(::collenchyma::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`"))),
-                )
-            ))
+            let src_desc = try!(x.desc()<::co::tensor::TensorDesc as ICudnnTensorDesc<$t>>.get_cudnn_desc());
+            let src_data = try!(try!(x.get(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `x`")))
+            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `x`.")))
+            .id_c();
+            let dest_desc = try!(result.desc().get_cudnn_desc());
+            let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
+            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
+            .id_c();
+
+            Ok(try!(match self.binary().cudnn().sigmoid_forward(
+                &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
+                &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
+                <::cudnn::ScalParams as ::cudnn::IScalParamsDefault<$t>>::default()
+            ) {
+                Ok(_) => Ok(()),
+                Err(err) => {
+                    println!("{:?}", err);
+                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation Sigmoid Forward."))
+                }
+            }))
+        }
+
+        fn sigmoid_plain(
+            &self,
+            x: &::co::tensor::SharedTensor<$t>,
+            result: &mut ::co::tensor::SharedTensor<$t>
+        ) -> Result<(), ::co::error::Error> {
+            let src_desc = try!(x.desc().get_cudnn_desc());
+            let src_data = try!(try!(x.get(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `x`")))
+            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `x`.")))
+            .id_c();
+            let dest_desc = try!(result.desc().get_cudnn_desc());
+            let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
+            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
+            .id_c();
+
+            Ok(try!(match self.binary().cudnn().sigmoid_forward(
+                &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
+                &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
+                <::cudnn::ScalParams as ::cudnn::IScalParamsDefault<$t>>::default()
+            ) {
+                Ok(_) => Ok(()),
+                Err(err) => {
+                    println!("{:?}", err);
+                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation Sigmoid Forward."))
+                }
+            }))
+        }
+
+        fn sigmoid_diff(
+            &self,
+            x: &mut ::co::tensor::SharedTensor<$t>,
+            result: &mut ::co::tensor::SharedTensor<$t>
+        ) -> Result<(), ::co::error::Error> {
+            match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
+            match result.add_device(self.device()) { _ => () }
+            let src_desc = try!(x.desc().get_cudnn_desc());
+            let src_data = try!(try!(x.get(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `x`")))
+            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `x`.")))
+            .id_c();
+            let dest_desc = try!(result.desc().get_cudnn_desc());
+            let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
+            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
+            .id_c();
+
+            Ok(try!(match self.binary().cudnn().sigmoid_forward(
+                &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
+                &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
+                <::cudnn::ScalParams as ::cudnn::IScalParamsDefault<$t>>::default()
+            ) {
+                Ok(_) => Ok(()),
+                Err(err) => {
+                    println!("{:?}", err);
+                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation Sigmoid Forward."))
+                }
+            }))
         }
 
-        fn sigmoid_plain(&self,
-            x: &mut ::collenchyma::tensor::SharedTensor<$t>,
-            result: &mut ::collenchyma::tensor::SharedTensor<$t>
-        ) -> Result<(), ::collenchyma::error::Error> {
-            Ok(try!(
-                <$b as IOperationSigmoid<$t>>::compute(&self,
-                    try!(x.get(self.device()).ok_or(::collenchyma::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `x`"))),
-                    try!(result.get_mut(self.device()).ok_or(::collenchyma::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`"))),
-                )
-            ))
+        fn sigmoid_diff_plain(
+            &self,
+            x: &::co::tensor::SharedTensor<$t>,
+            result: &mut ::co::tensor::SharedTensor<$t>
+        ) -> Result<(), ::co::error::Error> {
+            match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
+            match result.add_device(self.device()) { _ => () }
+            let src_desc = try!(x.desc().get_cudnn_desc());
+            let src_data = try!(try!(x.get(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `x`")))
+            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `x`.")))
+            .id_c();
+            let dest_desc = try!(result.desc().get_cudnn_desc());
+            let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
+            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
+            .id_c();
+
+            Ok(try!(match self.binary().cudnn().sigmoid_forward(
+                &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
+                &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
+                <::cudnn::ScalParams as ::cudnn::IScalParamsDefault<$t>>::default()
+            ) {
+                Ok(_) => Ok(()),
+                Err(err) => {
+                    println!("{:?}", err);
+                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation Sigmoid Forward."))
+                }
+            }))
         }
-    );
+    )
 }
diff --git a/src/lib.rs b/src/lib.rs
index 21328e9..a2d3d8e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -38,7 +38,7 @@
         trivial_casts, trivial_numeric_casts,
         unused_import_braces, unused_qualifications)]
 
-extern crate collenchyma;
+extern crate collenchyma as co;
 extern crate cudnn;
 extern crate libc;
 #[macro_use]
diff --git a/src/operation.rs b/src/operation.rs
index 562c1a1..fe5c8f4 100644
--- a/src/operation.rs
+++ b/src/operation.rs
@@ -1,8 +1,8 @@
 //! Provides the IOperationX operation traits for Collenchyma's Framework implementation.
 
-use collenchyma::plugin::numeric_helpers::Float;
-use collenchyma::memory::MemoryType;
-use collenchyma::plugin::Error;
+use co::plugin::numeric_helpers::Float;
+use co::memory::MemoryType;
+use co::plugin::Error;
 
 /// Describes a Sigmoid Operation.
 pub trait IOperationSigmoid<F: Float> {
diff --git a/src/plugin.rs b/src/plugin.rs
index f1c2eb2..1833bad 100644
--- a/src/plugin.rs
+++ b/src/plugin.rs
@@ -2,50 +2,51 @@
 
 use super::binary::INnBinary;
 use super::operation::*;
-use collenchyma::plugin::numeric_helpers::Float;
-use collenchyma::binary::IBinary;
-use collenchyma::tensor::SharedTensor;
-use collenchyma::device::DeviceType;
-use collenchyma::plugin::Error as LibError;
+use co::plugin::numeric_helpers::Float;
+use co::binary::IBinary;
+use co::tensor::SharedTensor;
+use co::device::DeviceType;
+use co::plugin::Error as LibError;
 
 /// Provides the functionality for a backend to support Neural Network related operations.
 pub trait INn<F: Float> {
     /// The Binary representation for this Plugin.
     type B: INnBinary<F> + IBinary;
 
-    /// Computes the absolute sum of vector `x` with complete memory management.
+    /// Computes the [Sigmoid function][sigmoid] over the input Tensor `x` with complete memory management.
+    /// [sigmoid]: https://en.wikipedia.org/wiki/Sigmoid_function
     ///
     /// Saves the result to `result`.
-    /// This is a Level 1 BLAS operation.
-    ///
-    /// For a no-memory managed version see `asum_plain`.
-    fn sigmoid(&self, x: &mut SharedTensor<F>, result: &mut SharedTensor<F>) -> Result<(), ::collenchyma::error::Error> {
-        match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
-        match result.add_device(self.device()) { _ => () }
-        Ok(try!(
-            self.binary().sigmoid().compute(
-                try!(x.get(self.device()).ok_or(LibError::MissingMemoryForDevice("Unable to resolve memory for `x`"))),
-                try!(result.get_mut(self.device()).ok_or(LibError::MissingMemoryForDevice("Unable to resolve memory for `result`"))),
-            )
-        ))
-    }
-
-    /// Computes the absolute sum of vector `x` without any memory management.
+    ///
+    /// For a no-memory managed version see `sigmoid_plain`.
+    fn sigmoid(&self, x: &mut SharedTensor<F>, result: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
+
+    /// Computes the Sigmoid function over the input Tensor `x` without any memory management.
     ///
     /// Saves the result to `result`.
-    /// This is a Level 1 BLAS operation.
     ///
     /// *Attention*:<br/>
     /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.<br/>
-    /// For a memory managed version see `asum`.
-    fn sigmoid_plain(&self, x: &mut SharedTensor<F>, result: &mut SharedTensor<F>) -> Result<(), ::collenchyma::error::Error> {
-        Ok(try!(
-            self.binary().sigmoid().compute(
-                try!(x.get(self.device()).ok_or(LibError::MissingMemoryForDevice("Unable to resolve memory for `x`"))),
-                try!(result.get_mut(self.device()).ok_or(LibError::MissingMemoryForDevice("Unable to resolve memory for `result`"))),
-            )
-        ))
-    }
+    /// For a memory managed version see `sigmoid`.
+    fn sigmoid_plain(&self, x: &SharedTensor<F>, result: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
+
+    /// Computes the first derivative of a [Sigmoid function][sigmoid] over the input Tensor `x` with complete memory management.
+    /// [sigmoid]: https://en.wikipedia.org/wiki/Sigmoid_function
+    ///
+    /// Saves the result to `result`.
+    ///
+    /// For a no-memory managed version see `sigmoid_diff_plain`.
+    fn sigmoid_diff(&self, x: &mut SharedTensor<F>, result: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
+
+    /// Computes the first derivative of a Sigmoid function over the input Tensor `x` without any memory management.
+    ///
+    /// Saves the result to `result`.
+    ///
+    /// *Attention*:<br/>
+    /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.<br/>
+    /// For a memory managed version see `sigmoid_diff`.
+    fn sigmoid_diff_plain(&self, x: &SharedTensor<F>, result: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
+
 
     /// Returns the binary representation
     fn binary(&self) -> &Self::B;

From 29e05c5932ea5205c4c925e62d7fec0b5991438f Mon Sep 17 00:00:00 2001
From: Hobofan <goisser94@gmail.com>
Date: Sun, 13 Dec 2015 15:40:34 +0100
Subject: [PATCH 3/5] refactor/cudnn_descriptor: implement ICudnnTensorDesc on
 SharedTensor

---
 src/frameworks/cuda.rs | 13 ++++++-------
 src/helper.rs          | 26 ++++++++++++--------------
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/src/frameworks/cuda.rs b/src/frameworks/cuda.rs
index c7e6ae1..78b232c 100644
--- a/src/frameworks/cuda.rs
+++ b/src/frameworks/cuda.rs
@@ -6,23 +6,22 @@ use ::plugin::*;
 use co::backend::Backend;
 use co::device::DeviceType;
 use co::memory::MemoryType;
-use co::tensor::{SharedTensor, TensorDesc, ITensorDesc};
+use co::tensor::{SharedTensor, ITensorDesc};
 use co::plugin::Error as PluginError;
 use co::frameworks::cuda::{Function, Module, Cuda};
 use cudnn::*;
-use std::mem::transmute;
 
 lazy_static! {
     static ref SIGMOID: Function = Function::from_isize(1);
 }
 
-pub trait ICudnnTensorDesc<T> : ITensorDesc {
+pub trait ICudnnTensorDesc<T> {
     fn get_cudnn_desc(&self) -> Result<TensorDescriptor, PluginError>;
 }
 
-impl ICudnnTensorDesc<f32> for TensorDesc {
+impl ICudnnTensorDesc<f32> for SharedTensor<f32> {
     fn get_cudnn_desc(&self) -> Result<TensorDescriptor, PluginError> {
-        match TensorDescriptor::new(&self.dims_i32(), &self.default_stride_i32(), DataType::Float) {
+        match TensorDescriptor::new(&self.desc().dims_i32().clone(), &self.desc().default_stride_i32().clone(), DataType::Float) {
             Ok(desc) => Ok(desc),
             Err(err) => {
                 println!("{:?}", err);
@@ -32,9 +31,9 @@ impl ICudnnTensorDesc<f32> for TensorDesc {
     }
 }
 
-impl ICudnnTensorDesc<f64> for TensorDesc {
+impl ICudnnTensorDesc<f64> for SharedTensor<f64> {
     fn get_cudnn_desc(&self) -> Result<TensorDescriptor, PluginError> {
-        match TensorDescriptor::new(&self.dims_i32(), &self.default_stride_i32(), DataType::Double) {
+        match TensorDescriptor::new(&self.desc().dims_i32().clone(), &self.desc().default_stride_i32().clone(), DataType::Double) {
             Ok(desc) => Ok(desc),
             Err(err) => {
                 println!("{:?}", err);
diff --git a/src/helper.rs b/src/helper.rs
index 108a293..67fafc9 100644
--- a/src/helper.rs
+++ b/src/helper.rs
@@ -10,11 +10,11 @@ macro_rules! impl_ops_sigmoid_for {
         ) -> Result<(), ::co::error::Error> {
             match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
             match result.add_device(self.device()) { _ => () }
-            let src_desc = try!(x.desc()<::co::tensor::TensorDesc as ICudnnTensorDesc<$t>>.get_cudnn_desc());
+            let src_desc = try!(x.get_cudnn_desc());
             let src_data = try!(try!(x.get(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `x`")))
             .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `x`.")))
             .id_c();
-            let dest_desc = try!(result.desc().get_cudnn_desc());
+            let dest_desc = try!(result.get_cudnn_desc());
             let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
             .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
             .id_c();
@@ -22,7 +22,7 @@ macro_rules! impl_ops_sigmoid_for {
             Ok(try!(match self.binary().cudnn().sigmoid_forward(
                 &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
                 &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
-                <::cudnn::ScalParams as ::cudnn::IScalParamsDefault<$t>>::default()
+                ::cudnn::ScalParams::<$t>::default()
             ) {
                 Ok(_) => Ok(()),
                 Err(err) => {
@@ -37,11 +37,11 @@ macro_rules! impl_ops_sigmoid_for {
             x: &::co::tensor::SharedTensor<$t>,
             result: &mut ::co::tensor::SharedTensor<$t>
         ) -> Result<(), ::co::error::Error> {
-            let src_desc = try!(x.desc().get_cudnn_desc());
+            let src_desc = try!(x.get_cudnn_desc());
             let src_data = try!(try!(x.get(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `x`")))
             .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `x`.")))
             .id_c();
-            let dest_desc = try!(result.desc().get_cudnn_desc());
+            let dest_desc = try!(result.get_cudnn_desc());
             let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
             .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
             .id_c();
@@ -49,7 +49,7 @@ macro_rules! impl_ops_sigmoid_for {
             Ok(try!(match self.binary().cudnn().sigmoid_forward(
                 &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
                 &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
-                <::cudnn::ScalParams as ::cudnn::IScalParamsDefault<$t>>::default()
+                ::cudnn::ScalParams::<$t>::default()
             ) {
                 Ok(_) => Ok(()),
                 Err(err) => {
@@ -66,11 +66,11 @@ macro_rules! impl_ops_sigmoid_for {
         ) -> Result<(), ::co::error::Error> {
             match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
             match result.add_device(self.device()) { _ => () }
-            let src_desc = try!(x.desc().get_cudnn_desc());
+            let src_desc = try!(x.get_cudnn_desc());
             let src_data = try!(try!(x.get(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `x`")))
             .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `x`.")))
             .id_c();
-            let dest_desc = try!(result.desc().get_cudnn_desc());
+            let dest_desc = try!(result.get_cudnn_desc());
             let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
             .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
             .id_c();
@@ -78,7 +78,7 @@ macro_rules! impl_ops_sigmoid_for {
             Ok(try!(match self.binary().cudnn().sigmoid_forward(
                 &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
                 &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
-                <::cudnn::ScalParams as ::cudnn::IScalParamsDefault<$t>>::default()
+                ::cudnn::ScalParams::<$t>::default()
             ) {
                 Ok(_) => Ok(()),
                 Err(err) => {
@@ -93,13 +93,11 @@ macro_rules! impl_ops_sigmoid_for {
             x: &::co::tensor::SharedTensor<$t>,
             result: &mut ::co::tensor::SharedTensor<$t>
         ) -> Result<(), ::co::error::Error> {
-            match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
-            match result.add_device(self.device()) { _ => () }
-            let src_desc = try!(x.desc().get_cudnn_desc());
+            let src_desc = try!(x.get_cudnn_desc());
             let src_data = try!(try!(x.get(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `x`")))
             .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `x`.")))
             .id_c();
-            let dest_desc = try!(result.desc().get_cudnn_desc());
+            let dest_desc = try!(result.get_cudnn_desc());
             let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
             .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
             .id_c();
@@ -107,7 +105,7 @@ macro_rules! impl_ops_sigmoid_for {
             Ok(try!(match self.binary().cudnn().sigmoid_forward(
                 &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
                 &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
-                <::cudnn::ScalParams as ::cudnn::IScalParamsDefault<$t>>::default()
+                ::cudnn::ScalParams::<$t>::default()
             ) {
                 Ok(_) => Ok(()),
                 Err(err) => {

From 43654dca7cb92826ffecd4f0cd251fb7071d11c5 Mon Sep 17 00:00:00 2001
From: Hobofan <goisser94@gmail.com>
Date: Mon, 14 Dec 2015 13:36:19 +0100
Subject: [PATCH 4/5] fix/scale_params: fix ScalParams default to work on
 stable

---
 src/helper.rs | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/helper.rs b/src/helper.rs
index 67fafc9..0a7cf8b 100644
--- a/src/helper.rs
+++ b/src/helper.rs
@@ -18,11 +18,12 @@ macro_rules! impl_ops_sigmoid_for {
             let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
             .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
             .id_c();
+            let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
 
             Ok(try!(match self.binary().cudnn().sigmoid_forward(
                 &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
                 &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
-                ::cudnn::ScalParams::<$t>::default()
+                scal_params
             ) {
                 Ok(_) => Ok(()),
                 Err(err) => {
@@ -45,11 +46,12 @@ macro_rules! impl_ops_sigmoid_for {
             let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
             .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
             .id_c();
+            let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
 
             Ok(try!(match self.binary().cudnn().sigmoid_forward(
                 &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
                 &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
-                ::cudnn::ScalParams::<$t>::default()
+                scal_params
             ) {
                 Ok(_) => Ok(()),
                 Err(err) => {
@@ -74,11 +76,12 @@ macro_rules! impl_ops_sigmoid_for {
             let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
             .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
             .id_c();
+            let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
 
             Ok(try!(match self.binary().cudnn().sigmoid_forward(
                 &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
                 &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
-                ::cudnn::ScalParams::<$t>::default()
+                scal_params
             ) {
                 Ok(_) => Ok(()),
                 Err(err) => {
@@ -101,11 +104,12 @@ macro_rules! impl_ops_sigmoid_for {
             let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
             .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
             .id_c();
+            let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
 
             Ok(try!(match self.binary().cudnn().sigmoid_forward(
                 &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
                 &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
-                ::cudnn::ScalParams::<$t>::default()
+                scal_params
             ) {
                 Ok(_) => Ok(()),
                 Err(err) => {

From 3311bb43d78c850db8322c9ea8c1a5f2ca189cd1 Mon Sep 17 00:00:00 2001
From: MichaelHirn <michael.hirn@storeness.de>
Date: Mon, 14 Dec 2015 18:51:55 +0000
Subject: [PATCH 5/5] feat/activation: add most popular NN activation functions

---
 .cargo/config                    |   1 -
 .cargo/linear-map/Cargo.toml     |  17 -
 .cargo/linear-map/LICENSE-APACHE | 201 --------
 .cargo/linear-map/LICENSE-MIT    |  25 -
 .cargo/linear-map/README.md      |   5 -
 .cargo/linear-map/deploy-docs.sh |  20 -
 .cargo/linear-map/src/lib.rs     | 804 -------------------------------
 Cargo.toml                       |   3 +-
 src/frameworks/cuda.rs           |   5 +
 src/helper.rs                    | 326 +++++++++++--
 src/plugin.rs                    |  81 +++-
 tests/cuda_nn_specs.rs           |  85 ----
 tests/relu_specs.rs              | 215 +++++++++
 tests/sigmoid_specs.rs           | 215 +++++++++
 tests/tanh_specs.rs              | 215 +++++++++
 15 files changed, 1005 insertions(+), 1213 deletions(-)
 delete mode 100644 .cargo/config
 delete mode 100644 .cargo/linear-map/Cargo.toml
 delete mode 100644 .cargo/linear-map/LICENSE-APACHE
 delete mode 100644 .cargo/linear-map/LICENSE-MIT
 delete mode 100644 .cargo/linear-map/README.md
 delete mode 100755 .cargo/linear-map/deploy-docs.sh
 delete mode 100644 .cargo/linear-map/src/lib.rs
 delete mode 100644 tests/cuda_nn_specs.rs
 create mode 100644 tests/relu_specs.rs
 create mode 100644 tests/sigmoid_specs.rs
 create mode 100644 tests/tanh_specs.rs

diff --git a/.cargo/config b/.cargo/config
deleted file mode 100644
index 50be109..0000000
--- a/.cargo/config
+++ /dev/null
@@ -1 +0,0 @@
-paths = [".cargo/linear-map"]
diff --git a/.cargo/linear-map/Cargo.toml b/.cargo/linear-map/Cargo.toml
deleted file mode 100644
index 31c7814..0000000
--- a/.cargo/linear-map/Cargo.toml
+++ /dev/null
@@ -1,17 +0,0 @@
-[package]
-
-name = "linear-map"
-version = "0.0.3"
-license = "MIT/Apache-2.0"
-description = "A map backed by a vector"
-authors = [
-    "Tobias Bucher <tobiasbucher5991@gmail.com>",
-]
-
-repository = "https://github.com/contain-rs/linear-map"
-documentation = "https://contain-rs.github.io/linear-map/linear_map"
-keywords = ["data-structures"]
-readme = "README.md"
-
-[features]
-nightly = []
diff --git a/.cargo/linear-map/LICENSE-APACHE b/.cargo/linear-map/LICENSE-APACHE
deleted file mode 100644
index 11069ed..0000000
--- a/.cargo/linear-map/LICENSE-APACHE
+++ /dev/null
@@ -1,201 +0,0 @@
-                              Apache License
-                        Version 2.0, January 2004
-                     http://www.apache.org/licenses/
-
-TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-1. Definitions.
-
-   "License" shall mean the terms and conditions for use, reproduction,
-   and distribution as defined by Sections 1 through 9 of this document.
-
-   "Licensor" shall mean the copyright owner or entity authorized by
-   the copyright owner that is granting the License.
-
-   "Legal Entity" shall mean the union of the acting entity and all
-   other entities that control, are controlled by, or are under common
-   control with that entity. For the purposes of this definition,
-   "control" means (i) the power, direct or indirect, to cause the
-   direction or management of such entity, whether by contract or
-   otherwise, or (ii) ownership of fifty percent (50%) or more of the
-   outstanding shares, or (iii) beneficial ownership of such entity.
-
-   "You" (or "Your") shall mean an individual or Legal Entity
-   exercising permissions granted by this License.
-
-   "Source" form shall mean the preferred form for making modifications,
-   including but not limited to software source code, documentation
-   source, and configuration files.
-
-   "Object" form shall mean any form resulting from mechanical
-   transformation or translation of a Source form, including but
-   not limited to compiled object code, generated documentation,
-   and conversions to other media types.
-
-   "Work" shall mean the work of authorship, whether in Source or
-   Object form, made available under the License, as indicated by a
-   copyright notice that is included in or attached to the work
-   (an example is provided in the Appendix below).
-
-   "Derivative Works" shall mean any work, whether in Source or Object
-   form, that is based on (or derived from) the Work and for which the
-   editorial revisions, annotations, elaborations, or other modifications
-   represent, as a whole, an original work of authorship. For the purposes
-   of this License, Derivative Works shall not include works that remain
-   separable from, or merely link (or bind by name) to the interfaces of,
-   the Work and Derivative Works thereof.
-
-   "Contribution" shall mean any work of authorship, including
-   the original version of the Work and any modifications or additions
-   to that Work or Derivative Works thereof, that is intentionally
-   submitted to Licensor for inclusion in the Work by the copyright owner
-   or by an individual or Legal Entity authorized to submit on behalf of
-   the copyright owner. For the purposes of this definition, "submitted"
-   means any form of electronic, verbal, or written communication sent
-   to the Licensor or its representatives, including but not limited to
-   communication on electronic mailing lists, source code control systems,
-   and issue tracking systems that are managed by, or on behalf of, the
-   Licensor for the purpose of discussing and improving the Work, but
-   excluding communication that is conspicuously marked or otherwise
-   designated in writing by the copyright owner as "Not a Contribution."
-
-   "Contributor" shall mean Licensor and any individual or Legal Entity
-   on behalf of whom a Contribution has been received by Licensor and
-   subsequently incorporated within the Work.
-
-2. Grant of Copyright License. Subject to the terms and conditions of
-   this License, each Contributor hereby grants to You a perpetual,
-   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-   copyright license to reproduce, prepare Derivative Works of,
-   publicly display, publicly perform, sublicense, and distribute the
-   Work and such Derivative Works in Source or Object form.
-
-3. Grant of Patent License. Subject to the terms and conditions of
-   this License, each Contributor hereby grants to You a perpetual,
-   worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-   (except as stated in this section) patent license to make, have made,
-   use, offer to sell, sell, import, and otherwise transfer the Work,
-   where such license applies only to those patent claims licensable
-   by such Contributor that are necessarily infringed by their
-   Contribution(s) alone or by combination of their Contribution(s)
-   with the Work to which such Contribution(s) was submitted. If You
-   institute patent litigation against any entity (including a
-   cross-claim or counterclaim in a lawsuit) alleging that the Work
-   or a Contribution incorporated within the Work constitutes direct
-   or contributory patent infringement, then any patent licenses
-   granted to You under this License for that Work shall terminate
-   as of the date such litigation is filed.
-
-4. Redistribution. You may reproduce and distribute copies of the
-   Work or Derivative Works thereof in any medium, with or without
-   modifications, and in Source or Object form, provided that You
-   meet the following conditions:
-
-   (a) You must give any other recipients of the Work or
-       Derivative Works a copy of this License; and
-
-   (b) You must cause any modified files to carry prominent notices
-       stating that You changed the files; and
-
-   (c) You must retain, in the Source form of any Derivative Works
-       that You distribute, all copyright, patent, trademark, and
-       attribution notices from the Source form of the Work,
-       excluding those notices that do not pertain to any part of
-       the Derivative Works; and
-
-   (d) If the Work includes a "NOTICE" text file as part of its
-       distribution, then any Derivative Works that You distribute must
-       include a readable copy of the attribution notices contained
-       within such NOTICE file, excluding those notices that do not
-       pertain to any part of the Derivative Works, in at least one
-       of the following places: within a NOTICE text file distributed
-       as part of the Derivative Works; within the Source form or
-       documentation, if provided along with the Derivative Works; or,
-       within a display generated by the Derivative Works, if and
-       wherever such third-party notices normally appear. The contents
-       of the NOTICE file are for informational purposes only and
-       do not modify the License. You may add Your own attribution
-       notices within Derivative Works that You distribute, alongside
-       or as an addendum to the NOTICE text from the Work, provided
-       that such additional attribution notices cannot be construed
-       as modifying the License.
-
-   You may add Your own copyright statement to Your modifications and
-   may provide additional or different license terms and conditions
-   for use, reproduction, or distribution of Your modifications, or
-   for any such Derivative Works as a whole, provided Your use,
-   reproduction, and distribution of the Work otherwise complies with
-   the conditions stated in this License.
-
-5. Submission of Contributions. Unless You explicitly state otherwise,
-   any Contribution intentionally submitted for inclusion in the Work
-   by You to the Licensor shall be under the terms and conditions of
-   this License, without any additional terms or conditions.
-   Notwithstanding the above, nothing herein shall supersede or modify
-   the terms of any separate license agreement you may have executed
-   with Licensor regarding such Contributions.
-
-6. Trademarks. This License does not grant permission to use the trade
-   names, trademarks, service marks, or product names of the Licensor,
-   except as required for reasonable and customary use in describing the
-   origin of the Work and reproducing the content of the NOTICE file.
-
-7. Disclaimer of Warranty. Unless required by applicable law or
-   agreed to in writing, Licensor provides the Work (and each
-   Contributor provides its Contributions) on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-   implied, including, without limitation, any warranties or conditions
-   of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-   PARTICULAR PURPOSE. You are solely responsible for determining the
-   appropriateness of using or redistributing the Work and assume any
-   risks associated with Your exercise of permissions under this License.
-
-8. Limitation of Liability. In no event and under no legal theory,
-   whether in tort (including negligence), contract, or otherwise,
-   unless required by applicable law (such as deliberate and grossly
-   negligent acts) or agreed to in writing, shall any Contributor be
-   liable to You for damages, including any direct, indirect, special,
-   incidental, or consequential damages of any character arising as a
-   result of this License or out of the use or inability to use the
-   Work (including but not limited to damages for loss of goodwill,
-   work stoppage, computer failure or malfunction, or any and all
-   other commercial damages or losses), even if such Contributor
-   has been advised of the possibility of such damages.
-
-9. Accepting Warranty or Additional Liability. While redistributing
-   the Work or Derivative Works thereof, You may choose to offer,
-   and charge a fee for, acceptance of support, warranty, indemnity,
-   or other liability obligations and/or rights consistent with this
-   License. However, in accepting such obligations, You may act only
-   on Your own behalf and on Your sole responsibility, not on behalf
-   of any other Contributor, and only if You agree to indemnify,
-   defend, and hold each Contributor harmless for any liability
-   incurred by, or claims asserted against, such Contributor by reason
-   of your accepting any such warranty or additional liability.
-
-END OF TERMS AND CONDITIONS
-
-APPENDIX: How to apply the Apache License to your work.
-
-   To apply the Apache License to your work, attach the following
-   boilerplate notice, with the fields enclosed by brackets "[]"
-   replaced with your own identifying information. (Don't include
-   the brackets!)  The text should be enclosed in the appropriate
-   comment syntax for the file format. We also recommend that a
-   file or class name and description of purpose be included on the
-   same "printed page" as the copyright notice for easier
-   identification within third-party archives.
-
-Copyright [yyyy] [name of copyright owner]
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
diff --git a/.cargo/linear-map/LICENSE-MIT b/.cargo/linear-map/LICENSE-MIT
deleted file mode 100644
index e69282e..0000000
--- a/.cargo/linear-map/LICENSE-MIT
+++ /dev/null
@@ -1,25 +0,0 @@
-Copyright (c) 2015 The Rust Project Developers
-
-Permission is hereby granted, free of charge, to any
-person obtaining a copy of this software and associated
-documentation files (the "Software"), to deal in the
-Software without restriction, including without
-limitation the rights to use, copy, modify, merge,
-publish, distribute, sublicense, and/or sell copies of
-the Software, and to permit persons to whom the Software
-is furnished to do so, subject to the following
-conditions:
-
-The above copyright notice and this permission notice
-shall be included in all copies or substantial portions
-of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
-ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
-TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
-PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
-SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
-IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
diff --git a/.cargo/linear-map/README.md b/.cargo/linear-map/README.md
deleted file mode 100644
index 4c1683b..0000000
--- a/.cargo/linear-map/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-**Temporary override; see [contain-rs/linear-map#8](https://github.com/contain-rs/linear-map/pull/8)**
-
-A map backed by a vector
-
-Documentation is available at https://contain-rs.github.io/linear-map/linear_map.
diff --git a/.cargo/linear-map/deploy-docs.sh b/.cargo/linear-map/deploy-docs.sh
deleted file mode 100755
index c8f25ee..0000000
--- a/.cargo/linear-map/deploy-docs.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-
-set -o errexit -o nounset
-
-rev=$(git rev-parse --short HEAD)
-
-cd target/doc
-
-git init
-git config user.email 'FlashCat@users.noreply.github.com'
-git config user.name 'FlashCat'
-git remote add upstream "https://${GH_TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git"
-git fetch upstream gh-pages
-git reset upstream/gh-pages
-
-touch .
-
-git add -A .
-git commit -m "rebuild pages at ${rev}"
-git push -q upstream HEAD:gh-pages
diff --git a/.cargo/linear-map/src/lib.rs b/.cargo/linear-map/src/lib.rs
deleted file mode 100644
index d82e98f..0000000
--- a/.cargo/linear-map/src/lib.rs
+++ /dev/null
@@ -1,804 +0,0 @@
-//! A module providing a map implementation `LinearMap` backed by a vector.
-
-#![warn(missing_docs)]
-#![cfg_attr(all(test, feature = "nightly"), feature(test))]
-
-use std::borrow::Borrow;
-use std::cmp::Ordering;
-use std::fmt::{self, Debug};
-use std::hash::{self, Hash};
-use std::iter::{self, Map};
-use std::mem;
-use std::ops;
-use std::slice;
-
-use self::Entry::{Occupied, Vacant};
-
-// TODO: Unzip the vectors?
-// Consideration: When unzipped, the compiler will not be able to understand
-// that both of the `Vec`s have the same length, thus stuff like `iter` and so
-// on should probably be implemented in unsafe code.
-
-/// A very simple map implementation backed by a vector.
-///
-/// Use it like any map, as long as the number of elements that it stores is
-/// very small.
-///
-/// # Example (like std's HashMap)
-///
-/// ```
-/// use linear_map::LinearMap;
-///
-/// // type inference lets us omit an explicit type signature (which
-/// // would be `LinearMap<&str, &str>` in this example).
-/// let mut book_reviews = LinearMap::new();
-///
-/// // review some books.
-/// book_reviews.insert("Adventures of Huckleberry Finn",    "My favorite book.");
-/// book_reviews.insert("Grimms' Fairy Tales",               "Masterpiece.");
-/// book_reviews.insert("Pride and Prejudice",               "Very enjoyable.");
-/// book_reviews.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot.");
-///
-/// // check for a specific one.
-/// if !book_reviews.contains_key("Les Misérables") {
-///     println!("We've got {} reviews, but Les Misérables ain't one.",
-///              book_reviews.len());
-/// }
-///
-/// // oops, this review has a lot of spelling mistakes, let's delete it.
-/// book_reviews.remove("The Adventures of Sherlock Holmes");
-///
-/// // look up the values associated with some keys.
-/// let to_find = ["Pride and Prejudice", "Alice's Adventure in Wonderland"];
-/// for book in to_find.iter() {
-///     match book_reviews.get(book) {
-///         Some(review) => println!("{}: {}", *book, *review),
-///         None => println!("{} is unreviewed.", *book)
-///     }
-/// }
-///
-/// // iterate over everything.
-/// for (book, review) in book_reviews.iter() {
-///     println!("{}: \"{}\"", *book, *review);
-/// }
-/// ```
-#[derive(Clone)]
-pub struct LinearMap<K,V> {
-    storage: Vec<(K,V)>,
-}
-
-impl<K:Eq,V> LinearMap<K,V> {
-    /// Creates an empty map. This method does not allocate.
-    pub fn new() -> LinearMap<K,V> {
-        LinearMap {
-            storage: Vec::new(),
-        }
-    }
-
-    /// Creates an empty map with the given initial capacity.
-    pub fn with_capacity(capacity: usize) -> LinearMap<K,V> {
-        LinearMap {
-            storage: Vec::with_capacity(capacity),
-        }
-    }
-
-    /// Returns the number of elements the map can hold without reallocating.
-    pub fn capacity(&self) -> usize {
-        self.storage.capacity()
-    }
-
-    /// Reserves capacity for at least `additional` more to be inserted in the
-    /// map. The collection may reserve more space to avoid frequent
-    /// reallocations.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the new allocation size overflows `usize`.
-    pub fn reserve(&mut self, additional: usize) {
-        self.storage.reserve(additional);
-    }
-
-    /// Reserves the minimum capacity for exactly `additional` more elemnnts to
-    /// be inserted in the map.
-    ///
-    /// Note that the allocator may give the collection more space than it
-    /// requests. Therefore capacity cannot be relied upon to be precisely
-    /// minimal. Prefer `reserve` if future insertions are expected.
-    ///
-    /// # Panics
-    ///
-    /// Panics if the new capacity overflows `usize`.
-    pub fn reserve_exact(&mut self, additional: usize) {
-        self.storage.reserve_exact(additional);
-    }
-
-    /// Shrinks the capacity of the map as much as possible.
-    ///
-    /// It will drop down as close as possible to the current length but the
-    /// allocator may still inform the map that there is more space than
-    /// necessary. Therefore capacity cannot be relid upon to be minimal.
-    pub fn shrink_to_fit(&mut self) {
-        self.storage.shrink_to_fit();
-    }
-
-    /// Returns the number of elements in the map.
-    pub fn len(&self) -> usize {
-        self.storage.len()
-    }
-
-    /// Returns true if the map contains no elements.
-    pub fn is_empty(&self) -> bool {
-        self.storage.is_empty()
-    }
-
-    /// Clears the map, removing all elements. Keeps the allocated memory for
-    /// reuse.
-    pub fn clear(&mut self) {
-        self.storage.clear();
-    }
-
-    /// An iterator visiting all key-value pairs in arbitrary order. Iterator
-    /// element type is `(&'a K, &'a V)`.
-    pub fn iter<'a>(&'a self) -> Iter<'a, K, V> {
-        fn ref_<A,B>(&(ref v1, ref v2): &(A, B)) -> (&A, &B) { (v1, v2) }
-        Iter { iter: self.storage.iter().map(ref_::<K, V> as fn(&'a (K, V)) -> (&'a K, &'a V)) }
-    }
-
-    /// An iterator visiting all key-value pairs in arbitrary order with
-    /// mutable references to the values. Iterator element type is `(&'a K, &'a
-    /// mut V)`.
-    pub fn iter_mut<'a>(&'a mut self) -> IterMut<'a, K, V> {
-        fn ref_<A,B>(&mut (ref v1, ref mut v2): &mut (A, B)) -> (&A, &mut B) { (v1, v2) }
-        IterMut { iter: self.storage.iter_mut().map(ref_::<K, V> as fn(&'a mut (K, V)) -> (&'a K, &'a mut V)) }
-    }
-
-    /// An iterator visiting all keys in arbitrary order. Iterator element type
-    /// is `&'a K`.
-    pub fn keys<'a>(&'a self) -> Keys<'a, K, V> {
-        fn first<A,B>((v, _): (A, B)) -> A { v }
-        Keys { iter: self.iter().map(first::<&'a K, &'a V> as fn((&'a K, &'a V)) -> &'a K) }
-    }
-
-    /// An iterator visiting all values in arbitrary order. Iterator element
-    /// type is `&'a V`.
-    pub fn values<'a>(&'a self) -> Values<'a, K, V> {
-        fn second<A,B>((_, v): (A, B)) -> B { v }
-        Values { iter: self.iter().map(second::<&'a K, &'a V> as fn((&'a K, &'a V)) -> &'a V) }
-    }
-
-    /// Returns a reference to the value corresponding to the key.
-    pub fn get<Q: ?Sized>(&self, key: &Q) -> Option<&V> where K: Borrow<Q>, Q: Eq {
-        for (k, v) in self.iter() {
-            if key == k.borrow() {
-                return Some(v);
-            }
-        }
-        None
-    }
-
-    /// Returns a mutable reference to the value corresponding to the key.
-    pub fn get_mut<Q: ?Sized>(&mut self, key: &Q) -> Option<&mut V> where K: Borrow<Q>, Q: Eq {
-        for (k, v) in self.iter_mut() {
-            if key == k.borrow() {
-                return Some(v);
-            }
-        }
-        None
-    }
-
-    /// Returns true if the map contains a value to the specified key.
-    pub fn contains_key<Q: ?Sized>(&self, key: &Q) -> bool where K: Borrow<Q>, Q: Eq {
-        self.get(key).is_some()
-    }
-
-    /// Inserts a key-value pair into the map. If the key already had a value
-    /// present in the map, it is returned. Otherwise, `None` is returned.
-    pub fn insert(&mut self, key: K, value: V) -> Option<V> {
-        for kv in self.storage.iter_mut() {
-            let found;
-            {
-                let &mut (ref k, _) = kv;
-                found = key == *k;
-            }
-            if found {
-                let (_, v) = mem::replace(kv, (key, value));
-                return Some(v);
-            }
-        }
-        self.storage.push((key, value));
-        None
-    }
-
-    /// Removes a key-value pair from the map. If the key had a value present
-    /// in the map, it is returned. Otherwise, `None` is returned.
-    pub fn remove<Q: ?Sized>(&mut self, key: &Q) -> Option<V> where K: Borrow<Q>, Q: Eq {
-        for i in 0..self.storage.len() {
-            let found;
-            {
-                let (ref k, _) = self.storage[i];
-                found = key == k.borrow();
-            }
-            if found {
-                let (_, v) = self.storage.swap_remove(i);
-                return Some(v);
-            }
-        }
-        None
-    }
-
-    /// Gets the given key's corresponding entry in the map for in-place manipulation.
-    pub fn entry(&mut self, key: K) -> Entry<K, V> {
-        match self.storage.iter().position(|&(ref k, _)| key == *k) {
-            None => Vacant(VacantEntry {
-                map: self,
-                key: key
-            }),
-            Some(index) => Occupied(OccupiedEntry {
-                map: self,
-                index: index
-            })
-        }
-    }
-}
-
-impl<K, V> Debug for LinearMap<K, V> where K: Eq + Debug, V: Debug {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_map().entries(self.iter()).finish()
-    }
-}
-
-impl<K, V> Default for LinearMap<K, V> where K: Eq {
-    fn default() -> Self { LinearMap::new() }
-}
-
-impl<K, V> Extend<(K, V)> for LinearMap<K, V> where K: Eq {
-    fn extend<I: IntoIterator<Item = (K, V)>>(&mut self, key_values: I) {
-        for (key, value) in key_values { self.insert(key, value); }
-    }
-}
-
-impl<K, V> iter::FromIterator<(K, V)> for LinearMap<K, V> where K: Eq {
-    fn from_iter<I: IntoIterator<Item = (K, V)>>(key_values: I) -> Self {
-        let mut map = Self::new();
-        map.extend(key_values);
-        map
-    }
-}
-
-impl<K, V> Hash for LinearMap<K, V> where K: Eq + Hash, V: Hash {
-    fn hash<H: hash::Hasher>(&self, h: &mut H) {
-        for e in self { e.hash(h); }
-    }
-}
-
-impl<'a, K, V, Q: ?Sized> ops::Index<&'a Q> for LinearMap<K, V> where K: Eq + Borrow<Q>, Q: Eq {
-    type Output = V;
-    fn index(&self, key: &'a Q) -> &V { self.get(key).expect("key not found") }
-}
-
-impl<K, V> PartialEq for LinearMap<K, V> where K: Eq, V: PartialEq {
-    fn eq(&self, other: &Self) -> bool { self.iter().eq(other.iter()) }
-}
-
-impl<K, V> Eq for LinearMap<K, V> where K: Eq, V: Eq {}
-
-impl<K, V> PartialOrd for LinearMap<K, V> where K: Eq + PartialOrd, V: PartialOrd {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        self.iter().partial_cmp(other.iter())
-    }
-}
-
-impl<K, V> Ord for LinearMap<K, V> where K: Ord, V: Ord {
-    fn cmp(&self, other: &Self) -> Ordering { self.iter().cmp(other.iter()) }
-}
-
-/// A view into a single occupied location in a LinearMap.
-pub struct OccupiedEntry<'a, K: 'a, V: 'a> {
-    map: &'a mut LinearMap<K, V>,
-    index: usize
-}
-
-/// A view into a single empty location in a LinearMap.
-pub struct VacantEntry<'a, K: 'a, V: 'a> {
-    map: &'a mut LinearMap<K, V>,
-    key: K
-}
-
-/// A view into a single location in a map, which may be vacant or occupied.
-pub enum Entry<'a, K: 'a, V: 'a> {
-    /// An occupied Entry.
-    Occupied(OccupiedEntry<'a, K, V>),
-
-    /// A vacant Entry.
-    Vacant(VacantEntry<'a, K, V>)
-}
-
-impl<'a, K, V> Entry<'a, K, V> {
-    /// Ensures a value is in the entry by inserting the default if empty, and returns
-    /// a mutable reference to the value in the entry.
-    pub fn or_insert(self, default: V) -> &'a mut V {
-        match self {
-            Occupied(entry) => entry.into_mut(),
-            Vacant(entry) => entry.insert(default)
-        }
-    }
-
-    /// Ensures a value is in the entry by inserting the result of the default function if empty,
-    /// and returns a mutable reference to the value in the entry.
-    pub fn or_insert_with<F: FnOnce() -> V>(self, default: F) -> &'a mut V {
-        match self {
-            Occupied(entry) => entry.into_mut(),
-            Vacant(entry) => entry.insert(default())
-        }
-    }
-}
-
-impl<'a, K, V> OccupiedEntry<'a, K, V> {
-    /// Gets a reference to the value in the entry.
-    pub fn get(&self) -> &V {
-        &self.map.storage[self.index].1
-    }
-
-    /// Gets a mutable reference to the value in the entry.
-    pub fn get_mut(&mut self) -> &mut V {
-        &mut self.map.storage[self.index].1
-    }
-
-    /// Converts the OccupiedEntry into a mutable reference to the value in the entry
-    /// with a lifetime bound to the map itself
-    pub fn into_mut(self) -> &'a mut V {
-        &mut self.map.storage[self.index].1
-    }
-
-    /// Sets the value of the entry, and returns the entry's old value
-    pub fn insert(&mut self, mut value: V) -> V {
-        let old_value = self.get_mut();
-        mem::swap(&mut value, old_value);
-        value
-    }
-
-    /// Takes the value out of the entry, and returns it
-    pub fn remove(self) -> V {
-        self.map.storage.swap_remove(self.index).1
-    }
-}
-
-impl<'a, K, V> VacantEntry<'a, K, V> {
-    /// Sets the value of the entry with the VacantEntry's key,
-    /// and returns a mutable reference to it
-    pub fn insert(self, value: V) -> &'a mut V {
-        self.map.storage.push((self.key, value));
-        &mut self.map.storage.last_mut().unwrap().1
-    }
-}
-
-/// A consuming iterator over a map.
-pub struct IntoIter<K, V> {
-    iter: ::std::vec::IntoIter<(K, V)>,
-}
-
-impl<K, V> Iterator for IntoIter<K, V> {
-    type Item = (K, V);
-    fn next(&mut self) -> Option<Self::Item> { self.iter.next() }
-    fn size_hint(&self) -> (usize, Option<usize>) { self.iter.size_hint() }
-}
-
-impl<K, V> DoubleEndedIterator for IntoIter<K, V> {
-    fn next_back(&mut self) -> Option<Self::Item> { self.iter.next_back() }
-}
-
-impl<K, V> ExactSizeIterator for IntoIter<K, V> {
-    fn len(&self) -> usize { self.iter.len() }
-}
-
-/// The iterator returned by `LinearMap::iter`.
-pub struct Iter<'a, K:'a, V:'a> {
-    iter: Map<slice::Iter<'a, (K, V)>, fn(&'a (K, V)) -> (&'a K, &'a V)>,
-}
-
-/// The iterator returned by `LinearMap::iter_mut`.
-pub struct IterMut<'a, K:'a, V:'a> {
-    iter: Map<slice::IterMut<'a, (K, V)>, fn(&'a mut (K, V)) -> (&'a K, &'a mut V)>,
-}
-
-/// The iterator returned by `LinearMap::keys`.
-pub struct Keys<'a, K:'a, V:'a> {
-    iter: Map<Iter<'a, K, V>, fn((&'a K, &'a V)) -> &'a K>,
-}
-
-/// The iterator returned by `LinearMap::values`.
-pub struct Values<'a, K:'a, V:'a> {
-    iter: Map<Iter<'a, K, V>, fn((&'a K, &'a V)) -> &'a V>,
-}
-
-impl<'a, K, V> Iterator for Iter<'a, K, V> {
-    type Item = (&'a K, &'a V);
-    fn next(&mut self) -> Option<(&'a K, &'a V)> { self.iter.next() }
-    fn size_hint(&self) -> (usize, Option<usize>) { self.iter.size_hint() }
-}
-
-impl<'a, K, V> Iterator for IterMut<'a, K, V> {
-    type Item = (&'a K, &'a mut V);
-    fn next(&mut self) -> Option<(&'a K, &'a mut V)> { self.iter.next() }
-    fn size_hint(&self) -> (usize, Option<usize>) { self.iter.size_hint() }
-}
-
-impl<'a, K, V> Iterator for Keys<'a, K, V> {
-    type Item = &'a K;
-    fn next(&mut self) -> Option<&'a K> { self.iter.next() }
-    fn size_hint(&self) -> (usize, Option<usize>) { self.iter.size_hint() }
-}
-
-impl<'a, K, V> Iterator for Values<'a, K, V> {
-    type Item = &'a V;
-    fn next(&mut self) -> Option<&'a V> { self.iter.next() }
-    fn size_hint(&self) -> (usize, Option<usize>) { self.iter.size_hint() }
-}
-
-impl<'a, K, V> Clone for Iter<'a, K, V> {
-    fn clone(&self) -> Iter<'a, K, V> { Iter { iter: self.iter.clone() } }
-}
-
-impl<'a, K, V> Clone for Keys<'a, K, V> {
-    fn clone(&self) -> Keys<'a, K, V> { Keys { iter: self.iter.clone() } }
-}
-
-impl<'a, K, V> Clone for Values<'a, K, V> {
-    fn clone(&self) -> Values<'a, K, V> { Values { iter: self.iter.clone() } }
-}
-
-impl<'a, K, V> DoubleEndedIterator for Iter<'a, K, V> {
-    fn next_back(&mut self) -> Option<(&'a K, &'a V)> { self.iter.next_back() }
-}
-
-impl<'a, K, V> DoubleEndedIterator for IterMut<'a, K, V> {
-    fn next_back(&mut self) -> Option<(&'a K, &'a mut V)> { self.iter.next_back() }
-}
-
-impl<'a, K, V> DoubleEndedIterator for Keys<'a, K, V> {
-    fn next_back(&mut self) -> Option<&'a K> { self.iter.next_back() }
-}
-
-impl<'a, K, V> DoubleEndedIterator for Values<'a, K, V> {
-    fn next_back(&mut self) -> Option<&'a V> { self.iter.next_back() }
-}
-
-impl<'a, K, V> ExactSizeIterator for Iter   <'a, K, V> { }
-impl<'a, K, V> ExactSizeIterator for IterMut<'a, K, V> { }
-impl<'a, K, V> ExactSizeIterator for Keys   <'a, K, V> { }
-impl<'a, K, V> ExactSizeIterator for Values <'a, K, V> { }
-
-impl<K, V> IntoIterator for LinearMap<K, V> where K: Eq {
-    type Item = (K, V);
-    type IntoIter = IntoIter<K, V>;
-    fn into_iter(self) -> IntoIter<K, V> { IntoIter { iter: self.storage.into_iter() } }
-}
-
-impl<'a, K, V> IntoIterator for &'a LinearMap<K, V> where K: Eq {
-    type Item = (&'a K, &'a V);
-    type IntoIter = Iter<'a, K, V>;
-    fn into_iter(self) -> Iter<'a, K, V> { self.iter() }
-}
-
-impl<'a, K, V> IntoIterator for &'a mut LinearMap<K, V> where K: Eq {
-    type Item = (&'a K, &'a mut V);
-    type IntoIter = IterMut<'a, K, V>;
-    fn into_iter(self) -> IterMut<'a, K, V> { self.iter_mut() }
-}
-
-#[cfg(test)]
-mod test {
-    use super::LinearMap;
-    use super::Entry::{Occupied, Vacant};
-
-    const TEST_CAPACITY: usize = 10;
-
-    #[test]
-    fn test_new() {
-        let map: LinearMap<i32, i32> = LinearMap::new();
-        assert_eq!(map.capacity(), 0);
-        assert_eq!(map.len(), 0);
-        assert!(map.is_empty());
-    }
-
-    #[test]
-    fn test_with_capacity() {
-        let map: LinearMap<i32, i32> = LinearMap::with_capacity(TEST_CAPACITY);
-        assert!(map.capacity() >= TEST_CAPACITY);
-    }
-
-    #[test]
-    fn test_capacity() {
-        let mut map = LinearMap::new();
-        map.insert(1, 2);
-        assert!(map.capacity() >= 1);
-        map.remove(&1);
-        assert!(map.capacity() >= 1);
-        map.reserve(TEST_CAPACITY);
-        let capacity = map.capacity();
-        assert!(capacity >= TEST_CAPACITY);
-        for i in 0..TEST_CAPACITY as i32 {
-            assert!(map.insert(i, i).is_none());
-        }
-        assert_eq!(capacity, map.capacity());
-    }
-
-    #[test]
-    fn test_reserve() {
-        let mut map = LinearMap::new();
-        map.reserve(TEST_CAPACITY);
-        assert!(map.capacity() >= TEST_CAPACITY);
-        for i in 0..TEST_CAPACITY as i32 {
-            assert!(map.insert(i, i).is_none());
-        }
-        map.reserve(TEST_CAPACITY);
-        assert!(map.capacity() >= 2 * TEST_CAPACITY);
-
-        let mut map = LinearMap::new();
-        map.reserve(TEST_CAPACITY);
-        assert!(map.capacity() >= TEST_CAPACITY);
-        for i in 0..TEST_CAPACITY as i32 {
-            assert!(map.insert(i, i).is_none());
-        }
-        map.reserve(TEST_CAPACITY);
-        assert!(map.capacity() >= 2 * TEST_CAPACITY);
-    }
-
-    #[test]
-    fn test_shrink_to_fit() {
-        let mut map = LinearMap::new();
-        map.shrink_to_fit();
-        assert_eq!(map.capacity(), 0);
-        map.reserve(TEST_CAPACITY);
-        map.shrink_to_fit();
-        assert_eq!(map.capacity(), 0);
-        for i in 0..TEST_CAPACITY as i32 {
-            assert!(map.insert(i, i).is_none());
-        }
-        map.shrink_to_fit();
-        assert_eq!(map.len(), TEST_CAPACITY);
-        assert!(map.capacity() >= TEST_CAPACITY);
-    }
-
-    #[test]
-    fn test_len_and_is_empty() {
-        let mut map = LinearMap::new();
-        assert_eq!(map.len(), 0);
-        assert!(map.is_empty());
-        map.insert(100, 100);
-        assert_eq!(map.len(), 1);
-        assert!(!map.is_empty());
-        for i in 0..TEST_CAPACITY as i32 {
-            assert!(map.insert(i, i).is_none());
-        }
-        assert_eq!(map.len(), 1 + TEST_CAPACITY);
-        assert!(!map.is_empty());
-        assert!(map.remove(&100).is_some());
-        assert_eq!(map.len(), TEST_CAPACITY);
-        assert!(!map.is_empty());
-    }
-
-    #[test]
-    fn test_clear() {
-        let mut map = LinearMap::new();
-        map.clear();
-        assert_eq!(map.len(), 0);
-        for i in 0..TEST_CAPACITY as i32 {
-            assert!(map.insert(i, i).is_none());
-        }
-        map.clear();
-        assert_eq!(map.len(), 0);
-        assert!(map.capacity() > 0);
-    }
-
-    #[test]
-    fn test_iterators() {
-        const ONE:   i32 = 0b0001;
-        const TWO:   i32 = 0b0010;
-        const THREE: i32 = 0b0100;
-        const FOUR:  i32 = 0b1000;
-        const ALL:   i32 = 0b1111;
-        let mut map = LinearMap::new();
-        assert!(map.insert(ONE, TWO).is_none());
-        assert!(map.insert(TWO, THREE).is_none());
-        assert!(map.insert(THREE, FOUR).is_none());
-        assert!(map.insert(FOUR, ONE).is_none());
-
-        {
-            let mut result_k = 0;
-            let mut result_v = 0;
-            for (&k, &v) in map.iter() {
-                result_k ^= k;
-                result_v ^= v;
-                assert_eq!(((k << 1) & ALL) | ((k >> 3) & ALL), v);
-            }
-            assert_eq!(result_k, ALL);
-            assert_eq!(result_v, ALL);
-        }
-        {
-            let mut result_k = 0;
-            let mut result_v = 0;
-            for (&k, &mut v) in map.iter_mut() {
-                result_k ^= k;
-                result_v ^= v;
-                assert_eq!(((k << 1) & ALL) | ((k >> 3) & ALL), v);
-            }
-            assert_eq!(result_k, ALL);
-            assert_eq!(result_v, ALL);
-        }
-        {
-            let mut result = 0;
-            for &k in map.keys() {
-                result ^= k;
-            }
-            assert_eq!(result, ALL);
-        }
-        {
-            let mut result = 0;
-            for &v in map.values() {
-                result ^= v;
-            }
-            assert_eq!(result, ALL);
-        }
-    }
-
-    #[test]
-    fn test_insert_remove_get() {
-        let mut map = LinearMap::new();
-        assert!(map.insert(100, 101).is_none());
-        assert!(map.contains_key(&100));
-        assert_eq!(map.get(&100), Some(&101));
-        assert_eq!(map.get_mut(&100), Some(&mut 101));
-        for i in 0..TEST_CAPACITY as i32 {
-            assert!(map.insert(i, i).is_none());
-        }
-        assert_eq!(map.insert(100, 102), Some(101));
-        assert_eq!(map.remove(&100), Some(102));
-        assert_eq!(map.remove(&100), None);
-        assert_eq!(map.remove(&1000), None);
-    }
-
-    #[test]
-    fn test_entry() {
-        let xs = [(1, 10), (2, 20), (3, 30), (4, 40), (5, 50), (6, 60)];
-
-        let mut map = LinearMap::new();
-
-        for &(k, v) in &xs {
-            map.insert(k, v);
-        }
-
-        // Existing key (insert)
-        match map.entry(1) {
-            Vacant(_) => unreachable!(),
-            Occupied(mut view) => {
-                assert_eq!(view.get(), &10);
-                assert_eq!(view.insert(100), 10);
-            }
-        }
-        assert_eq!(map.get(&1).unwrap(), &100);
-        assert_eq!(map.len(), 6);
-
-
-        // Existing key (update)
-        match map.entry(2) {
-            Vacant(_) => unreachable!(),
-            Occupied(mut view) => {
-                let v = view.get_mut();
-                let new_v = (*v) * 10;
-                *v = new_v;
-            }
-        }
-        assert_eq!(map.get(&2).unwrap(), &200);
-        assert_eq!(map.len(), 6);
-
-        // Existing key (take)
-        match map.entry(3) {
-            Vacant(_) => unreachable!(),
-            Occupied(view) => {
-                assert_eq!(view.remove(), 30);
-            }
-        }
-        assert_eq!(map.get(&3), None);
-        assert_eq!(map.len(), 5);
-
-
-        // Inexistent key (insert)
-        match map.entry(10) {
-            Occupied(_) => unreachable!(),
-            Vacant(view) => {
-                assert_eq!(*view.insert(1000), 1000);
-            }
-        }
-        assert_eq!(map.get(&10).unwrap(), &1000);
-        assert_eq!(map.len(), 6);
-    }
-}
-
-#[cfg(all(test, feature = "nightly"))]
-mod bench {
-    use super::LinearMap;
-
-    extern crate test;
-
-    const SMALL:  u32 =   10;
-    const MEDIUM: u32 =  100;
-    const BIG:    u32 = 1000;
-
-    fn insert(b: &mut test::Bencher, num: u32) {
-        b.iter(|| {
-            let mut map = LinearMap::new();
-            for i in 0..num {
-                map.insert(i, i);
-            }
-        })
-    }
-
-    fn remove_insert(b: &mut test::Bencher, num: u32) {
-        b.iter(|| {
-            let mut map = LinearMap::new();
-            for i in 0..num {
-                map.insert(i, i);
-            }
-            for i in 0..num {
-                map.remove(&i);
-            }
-        })
-    }
-
-    fn remove_rev_insert(b: &mut test::Bencher, num: u32) {
-        b.iter(|| {
-            let mut map = LinearMap::new();
-            for i in 0..num {
-                map.insert(i, i);
-            }
-            for i in 0..num {
-                map.remove(&(num - i - 1));
-            }
-        })
-    }
-
-    fn get_middle(b: &mut test::Bencher, num: u32) {
-        let mut map = LinearMap::new();
-        for i in 0..num {
-            map.insert(i, i);
-        }
-        let middle = num / 2;
-        b.iter(|| {
-            test::black_box(map.get(&middle));
-            test::black_box(map.get_mut(&middle));
-        })
-    }
-
-    fn get_none(b: &mut test::Bencher, num: u32) {
-        let mut map = LinearMap::new();
-        for i in 0..num {
-            map.insert(i, i);
-        }
-        let none = num + 1;
-        b.iter(|| {
-            test::black_box(map.get(&none));
-            test::black_box(map.get_mut(&none));
-        })
-    }
-
-    #[bench] fn bench_insert_small (b: &mut test::Bencher) { insert(b, SMALL);  }
-    #[bench] fn bench_insert_medium(b: &mut test::Bencher) { insert(b, MEDIUM); }
-    #[bench] fn bench_insert_big   (b: &mut test::Bencher) { insert(b, BIG);    }
-
-    #[bench] fn bench_remove_insert_small (b: &mut test::Bencher) { remove_insert(b, SMALL);  }
-    #[bench] fn bench_remove_insert_medium(b: &mut test::Bencher) { remove_insert(b, MEDIUM); }
-    #[bench] fn bench_remove_insert_big   (b: &mut test::Bencher) { remove_insert(b, BIG);    }
-
-    #[bench] fn bench_remove_rev_insert_small (b: &mut test::Bencher) { remove_rev_insert(b, SMALL);  }
-    #[bench] fn bench_remove_rev_insert_medium(b: &mut test::Bencher) { remove_rev_insert(b, MEDIUM); }
-    #[bench] fn bench_remove_rev_insert_big   (b: &mut test::Bencher) { remove_rev_insert(b, BIG);    }
-
-    #[bench] fn bench_get_middle_small (b: &mut test::Bencher) { get_middle(b, SMALL);  }
-    #[bench] fn bench_get_middle_medium(b: &mut test::Bencher) { get_middle(b, MEDIUM); }
-    #[bench] fn bench_get_middle_big   (b: &mut test::Bencher) { get_middle(b, BIG);    }
-
-    #[bench] fn bench_get_none_small (b: &mut test::Bencher) { get_none(b, SMALL);  }
-    #[bench] fn bench_get_none_medium(b: &mut test::Bencher) { get_none(b, MEDIUM); }
-    #[bench] fn bench_get_none_big   (b: &mut test::Bencher) { get_none(b, BIG);    }
-}
diff --git a/Cargo.toml b/Cargo.toml
index 879f069..752264f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -14,7 +14,8 @@ license         = "MIT"
 
 [dependencies]
 collenchyma = { path = "/home/hobofan/collenchyma" }
-cudnn = "0.1"
+# cudnn = "0.1.3"
+cudnn = { git = "https://github.com/autumnai/rust-cudnn.git", rev = "6436d44" }
 libc = "0.2"
 lazy_static = "0.1"
 clippy = { version = "0.0.27", optional = true }
diff --git a/src/frameworks/cuda.rs b/src/frameworks/cuda.rs
index 78b232c..e46c9d6 100644
--- a/src/frameworks/cuda.rs
+++ b/src/frameworks/cuda.rs
@@ -13,6 +13,7 @@ use cudnn::*;
 
 lazy_static! {
     static ref SIGMOID: Function = Function::from_isize(1);
+    static ref CUDNN: Result<Cudnn, Error> = Cudnn::new();
 }
 
 pub trait ICudnnTensorDesc<T> {
@@ -96,6 +97,8 @@ impl INn<f32> for Backend<Cuda> {
     type B = Module;
 
     impl_ops_sigmoid_for!(f32, Backend<Cuda>);
+    impl_ops_relu_for!(f32, Backend<Cuda>);
+    impl_ops_tanh_for!(f32, Backend<Cuda>);
 
     fn binary(&self) -> &Self::B {
         self.binary()
@@ -110,6 +113,8 @@ impl INn<f64> for Backend<Cuda> {
     type B = Module;
 
     impl_ops_sigmoid_for!(f64, Backend<Cuda>);
+    impl_ops_relu_for!(f64, Backend<Cuda>);
+    impl_ops_tanh_for!(f64, Backend<Cuda>);
 
     fn binary(&self) -> &Self::B {
         self.binary()
diff --git a/src/helper.rs b/src/helper.rs
index 0a7cf8b..f9c1f80 100644
--- a/src/helper.rs
+++ b/src/helper.rs
@@ -1,5 +1,27 @@
 //! Provides macros for convenient implementation of NN operations.
 
+/// Returns cuDNN ready memory pointer from a SharedTensor.
+pub unsafe fn receive_memory_ptr<T>(x: &::co::tensor::SharedTensor<T>, device: &::co::device::DeviceType) -> Result<*const ::libc::c_void, ::co::plugin::Error> {
+    Ok(::std::mem::transmute::<u64, *const ::libc::c_void>(
+        try!(
+            try!(
+                x.get(device).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory."))
+            ).as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive CUDA memory."))
+        ).id_c()
+    ))
+}
+
+/// Returns mutable cuDNN ready memory pointer from a SharedTensor.
+pub unsafe fn receive_memory_ptr_mut<T>(x: &::co::tensor::SharedTensor<T>, device: &::co::device::DeviceType) -> Result<*mut ::libc::c_void, ::co::plugin::Error> {
+    Ok(::std::mem::transmute::<u64, *mut ::libc::c_void>(
+        try!(
+            try!(
+                x.get(device).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory."))
+            ).as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive CUDA memory."))
+        ).id_c()
+    ))
+}
+
 #[macro_export]
 macro_rules! impl_ops_sigmoid_for {
     ($t:ident, $b:ty) => (
@@ -10,19 +32,13 @@ macro_rules! impl_ops_sigmoid_for {
         ) -> Result<(), ::co::error::Error> {
             match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
             match result.add_device(self.device()) { _ => () }
-            let src_desc = try!(x.get_cudnn_desc());
-            let src_data = try!(try!(x.get(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `x`")))
-            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `x`.")))
-            .id_c();
-            let dest_desc = try!(result.get_cudnn_desc());
-            let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
-            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
-            .id_c();
             let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
 
             Ok(try!(match self.binary().cudnn().sigmoid_forward(
-                &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
-                &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
+                &try!(x.get_cudnn_desc()), // src_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x, self.device()) }), //src_data
+                &try!(result.get_cudnn_desc()), // dest_desc
+                try!(unsafe { ::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data
                 scal_params
             ) {
                 Ok(_) => Ok(()),
@@ -38,19 +54,13 @@ macro_rules! impl_ops_sigmoid_for {
             x: &::co::tensor::SharedTensor<$t>,
             result: &mut ::co::tensor::SharedTensor<$t>
         ) -> Result<(), ::co::error::Error> {
-            let src_desc = try!(x.get_cudnn_desc());
-            let src_data = try!(try!(x.get(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `x`")))
-            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `x`.")))
-            .id_c();
-            let dest_desc = try!(result.get_cudnn_desc());
-            let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
-            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
-            .id_c();
             let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
 
             Ok(try!(match self.binary().cudnn().sigmoid_forward(
-                &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
-                &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
+                &try!(x.get_cudnn_desc()), // src_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x, self.device()) }), //src_data
+                &try!(result.get_cudnn_desc()), // dest_desc
+                try!(unsafe { ::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data
                 scal_params
             ) {
                 Ok(_) => Ok(()),
@@ -61,60 +71,284 @@ macro_rules! impl_ops_sigmoid_for {
             }))
         }
 
-        fn sigmoid_diff(
+        fn sigmoid_grad(
+            &self,
+            x: &mut ::co::tensor::SharedTensor<$t>,
+            x_diff: &mut ::co::tensor::SharedTensor<$t>,
+            result: &mut ::co::tensor::SharedTensor<$t>,
+            result_diff: &mut ::co::tensor::SharedTensor<$t>
+        ) -> Result<(), ::co::error::Error> {
+            match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
+            match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) }
+            match result.add_device(self.device()) { _ => try!(x.sync(self.device())) }
+            match result_diff.add_device(self.device()) { _ => () }
+            let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
+
+            Ok(try!(match self.binary().cudnn().sigmoid_backward(
+                &try!(x.get_cudnn_desc()), // src_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x, self.device()) }), //src_data
+                &try!(x_diff.get_cudnn_desc()), // src_diff_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data
+                &try!(result.get_cudnn_desc()), // dest_desc
+                try!(unsafe { ::helper::receive_memory_ptr(result, self.device()) }), // dest_data
+                &try!(result_diff.get_cudnn_desc()), // dest_diff_desc
+                try!(unsafe { ::helper::receive_memory_ptr_mut(result_diff, self.device()) }), // dest_diff_data
+                scal_params
+            ) {
+                Ok(_) => Ok(()),
+                Err(err) => {
+                    println!("{:?}", err);
+                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation Sigmoid Backward."))
+                }
+            }))
+        }
+
+        fn sigmoid_grad_plain(
+            &self,
+            x: &::co::tensor::SharedTensor<$t>,
+            x_diff: &::co::tensor::SharedTensor<$t>,
+            result: &::co::tensor::SharedTensor<$t>,
+            result_diff: &mut ::co::tensor::SharedTensor<$t>
+        ) -> Result<(), ::co::error::Error> {
+            let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
+
+            Ok(try!(match self.binary().cudnn().sigmoid_backward(
+                &try!(x.get_cudnn_desc()), // src_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x, self.device()) }), //src_data
+                &try!(x_diff.get_cudnn_desc()), // src_diff_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data
+                &try!(result.get_cudnn_desc()), // dest_desc
+                try!(unsafe { ::helper::receive_memory_ptr(result, self.device()) }), // dest_data
+                &try!(result_diff.get_cudnn_desc()), // dest_diff_desc
+                try!(unsafe { ::helper::receive_memory_ptr_mut(result_diff, self.device()) }), // dest_diff_data
+                scal_params
+            ) {
+                Ok(_) => Ok(()),
+                Err(err) => {
+                    println!("{:?}", err);
+                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation Sigmoid Backward."))
+                }
+            }))
+        }
+    )
+}
+
+#[macro_export]
+macro_rules! impl_ops_relu_for {
+    ($t:ident, $b:ty) => (
+        fn relu(
             &self,
             x: &mut ::co::tensor::SharedTensor<$t>,
             result: &mut ::co::tensor::SharedTensor<$t>
         ) -> Result<(), ::co::error::Error> {
             match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
             match result.add_device(self.device()) { _ => () }
-            let src_desc = try!(x.get_cudnn_desc());
-            let src_data = try!(try!(x.get(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `x`")))
-            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `x`.")))
-            .id_c();
-            let dest_desc = try!(result.get_cudnn_desc());
-            let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
-            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
-            .id_c();
             let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
 
-            Ok(try!(match self.binary().cudnn().sigmoid_forward(
-                &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
-                &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
+            Ok(try!(match self.binary().cudnn().relu_forward(
+                &try!(x.get_cudnn_desc()), // src_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x, self.device()) }), //src_data
+                &try!(result.get_cudnn_desc()), // dest_desc
+                try!(unsafe { ::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data
                 scal_params
             ) {
                 Ok(_) => Ok(()),
                 Err(err) => {
                     println!("{:?}", err);
-                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation Sigmoid Forward."))
+                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation relu Forward."))
                 }
             }))
         }
 
-        fn sigmoid_diff_plain(
+        fn relu_plain(
             &self,
             x: &::co::tensor::SharedTensor<$t>,
             result: &mut ::co::tensor::SharedTensor<$t>
         ) -> Result<(), ::co::error::Error> {
-            let src_desc = try!(x.get_cudnn_desc());
-            let src_data = try!(try!(x.get(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `x`")))
-            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `x`.")))
-            .id_c();
-            let dest_desc = try!(result.get_cudnn_desc());
-            let dest_data = try!(try!(result.get_mut(self.device()).ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to resolve memory for `result`")))
-            .as_cuda().ok_or(::co::plugin::Error::MissingMemoryForDevice("Unable to receive native memory for `result`.")))
-            .id_c();
             let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
 
-            Ok(try!(match self.binary().cudnn().sigmoid_forward(
-                &src_desc, unsafe { ::std::mem::transmute::<u64, *const ::libc::c_void>(src_data) },
-                &dest_desc, unsafe { ::std::mem::transmute::<u64, *mut ::libc::c_void>(dest_data) },
+            Ok(try!(match self.binary().cudnn().relu_forward(
+                &try!(x.get_cudnn_desc()), // src_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x, self.device()) }), //src_data
+                &try!(result.get_cudnn_desc()), // dest_desc
+                try!(unsafe { ::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data
                 scal_params
             ) {
                 Ok(_) => Ok(()),
                 Err(err) => {
                     println!("{:?}", err);
-                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation Sigmoid Forward."))
+                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation relu Forward."))
+                }
+            }))
+        }
+
+        fn relu_grad(
+            &self,
+            x: &mut ::co::tensor::SharedTensor<$t>,
+            x_diff: &mut ::co::tensor::SharedTensor<$t>,
+            result: &mut ::co::tensor::SharedTensor<$t>,
+            result_diff: &mut ::co::tensor::SharedTensor<$t>
+        ) -> Result<(), ::co::error::Error> {
+            match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
+            match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) }
+            match result.add_device(self.device()) { _ => try!(x.sync(self.device())) }
+            match result_diff.add_device(self.device()) { _ => () }
+            let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
+
+            Ok(try!(match self.binary().cudnn().relu_backward(
+                &try!(x.get_cudnn_desc()), // src_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x, self.device()) }), //src_data
+                &try!(x_diff.get_cudnn_desc()), // src_diff_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data
+                &try!(result.get_cudnn_desc()), // dest_desc
+                try!(unsafe { ::helper::receive_memory_ptr(result, self.device()) }), // dest_data
+                &try!(result_diff.get_cudnn_desc()), // dest_diff_desc
+                try!(unsafe { ::helper::receive_memory_ptr_mut(result_diff, self.device()) }), // dest_diff_data
+                scal_params
+            ) {
+                Ok(_) => Ok(()),
+                Err(err) => {
+                    println!("{:?}", err);
+                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation relu Backward."))
+                }
+            }))
+        }
+
+        fn relu_grad_plain(
+            &self,
+            x: &::co::tensor::SharedTensor<$t>,
+            x_diff: &::co::tensor::SharedTensor<$t>,
+            result: &::co::tensor::SharedTensor<$t>,
+            result_diff: &mut ::co::tensor::SharedTensor<$t>
+        ) -> Result<(), ::co::error::Error> {
+            let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
+
+            Ok(try!(match self.binary().cudnn().relu_backward(
+                &try!(x.get_cudnn_desc()), // src_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x, self.device()) }), //src_data
+                &try!(x_diff.get_cudnn_desc()), // src_diff_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data
+                &try!(result.get_cudnn_desc()), // dest_desc
+                try!(unsafe { ::helper::receive_memory_ptr(result, self.device()) }), // dest_data
+                &try!(result_diff.get_cudnn_desc()), // dest_diff_desc
+                try!(unsafe { ::helper::receive_memory_ptr_mut(result_diff, self.device()) }), // dest_diff_data
+                scal_params
+            ) {
+                Ok(_) => Ok(()),
+                Err(err) => {
+                    println!("{:?}", err);
+                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation relu Backward."))
+                }
+            }))
+        }
+    )
+}
+
+#[macro_export]
+macro_rules! impl_ops_tanh_for {
+    ($t:ident, $b:ty) => (
+        fn tanh(
+            &self,
+            x: &mut ::co::tensor::SharedTensor<$t>,
+            result: &mut ::co::tensor::SharedTensor<$t>
+        ) -> Result<(), ::co::error::Error> {
+            match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
+            match result.add_device(self.device()) { _ => () }
+            let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
+
+            Ok(try!(match self.binary().cudnn().tanh_forward(
+                &try!(x.get_cudnn_desc()), // src_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x, self.device()) }), //src_data
+                &try!(result.get_cudnn_desc()), // dest_desc
+                try!(unsafe { ::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data
+                scal_params
+            ) {
+                Ok(_) => Ok(()),
+                Err(err) => {
+                    println!("{:?}", err);
+                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation tanh Forward."))
+                }
+            }))
+        }
+
+        fn tanh_plain(
+            &self,
+            x: &::co::tensor::SharedTensor<$t>,
+            result: &mut ::co::tensor::SharedTensor<$t>
+        ) -> Result<(), ::co::error::Error> {
+            let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
+
+            Ok(try!(match self.binary().cudnn().tanh_forward(
+                &try!(x.get_cudnn_desc()), // src_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x, self.device()) }), //src_data
+                &try!(result.get_cudnn_desc()), // dest_desc
+                try!(unsafe { ::helper::receive_memory_ptr_mut(result, self.device()) }), // dest_data
+                scal_params
+            ) {
+                Ok(_) => Ok(()),
+                Err(err) => {
+                    println!("{:?}", err);
+                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation tanh Forward."))
+                }
+            }))
+        }
+
+        fn tanh_grad(
+            &self,
+            x: &mut ::co::tensor::SharedTensor<$t>,
+            x_diff: &mut ::co::tensor::SharedTensor<$t>,
+            result: &mut ::co::tensor::SharedTensor<$t>,
+            result_diff: &mut ::co::tensor::SharedTensor<$t>
+        ) -> Result<(), ::co::error::Error> {
+            match x.add_device(self.device()) { _ => try!(x.sync(self.device())) }
+            match x_diff.add_device(self.device()) { _ => try!(x.sync(self.device())) }
+            match result.add_device(self.device()) { _ => try!(x.sync(self.device())) }
+            match result_diff.add_device(self.device()) { _ => () }
+            let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
+
+            Ok(try!(match self.binary().cudnn().tanh_backward(
+                &try!(x.get_cudnn_desc()), // src_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x, self.device()) }), //src_data
+                &try!(x_diff.get_cudnn_desc()), // src_diff_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data
+                &try!(result.get_cudnn_desc()), // dest_desc
+                try!(unsafe { ::helper::receive_memory_ptr(result, self.device()) }), // dest_data
+                &try!(result_diff.get_cudnn_desc()), // dest_diff_desc
+                try!(unsafe { ::helper::receive_memory_ptr_mut(result_diff, self.device()) }), // dest_diff_data
+                scal_params
+            ) {
+                Ok(_) => Ok(()),
+                Err(err) => {
+                    println!("{:?}", err);
+                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation tanh Backward."))
+                }
+            }))
+        }
+
+        fn tanh_grad_plain(
+            &self,
+            x: &::co::tensor::SharedTensor<$t>,
+            x_diff: &::co::tensor::SharedTensor<$t>,
+            result: &::co::tensor::SharedTensor<$t>,
+            result_diff: &mut ::co::tensor::SharedTensor<$t>
+        ) -> Result<(), ::co::error::Error> {
+            let scal_params: ::cudnn::utils::ScalParams<$t> = ::cudnn::utils::ScalParams::default();
+
+            Ok(try!(match self.binary().cudnn().tanh_backward(
+                &try!(x.get_cudnn_desc()), // src_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x, self.device()) }), //src_data
+                &try!(x_diff.get_cudnn_desc()), // src_diff_desc
+                try!(unsafe { ::helper::receive_memory_ptr(x_diff, self.device()) }), //src_diff_data
+                &try!(result.get_cudnn_desc()), // dest_desc
+                try!(unsafe { ::helper::receive_memory_ptr(result, self.device()) }), // dest_data
+                &try!(result_diff.get_cudnn_desc()), // dest_diff_desc
+                try!(unsafe { ::helper::receive_memory_ptr_mut(result_diff, self.device()) }), // dest_diff_data
+                scal_params
+            ) {
+                Ok(_) => Ok(()),
+                Err(err) => {
+                    println!("{:?}", err);
+                    Err(::co::plugin::Error::Operation("Unable to execute CUDA cuDNN Activation tanh Backward."))
                 }
             }))
         }
diff --git a/src/plugin.rs b/src/plugin.rs
index 1833bad..b2a2d3f 100644
--- a/src/plugin.rs
+++ b/src/plugin.rs
@@ -1,12 +1,10 @@
 //! Provides the INn Plugin trait for Collenchyma implementation.
 
 use super::binary::INnBinary;
-use super::operation::*;
 use co::plugin::numeric_helpers::Float;
 use co::binary::IBinary;
 use co::tensor::SharedTensor;
 use co::device::DeviceType;
-use co::plugin::Error as LibError;
 
 /// Provides the functionality for a backend to support Neural Network related operations.
 pub trait INn<F: Float> {
@@ -30,23 +28,90 @@ pub trait INn<F: Float> {
     /// For a memory managed version see `sigmoid`.
     fn sigmoid_plain(&self, x: &SharedTensor<F>, result: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
 
-    /// Computes the first derivative of a [Sigmoid function][sigmoid] over the input Tensor `x` with complete memory management.
+    /// Computes the gradient of a [Sigmoid function][sigmoid] over the input Tensor `x` with complete memory management.
     /// [sigmoid]: https://en.wikipedia.org/wiki/Sigmoid_function
     ///
+    /// Saves the result to `result_diff`.
+    ///
+    /// For a no-memory managed version see `sigmoid_grad_plain`.
+    fn sigmoid_grad(&self, x: &mut SharedTensor<F>, x_diff: &mut SharedTensor<F>, result: &mut SharedTensor<F>, result_diff: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
+
+    /// Computes the gradient of a Sigmoid function over the input Tensor `x` without any memory management.
+    ///
+    /// Saves the result to `result_diff`.
+    ///
+    /// *Attention*:<br/>
+    /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.<br/>
+    /// For a memory managed version see `sigmoid_grad`.
+    fn sigmoid_grad_plain(&self, x: &SharedTensor<F>, x_diff: &SharedTensor<F>, result: &SharedTensor<F>, result_diff: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
+
+    /// Computes the [Rectified linear units][relu] over the input Tensor `x` with complete memory management.
+    /// [relu]: https://en.wikipedia.org/wiki/Rectifier_(neural_networks)
+    ///
     /// Saves the result to `result`.
     ///
-    /// For a no-memory managed version see `sigmoid_diff_plain`.
-    fn sigmoid_diff(&self, x: &mut SharedTensor<F>, result: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
+    /// For a no-memory managed version see `relu_plain`.
+    fn relu(&self, x: &mut SharedTensor<F>, result: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
 
-    /// Computes the first derivative of a Sigmoid function over the input Tensor `x` without any memory management.
+    /// Computes the ReLU over the input Tensor `x` without any memory management.
     ///
     /// Saves the result to `result`.
     ///
     /// *Attention*:<br/>
     /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.<br/>
-    /// For a memory managed version see `sigmoid_diff`.
-    fn sigmoid_diff_plain(&self, x: &SharedTensor<F>, result: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
+    /// For a memory managed version see `relu`.
+    fn relu_plain(&self, x: &SharedTensor<F>, result: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
 
+    /// Computes the gradient of [ReLU][relu] over the input Tensor `x` with complete memory management.
+    /// [relu]: https://en.wikipedia.org/wiki/Rectifier_(neural_networks)
+    ///
+    /// Saves the result to `result_diff`.
+    ///
+    /// For a no-memory managed version see `relu_grad_plain`.
+    fn relu_grad(&self, x: &mut SharedTensor<F>, x_diff: &mut SharedTensor<F>, result: &mut SharedTensor<F>, result_diff: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
+
+    /// Computes the gradient of ReLU over the input Tensor `x` without any memory management.
+    ///
+    /// Saves the result to `result_diff`.
+    ///
+    /// *Attention*:<br/>
+    /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.<br/>
+    /// For a memory managed version see `relu_grad`.
+    fn relu_grad_plain(&self, x: &SharedTensor<F>, x_diff: &SharedTensor<F>, result: &SharedTensor<F>, result_diff: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
+
+    /// Computes the [hyperbolic Tangent][tanh] over the input Tensor `x` with complete memory management.
+    /// [tanh]: https://en.wikipedia.org/wiki/Hyperbolic_function
+    ///
+    /// Saves the result to `result`.
+    ///
+    /// For a no-memory managed version see `tanh_plain`.
+    fn tanh(&self, x: &mut SharedTensor<F>, result: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
+
+    /// Computes the tanh over the input Tensor `x` without any memory management.
+    ///
+    /// Saves the result to `result`.
+    ///
+    /// *Attention*:<br/>
+    /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.<br/>
+    /// For a memory managed version see `tanh`.
+    fn tanh_plain(&self, x: &SharedTensor<F>, result: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
+
+    /// Computes the gradient of [tanh][tanh] over the input Tensor `x` with complete memory management.
+    /// [tanh]: https://en.wikipedia.org/wiki/Hyperbolic_function
+    ///
+    /// Saves the result to `result_diff`.
+    ///
+    /// For a no-memory managed version see `tanh_grad_plain`.
+    fn tanh_grad(&self, x: &mut SharedTensor<F>, x_diff: &mut SharedTensor<F>, result: &mut SharedTensor<F>, result_diff: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
+
+    /// Computes the gradient of tanh over the input Tensor `x` without any memory management.
+    ///
+    /// Saves the result to `result_diff`.
+    ///
+    /// *Attention*:<br/>
+    /// For a correct computation result, you need to manage the memory allocation and synchronization yourself.<br/>
+    /// For a memory managed version see `tanh_grad`.
+    fn tanh_grad_plain(&self, x: &SharedTensor<F>, x_diff: &SharedTensor<F>, result: &SharedTensor<F>, result_diff: &mut SharedTensor<F>) -> Result<(), ::co::error::Error>;
 
     /// Returns the binary representation
     fn binary(&self) -> &Self::B;
diff --git a/tests/cuda_nn_specs.rs b/tests/cuda_nn_specs.rs
deleted file mode 100644
index 53ac2ba..0000000
--- a/tests/cuda_nn_specs.rs
+++ /dev/null
@@ -1,85 +0,0 @@
-extern crate collenchyma_nn as co_nn;
-extern crate collenchyma as co;
-
-#[cfg(test)]
-mod blas_spec {
-
-    use co::backend::{Backend, BackendConfig};
-    use co::framework::IFramework;
-    use co::frameworks::{Cuda, Native};
-    use co_nn::plugin::*;
-    use co::memory::MemoryType;
-    use co::tensor::SharedTensor;
-    use co::plugin::numeric_helpers::{cast, Float};
-
-    fn get_native_backend() -> Backend<Native> {
-        let framework = Native::new();
-        let hardwares = framework.hardwares();
-        let backend_config = BackendConfig::new(framework, hardwares);
-        Backend::new(backend_config).unwrap()
-    }
-
-    fn get_cuda_backend() -> Backend<Cuda> {
-        let framework = Cuda::new();
-        let hardwares = framework.hardwares();
-        let backend_config = BackendConfig::new(framework, hardwares);
-        Backend::new(backend_config).unwrap()
-    }
-
-    fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
-        if let &mut MemoryType::Native(ref mut mem) = mem {
-            let mut mem_buffer = mem.as_mut_slice::<T>();
-            for (index, datum) in data.iter().enumerate() {
-                mem_buffer[index] = *datum;
-            }
-        }
-    }
-
-    fn get_sigmoid_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>){
-        let val = cast::<f64, T>(1f64).unwrap();
-        let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
-        x.add_device(native.device()).unwrap();
-        x.sync(native.device());
-        write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val]);
-        x.sync(backend.device());
-
-        let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
-        result.add_device(native.device()).unwrap();
-
-        (x, result)
-    }
-
-    #[test]
-    fn it_computes_correct_sigmoid_on_cuda_for_f32() {
-        let backend = get_cuda_backend();
-        let native = get_native_backend();
-        let (mut x, mut result) = get_sigmoid_memory::<f32, Cuda, Native>(&backend, &native);
-
-        match backend.sigmoid(&mut x, &mut result) {
-            Ok(_) => {
-                result.sync(native.device()).unwrap();
-                if let Some(mem) = result.get(native.device()).unwrap().as_native() {
-                    assert_eq!(&[0.7310585786300049f32, 0.7310585786300049f32, 0.7310585786300049f32], mem.as_slice::<f32>());
-                }
-            },
-            Err(err) => { println!("{:?}", err); assert!(false) }
-        }
-    }
-
-    #[test]
-    fn it_computes_correct_sigmoid_on_cuda_for_f64() {
-        let backend = get_cuda_backend();
-        let native = get_native_backend();
-        let (mut x, mut result) = get_sigmoid_memory::<f64, Cuda, Native>(&backend, &native);
-
-        match backend.sigmoid(&mut x, &mut result) {
-            Ok(_) => {
-                result.sync(native.device()).unwrap();
-                if let Some(mem) = result.get(native.device()).unwrap().as_native() {
-                    assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.7310585786300049f64], mem.as_slice::<f64>());
-                }
-            },
-            Err(err) => { println!("{:?}", err); assert!(false) }
-        }
-    }
-}
diff --git a/tests/relu_specs.rs b/tests/relu_specs.rs
new file mode 100644
index 0000000..721e4bc
--- /dev/null
+++ b/tests/relu_specs.rs
@@ -0,0 +1,215 @@
+extern crate collenchyma_nn as co_nn;
+extern crate collenchyma as co;
+
+#[cfg(test)]
+mod nn_spec {
+
+    use co::backend::{Backend, BackendConfig};
+    use co::framework::IFramework;
+    use co::frameworks::{Cuda, Native};
+    use co_nn::plugin::*;
+    use co::memory::MemoryType;
+    use co::tensor::SharedTensor;
+    use co::plugin::numeric_helpers::{cast, Float};
+
+    fn get_native_backend() -> Backend<Native> {
+        let framework = Native::new();
+        let hardwares = framework.hardwares();
+        let backend_config = BackendConfig::new(framework, hardwares);
+        Backend::new(backend_config).unwrap()
+    }
+
+    fn get_cuda_backend() -> Backend<Cuda> {
+        let framework = Cuda::new();
+        let hardwares = framework.hardwares();
+        let backend_config = BackendConfig::new(framework, hardwares);
+        Backend::new(backend_config).unwrap()
+    }
+
+    fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
+        if let &mut MemoryType::Native(ref mut mem) = mem {
+            let mut mem_buffer = mem.as_mut_slice::<T>();
+            for (index, datum) in data.iter().enumerate() {
+                mem_buffer[index] = *datum;
+            }
+        }
+    }
+
+    fn get_activation_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>){
+        let val = cast::<f64, T>(1f64).unwrap();
+        let val2 = cast::<f64, T>(2f64).unwrap();
+        let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        x.add_device(native.device()).unwrap();
+        x.sync(native.device()).unwrap();
+        write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
+        x.sync(backend.device()).unwrap();
+
+        let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        result.add_device(native.device()).unwrap();
+
+        (x, result)
+    }
+
+    fn get_activation_grad_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
+        let val = cast::<f64, T>(1f64).unwrap();
+        let val2 = cast::<f64, T>(2f64).unwrap();
+        let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        x.add_device(native.device()).unwrap();
+        x.sync(native.device()).unwrap();
+        write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
+        x.sync(backend.device()).unwrap();
+
+        let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        x_diff.add_device(native.device()).unwrap();
+        x_diff.sync(native.device()).unwrap();
+        write_to_memory(x_diff.get_mut(native.device()).unwrap(), &[val, val, val2]);
+        x_diff.sync(backend.device()).unwrap();
+
+        let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        result.add_device(native.device()).unwrap();
+        result.sync(native.device()).unwrap();
+        write_to_memory(result.get_mut(native.device()).unwrap(), &[val, val, val2]);
+        result.sync(backend.device()).unwrap();
+
+        let mut result_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        result_diff.add_device(native.device()).unwrap();
+
+        (x, x_diff, result, result_diff)
+    }
+
+    #[test]
+    fn it_computes_correct_relu_on_cuda_for_f32() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut result) = get_activation_memory::<f32, Cuda, Native>(&backend, &native);
+
+        match backend.relu(&mut x, &mut result) {
+            Ok(_) => {
+                result.sync(native.device()).unwrap();
+                if let Some(mem) = result.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_relu_on_cuda_for_f64() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut result) = get_activation_memory::<f64, Cuda, Native>(&backend, &native);
+
+        match backend.relu(&mut x, &mut result) {
+            Ok(_) => {
+                result.sync(native.device()).unwrap();
+                if let Some(mem) = result.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_relu_on_cuda_for_f32_plain() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut result) = get_activation_memory::<f32, Cuda, Native>(&backend, &native);
+
+        match backend.relu_plain(&mut x, &mut result) {
+            Ok(_) => {
+                result.sync(native.device()).unwrap();
+                if let Some(mem) = result.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_relu_on_cuda_for_f64_plain() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut result) = get_activation_memory::<f64, Cuda, Native>(&backend, &native);
+
+        match backend.relu_plain(&mut x, &mut result) {
+            Ok(_) => {
+                result.sync(native.device()).unwrap();
+                if let Some(mem) = result.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_relu_grad_on_cuda_for_f32() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut x_diff, mut result, mut result_diff) = get_activation_grad_memory::<f32, Cuda, Native>(&backend, &native);
+
+        match backend.relu_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
+            Ok(_) => {
+                result_diff.sync(native.device()).unwrap();
+                if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_relu_grad_on_cuda_for_f64() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut x_diff, mut result, mut result_diff) = get_activation_grad_memory::<f64, Cuda, Native>(&backend, &native);
+
+        match backend.relu_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
+            Ok(_) => {
+                result_diff.sync(native.device()).unwrap();
+                if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_relu_grad_on_cuda_for_f32_plain() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut x_diff, mut result, mut result_diff) = get_activation_grad_memory::<f32, Cuda, Native>(&backend, &native);
+
+        match backend.relu_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
+            Ok(_) => {
+                result_diff.sync(native.device()).unwrap();
+                if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[1f32, 1f32, 2f32], mem.as_slice::<f32>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_relu_grad_on_cuda_for_f64_plain() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut x_diff, mut result, mut result_diff) = get_activation_grad_memory::<f64, Cuda, Native>(&backend, &native);
+
+        match backend.relu_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
+            Ok(_) => {
+                result_diff.sync(native.device()).unwrap();
+                if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[1f64, 1f64, 2f64], mem.as_slice::<f64>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+}
diff --git a/tests/sigmoid_specs.rs b/tests/sigmoid_specs.rs
new file mode 100644
index 0000000..4b6c918
--- /dev/null
+++ b/tests/sigmoid_specs.rs
@@ -0,0 +1,215 @@
+extern crate collenchyma_nn as co_nn;
+extern crate collenchyma as co;
+
+#[cfg(test)]
+mod nn_spec {
+
+    use co::backend::{Backend, BackendConfig};
+    use co::framework::IFramework;
+    use co::frameworks::{Cuda, Native};
+    use co_nn::plugin::*;
+    use co::memory::MemoryType;
+    use co::tensor::SharedTensor;
+    use co::plugin::numeric_helpers::{cast, Float};
+
+    fn get_native_backend() -> Backend<Native> {
+        let framework = Native::new();
+        let hardwares = framework.hardwares();
+        let backend_config = BackendConfig::new(framework, hardwares);
+        Backend::new(backend_config).unwrap()
+    }
+
+    fn get_cuda_backend() -> Backend<Cuda> {
+        let framework = Cuda::new();
+        let hardwares = framework.hardwares();
+        let backend_config = BackendConfig::new(framework, hardwares);
+        Backend::new(backend_config).unwrap()
+    }
+
+    fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
+        if let &mut MemoryType::Native(ref mut mem) = mem {
+            let mut mem_buffer = mem.as_mut_slice::<T>();
+            for (index, datum) in data.iter().enumerate() {
+                mem_buffer[index] = *datum;
+            }
+        }
+    }
+
+    fn get_activation_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>){
+        let val = cast::<f64, T>(1f64).unwrap();
+        let val2 = cast::<f64, T>(2f64).unwrap();
+        let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        x.add_device(native.device()).unwrap();
+        x.sync(native.device()).unwrap();
+        write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
+        x.sync(backend.device()).unwrap();
+
+        let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        result.add_device(native.device()).unwrap();
+
+        (x, result)
+    }
+
+    fn get_activation_grad_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
+        let val = cast::<f64, T>(1f64).unwrap();
+        let val2 = cast::<f64, T>(2f64).unwrap();
+        let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        x.add_device(native.device()).unwrap();
+        x.sync(native.device()).unwrap();
+        write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
+        x.sync(backend.device()).unwrap();
+
+        let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        x_diff.add_device(native.device()).unwrap();
+        x_diff.sync(native.device()).unwrap();
+        write_to_memory(x_diff.get_mut(native.device()).unwrap(), &[val, val, val2]);
+        x_diff.sync(backend.device()).unwrap();
+
+        let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        result.add_device(native.device()).unwrap();
+        result.sync(native.device()).unwrap();
+        write_to_memory(result.get_mut(native.device()).unwrap(), &[val, val, val2]);
+        result.sync(backend.device()).unwrap();
+
+        let mut result_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        result_diff.add_device(native.device()).unwrap();
+
+        (x, x_diff, result, result_diff)
+    }
+
+    #[test]
+    fn it_computes_correct_sigmoid_on_cuda_for_f32() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut result) = get_activation_memory::<f32, Cuda, Native>(&backend, &native);
+
+        match backend.sigmoid(&mut x, &mut result) {
+            Ok(_) => {
+                result.sync(native.device()).unwrap();
+                if let Some(mem) = result.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0.7310585786f32, 0.7310586f32, 0.880797f32], mem.as_slice::<f32>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_sigmoid_on_cuda_for_f64() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut result) = get_activation_memory::<f64, Cuda, Native>(&backend, &native);
+
+        match backend.sigmoid(&mut x, &mut result) {
+            Ok(_) => {
+                result.sync(native.device()).unwrap();
+                if let Some(mem) = result.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.8807970779778823f64], mem.as_slice::<f64>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_sigmoid_on_cuda_for_f32_plain() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut result) = get_activation_memory::<f32, Cuda, Native>(&backend, &native);
+
+        match backend.sigmoid_plain(&mut x, &mut result) {
+            Ok(_) => {
+                result.sync(native.device()).unwrap();
+                if let Some(mem) = result.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0.7310585786f32, 0.7310586f32, 0.880797f32], mem.as_slice::<f32>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_sigmoid_on_cuda_for_f64_plain() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut result) = get_activation_memory::<f64, Cuda, Native>(&backend, &native);
+
+        match backend.sigmoid_plain(&mut x, &mut result) {
+            Ok(_) => {
+                result.sync(native.device()).unwrap();
+                if let Some(mem) = result.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0.7310585786300049f64, 0.7310585786300049f64, 0.8807970779778823f64], mem.as_slice::<f64>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_sigmoid_grad_on_cuda_for_f32() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut x_diff, mut result, mut result_diff) = get_activation_grad_memory::<f32, Cuda, Native>(&backend, &native);
+
+        match backend.sigmoid_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
+            Ok(_) => {
+                result_diff.sync(native.device()).unwrap();
+                if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0f32, 0f32, -4f32], mem.as_slice::<f32>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_sigmoid_grad_on_cuda_for_f64() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut x_diff, mut result, mut result_diff) = get_activation_grad_memory::<f64, Cuda, Native>(&backend, &native);
+
+        match backend.sigmoid_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
+            Ok(_) => {
+                result_diff.sync(native.device()).unwrap();
+                if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0f64, 0f64, -4f64], mem.as_slice::<f64>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_sigmoid_grad_on_cuda_for_f32_plain() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut x_diff, mut result, mut result_diff) = get_activation_grad_memory::<f32, Cuda, Native>(&backend, &native);
+
+        match backend.sigmoid_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
+            Ok(_) => {
+                result_diff.sync(native.device()).unwrap();
+                if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0f32, 0f32, -4f32], mem.as_slice::<f32>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_sigmoid_grad_on_cuda_for_f64_plain() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut x_diff, mut result, mut result_diff) = get_activation_grad_memory::<f64, Cuda, Native>(&backend, &native);
+
+        match backend.sigmoid_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
+            Ok(_) => {
+                result_diff.sync(native.device()).unwrap();
+                if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0f64, 0f64, -4f64], mem.as_slice::<f64>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+}
diff --git a/tests/tanh_specs.rs b/tests/tanh_specs.rs
new file mode 100644
index 0000000..15e0a89
--- /dev/null
+++ b/tests/tanh_specs.rs
@@ -0,0 +1,215 @@
+extern crate collenchyma_nn as co_nn;
+extern crate collenchyma as co;
+
+#[cfg(test)]
+mod nn_spec {
+
+    use co::backend::{Backend, BackendConfig};
+    use co::framework::IFramework;
+    use co::frameworks::{Cuda, Native};
+    use co_nn::plugin::*;
+    use co::memory::MemoryType;
+    use co::tensor::SharedTensor;
+    use co::plugin::numeric_helpers::{cast, Float};
+
+    fn get_native_backend() -> Backend<Native> {
+        let framework = Native::new();
+        let hardwares = framework.hardwares();
+        let backend_config = BackendConfig::new(framework, hardwares);
+        Backend::new(backend_config).unwrap()
+    }
+
+    fn get_cuda_backend() -> Backend<Cuda> {
+        let framework = Cuda::new();
+        let hardwares = framework.hardwares();
+        let backend_config = BackendConfig::new(framework, hardwares);
+        Backend::new(backend_config).unwrap()
+    }
+
+    fn write_to_memory<T: Copy>(mem: &mut MemoryType, data: &[T]) {
+        if let &mut MemoryType::Native(ref mut mem) = mem {
+            let mut mem_buffer = mem.as_mut_slice::<T>();
+            for (index, datum) in data.iter().enumerate() {
+                mem_buffer[index] = *datum;
+            }
+        }
+    }
+
+    fn get_activation_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>){
+        let val = cast::<f64, T>(1f64).unwrap();
+        let val2 = cast::<f64, T>(2f64).unwrap();
+        let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        x.add_device(native.device()).unwrap();
+        x.sync(native.device()).unwrap();
+        write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
+        x.sync(backend.device()).unwrap();
+
+        let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        result.add_device(native.device()).unwrap();
+
+        (x, result)
+    }
+
+    fn get_activation_grad_memory<T: Float, B: IFramework + Clone, C: IFramework + Clone>(backend: &Backend<B>, native: &Backend<C>) -> (SharedTensor<T>, SharedTensor<T>, SharedTensor<T>, SharedTensor<T>){
+        let val = cast::<f64, T>(1f64).unwrap();
+        let val2 = cast::<f64, T>(2f64).unwrap();
+        let mut x = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        x.add_device(native.device()).unwrap();
+        x.sync(native.device()).unwrap();
+        write_to_memory(x.get_mut(native.device()).unwrap(), &[val, val, val2]);
+        x.sync(backend.device()).unwrap();
+
+        let mut x_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        x_diff.add_device(native.device()).unwrap();
+        x_diff.sync(native.device()).unwrap();
+        write_to_memory(x_diff.get_mut(native.device()).unwrap(), &[val, val, val2]);
+        x_diff.sync(backend.device()).unwrap();
+
+        let mut result = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        result.add_device(native.device()).unwrap();
+        result.sync(native.device()).unwrap();
+        write_to_memory(result.get_mut(native.device()).unwrap(), &[val, val, val2]);
+        result.sync(backend.device()).unwrap();
+
+        let mut result_diff = SharedTensor::<T>::new(backend.device(), &(1, 1, 3)).unwrap();
+        result_diff.add_device(native.device()).unwrap();
+
+        (x, x_diff, result, result_diff)
+    }
+
+    #[test]
+    fn it_computes_correct_tanh_on_cuda_for_f32() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut result) = get_activation_memory::<f32, Cuda, Native>(&backend, &native);
+
+        match backend.tanh(&mut x, &mut result) {
+            Ok(_) => {
+                result.sync(native.device()).unwrap();
+                if let Some(mem) = result.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0.7615942f32, 0.7615942f32, 0.9640276f32], mem.as_slice::<f32>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_tanh_on_cuda_for_f64() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut result) = get_activation_memory::<f64, Cuda, Native>(&backend, &native);
+
+        match backend.tanh(&mut x, &mut result) {
+            Ok(_) => {
+                result.sync(native.device()).unwrap();
+                if let Some(mem) = result.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0.7615941559557649f64, 0.7615941559557649f64, 0.9640275800758169f64], mem.as_slice::<f64>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_tanh_on_cuda_for_f32_plain() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut result) = get_activation_memory::<f32, Cuda, Native>(&backend, &native);
+
+        match backend.tanh_plain(&mut x, &mut result) {
+            Ok(_) => {
+                result.sync(native.device()).unwrap();
+                if let Some(mem) = result.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0.7615942f32, 0.7615942f32, 0.9640276f32], mem.as_slice::<f32>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_tanh_on_cuda_for_f64_plain() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut result) = get_activation_memory::<f64, Cuda, Native>(&backend, &native);
+
+        match backend.tanh_plain(&mut x, &mut result) {
+            Ok(_) => {
+                result.sync(native.device()).unwrap();
+                if let Some(mem) = result.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0.7615941559557649f64, 0.7615941559557649f64, 0.9640275800758169f64], mem.as_slice::<f64>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_tanh_grad_on_cuda_for_f32() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut x_diff, mut result, mut result_diff) = get_activation_grad_memory::<f32, Cuda, Native>(&backend, &native);
+
+        match backend.tanh_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
+            Ok(_) => {
+                result_diff.sync(native.device()).unwrap();
+                if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0f32, 0f32, -6f32], mem.as_slice::<f32>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_tanh_grad_on_cuda_for_f64() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut x_diff, mut result, mut result_diff) = get_activation_grad_memory::<f64, Cuda, Native>(&backend, &native);
+
+        match backend.tanh_grad(&mut x, &mut x_diff, &mut result, &mut result_diff) {
+            Ok(_) => {
+                result_diff.sync(native.device()).unwrap();
+                if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0f64, 0f64, -6f64], mem.as_slice::<f64>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_tanh_grad_on_cuda_for_f32_plain() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut x_diff, mut result, mut result_diff) = get_activation_grad_memory::<f32, Cuda, Native>(&backend, &native);
+
+        match backend.tanh_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
+            Ok(_) => {
+                result_diff.sync(native.device()).unwrap();
+                if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0f32, 0f32, -6f32], mem.as_slice::<f32>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+
+    #[test]
+    fn it_computes_correct_tanh_grad_on_cuda_for_f64_plain() {
+        let backend = get_cuda_backend();
+        let native = get_native_backend();
+        let (mut x, mut x_diff, mut result, mut result_diff) = get_activation_grad_memory::<f64, Cuda, Native>(&backend, &native);
+
+        match backend.tanh_grad_plain(&mut x, &mut x_diff, &mut result, &mut result_diff) {
+            Ok(_) => {
+                result_diff.sync(native.device()).unwrap();
+                if let Some(mem) = result_diff.get(native.device()).unwrap().as_native() {
+                    assert_eq!(&[0f64, 0f64, -6f64], mem.as_slice::<f64>());
+                }
+            },
+            Err(err) => { println!("{:?}", err); assert!(false) }
+        }
+    }
+}