feat/features: add framework feature groups

autumnai · Dec 9, 2015 · 74c86c4 · 74c86c4
1 parent 346a6d5
commit 74c86c4
Show file tree

Hide file tree

Showing 10 changed files with 410 additions and 355 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -20,10 +20,10 @@ before_script:
   export PATH=$HOME/.local/bin:$PATH
 script:
 - |
-  travis-cargo build &&
-  travis-cargo test  &&
-  travis-cargo bench &&
-  travis-cargo doc
+  travis-cargo build -- --no-default-features &&
+  travis-cargo test -- --no-default-features  &&
+  travis-cargo bench -- --no-default-features &&
+  travis-cargo doc -- --no-default-features
 addons:
   apt:
     packages:

diff --git a/Cargo.toml b/Cargo.toml
@@ -2,26 +2,34 @@
 name            = "collenchyma-blas"
 description     = "Collenchyma library for full BLAS support"
 version         = "0.1.0"
-authors         = ["MichaelHirn <[email protected]>"]
+authors         = ["MichaelHirn <[email protected]>",
+                   "Maximilian Goisser <[email protected]>"]
 
 repository      = "https://github.com/autumnai/collenchyma-blas"
 homepage        = "https://github.com/autumnai/collenchyma-blas"
 documentation   = "https://autumnai.github.io/collenchyma-blas"
 readme          = "README.md"
 
-keywords        = ["blas", "collenchyma", "computation", "hpc"]
+keywords        = ["blas", "collenchyma", "computation", "hpc", "plugin"]
 license         = "MIT"
 
 [dependencies]
-collenchyma = "0.0.4"
-rblas = "0.0.11"
+collenchyma = { version = "0.0.5", default-features = false }
+
+rblas = { version = "0.0.11", optional = true }
+
 clippy = { version = "0.0.27", optional = true }
 
 [dev-dependencies]
 
 rand = "0.3"
 
 [features]
+default = ["native", "cuda", "opencl"]
+native = ["collenchyma/native", "rblas"]
+cuda = ["collenchyma/cuda"]
+opencl = ["collenchyma/opencl"]
+
+travis = ["native"]
 dev = []
-travis = []
 lint = ["clippy"]
diff --git a/README.md b/README.md
@@ -28,7 +28,7 @@ For more information,
 If you're using Cargo, just add collenchyma-BLAS to your Cargo.toml:
 
     [dependencies]
-    collenchyma = "X"
+    collenchyma = "0.0.5"
     collenchyma-blas = "0.1.0"
 
 If you're using [Cargo Edit][cargo-edit], you can call:

diff --git a/benches/rblas_overhead.rs b/benches/rblas_overhead.rs
@@ -69,6 +69,37 @@ fn bench_1000_dot_100_collenchyma_profile(
     });
 }
 
+#[bench]
+fn bench_1000_dot_100_collenchyma_plain(b: &mut Bencher) {
+    let mut rng = thread_rng();
+    let slice_a = rng.gen_iter::<f32>().take(100).collect::<Vec<f32>>();
+    let slice_b = rng.gen_iter::<f32>().take(100).collect::<Vec<f32>>();
+
+    let backend = backend();
+    let shared_a = &mut SharedMemory::<f32, _>::new(backend.device(), TensorR1::new([100])).unwrap();
+    let shared_b = &mut SharedMemory::<f32, _>::new(backend.device(), TensorR1::new([100])).unwrap();
+    let shared_res = &mut SharedMemory::<f32, _>::new(backend.device(), TensorR0::new()).unwrap();
+    shared_a.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_a);
+    shared_b.get_mut(backend.device()).unwrap().as_mut_native().unwrap().as_mut_slice().clone_from_slice(&slice_b);
+    let _ = backend.dot(shared_a, shared_b, shared_res);
+    bench_1000_dot_100_collenchyma_plain_profile(b, &backend, shared_a, shared_b, shared_res);
+}
+
+#[inline(never)]
+fn bench_1000_dot_100_collenchyma_plain_profile(
+    b: &mut Bencher,
+    backend: &Backend<Native>,
+    shared_a: &mut SharedMemory<f32, TensorR1>,
+    shared_b: &mut SharedMemory<f32, TensorR1>,
+    shared_res: &mut SharedMemory<f32, TensorR0>
+) {
+    b.iter(|| {
+        for _ in 0..1000 {
+            let _ = backend.dot_plain(shared_a, shared_b, shared_res);
+        }
+    });
+}
+
 #[bench]
 fn bench_100_dot_1000_rblas(b: &mut Bencher) {
     let mut rng = thread_rng();

diff --git a/src/frameworks/cuda.rs b/src/frameworks/cuda.rs
@@ -1,103 +1,103 @@
 //! Provides BLAS for a OpenCL backend.
-
-use ::operation::*;
-use ::binary::*;
-use ::library::*;
-use collenchyma::backend::Backend;
-use collenchyma::device::DeviceType;
-use collenchyma::memory::MemoryType;
-use collenchyma::plugin::Error;
-use collenchyma::frameworks::cuda::{Function, Module, Cuda};
-
-impl IBlasBinary<f32> for Module {
-    type Asum = Function;
-    type Axpy = Function;
-    type Copy = Function;
-    type Dot = Function;
-    type Nrm2 = Function;
-
-    type Scale = Function;
-    type Swap = Function;
-
-    fn asum(&self) -> Self::Asum {
-        unimplemented!()
-    }
-
-    fn axpy(&self) -> Self::Axpy {
-        self.blas_axpy
-    }
-
-    fn copy(&self) -> Self::Copy {
-        unimplemented!()
-    }
-
-    fn dot(&self) -> Self::Dot {
-        self.blas_dot
-    }
-
-    fn nrm2(&self) -> Self::Nrm2 {
-        unimplemented!()
-    }
-
-    fn scale(&self) -> Self::Scale {
-        self.blas_scale
-    }
-
-    fn swap(&self) -> Self::Swap {
-        unimplemented!()
-    }
-}
-
-impl IOperationAsum<f32> for Function {
-    fn compute(&self, x: &MemoryType, result: &mut MemoryType) -> Result<(), Error> {
-        unimplemented!()
-    }
-}
-
-impl IOperationAxpy<f32> for Function {
-    fn compute(&self, a: &MemoryType, x: &MemoryType, y: &mut MemoryType) -> Result<(), Error> {
-        unimplemented!()
-    }
-}
-
-impl IOperationCopy<f32> for Function {
-    fn compute(&self, x: &MemoryType, y: &mut MemoryType) -> Result<(), Error> {
-        unimplemented!()
-    }
-}
-
-impl IOperationDot<f32> for Function {
-    fn compute(&self, x: &MemoryType, y: &MemoryType, result: &mut MemoryType) -> Result<(), Error> {
-        unimplemented!()
-    }
-}
-
-impl IOperationNrm2<f32> for Function {
-    fn compute(&self, x: &MemoryType, result: &mut MemoryType) -> Result<(), Error> {
-        unimplemented!()
-    }
-}
-
-impl IOperationScale<f32> for Function {
-    fn compute(&self, a: &MemoryType, x: &mut MemoryType) -> Result<(), Error> {
-        unimplemented!()
-    }
-}
-
-impl IOperationSwap<f32> for Function {
-    fn compute(&self, x: &mut MemoryType, y: &mut MemoryType) -> Result<(), Error> {
-        unimplemented!()
-    }
-}
-
-impl IBlas<f32> for Backend<Cuda> {
-    type B = Module;
-
-    fn binary(&self) -> &Self::B {
-        self.binary()
-    }
-
-    fn device(&self) -> &DeviceType {
-        self.device()
-    }
-}
+// #![allow(unused_variables)]
+// use ::operation::*;
+// use ::binary::*;
+// use ::library::*;
+// use collenchyma::backend::Backend;
+// use collenchyma::device::DeviceType;
+// use collenchyma::memory::MemoryType;
+// use collenchyma::plugin::Error;
+// use collenchyma::frameworks::cuda::{Function, Module, Cuda};
+//
+// impl IBlasBinary<f32> for Module {
+//     type Asum = Function;
+//     type Axpy = Function;
+//     type Copy = Function;
+//     type Dot = Function;
+//     type Nrm2 = Function;
+//
+//     type Scale = Function;
+//     type Swap = Function;
+//
+//     fn asum(&self) -> Self::Asum {
+//         unimplemented!()
+//     }
+//
+//     fn axpy(&self) -> Self::Axpy {
+//         self.blas_axpy
+//     }
+//
+//     fn copy(&self) -> Self::Copy {
+//         unimplemented!()
+//     }
+//
+//     fn dot(&self) -> Self::Dot {
+//         self.blas_dot
+//     }
+//
+//     fn nrm2(&self) -> Self::Nrm2 {
+//         unimplemented!()
+//     }
+//
+//     fn scale(&self) -> Self::Scale {
+//         self.blas_scale
+//     }
+//
+//     fn swap(&self) -> Self::Swap {
+//         unimplemented!()
+//     }
+// }
+//
+// impl IOperationAsum<f32> for Function {
+//     fn compute(&self, x: &MemoryType, result: &mut MemoryType) -> Result<(), Error> {
+//         unimplemented!()
+//     }
+// }
+//
+// impl IOperationAxpy<f32> for Function {
+//     fn compute(&self, a: &MemoryType, x: &MemoryType, y: &mut MemoryType) -> Result<(), Error> {
+//         unimplemented!()
+//     }
+// }
+//
+// impl IOperationCopy<f32> for Function {
+//     fn compute(&self, x: &MemoryType, y: &mut MemoryType) -> Result<(), Error> {
+//         unimplemented!()
+//     }
+// }
+//
+// impl IOperationDot<f32> for Function {
+//     fn compute(&self, x: &MemoryType, y: &MemoryType, result: &mut MemoryType) -> Result<(), Error> {
+//         unimplemented!()
+//     }
+// }
+//
+// impl IOperationNrm2<f32> for Function {
+//     fn compute(&self, x: &MemoryType, result: &mut MemoryType) -> Result<(), Error> {
+//         unimplemented!()
+//     }
+// }
+//
+// impl IOperationScale<f32> for Function {
+//     fn compute(&self, a: &MemoryType, x: &mut MemoryType) -> Result<(), Error> {
+//         unimplemented!()
+//     }
+// }
+//
+// impl IOperationSwap<f32> for Function {
+//     fn compute(&self, x: &mut MemoryType, y: &mut MemoryType) -> Result<(), Error> {
+//         unimplemented!()
+//     }
+// }
+//
+// impl IBlas<f32> for Backend<Cuda> {
+//     type B = Module;
+//
+//     fn binary(&self) -> &Self::B {
+//         self.binary()
+//     }
+//
+//     fn device(&self) -> &DeviceType {
+//         self.device()
+//     }
+// }
diff --git a/src/frameworks/mod.rs b/src/frameworks/mod.rs
@@ -1,5 +1,8 @@
 //! Provides the specific Framework implementations for the Library Operations.
 
+#[cfg(feature = "native")]
 mod native;
-mod opencl;
+#[cfg(feature = "cuda")]
 mod cuda;
+#[cfg(feature = "opencl")]
+mod opencl;
diff --git a/src/frameworks/native.rs b/src/frameworks/native.rs
@@ -8,7 +8,7 @@ use collenchyma::backend::Backend;
 use collenchyma::memory::MemoryType;
 use collenchyma::frameworks::native::{Native, Function, Binary};
 use collenchyma::plugin::Error;
-use blas::{Asum, Axpy, Copy, Dot, Nrm2, Scal, Swap};
+use rblas::{Asum, Axpy, Copy, Dot, Nrm2, Scal, Swap};
 
 macro_rules! impl_binary(($($t: ident), +) => (
     $(