Skip to content

Commit

Permalink
Merge pull request #151 from guacamoleo/master
Browse files Browse the repository at this point in the history
promoting develop to master
  • Loading branch information
Timmy committed Oct 16, 2015
2 parents 9731ea2 + 0482e1c commit 8b5f7a0
Show file tree
Hide file tree
Showing 183 changed files with 32,985 additions and 1,192 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@

# flags.txt file
*flags.txt

# vim temp files
.*.swp
168 changes: 124 additions & 44 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,54 +1,134 @@
# Ubuntu name decoder ring; https://en.wikipedia.org/wiki/List_of_Ubuntu_releases
# Ubuntu 12.04 LTS (Precise Pangolin) <== Travis CI VM image
# Ubuntu 12.10 (Quantal Quetzal)
# Ubuntu 13.04 (Raring Ringtail)
# Ubuntu 13.10 (Saucy Salamander)
# Ubuntu 14.04 LTS (Trusty Tahr)
# Ubuntu 14.10 (Utopic Unicorn)
# Ubuntu 15.04 (Vivid Vervet)
# Ubuntu 15.10 (Wily Werewolf)
# Ubuntu 16.04 LTS (Xenial Xantus)

# language: instructs travis what compilers && environment to set up in build matrix
language: cpp

# sudo: false instructs travis to build our project in a docker VM (faster)
# Can not yet install fglrx packages with 'false'
sudo: required # false

# os: expands the build matrix to include multiple os's
# disable linux, as we get sporadic failures on building boost, needs investigation
os:
- linux
- osx

# compiler: expands the build matrix to include multiple compilers (per os)
compiler:
- gcc
- clang

addons:
# apt: is disabled on osx builds
# apt: needed by docker framework to install project dependencies without
# sudo. Apt uses published Ubunto PPA's from https://launchpad.net/
# https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json
apt:
sources:
# ubuntu-toolchain-r-test contains newer versions of gcc to install
# - ubuntu-toolchain-r-test
# llvm-toolchain-precise-3.6 contains newer versions of clang to install
# - llvm-toolchain-precise-3.6
# kubuntu-backports contains newer versions of cmake to install
- kubuntu-backports
# boost-latest contains boost v1.55
- boost-latest
packages:
# g++-4.8 is minimum version considered to be the first good c++11 gnu compiler
# - g++-4.8
# - clang-3.6
# We require v2.8.12 minimum
- cmake
# I'm finding problems between pre-compiled versions of boost ublas, with gtest
# stl_algobase.h: error: no matching function for call to swap()
- libboost-program-options1.55-dev
# - libboost-serialization1.55-dev
# - libboost-filesystem1.55-dev
# - libboost-system1.55-dev
# - libboost-regex1.55-dev
# The package opencl-headers on 'precise' only installs v1.1 cl headers; uncomment for 'trusty' or greater
# - opencl-headers
# Uncomment one of the following when fglrx modules are added to the apt whitelist
# - fglrx
# - fglrx=2:8.960-0ubuntu1
# - fglrx=2:13.350.1-0ubuntu0.0.1

# env: specifies additional global variables to define per row in build matrix
env:
global:
- CLBLAS_ROOT=${TRAVIS_BUILD_DIR}/bin/make/release

# The following filters our build matrix; we are interested in linux-gcc & osx-clang
matrix:
exclude:
- os: linux
compiler: clang
- os: osx
compiler: gcc

before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq fglrx libboost-program-options-dev
# Uncomment below to help verify the installs above work
# - ls -la /usr/lib/libboost*
# - ls -la /usr/include/boost
# Remove the following linux clause when fglrx can be installed with sudo: false
- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
sudo apt-get update -qq &&
sudo apt-get install -qq fglrx=2:13.350.1-0ubuntu0.0.1;
fi
- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
export OPENCL_ROOT="${TRAVIS_BUILD_DIR}/opencl-headers";
fi
- if [ ${TRAVIS_OS_NAME} == "osx" ]; then
brew update;
brew outdated boost || brew upgrade boost;
brew outdated cmake || brew upgrade cmake;
fi
# - if [ ${CXX} = "g++" ]; then export CXX="g++-4.8" CC="gcc-4.8"; fi
- cmake --version;
- ${CC} --version;
- ${CXX} --version;

install:
# 'Precise' only distributes v1.1 opencl headers; download 1.2 headers from khronos website
# Remove when the travis VM upgrades to 'trusty' or beyond
- if [ ${TRAVIS_OS_NAME} == "linux" ]; then
mkdir -p ${OPENCL_ROOT}/include/CL;
pushd ${OPENCL_ROOT}/include/CL;
wget -w 1 -r -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/;
popd;
fi
# osx image does not contain cl.hpp file; download from Khronos
# - if [ ${TRAVIS_OS_NAME} == "osx" ]; then
# pushd /System/Library/Frameworks/OpenCL.framework/Versions/A/Headers/;
# sudo wget -w 1 -np -nd -nv -A h,hpp https://www.khronos.org/registry/cl/api/1.2/cl.hpp;
# popd;
# fi

# Use before_script: to run configure steps
before_script:
- cd ${TRAVIS_BUILD_DIR}
# download OpenCL 1.2 header files since Travis CI only provides 1.1
- mkdir -p OpenCLInclude/CL
- cd OpenCLInclude/CL
#- wget -r --no-parent -nH --cut-dirs=4 --reject="index.html*" https://www.khronos.org/registry/cl/api/1.2/
- wget https://www.khronos.org/registry/cl/api/1.2/cl.h
- wget https://www.khronos.org/registry/cl/api/1.2/cl.hpp
- wget https://www.khronos.org/registry/cl/api/1.2/cl_d3d10.h
- wget https://www.khronos.org/registry/cl/api/1.2/cl_d3d11.h
- wget https://www.khronos.org/registry/cl/api/1.2/cl_dx9_media_sharing.h
- wget https://www.khronos.org/registry/cl/api/1.2/cl_egl.h
- wget https://www.khronos.org/registry/cl/api/1.2/cl_ext.h
- wget https://www.khronos.org/registry/cl/api/1.2/cl_gl.h
- wget https://www.khronos.org/registry/cl/api/1.2/cl_gl_ext.h
- wget https://www.khronos.org/registry/cl/api/1.2/cl_platform.h
- wget https://www.khronos.org/registry/cl/api/1.2/opencl.h
- ls
- pwd
- cd ../..
- mkdir -p bin/clBLAS
- cd bin/clBLAS
- cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOPENCL_INCLUDE_DIRS:PATH=$PWD/../../OpenCLInclude -DCMAKE_INSTALL_PREFIX:PATH=$PWD/package ../../src

script:
- make install
# - ls -Rla package
# Run a simple test to validate that the build works; CPU device in a VM
- cd package/bin
- export LD_LIBRARY_PATH=${TRAVIS_BUILD_DIR}/bin/clBLAS/package/lib64:${LD_LIBRARY_PATH}
- ./clBLAS-client --cpu

after_success:
- cd ${TRAVIS_BUILD_DIR}/bin/clBLAS
- mkdir -p ${CLBLAS_ROOT}
- pushd ${CLBLAS_ROOT}
- cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOCL_VERSION=2.0 -DOPENCL_ROOT=${OPENCL_ROOT} ${TRAVIS_BUILD_DIR}/src

# use script: to execute build steps
script:
- make package

notifications:
email:
- [email protected]
on_success: change
on_failure: always

deploy:
provider: releases
prerelease: true
draft: true
skip_cleanup: true
api_key:
secure: MBkxtcfSk+4UvGRO+WRhmS86vIVzAs0LIF2sAtr/S+Ed+OdUAuhZypUsDXGWtK3mL55v9c8BZXefFfHfJqElcNmyHKwCptbCR/JiM8YBtjoy2/RW1NcJUZp+QuRlk23xPADj7QkPjv7dfrQUMitkLUXAD+uTmMe2l8gmlbhMrQqPBKhb+31FNv6Lmo6oa6GjbiGi7qjsrJc7uQjhppLam+M7BZbBALGbIqMIrb2BMDMMhBoDbb4zSKrSg3+krd3kKiCClJlK7xjIlyFXZ527ETQ+PMtIeQb0eJ3aQwa4caBRCm5BDzt8GnJ48S88EkynbQioCEE87ebcyOM7M+wfslW/Fm1Y86X5odIljkOmTNKoDvgLxc9vUCBtMyVHNIgZcToPdsrMsGxcHV+JtU3yVQVm6dnA5P/zG5bA+aBjsd7p7BdOE4fdhvZV5XRAk/wmiyWalF7hKJxHIiWAKknL+tpPDDUF+fHmDDsdf7yRDJBegNcKfw4+m19MIvLn9fbiNVCtwCAL1T4yWkIEpi4MRMDPtftmkZPbi6UwluOJUTeCeHe4en99Yu2haemNPqXs6rR0LlXGk31GQwzlrNfb+94F5tT2a4Ka4PsruA2NMW/IYCYEE5Gu7PihVDR031Fn9cdCU9kefUgyB07rJD6q/W+ljsU0osyg7VxyfMg8rkw=
file: ${CLBLAS_ROOT}/clBLAS-build/*.tar.gz
file_glob: true
on:
all_branches: true
tags: true
57 changes: 25 additions & 32 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
## Build Status
| Build branch | master | develop |
|-----|-----|-----|
| GCC/Clang x64 | [![Build Status](https://travis-ci.org/clMathLibraries/clBLAS.svg?branch=master)](https://travis-ci.org/clMathLibraries/clBLAS/branches) | [![Build Status](https://travis-ci.org/clMathLibraries/clBLAS.svg?branch=develop)](https://travis-ci.org/clMathLibraries/clBLAS/branches) |
| Visual Studio x64 | [![Build status](https://ci.appveyor.com/api/projects/status/v384bi6e8xv8nxjm/branch/master?svg=true)](https://ci.appveyor.com/project/kknox/clblas-5ph9i/branch/master)|[![Build status](https://ci.appveyor.com/api/projects/status/v384bi6e8xv8nxjm/branch/develop?svg=true)](https://ci.appveyor.com/project/kknox/clblas-5ph9i/branch/develop) |

clBLAS
=====
[![Build Status](https://travis-ci.org/clMathLibraries/clBLAS.png)](https://travis-ci.org/clMathLibraries/clBLAS)


This repository houses the code for the OpenCL™ BLAS portion of clMath.
The complete set of BLAS level 1, 2 & 3 routines is implemented. Please
see Netlib BLAS for the list of supported routines. In addition to GPU
Expand All @@ -20,30 +23,20 @@ library does generate and enqueue optimized OpenCL kernels, relieving
the user from the task of writing, optimizing and maintaining kernel
code themselves.

## clBLAS update notes 04/2015
- A subset of GEMM and TRSM can be off-line compiled for Hawaii, Bonaire and Tahiti device at compile-time. This feature
eliminates the overhead of calling clBuildProgram() at run-time.
- Off-line compilation can be done with OpenCL 1.1, OpenCL 1.2 and OpenCl 2.0 runtime. However, for better
performance OpenCL 2.0 is recommended. Library user can select "OCL_VERSION" from CMake to ensure the library with
OpenCL version. It is library user's responsibility to ensure compatible hardware and driver.
- Added flags_public.txt file that contains OpenCL compiler flags used by off-line compilation. The flags_public.txt
will only be loaded when OCL_VERSION is 2.0.
- User can off-line compile one or more supported device by selecting
OCL_OFFLINE_BUILD_BONAIRE_KERNEL
OCL_OFFLINE_BUILD_HAWII_KERNEL
OCL_OFFLINE_BUILD_TAHITI_KERNEL.
However, compile for more than one device at a time might result in running out of heap memory. Thus, compile for
one device at a time is recommended.
- User may also supply specific OpenCL compiler path with OCL_COMPILER_DIR or the library will load default OpenCL compiler.
- The minimum driver requirement for off-line compilation is 14.502.

## clBLAS update notes 09/2015

- Introducing [AutoGemm](http://github.com/clMathLibraries/clBLAS/wiki/AutoGemm)
- clBLAS's Gemm implementation has been comprehensively overhauled to use AutoGemm. AutoGemm is a suite of python scripts which generate optimized kernels and kernel selection logic, for all precisions, transposes, tile sizes and so on.
- CMake is configured to use AutoGemm for clBLAS so the build and usage experience of Gemm remains unchanged (only performance and maintainability has been improved). Kernel sources are generated at build time (not runtime) and can be configured within CMake to be pre-compiled at build time.
- clBLAS users with unique Gemm requirements can customize AutoGemm to their needs (such as non-default tile sizes for very small or very skinny matrices); see [AutoGemm](http://github.com/clMathLibraries/clBLAS/wiki/AutoGemm) documentation for details.


## clBLAS library user documentation

[Library and API documentation][] for developers is available online as
a GitHub Pages website

### Google Groups
## Google Groups

Two mailing lists have been created for the clMath projects:

Expand Down Expand Up @@ -108,10 +101,10 @@ The simple example below shows how to use clBLAS to compute an OpenCL accelerate
static const cl_float beta = 20;

static cl_float C[M*N] = {
11, 12, 13,
21, 22, 23,
31, 32, 33,
41, 42, 43,
11, 12, 13,
21, 22, 23,
31, 32, 33,
41, 42, 43,
};
static const size_t ldc = N; /* i.e. ldc = N */

Expand Down Expand Up @@ -155,13 +148,13 @@ The simple example below shows how to use clBLAS to compute an OpenCL accelerate
err = clEnqueueWriteBuffer( queue, bufC, CL_TRUE, 0,
M * N * sizeof( *C ), C, 0, NULL, NULL );

/* Call clBLAS extended function. Perform gemm for the lower right sub-matrices */
err = clblasSgemm( clblasRowMajor, clblasNoTrans, clblasNoTrans,
M, N, K,
alpha, bufA, 0, lda,
bufB, 0, ldb, beta,
bufC, 0, ldc,
1, &queue, 0, NULL, &event );
/* Call clBLAS extended function. Perform gemm for the lower right sub-matrices */
err = clblasSgemm( clblasRowMajor, clblasNoTrans, clblasNoTrans,
M, N, K,
alpha, bufA, 0, lda,
bufB, 0, ldb, beta,
bufC, 0, ldc,
1, &queue, 0, NULL, &event );

/* Wait for calculations to be finished. */
err = clWaitForEvents( 1, &event );
Expand Down
105 changes: 105 additions & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Appveyor OS list
# Windows Server 2012 R2 (x64) <== Appveyor default image
# Visual Studio 2015

# os: expands the build matrix to include multiple os's
os:
- Windows Server 2012

# compiler: expands the build matrix to include multiple compilers (per os)
platform:
- x64

configuration:
- Release

# Only clone the top level commit; don't bother with history
shallow_clone: true

# environment: specifies additional global variables to define per row in build matrix
environment:
global:
CLBLAS_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\nmake\\release"
OPENCL_ROOT: "%APPVEYOR_BUILD_FOLDER%\\bin\\opencl"
# BOOST_ROOT: "C:/Libraries/boost" # boost 1.56, 32-bit only
BOOST_ROOT: "C:\\Libraries\\boost_1_58_0"
OPENCL_REGISTRY: "https://www.khronos.org/registry/cl"

init:
- echo init step
- cmake --version
- C:\"Program Files (x86)"\"Microsoft Visual Studio 12.0"\VC\vcvarsall.bat %PLATFORM%
# Uncomment the following to display Remote Desktop connection details
# - ps: iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))

# We need to create an opencl import library that clblas can link against
# Vendor based OpenCL packages are hard to use because of download size, registration requirements
# and unattended installs not well supported
install:
- echo install step
- ps: mkdir $env:OPENCL_ROOT
- ps: pushd $env:OPENCL_ROOT
- ps: $opencl_registry = $env:OPENCL_REGISTRY
# This downloads the source to the example/demo icd library
- ps: wget $opencl_registry/specs/opencl-icd-1.2.11.0.tgz -OutFile opencl-icd-1.2.11.0.tgz
- ps: 7z x opencl-icd-1.2.11.0.tgz
- ps: 7z x opencl-icd-1.2.11.0.tar
- ps: mv .\icd\* .
# This downloads all the opencl header files
# The cmake build files expect a directory called inc
- ps: mkdir inc/CL
- ps: wget $opencl_registry/api/1.2/ | select -ExpandProperty links | where {$_.href -like "*.h*"} | select -ExpandProperty outerText | foreach{ wget $opencl_registry/api/1.2/$_ -OutFile inc/CL/$_ }
# - ps: dir; if( $lastexitcode -eq 0 ){ dir include/CL } else { Write-Output boom }
# Create the static import lib in a directory called lib, so findopencl() will find it
- ps: mkdir lib
- ps: pushd lib
- cmake -G "NMake Makefiles" ..
- nmake
- ps: popd
# Rename the inc directory to include, so FindOpencl() will find it
- ps: ren inc include
- ps: popd
- ps: popd

# before_build is used to run configure steps
before_build:
- echo before_build step
# Boost 1.58 is not installed in typical fashion, help FindBoost() find binary libs with BOOST_LIBRARYDIR
- ps: $env:BOOST_LIBRARYDIR = "$env:BOOST_ROOT/lib64-msvc-12.0"
- ps: mkdir $env:CLBLAS_ROOT
- ps: pushd $env:CLBLAS_ROOT
- cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=%CONFIGURATION% -DBUILD_TEST=OFF -DBUILD_CLIENT=ON -DOCL_VERSION=2.0 -DOPENCL_ROOT=%OPENCL_ROOT% %APPVEYOR_BUILD_FOLDER%/src

# build_script invokes the compiler
build_script:
- echo build_script step
- nmake package

after_build:
- echo after_build step
- ps: ls $env:CLBLAS_ROOT
- ps: mv $env:CLBLAS_ROOT\*.zip $env:APPVEYOR_BUILD_FOLDER

# Appyeyor will save a copy of the package in it's personal storage
artifacts:
- path: '*.zip'
name: binary_zip
type: zip

# on_finish always executes regardless of passed or failed builds
on_finish:
- echo on_finish step

# Appveyor will push the artifacts it has saved to GitHub 'releases' tab
deploy:
provider: GitHub
auth_token:
secure: dRXIWJKpU7h2RsHX7RqmyYCtCw+Q9O3X5MArloY6p34GZC1w7bp+jQYTZqbdO7bw
artifact: binary_zip
draft: true
prerelease: true
on:
appveyor_repo_tag: true

# Uncomment the following to pause the VM and wait for RDP connetion to debug
# - ps: $blockRdp = $true; iex ((new-object net.webclient).DownloadString('https://raw.githubusercontent.com/appveyor/ci/master/scripts/enable-rdp.ps1'))
Loading

0 comments on commit 8b5f7a0

Please sign in to comment.