Skip to content

Commit

Permalink
[SYCL] Add Windows support for device_info
Browse files Browse the repository at this point in the history
Fixed few warnings caused by inconsistent usage of class and struct keywords.
Added a macro to expose global symbols and generate sycl.lib on Windows.

Signed-off-by: Vyacheslav N Klochkov <[email protected]>
  • Loading branch information
v-klochkov authored and bader committed May 21, 2019
1 parent ed668e0 commit 3b0defe
Show file tree
Hide file tree
Showing 13 changed files with 232 additions and 115 deletions.
2 changes: 2 additions & 0 deletions sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ project(sycl-solution)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)

if(MSVC)
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
Expand Down Expand Up @@ -135,6 +136,7 @@ add_library("${SYCLLibrary}" SHARED
"${sourceRootPath}/detail/program_manager/program_manager.cpp"
"${sourceRootPath}/detail/queue_impl.cpp"
"${sourceRootPath}/detail/os_util.cpp"
"${sourceRootPath}/detail/platform_util.cpp"
"${sourceRootPath}/detail/sampler_impl.cpp"
"${sourceRootPath}/detail/scheduler/commands.cpp"
"${sourceRootPath}/detail/scheduler/commands2.cpp"
Expand Down
2 changes: 1 addition & 1 deletion sycl/include/CL/sycl/detail/buffer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class accessor;
template <typename T, int dimensions, typename AllocatorT> class buffer;
class handler;
class queue;
template <int dimentions> class id;
template <int dimentions> struct id;
template <int dimentions> class range;
using buffer_allocator = aligned_allocator<char, /*alignment*/ 64>;
namespace detail {
Expand Down
4 changes: 2 additions & 2 deletions sycl/include/CL/sycl/detail/helpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ namespace cl {
namespace sycl {
class context;
class event;
template <int dimensions, bool with_offset> class item;
template <int dimensions, bool with_offset> struct item;
template <int dimensions> class group;
template <int dimensions> class range;
template <int dimensions> class id;
template <int dimensions> struct id;
template <int dimensions> class nd_item;
namespace detail {
class context_impl;
Expand Down
5 changes: 5 additions & 0 deletions sycl/include/CL/sycl/detail/os_util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

#pragma once

#include <stdlib.h>

#ifdef _WIN32
#define SYCL_RT_OS_WINDOWS
// Windows platform
Expand Down Expand Up @@ -48,6 +50,9 @@ class OSUtil {
/// Module handle for the executable module - it is assumed there is always
/// single one at most.
static const OSModuleHandle ExeModuleHandle;

/// Returns the amount of RAM available for the operating system.
static size_t getOSMemSize();
};

} // namespace detail
Expand Down
40 changes: 40 additions & 0 deletions sycl/include/CL/sycl/detail/platform_util.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
//===-- platform_util.hpp - platform utilities ----------------*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#pragma once

#include <cstdint>

namespace cl {
namespace sycl {
namespace detail {

struct PlatformUtil {
enum class TypeIndex : unsigned int {
Char = 0,
Short = 1,
Int = 2,
Long = 3,
Float = 4,
Double = 5,
Half = 6
};

/// Returns the maximum vector width counted in elements of the given type.
static uint32_t getNativeVectorWidth(TypeIndex Index);

static uint32_t getMaxClockFrequency();

static uint32_t getMemCacheLineSize();

static uint64_t getMemCacheSize();
};

} // namespace detail
} // namespace sycl
} // namespace cl
2 changes: 1 addition & 1 deletion sycl/include/CL/sycl/group.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
namespace cl {
namespace sycl {
namespace detail {
class Builder;
struct Builder;
} // namespace detail

template <int dimensions = 1> class group {
Expand Down
3 changes: 2 additions & 1 deletion sycl/include/CL/sycl/id.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@ namespace cl {
namespace sycl {
template <int dimensions> class range;
template <int dimensions = 1> struct id : public detail::array<dimensions> {
public:
private:
using base = detail::array<dimensions>;
public:
id() = default;

/* The following constructor is only available in the id struct
Expand Down
9 changes: 5 additions & 4 deletions sycl/include/CL/sycl/item.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
namespace cl {
namespace sycl {
namespace detail {
class Builder;
struct Builder;
}
template <int dimensions> struct id;
template <int dimensions> struct range;
template <int dimensions> class range;
template <int dimensions = 1, bool with_offset = true> struct item {

item() = delete;
Expand Down Expand Up @@ -86,8 +86,9 @@ template <int dimensions = 1, bool with_offset = true> struct item {

protected:
// For call constructor inside conversion operator
friend class item<dimensions, false>;
friend class detail::Builder;
friend struct item<dimensions, false>;
friend struct item<dimensions, true>;
friend struct detail::Builder;

template <size_t W = with_offset>
item(typename std::enable_if<(W == true), const range<dimensions>>::type &R,
Expand Down
2 changes: 1 addition & 1 deletion sycl/include/CL/sycl/nd_item.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
namespace cl {
namespace sycl {
namespace detail {
class Builder;
struct Builder;
}
template <int dimensions = 1> struct nd_item {

Expand Down
2 changes: 1 addition & 1 deletion sycl/include/CL/sycl/range.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ namespace sycl {
template <int dimensions> struct id;
template <int dimensions = 1>
class range : public detail::array<dimensions> {
public:
using base = detail::array<dimensions>;
public:
/* The following constructor is only available in the range class
specialization where: dimensions==1 */
template <int N = dimensions>
Expand Down
103 changes: 13 additions & 90 deletions sycl/source/detail/device_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
//===----------------------------------------------------------------------===//

#include <CL/sycl/detail/device_info.hpp>
#include <CL/sycl/detail/os_util.hpp>
#include <CL/sycl/detail/platform_util.hpp>
#include <CL/sycl/device.hpp>
#include <chrono>
#include <sys/sysinfo.h>
#include <thread>

#ifdef __GNUG__
Expand All @@ -21,22 +22,6 @@ namespace cl {
namespace sycl {
namespace detail {

// Used by methods that duplicate OpenCL behaviour in order to get CPU info
// TODO add Windows support
// TODO add support for x86-64 ABI selected using ifdef.
static void cpuid(unsigned int cpuid_info[], unsigned int type) {
unsigned int eax, ebx, ecx, edx;
__asm__ __volatile__("mov %%ebx, %%edi\n\r"
"cpuid\n\r"
"xchg %%edi, %%ebx\n\r"
: "=a"(eax), "=D"(ebx), "=c"(ecx), "=d"(edx)
: "a"(type));
cpuid_info[0] = eax;
cpuid_info[1] = ebx;
cpuid_info[2] = ecx;
cpuid_info[3] = edx;
}

vector_class<info::fp_config> read_fp_bitfield(cl_device_fp_config bits) {
vector_class<info::fp_config> result;
if (bits & CL_FP_DENORM)
Expand Down Expand Up @@ -156,109 +141,51 @@ cl_uint get_device_info_host<info::device::preferred_vector_width_half>() {
return 0;
}

// SSE4.2 has 16 byte (XMM) registers
static const cl_uint NATIVE_VECTOR_WIDTH_SSE42[] = {16, 8, 4, 2, 4, 2, 0};
// AVX supports 32 byte (YMM) registers only for floats and doubles
static const cl_uint NATIVE_VECTOR_WIDTH_AVX[] = {16, 8, 4, 2, 8, 4, 0};
// AVX2 has a full set of 32 byte (YMM) registers
static const cl_uint NATIVE_VECTOR_WIDTH_AVX2[] = {32, 16, 8, 4, 8, 4, 0};
// AVX512 has 64 byte (ZMM) registers
static const cl_uint NATIVE_VECTOR_WIDTH_AVX512[] = {64, 32, 16, 8, 16, 8, 0};

cl_uint get_native_vector_width(size_t idx) {
#if (__GNUG__ && GCC_VERSION > 40900)
if (__builtin_cpu_supports("avx512f")) {
return NATIVE_VECTOR_WIDTH_AVX512[idx];
}
#endif

if (__builtin_cpu_supports("avx2")) {
return NATIVE_VECTOR_WIDTH_AVX2[idx];
}
if (__builtin_cpu_supports("avx")) {
return NATIVE_VECTOR_WIDTH_AVX[idx];
}
return NATIVE_VECTOR_WIDTH_SSE42[idx];
}

template <>
cl_uint get_device_info_host<info::device::native_vector_width_char>() {
return get_native_vector_width(0);
return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Char);
}

template <>
cl_uint get_device_info_host<info::device::native_vector_width_short>() {
return get_native_vector_width(1);
return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Short);
}

template <>
cl_uint get_device_info_host<info::device::native_vector_width_int>() {
return get_native_vector_width(2);
return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Int);
}

template <>
cl_uint get_device_info_host<info::device::native_vector_width_long>() {
return get_native_vector_width(3);
return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Long);
}

template <>
cl_uint get_device_info_host<info::device::native_vector_width_float>() {
return get_native_vector_width(4);
return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Float);
}

template <>
cl_uint get_device_info_host<info::device::native_vector_width_double>() {
return get_native_vector_width(5);
return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Double);
}

template <>
cl_uint get_device_info_host<info::device::native_vector_width_half>() {
return get_native_vector_width(6);
return PlatformUtil::getNativeVectorWidth(PlatformUtil::TypeIndex::Half);
}

template <> cl_uint get_device_info_host<info::device::max_clock_frequency>() {
throw runtime_error(
"max_clock_frequency parameter is not supported for host device");
unsigned int cpuInfo[4] = {0 - 1u};
string_class buff(sizeof(cpuInfo) * 3 + 1, 0);
size_t offset = 0;

for (unsigned int i = 0x80000002; i <= 0x80000004; i++) {
cpuid(cpuInfo, i);
std::copy(reinterpret_cast<char *>(cpuInfo),
reinterpret_cast<char *>(cpuInfo) + sizeof(cpuInfo),
buff.begin() + offset);
offset += sizeof(cpuInfo);
}
std::size_t found = buff.rfind("Hz");
// Bail out if frequency is not found in CPUID string
if (found == std::string::npos)
return 0;

buff = buff.substr(0, found);

cl_uint freq = 0;
switch (buff[buff.size() - 1]) {
case 'M':
freq = 1;
break;
case 'G':
freq = 1000;
break;
}
buff = buff.substr(buff.rfind(' '), buff.length());
freq *= std::stod(buff);
return freq;
return PlatformUtil::getMaxClockFrequency();
}

template <> cl_uint get_device_info_host<info::device::address_bits>() {
return sizeof(void *) * 8;
}

template <> cl_ulong get_device_info_host<info::device::global_mem_size>() {
struct sysinfo meminfo;
sysinfo(&meminfo);
return meminfo.totalram * meminfo.mem_unit;
return static_cast<cl_ulong>(OSUtil::getOSMemSize());
}

template <> cl_ulong get_device_info_host<info::device::max_mem_alloc_size>() {
Expand Down Expand Up @@ -362,16 +289,12 @@ get_device_info_host<info::device::global_mem_cache_type>() {

template <>
cl_uint get_device_info_host<info::device::global_mem_cache_line_size>() {
unsigned int viCPUInfo[4] = {(unsigned int)-1};
cpuid(viCPUInfo, 0x80000006);
return viCPUInfo[2] & 0xff;
return PlatformUtil::getMemCacheLineSize();
}

template <>
cl_ulong get_device_info_host<info::device::global_mem_cache_size>() {
unsigned int viCPUInfo[4] = {(unsigned int)-1};
cpuid(viCPUInfo, 0x80000006);
return ((viCPUInfo[2] >> 16) & 0xffff) * 1024;
return PlatformUtil::getMemCacheSize();
}

template <>
Expand Down
Loading

0 comments on commit 3b0defe

Please sign in to comment.