Skip to content

Commit

Permalink
xe: ukernel: Delay microkernel check until micro_sdpa creation
Browse files Browse the repository at this point in the history
Checks for the compatibility to build/run microkernel codes were performed at
program initialization time. This commit delays the checks to a later point so
that they are only performed once the micro_sdpa kernel is generated. The reason
for this is that the vISA compiler generates a kernel.errors.txt if there is an
error and this caused the CI to fail because the build directory is dirty
afterwards.
  • Loading branch information
umar456 authored and karturov committed Nov 18, 2024
1 parent e72f65d commit 48f6bd9
Show file tree
Hide file tree
Showing 9 changed files with 107 additions and 44 deletions.
2 changes: 0 additions & 2 deletions src/gpu/intel/compute/device_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,6 @@ status_t device_info_t::init_serialized_device_info(
serialized_device_info_.write(&mayiuse_systolic_);
serialized_device_info_.write(&mayiuse_ngen_kernels_);
serialized_device_info_.write(&mayiuse_system_memory_allocators_);
serialized_device_info_.write(&mayiuse_microkernels_);
serialized_device_info_.write(&mayiuse_non_uniform_work_groups_);

const size_t name_size = name_.size();
Expand Down Expand Up @@ -332,7 +331,6 @@ status_t device_info_t::init_from_cache_blob(
DESERIALIZE(mayiuse_systolic_, bool);
DESERIALIZE(mayiuse_ngen_kernels_, bool);
DESERIALIZE(mayiuse_system_memory_allocators_, bool);
DESERIALIZE(mayiuse_microkernels_, bool);
DESERIALIZE(mayiuse_non_uniform_work_groups_, bool);
#undef DESERIALIZE

Expand Down
4 changes: 0 additions & 4 deletions src/gpu/intel/compute/device_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,6 @@ struct device_info_t {

bool mayiuse_ngen_kernels() const { return mayiuse_ngen_kernels_; }

/// Returns true if the OpenCL compiler supports microkernels.
bool mayiuse_microkernels() const { return mayiuse_microkernels_; }

bool mayiuse_systolic() const { return mayiuse_systolic_; }

bool mayiuse_non_uniform_work_groups() const {
Expand Down Expand Up @@ -281,7 +278,6 @@ struct device_info_t {
bool mayiuse_systolic_ = false;
bool mayiuse_ngen_kernels_ = false;
bool mayiuse_system_memory_allocators_ = false;
bool mayiuse_microkernels_ = false;

std::string name_;
xpu::runtime_version_t runtime_version_;
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/intel/ocl/micro_sdpa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ status_t micro_sdpa_t::pd_t::init_microkernels(impl::engine_t *engine) {
arch_ = dev_info->gpu_arch();
auto *d = desc();

VCONDCHECK(primitive, create, check, sdpa,
(dev_info->mayiuse_microkernels()), status::unimplemented,
VCONDCHECK(primitive, create, check, sdpa, mayiuse_microkernels(engine),
status::unimplemented,
"Microkernels not supported by the OpenCL driver.");

/* Retrieve pre-tuned kernel configuration */
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/intel/ocl/ocl_gpu_device_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ status_t ocl_gpu_device_info_t::init_arch(impl::engine_t *engine) {

init_gpu_hw_info(engine, device, context, ip_version_, gpu_arch_,
gpu_product_family_, stepping_id_, native_extensions_,
mayiuse_systolic_, mayiuse_ngen_kernels_, mayiuse_microkernels_);
mayiuse_systolic_, mayiuse_ngen_kernels_);

err = clReleaseContext(context);
OCL_CHECK(err);
Expand Down
32 changes: 1 addition & 31 deletions src/gpu/intel/ocl/ocl_gpu_hw_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,34 +55,10 @@ xpu::runtime_version_t get_driver_version(cl_device_id device) {
return runtime_version;
}

/// Tries to build a kernel with assembly instructions to check to see if the
/// OpenCL compiler supports microkernels.
bool try_building_with_microkernels(cl_context context, cl_device_id device) {
const char *kernel_code = R""""(
kernel void igc_check() {
__asm__ volatile(
".decl AA0 v_type=G type=ud num_elts=1\n"
".decl AA1 v_type=G type=ud num_elts=1\n"
".implicit_PSEUDO_INPUT AA0 offset=256 size=4\n"
".implicit_PSEUDO_INPUT AA1 offset=256 size=4\n"
"mov (M1_NM,1) AA0(0,0)<1> AA1(0,0)<0;1,0>\n"
);
}
)"""";
cl_int err;
/// Not using existing build infrastructure to avoid error messages in the CI logs
xpu::ocl::wrapper_t<cl_program> program(
clCreateProgramWithSource(context, 1, &kernel_code, nullptr, &err));
if (err != CL_SUCCESS) return false;
err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
return err == CL_SUCCESS;
}

void init_gpu_hw_info(impl::engine_t *engine, cl_device_id device,
cl_context context, uint32_t &ip_version, compute::gpu_arch_t &gpu_arch,
int &gpu_product_family, int &stepping_id, uint64_t &native_extensions,
bool &mayiuse_systolic, bool &mayiuse_ngen_kernels,
bool &mayiuse_microkernels) {
bool &mayiuse_systolic, bool &mayiuse_ngen_kernels) {
using namespace ngen;
HW hw = HW::Unknown;
Product product = {ProductFamily::Unknown, 0};
Expand All @@ -107,12 +83,6 @@ void init_gpu_hw_info(impl::engine_t *engine, cl_device_id device,
= jit::gpu_supports_binary_format(&mayiuse_ngen_kernels, engine);
if (status != status::success) mayiuse_ngen_kernels = false;

mayiuse_microkernels = get_driver_version(device)
>= xpu::runtime_version_t(24, 22, 29735);
if (!mayiuse_microkernels) {
mayiuse_microkernels = try_building_with_microkernels(context, device);
}

ip_version = 0;
if (clGetDeviceInfo(device, CL_DEVICE_IP_VERSION_INTEL, sizeof(ip_version),
&ip_version, nullptr)
Expand Down
3 changes: 1 addition & 2 deletions src/gpu/intel/ocl/ocl_gpu_hw_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ xpu::runtime_version_t get_driver_version(cl_device_id device);
void init_gpu_hw_info(impl::engine_t *engine, cl_device_id device,
cl_context context, uint32_t &ip_version, compute::gpu_arch_t &gpu_arch,
int &gpu_product_family, int &stepping_id, uint64_t &native_extensions,
bool &mayiuse_systolic, bool &mayiuse_ngen_kernels,
bool &mayiuse_microkernels);
bool &mayiuse_systolic, bool &mayiuse_ngen_kernels);

} // namespace ocl
} // namespace intel
Expand Down
99 changes: 99 additions & 0 deletions src/gpu/intel/ocl/ocl_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,19 @@
#include <algorithm>
#include <cstring>
#include <iostream>
#include <mutex>
#include <CL/cl_ext.h>

#include "gpu/intel/ocl/ocl_gpu_engine.hpp"
#include "gpu/intel/ocl/ocl_gpu_hw_info.hpp"
#include "gpu/intel/ocl/ocl_gpu_kernel.hpp"
#include "gpu/intel/ocl/ocl_utils.hpp"
#include "xpu/ocl/utils.hpp"

#if __has_include(<sycl/sycl.hpp>)
#include "gpu/intel/sycl/engine.hpp"
#endif

#ifndef CL_KERNEL_BINARY_PROGRAM_INTEL
#define CL_KERNEL_BINARY_PROGRAM_INTEL 0x407D
#endif
Expand Down Expand Up @@ -76,6 +82,99 @@ namespace gpu {
namespace intel {
namespace ocl {

/// Tries to build a kernel with assembly instructions to check to see if the
/// OpenCL compiler supports microkernels.
bool try_building_with_microkernels(cl_context context, cl_device_id device) {
const char *kernel_code = R""""(
kernel void igc_check() {
__asm__ volatile(
".decl AA0 v_type=G type=ud num_elts=1\n"
".decl AA1 v_type=G type=ud num_elts=1\n"
".implicit_PSEUDO_INPUT AA0 offset=256 size=4\n"
".implicit_PSEUDO_INPUT AA1 offset=256 size=4\n"
"mov (M1_NM,1) AA0(0,0)<1> AA1(0,0)<0;1,0>\n"
);
}
)"""";
cl_int err;
/// Not using existing build infrastructure to avoid error messages in the CI logs
xpu::ocl::wrapper_t<cl_program> program(
clCreateProgramWithSource(context, 1, &kernel_code, nullptr, &err));
if (err != CL_SUCCESS) return false;
err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr);
return err == CL_SUCCESS;
}

int get_sycl_ocl_device_and_context(
xpu::ocl::wrapper_t<cl_context> &ocl_context,
xpu::ocl::wrapper_t<cl_device_id> &ocl_device,
const impl::engine_t *engine) {
#if DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL
auto *sycl_engine = utils::downcast<const sycl::engine_t *>(engine);
auto &device = sycl_engine->device();

auto be = xpu::sycl::get_backend(device);
if (be == xpu::sycl::backend_t::opencl) {
cl_int err = CL_SUCCESS;
auto ocl_dev = xpu::sycl::compat::get_native<cl_device_id>(device);
ocl_device = xpu::ocl::make_wrapper(ocl_dev, true);

ocl_context = xpu::ocl::make_wrapper(
clCreateContext(nullptr, 1, &ocl_dev, nullptr, nullptr, &err),
true);
if (err) return -1;
} else if (be == xpu::sycl::backend_t::level0) {
std::unique_ptr<gpu::intel::ocl::ocl_gpu_engine_t, engine_deleter_t>
ocl_engine;
auto err
= gpu::intel::sycl::create_ocl_engine(&ocl_engine, sycl_engine);
if (err != status::success) return -1;
ocl_device = xpu::ocl::make_wrapper(ocl_engine->device(), true);
ocl_context = xpu::ocl::make_wrapper(ocl_engine->context(), true);
}
#endif
return 0;
}

bool mayiuse_microkernels(const impl::engine_t *engine) {
auto mayiuse_mk = [](const impl::engine_t *engine) {
xpu::ocl::wrapper_t<cl_device_id> ocl_device;
xpu::ocl::wrapper_t<cl_context> ocl_context;

switch (engine->runtime_kind()) {
case runtime_kind::sycl: {
auto err = get_sycl_ocl_device_and_context(
ocl_context, ocl_device, engine);
if (err) return false;
} break;
case runtime_kind::ocl: {
const ocl_gpu_engine_t *eng
= utils::downcast<const ocl_gpu_engine_t *>(engine);
ocl_device = xpu::ocl::make_wrapper(eng->device(), true);
ocl_context = xpu::ocl::make_wrapper(eng->context(), true);
} break;
default: return false;
}

bool mayiuse_microkernels = get_driver_version(ocl_device)
>= xpu::runtime_version_t(24, 22, 29735);
if (!mayiuse_microkernels) {
mayiuse_microkernels
= try_building_with_microkernels(ocl_context, ocl_device);
}
return mayiuse_microkernels;
};

static std::map<engine_id_t, bool> engine_microkernel_map {
{engine->engine_id(), mayiuse_mk(engine)}};

static std::mutex map_mutex;
std::lock_guard<std::mutex> map_lock(map_mutex);
auto it = engine_microkernel_map.find(engine->engine_id());
if (it != std::end(engine_microkernel_map)) { return it->second; }
return engine_microkernel_map[engine->engine_id()] = mayiuse_mk(engine);
}

status_t get_ocl_kernel_arg_type(compute::scalar_type_t *type,
cl_kernel ocl_kernel, cl_uint idx, bool allow_undef) {
char s_type[16];
Expand Down
2 changes: 2 additions & 0 deletions src/gpu/intel/ocl/ocl_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ namespace ocl {

enum { OCL_BUFFER_ALIGNMENT = 128 };

bool mayiuse_microkernels(const impl::engine_t *engine);

status_t get_ocl_kernel_arg_type(compute::scalar_type_t *type,
cl_kernel ocl_kernel, int idx, bool allow_undef = false);

Expand Down
3 changes: 1 addition & 2 deletions src/gpu/intel/sycl/device_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ status_t device_info_t::init_arch(impl::engine_t *engine) {
gpu::intel::ocl::init_gpu_hw_info(engine, ocl_dev_wrapper,
ocl_ctx_wrapper, ip_version_, gpu_arch_, gpu_product_family_,
stepping_id_, native_extensions_, mayiuse_systolic_,
mayiuse_ngen_kernels_, mayiuse_microkernels_);
mayiuse_ngen_kernels_);
} else if (be == xpu::sycl::backend_t::level0) {
// TODO: add support for L0 binary ngen check
// XXX: query from ocl_engine for now
Expand All @@ -70,7 +70,6 @@ status_t device_info_t::init_arch(impl::engine_t *engine) {
stepping_id_ = dev_info->stepping_id();
mayiuse_systolic_ = dev_info->mayiuse_systolic();
mayiuse_ngen_kernels_ = dev_info->mayiuse_ngen_kernels();
mayiuse_microkernels_ = dev_info->mayiuse_microkernels();
} else {
assert(!"not_expected");
}
Expand Down

0 comments on commit 48f6bd9

Please sign in to comment.