From 48f6bd93fba1bf01a678c29d9be28b111b595a57 Mon Sep 17 00:00:00 2001 From: Umar Arshad Date: Thu, 14 Nov 2024 13:48:33 -0800 Subject: [PATCH] xe: ukernel: Delay microkernel check until micro_sdpa creation Checks for the compatibility to build/run microkernel codes were performed at program initialization time. This commit delays the checks to a later point so that they are only performed once the micro_sdpa kernel is generated. The reason for this is that the vISA compiler generates a kernel.errors.txt if there is an error and this caused the CI to fail because the build directory is dirty afterwards. --- src/gpu/intel/compute/device_info.cpp | 2 - src/gpu/intel/compute/device_info.hpp | 4 - src/gpu/intel/ocl/micro_sdpa.cpp | 4 +- src/gpu/intel/ocl/ocl_gpu_device_info.cpp | 2 +- src/gpu/intel/ocl/ocl_gpu_hw_info.cpp | 32 +------- src/gpu/intel/ocl/ocl_gpu_hw_info.hpp | 3 +- src/gpu/intel/ocl/ocl_utils.cpp | 99 +++++++++++++++++++++++ src/gpu/intel/ocl/ocl_utils.hpp | 2 + src/gpu/intel/sycl/device_info.cpp | 3 +- 9 files changed, 107 insertions(+), 44 deletions(-) diff --git a/src/gpu/intel/compute/device_info.cpp b/src/gpu/intel/compute/device_info.cpp index 63aeeb7be4b..606934c185d 100644 --- a/src/gpu/intel/compute/device_info.cpp +++ b/src/gpu/intel/compute/device_info.cpp @@ -290,7 +290,6 @@ status_t device_info_t::init_serialized_device_info( serialized_device_info_.write(&mayiuse_systolic_); serialized_device_info_.write(&mayiuse_ngen_kernels_); serialized_device_info_.write(&mayiuse_system_memory_allocators_); - serialized_device_info_.write(&mayiuse_microkernels_); serialized_device_info_.write(&mayiuse_non_uniform_work_groups_); const size_t name_size = name_.size(); @@ -332,7 +331,6 @@ status_t device_info_t::init_from_cache_blob( DESERIALIZE(mayiuse_systolic_, bool); DESERIALIZE(mayiuse_ngen_kernels_, bool); DESERIALIZE(mayiuse_system_memory_allocators_, bool); - DESERIALIZE(mayiuse_microkernels_, bool); DESERIALIZE(mayiuse_non_uniform_work_groups_, bool); #undef DESERIALIZE diff --git a/src/gpu/intel/compute/device_info.hpp b/src/gpu/intel/compute/device_info.hpp index fa83697ced4..4bf8210220e 100644 --- a/src/gpu/intel/compute/device_info.hpp +++ b/src/gpu/intel/compute/device_info.hpp @@ -231,9 +231,6 @@ struct device_info_t { bool mayiuse_ngen_kernels() const { return mayiuse_ngen_kernels_; } - /// Returns true if the OpenCL compiler supports microkernels. - bool mayiuse_microkernels() const { return mayiuse_microkernels_; } - bool mayiuse_systolic() const { return mayiuse_systolic_; } bool mayiuse_non_uniform_work_groups() const { @@ -281,7 +278,6 @@ struct device_info_t { bool mayiuse_systolic_ = false; bool mayiuse_ngen_kernels_ = false; bool mayiuse_system_memory_allocators_ = false; - bool mayiuse_microkernels_ = false; std::string name_; xpu::runtime_version_t runtime_version_; diff --git a/src/gpu/intel/ocl/micro_sdpa.cpp b/src/gpu/intel/ocl/micro_sdpa.cpp index 20b4a7309a6..410e43a2773 100644 --- a/src/gpu/intel/ocl/micro_sdpa.cpp +++ b/src/gpu/intel/ocl/micro_sdpa.cpp @@ -153,8 +153,8 @@ status_t micro_sdpa_t::pd_t::init_microkernels(impl::engine_t *engine) { arch_ = dev_info->gpu_arch(); auto *d = desc(); - VCONDCHECK(primitive, create, check, sdpa, - (dev_info->mayiuse_microkernels()), status::unimplemented, + VCONDCHECK(primitive, create, check, sdpa, mayiuse_microkernels(engine), + status::unimplemented, "Microkernels not supported by the OpenCL driver."); /* Retrieve pre-tuned kernel configuration */ diff --git a/src/gpu/intel/ocl/ocl_gpu_device_info.cpp b/src/gpu/intel/ocl/ocl_gpu_device_info.cpp index ca0be02bc73..ab73415fdd4 100644 --- a/src/gpu/intel/ocl/ocl_gpu_device_info.cpp +++ b/src/gpu/intel/ocl/ocl_gpu_device_info.cpp @@ -44,7 +44,7 @@ status_t ocl_gpu_device_info_t::init_arch(impl::engine_t *engine) { init_gpu_hw_info(engine, device, context, ip_version_, gpu_arch_, gpu_product_family_, stepping_id_, native_extensions_, - mayiuse_systolic_, mayiuse_ngen_kernels_, mayiuse_microkernels_); + mayiuse_systolic_, mayiuse_ngen_kernels_); err = clReleaseContext(context); OCL_CHECK(err); diff --git a/src/gpu/intel/ocl/ocl_gpu_hw_info.cpp b/src/gpu/intel/ocl/ocl_gpu_hw_info.cpp index 6b27a30ddd9..202deb77d50 100644 --- a/src/gpu/intel/ocl/ocl_gpu_hw_info.cpp +++ b/src/gpu/intel/ocl/ocl_gpu_hw_info.cpp @@ -55,34 +55,10 @@ xpu::runtime_version_t get_driver_version(cl_device_id device) { return runtime_version; } -/// Tries to build a kernel with assembly instructions to check to see if the -/// OpenCL compiler supports microkernels. -bool try_building_with_microkernels(cl_context context, cl_device_id device) { - const char *kernel_code = R""""( - kernel void igc_check() { - __asm__ volatile( - ".decl AA0 v_type=G type=ud num_elts=1\n" - ".decl AA1 v_type=G type=ud num_elts=1\n" - ".implicit_PSEUDO_INPUT AA0 offset=256 size=4\n" - ".implicit_PSEUDO_INPUT AA1 offset=256 size=4\n" - "mov (M1_NM,1) AA0(0,0)<1> AA1(0,0)<0;1,0>\n" - ); - } - )""""; - cl_int err; - /// Not using existing build infrastructure to avoid error messages in the CI logs - xpu::ocl::wrapper_t program( - clCreateProgramWithSource(context, 1, &kernel_code, nullptr, &err)); - if (err != CL_SUCCESS) return false; - err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr); - return err == CL_SUCCESS; -} - void init_gpu_hw_info(impl::engine_t *engine, cl_device_id device, cl_context context, uint32_t &ip_version, compute::gpu_arch_t &gpu_arch, int &gpu_product_family, int &stepping_id, uint64_t &native_extensions, - bool &mayiuse_systolic, bool &mayiuse_ngen_kernels, - bool &mayiuse_microkernels) { + bool &mayiuse_systolic, bool &mayiuse_ngen_kernels) { using namespace ngen; HW hw = HW::Unknown; Product product = {ProductFamily::Unknown, 0}; @@ -107,12 +83,6 @@ void init_gpu_hw_info(impl::engine_t *engine, cl_device_id device, = jit::gpu_supports_binary_format(&mayiuse_ngen_kernels, engine); if (status != status::success) mayiuse_ngen_kernels = false; - mayiuse_microkernels = get_driver_version(device) - >= xpu::runtime_version_t(24, 22, 29735); - if (!mayiuse_microkernels) { - mayiuse_microkernels = try_building_with_microkernels(context, device); - } - ip_version = 0; if (clGetDeviceInfo(device, CL_DEVICE_IP_VERSION_INTEL, sizeof(ip_version), &ip_version, nullptr) diff --git a/src/gpu/intel/ocl/ocl_gpu_hw_info.hpp b/src/gpu/intel/ocl/ocl_gpu_hw_info.hpp index 3184116fb11..ec16482ca87 100644 --- a/src/gpu/intel/ocl/ocl_gpu_hw_info.hpp +++ b/src/gpu/intel/ocl/ocl_gpu_hw_info.hpp @@ -33,8 +33,7 @@ xpu::runtime_version_t get_driver_version(cl_device_id device); void init_gpu_hw_info(impl::engine_t *engine, cl_device_id device, cl_context context, uint32_t &ip_version, compute::gpu_arch_t &gpu_arch, int &gpu_product_family, int &stepping_id, uint64_t &native_extensions, - bool &mayiuse_systolic, bool &mayiuse_ngen_kernels, - bool &mayiuse_microkernels); + bool &mayiuse_systolic, bool &mayiuse_ngen_kernels); } // namespace ocl } // namespace intel diff --git a/src/gpu/intel/ocl/ocl_utils.cpp b/src/gpu/intel/ocl/ocl_utils.cpp index 1d7cc09e36e..178b4126dd6 100644 --- a/src/gpu/intel/ocl/ocl_utils.cpp +++ b/src/gpu/intel/ocl/ocl_utils.cpp @@ -17,13 +17,19 @@ #include #include #include +#include #include #include "gpu/intel/ocl/ocl_gpu_engine.hpp" +#include "gpu/intel/ocl/ocl_gpu_hw_info.hpp" #include "gpu/intel/ocl/ocl_gpu_kernel.hpp" #include "gpu/intel/ocl/ocl_utils.hpp" #include "xpu/ocl/utils.hpp" +#if __has_include() +#include "gpu/intel/sycl/engine.hpp" +#endif + #ifndef CL_KERNEL_BINARY_PROGRAM_INTEL #define CL_KERNEL_BINARY_PROGRAM_INTEL 0x407D #endif @@ -76,6 +82,99 @@ namespace gpu { namespace intel { namespace ocl { +/// Tries to build a kernel with assembly instructions to check to see if the +/// OpenCL compiler supports microkernels. +bool try_building_with_microkernels(cl_context context, cl_device_id device) { + const char *kernel_code = R""""( + kernel void igc_check() { + __asm__ volatile( + ".decl AA0 v_type=G type=ud num_elts=1\n" + ".decl AA1 v_type=G type=ud num_elts=1\n" + ".implicit_PSEUDO_INPUT AA0 offset=256 size=4\n" + ".implicit_PSEUDO_INPUT AA1 offset=256 size=4\n" + "mov (M1_NM,1) AA0(0,0)<1> AA1(0,0)<0;1,0>\n" + ); + } + )""""; + cl_int err; + /// Not using existing build infrastructure to avoid error messages in the CI logs + xpu::ocl::wrapper_t program( + clCreateProgramWithSource(context, 1, &kernel_code, nullptr, &err)); + if (err != CL_SUCCESS) return false; + err = clBuildProgram(program, 1, &device, nullptr, nullptr, nullptr); + return err == CL_SUCCESS; +} + +int get_sycl_ocl_device_and_context( + xpu::ocl::wrapper_t &ocl_context, + xpu::ocl::wrapper_t &ocl_device, + const impl::engine_t *engine) { +#if DNNL_GPU_RUNTIME == DNNL_RUNTIME_SYCL + auto *sycl_engine = utils::downcast(engine); + auto &device = sycl_engine->device(); + + auto be = xpu::sycl::get_backend(device); + if (be == xpu::sycl::backend_t::opencl) { + cl_int err = CL_SUCCESS; + auto ocl_dev = xpu::sycl::compat::get_native(device); + ocl_device = xpu::ocl::make_wrapper(ocl_dev, true); + + ocl_context = xpu::ocl::make_wrapper( + clCreateContext(nullptr, 1, &ocl_dev, nullptr, nullptr, &err), + true); + if (err) return -1; + } else if (be == xpu::sycl::backend_t::level0) { + std::unique_ptr + ocl_engine; + auto err + = gpu::intel::sycl::create_ocl_engine(&ocl_engine, sycl_engine); + if (err != status::success) return -1; + ocl_device = xpu::ocl::make_wrapper(ocl_engine->device(), true); + ocl_context = xpu::ocl::make_wrapper(ocl_engine->context(), true); + } +#endif + return 0; +} + +bool mayiuse_microkernels(const impl::engine_t *engine) { + auto mayiuse_mk = [](const impl::engine_t *engine) { + xpu::ocl::wrapper_t ocl_device; + xpu::ocl::wrapper_t ocl_context; + + switch (engine->runtime_kind()) { + case runtime_kind::sycl: { + auto err = get_sycl_ocl_device_and_context( + ocl_context, ocl_device, engine); + if (err) return false; + } break; + case runtime_kind::ocl: { + const ocl_gpu_engine_t *eng + = utils::downcast(engine); + ocl_device = xpu::ocl::make_wrapper(eng->device(), true); + ocl_context = xpu::ocl::make_wrapper(eng->context(), true); + } break; + default: return false; + } + + bool mayiuse_microkernels = get_driver_version(ocl_device) + >= xpu::runtime_version_t(24, 22, 29735); + if (!mayiuse_microkernels) { + mayiuse_microkernels + = try_building_with_microkernels(ocl_context, ocl_device); + } + return mayiuse_microkernels; + }; + + static std::map engine_microkernel_map { + {engine->engine_id(), mayiuse_mk(engine)}}; + + static std::mutex map_mutex; + std::lock_guard map_lock(map_mutex); + auto it = engine_microkernel_map.find(engine->engine_id()); + if (it != std::end(engine_microkernel_map)) { return it->second; } + return engine_microkernel_map[engine->engine_id()] = mayiuse_mk(engine); +} + status_t get_ocl_kernel_arg_type(compute::scalar_type_t *type, cl_kernel ocl_kernel, cl_uint idx, bool allow_undef) { char s_type[16]; diff --git a/src/gpu/intel/ocl/ocl_utils.hpp b/src/gpu/intel/ocl/ocl_utils.hpp index 482f60c7438..8302838da06 100644 --- a/src/gpu/intel/ocl/ocl_utils.hpp +++ b/src/gpu/intel/ocl/ocl_utils.hpp @@ -40,6 +40,8 @@ namespace ocl { enum { OCL_BUFFER_ALIGNMENT = 128 }; +bool mayiuse_microkernels(const impl::engine_t *engine); + status_t get_ocl_kernel_arg_type(compute::scalar_type_t *type, cl_kernel ocl_kernel, int idx, bool allow_undef = false); diff --git a/src/gpu/intel/sycl/device_info.cpp b/src/gpu/intel/sycl/device_info.cpp index e7b88bfbe51..993ab641320 100644 --- a/src/gpu/intel/sycl/device_info.cpp +++ b/src/gpu/intel/sycl/device_info.cpp @@ -55,7 +55,7 @@ status_t device_info_t::init_arch(impl::engine_t *engine) { gpu::intel::ocl::init_gpu_hw_info(engine, ocl_dev_wrapper, ocl_ctx_wrapper, ip_version_, gpu_arch_, gpu_product_family_, stepping_id_, native_extensions_, mayiuse_systolic_, - mayiuse_ngen_kernels_, mayiuse_microkernels_); + mayiuse_ngen_kernels_); } else if (be == xpu::sycl::backend_t::level0) { // TODO: add support for L0 binary ngen check // XXX: query from ocl_engine for now @@ -70,7 +70,6 @@ status_t device_info_t::init_arch(impl::engine_t *engine) { stepping_id_ = dev_info->stepping_id(); mayiuse_systolic_ = dev_info->mayiuse_systolic(); mayiuse_ngen_kernels_ = dev_info->mayiuse_ngen_kernels(); - mayiuse_microkernels_ = dev_info->mayiuse_microkernels(); } else { assert(!"not_expected"); }