From 58481d606c19f4e46c1cd7dbfd4aba819ae024d3 Mon Sep 17 00:00:00 2001 From: "Chereshnev, Eugene" Date: Tue, 1 Nov 2022 13:13:07 -0700 Subject: [PATCH] gpu: compute: remove lazy initialization for mayiuse_ngen_kernels() - Most models rely on nGEN kernels so overhead on this check cannot be avoided - Lazy initialization prevents "full" engine serialization. If the check hasn't been done yet - the serialized object is incomplete (mayiuse_ngen_kernels() result is not serialized). This commit removes lazy initialization. --- src/gpu/compute/compute_engine.hpp | 4 ++-- src/gpu/compute/device_info.cpp | 24 ------------------------ src/gpu/compute/device_info.hpp | 6 ++---- src/gpu/ocl/ocl_gpu_device_info.cpp | 3 ++- src/gpu/ocl/ocl_gpu_hw_info.cpp | 11 +++++++++-- src/gpu/ocl/ocl_gpu_hw_info.hpp | 8 +++++--- src/gpu/ocl/verbose.hpp | 8 +++++--- src/sycl/sycl_device_info.cpp | 5 +++-- src/sycl/verbose.hpp | 7 ++++--- 9 files changed, 32 insertions(+), 44 deletions(-) diff --git a/src/gpu/compute/compute_engine.hpp b/src/gpu/compute/compute_engine.hpp index b77dc2d2165..152af230204 100644 --- a/src/gpu/compute/compute_engine.hpp +++ b/src/gpu/compute/compute_engine.hpp @@ -125,8 +125,8 @@ class compute_engine_t : public engine_t { bool is_xe_hpc() const { return device_info_->gpu_arch() == gpu_arch_t::xe_hpc; } - bool mayiuse_ngen_kernels() { - return device_info_->mayiuse_ngen_kernels(this); + bool mayiuse_ngen_kernels() const { + return device_info_->mayiuse_ngen_kernels(); } bool mayiuse_non_uniform_work_groups() const { return device_info_->mayiuse_non_uniform_work_groups(); diff --git a/src/gpu/compute/device_info.cpp b/src/gpu/compute/device_info.cpp index 7236cea1dfc..dab3c5df27b 100644 --- a/src/gpu/compute/device_info.cpp +++ b/src/gpu/compute/device_info.cpp @@ -20,9 +20,6 @@ #include "gpu/compute/device_info.hpp" -#include "common/verbose.hpp" -#include "gpu/jit/binary_format.hpp" - #ifdef DNNL_WITH_SYCL #include "sycl/sycl_engine_base.hpp" #endif @@ -59,25 +56,6 @@ uint64_t get_future_extensions(compute::gpu_arch_t gpu_arch) { return extensions; } -bool device_info_t::mayiuse_ngen_kernels(engine_t *engine) { - static std::mutex m; - std::lock_guard guard(m); - - if (checked_ngen_kernels_) return mayiuse_ngen_kernels_; - - auto status - = jit::gpu_supports_binary_format(&mayiuse_ngen_kernels_, engine); - if (status != status::success) mayiuse_ngen_kernels_ = false; - - if (get_verbose()) - printf("onednn_verbose,info,gpu,binary_kernels:%s\n", - mayiuse_ngen_kernels_ ? "enabled" : "disabled"); - - checked_ngen_kernels_ = true; - - return mayiuse_ngen_kernels_; -} - bool device_info_t::mayiuse_sub_group(int size) const { switch (gpu_arch()) { case gpu_arch_t::xe_hpc: return utils::one_of(size, 16, 32); @@ -221,7 +199,6 @@ status_t device_info_t::init_serialized_device_info( serialized_device_info_.write(&llc_cache_size_); serialized_device_info_.write(&extensions_); serialized_device_info_.write(&mayiuse_ngen_kernels_); - serialized_device_info_.write(&checked_ngen_kernels_); serialized_device_info_.write(&mayiuse_non_uniform_work_groups_); const size_t name_size = name_.size(); @@ -257,7 +234,6 @@ status_t device_info_t::init_from_cache_blob( DESERIALIZE(llc_cache_size_, size_t); DESERIALIZE(extensions_, uint64_t); DESERIALIZE(mayiuse_ngen_kernels_, bool); - DESERIALIZE(checked_ngen_kernels_, bool); DESERIALIZE(mayiuse_non_uniform_work_groups_, bool); #undef DESERIALIZE diff --git a/src/gpu/compute/device_info.hpp b/src/gpu/compute/device_info.hpp index 56ce64c55e6..b5f912bbd0c 100644 --- a/src/gpu/compute/device_info.hpp +++ b/src/gpu/compute/device_info.hpp @@ -239,7 +239,7 @@ struct device_info_t { } const std::string &name() const { return name_; } - bool mayiuse_ngen_kernels(engine_t *engine); + bool mayiuse_ngen_kernels() const { return mayiuse_ngen_kernels_; } bool mayiuse_non_uniform_work_groups() const { return mayiuse_non_uniform_work_groups_; @@ -272,6 +272,7 @@ struct device_info_t { compute::gpu_arch_t gpu_arch_ = compute::gpu_arch_t::unknown; int stepping_id_ = 0; + bool mayiuse_ngen_kernels_ = false; std::string name_; runtime_version_t runtime_version_; @@ -295,9 +296,6 @@ struct device_info_t { const std::vector &cache_blob = {}); status_t init_from_cache_blob(const std::vector &cache_blob); - bool mayiuse_ngen_kernels_ = false; - bool checked_ngen_kernels_ = false; - bool mayiuse_non_uniform_work_groups_ = false; serialization_stream_t serialized_device_info_; diff --git a/src/gpu/ocl/ocl_gpu_device_info.cpp b/src/gpu/ocl/ocl_gpu_device_info.cpp index 04beb719d58..dba90db72a4 100644 --- a/src/gpu/ocl/ocl_gpu_device_info.cpp +++ b/src/gpu/ocl/ocl_gpu_device_info.cpp @@ -39,7 +39,8 @@ status_t ocl_gpu_device_info_t::init_arch(engine_t *engine) { = clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err); OCL_CHECK(err); - init_gpu_hw_info(device, context, gpu_arch_, stepping_id_); + init_gpu_hw_info(engine, device, context, gpu_arch_, stepping_id_, + mayiuse_ngen_kernels_); err = clReleaseContext(context); OCL_CHECK(err); diff --git a/src/gpu/ocl/ocl_gpu_hw_info.cpp b/src/gpu/ocl/ocl_gpu_hw_info.cpp index 75cda22957a..b45d49c6724 100644 --- a/src/gpu/ocl/ocl_gpu_hw_info.cpp +++ b/src/gpu/ocl/ocl_gpu_hw_info.cpp @@ -15,6 +15,8 @@ *******************************************************************************/ #include "gpu/ocl/ocl_gpu_hw_info.hpp" + +#include "gpu/jit/binary_format.hpp" #include "gpu/jit/jit_generator.hpp" #include "gpu/jit/ngen_type_bridge.hpp" @@ -23,8 +25,9 @@ namespace impl { namespace gpu { namespace ocl { -void init_gpu_hw_info(cl_device_id device, cl_context context, - compute::gpu_arch_t &gpu_arch, int &stepping_id) { +void init_gpu_hw_info(engine_t *engine, cl_device_id device, cl_context context, + compute::gpu_arch_t &gpu_arch, int &stepping_id, + bool &mayiuse_ngen_kernels) { using namespace ngen; HW hw = HW::Unknown; @@ -32,6 +35,10 @@ void init_gpu_hw_info(cl_device_id device, cl_context context, context, device, hw, stepping_id); gpu_arch = jit::convert_ngen_arch_to_dnnl(hw); + + auto status + = jit::gpu_supports_binary_format(&mayiuse_ngen_kernels, engine); + if (status != status::success) mayiuse_ngen_kernels = false; } } // namespace ocl diff --git a/src/gpu/ocl/ocl_gpu_hw_info.hpp b/src/gpu/ocl/ocl_gpu_hw_info.hpp index 9f42443a342..be1bd25d242 100644 --- a/src/gpu/ocl/ocl_gpu_hw_info.hpp +++ b/src/gpu/ocl/ocl_gpu_hw_info.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2020-2021 Intel Corporation +* Copyright 2020-2022 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include +#include "common/c_types_map.hpp" #include "gpu/compute/device_info.hpp" namespace dnnl { @@ -26,8 +27,9 @@ namespace impl { namespace gpu { namespace ocl { -void init_gpu_hw_info(cl_device_id device, cl_context context, - compute::gpu_arch_t &gpu_arch, int &stepping_id); +void init_gpu_hw_info(engine_t *engine, cl_device_id device, cl_context context, + compute::gpu_arch_t &gpu_arch, int &stepping_id, + bool &mayiuse_ngen_kernels); } // namespace ocl } // namespace gpu diff --git a/src/gpu/ocl/verbose.hpp b/src/gpu/ocl/verbose.hpp index e6f02650abf..257be97cdd3 100644 --- a/src/gpu/ocl/verbose.hpp +++ b/src/gpu/ocl/verbose.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2021 Intel Corporation +* Copyright 2019-2022 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,8 +39,10 @@ void print_verbose_header() { auto s_name = dev_info ? dev_info->name() : "unknown"; auto s_ver = dev_info ? dev_info->runtime_version().str() : "unknown"; - printf("onednn_verbose,info,gpu,engine,%d,name:%s,driver_version:%s\n", - (int)i, s_name.c_str(), s_ver.c_str()); + printf("onednn_verbose,info,gpu,engine,%d,name:%s,driver_version:%s," + "binary_kernels:%s\n", + (int)i, s_name.c_str(), s_ver.c_str(), + dev_info->mayiuse_ngen_kernels() ? "enabled" : "disabled"); } } diff --git a/src/sycl/sycl_device_info.cpp b/src/sycl/sycl_device_info.cpp index e2b9f9d6096..44ff2155a38 100644 --- a/src/sycl/sycl_device_info.cpp +++ b/src/sycl/sycl_device_info.cpp @@ -50,8 +50,8 @@ status_t sycl_device_info_t::init_arch(engine_t *engine) { clCreateContext(nullptr, 1, &ocl_dev, nullptr, nullptr, &err)); OCL_CHECK(err); - gpu::ocl::init_gpu_hw_info( - ocl_dev_wrapper, ocl_ctx_wrapper, gpu_arch_, stepping_id_); + gpu::ocl::init_gpu_hw_info(engine, ocl_dev_wrapper, ocl_ctx_wrapper, + gpu_arch_, stepping_id_, mayiuse_ngen_kernels_); } else if (be == backend_t::level0) { // TODO: add support for L0 binary ngen check // XXX: query from ocl_engine for now @@ -68,6 +68,7 @@ status_t sycl_device_info_t::init_arch(engine_t *engine) { auto *dev_info = compute_engine->device_info(); gpu_arch_ = dev_info->gpu_arch(); stepping_id_ = dev_info->stepping_id(); + mayiuse_ngen_kernels_ = dev_info->mayiuse_ngen_kernels(); } else { assert(!"not_expected"); } diff --git a/src/sycl/verbose.hpp b/src/sycl/verbose.hpp index cb5ddae702a..0ae4eb503bc 100644 --- a/src/sycl/verbose.hpp +++ b/src/sycl/verbose.hpp @@ -1,5 +1,5 @@ /******************************************************************************* -* Copyright 2019-2021 Intel Corporation +* Copyright 2019-2022 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,9 +41,10 @@ void print_verbose_header(engine_kind_t kind) { auto s_ver = dev_info ? dev_info->runtime_version().str() : "unknown"; printf("onednn_verbose,info,%s,engine,%d,backend:%s,name:%s,driver_" - "version:%s\n", + "version:%s,binary_kernels:%s\n", s_engine_kind, (int)i, s_backend.c_str(), s_name.c_str(), - s_ver.c_str()); + s_ver.c_str(), + dev_info->mayiuse_ngen_kernels() ? "enabled" : "disabled"); } }