Skip to content

Commit

Permalink
gpu: compute: remove lazy initialization for mayiuse_ngen_kernels()
Browse files Browse the repository at this point in the history
- Most models rely on nGEN kernels so overhead on this check cannot be
  avoided
- Lazy initialization prevents "full" engine serialization. If the
  check hasn't been done yet - the serialized object is incomplete
  (mayiuse_ngen_kernels() result is not serialized). This commit removes
  lazy initialization.
  • Loading branch information
echeresh committed Nov 2, 2022
1 parent 7144393 commit 58481d6
Show file tree
Hide file tree
Showing 9 changed files with 32 additions and 44 deletions.
4 changes: 2 additions & 2 deletions src/gpu/compute/compute_engine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ class compute_engine_t : public engine_t {
bool is_xe_hpc() const {
return device_info_->gpu_arch() == gpu_arch_t::xe_hpc;
}
bool mayiuse_ngen_kernels() {
return device_info_->mayiuse_ngen_kernels(this);
bool mayiuse_ngen_kernels() const {
return device_info_->mayiuse_ngen_kernels();
}
bool mayiuse_non_uniform_work_groups() const {
return device_info_->mayiuse_non_uniform_work_groups();
Expand Down
24 changes: 0 additions & 24 deletions src/gpu/compute/device_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@

#include "gpu/compute/device_info.hpp"

#include "common/verbose.hpp"
#include "gpu/jit/binary_format.hpp"

#ifdef DNNL_WITH_SYCL
#include "sycl/sycl_engine_base.hpp"
#endif
Expand Down Expand Up @@ -59,25 +56,6 @@ uint64_t get_future_extensions(compute::gpu_arch_t gpu_arch) {
return extensions;
}

bool device_info_t::mayiuse_ngen_kernels(engine_t *engine) {
static std::mutex m;
std::lock_guard<std::mutex> guard(m);

if (checked_ngen_kernels_) return mayiuse_ngen_kernels_;

auto status
= jit::gpu_supports_binary_format(&mayiuse_ngen_kernels_, engine);
if (status != status::success) mayiuse_ngen_kernels_ = false;

if (get_verbose())
printf("onednn_verbose,info,gpu,binary_kernels:%s\n",
mayiuse_ngen_kernels_ ? "enabled" : "disabled");

checked_ngen_kernels_ = true;

return mayiuse_ngen_kernels_;
}

bool device_info_t::mayiuse_sub_group(int size) const {
switch (gpu_arch()) {
case gpu_arch_t::xe_hpc: return utils::one_of(size, 16, 32);
Expand Down Expand Up @@ -221,7 +199,6 @@ status_t device_info_t::init_serialized_device_info(
serialized_device_info_.write(&llc_cache_size_);
serialized_device_info_.write(&extensions_);
serialized_device_info_.write(&mayiuse_ngen_kernels_);
serialized_device_info_.write(&checked_ngen_kernels_);
serialized_device_info_.write(&mayiuse_non_uniform_work_groups_);

const size_t name_size = name_.size();
Expand Down Expand Up @@ -257,7 +234,6 @@ status_t device_info_t::init_from_cache_blob(
DESERIALIZE(llc_cache_size_, size_t);
DESERIALIZE(extensions_, uint64_t);
DESERIALIZE(mayiuse_ngen_kernels_, bool);
DESERIALIZE(checked_ngen_kernels_, bool);
DESERIALIZE(mayiuse_non_uniform_work_groups_, bool);
#undef DESERIALIZE

Expand Down
6 changes: 2 additions & 4 deletions src/gpu/compute/device_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ struct device_info_t {
}
const std::string &name() const { return name_; }

bool mayiuse_ngen_kernels(engine_t *engine);
bool mayiuse_ngen_kernels() const { return mayiuse_ngen_kernels_; }

bool mayiuse_non_uniform_work_groups() const {
return mayiuse_non_uniform_work_groups_;
Expand Down Expand Up @@ -272,6 +272,7 @@ struct device_info_t {

compute::gpu_arch_t gpu_arch_ = compute::gpu_arch_t::unknown;
int stepping_id_ = 0;
bool mayiuse_ngen_kernels_ = false;

std::string name_;
runtime_version_t runtime_version_;
Expand All @@ -295,9 +296,6 @@ struct device_info_t {
const std::vector<uint8_t> &cache_blob = {});
status_t init_from_cache_blob(const std::vector<uint8_t> &cache_blob);

bool mayiuse_ngen_kernels_ = false;
bool checked_ngen_kernels_ = false;

bool mayiuse_non_uniform_work_groups_ = false;

serialization_stream_t serialized_device_info_;
Expand Down
3 changes: 2 additions & 1 deletion src/gpu/ocl/ocl_gpu_device_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ status_t ocl_gpu_device_info_t::init_arch(engine_t *engine) {
= clCreateContext(nullptr, 1, &device, nullptr, nullptr, &err);
OCL_CHECK(err);

init_gpu_hw_info(device, context, gpu_arch_, stepping_id_);
init_gpu_hw_info(engine, device, context, gpu_arch_, stepping_id_,
mayiuse_ngen_kernels_);

err = clReleaseContext(context);
OCL_CHECK(err);
Expand Down
11 changes: 9 additions & 2 deletions src/gpu/ocl/ocl_gpu_hw_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
*******************************************************************************/

#include "gpu/ocl/ocl_gpu_hw_info.hpp"

#include "gpu/jit/binary_format.hpp"
#include "gpu/jit/jit_generator.hpp"
#include "gpu/jit/ngen_type_bridge.hpp"

Expand All @@ -23,15 +25,20 @@ namespace impl {
namespace gpu {
namespace ocl {

void init_gpu_hw_info(cl_device_id device, cl_context context,
compute::gpu_arch_t &gpu_arch, int &stepping_id) {
void init_gpu_hw_info(engine_t *engine, cl_device_id device, cl_context context,
compute::gpu_arch_t &gpu_arch, int &stepping_id,
bool &mayiuse_ngen_kernels) {
using namespace ngen;

HW hw = HW::Unknown;
jit::jit_generator<HW::Unknown>::detectHWInfo(
context, device, hw, stepping_id);

gpu_arch = jit::convert_ngen_arch_to_dnnl(hw);

auto status
= jit::gpu_supports_binary_format(&mayiuse_ngen_kernels, engine);
if (status != status::success) mayiuse_ngen_kernels = false;
}

} // namespace ocl
Expand Down
8 changes: 5 additions & 3 deletions src/gpu/ocl/ocl_gpu_hw_info.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2020-2021 Intel Corporation
* Copyright 2020-2022 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -19,15 +19,17 @@

#include <CL/cl.h>

#include "common/c_types_map.hpp"
#include "gpu/compute/device_info.hpp"

namespace dnnl {
namespace impl {
namespace gpu {
namespace ocl {

void init_gpu_hw_info(cl_device_id device, cl_context context,
compute::gpu_arch_t &gpu_arch, int &stepping_id);
void init_gpu_hw_info(engine_t *engine, cl_device_id device, cl_context context,
compute::gpu_arch_t &gpu_arch, int &stepping_id,
bool &mayiuse_ngen_kernels);

} // namespace ocl
} // namespace gpu
Expand Down
8 changes: 5 additions & 3 deletions src/gpu/ocl/verbose.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2021 Intel Corporation
* Copyright 2019-2022 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -39,8 +39,10 @@ void print_verbose_header() {
auto s_name = dev_info ? dev_info->name() : "unknown";
auto s_ver = dev_info ? dev_info->runtime_version().str() : "unknown";

printf("onednn_verbose,info,gpu,engine,%d,name:%s,driver_version:%s\n",
(int)i, s_name.c_str(), s_ver.c_str());
printf("onednn_verbose,info,gpu,engine,%d,name:%s,driver_version:%s,"
"binary_kernels:%s\n",
(int)i, s_name.c_str(), s_ver.c_str(),
dev_info->mayiuse_ngen_kernels() ? "enabled" : "disabled");
}
}

Expand Down
5 changes: 3 additions & 2 deletions src/sycl/sycl_device_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ status_t sycl_device_info_t::init_arch(engine_t *engine) {
clCreateContext(nullptr, 1, &ocl_dev, nullptr, nullptr, &err));
OCL_CHECK(err);

gpu::ocl::init_gpu_hw_info(
ocl_dev_wrapper, ocl_ctx_wrapper, gpu_arch_, stepping_id_);
gpu::ocl::init_gpu_hw_info(engine, ocl_dev_wrapper, ocl_ctx_wrapper,
gpu_arch_, stepping_id_, mayiuse_ngen_kernels_);
} else if (be == backend_t::level0) {
// TODO: add support for L0 binary ngen check
// XXX: query from ocl_engine for now
Expand All @@ -68,6 +68,7 @@ status_t sycl_device_info_t::init_arch(engine_t *engine) {
auto *dev_info = compute_engine->device_info();
gpu_arch_ = dev_info->gpu_arch();
stepping_id_ = dev_info->stepping_id();
mayiuse_ngen_kernels_ = dev_info->mayiuse_ngen_kernels();
} else {
assert(!"not_expected");
}
Expand Down
7 changes: 4 additions & 3 deletions src/sycl/verbose.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright 2019-2021 Intel Corporation
* Copyright 2019-2022 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -41,9 +41,10 @@ void print_verbose_header(engine_kind_t kind) {
auto s_ver = dev_info ? dev_info->runtime_version().str() : "unknown";

printf("onednn_verbose,info,%s,engine,%d,backend:%s,name:%s,driver_"
"version:%s\n",
"version:%s,binary_kernels:%s\n",
s_engine_kind, (int)i, s_backend.c_str(), s_name.c_str(),
s_ver.c_str());
s_ver.c_str(),
dev_info->mayiuse_ngen_kernels() ? "enabled" : "disabled");
}
}

Expand Down

0 comments on commit 58481d6

Please sign in to comment.