Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create new api to indicate detect thread usage #18081

Closed
wants to merge 9 commits into from
4 changes: 3 additions & 1 deletion paddle/fluid/inference/api/analysis_config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// MKLDNN related.
CP_MEMBER(use_mkldnn_);
CP_MEMBER(mkldnn_enabled_op_types_);
CP_MEMBER(mkldnn_disable_cache_);
// Quantization related.
CP_MEMBER(use_mkldnn_quantizer_);
CP_MEMBER(mkldnn_quantizer_config_);
Expand Down Expand Up @@ -150,9 +151,10 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
Update();
}

void AnalysisConfig::EnableMKLDNN() {
void AnalysisConfig::EnableMKLDNN(int mkldnn_disable_cache) {
#ifdef PADDLE_WITH_MKLDNN
use_mkldnn_ = true;
mkldnn_disable_cache_ = mkldnn_disable_cache;
#else
LOG(ERROR) << "Please compile with MKLDNN first to use MKLDNN";
use_mkldnn_ = false;
Expand Down
28 changes: 28 additions & 0 deletions paddle/fluid/inference/api/analysis_predictor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,15 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size) {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
LeoZhao-Intel marked this conversation as resolved.
Show resolved Hide resolved
// TODO(intel): will refactor this code later
VLOG(3) << "AnalysisPredictor::Run get_cur_thread_id="
<< paddle::platform::get_cur_thread_id()
<< ", mkldnn_disable_cache_=" << config_.mkldnn_disable_cache_
<< "\n";
if (paddle::platform::get_cur_thread_id() == 0)
paddle::platform::set_cur_thread_id(config_.mkldnn_disable_cache_);
#endif
VLOG(3) << "Predictor::predict";
inference::Timer timer;
timer.tic();
Expand Down Expand Up @@ -238,6 +247,11 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);
LeoZhao-Intel marked this conversation as resolved.
Show resolved Hide resolved
#ifdef PADDLE_WITH_MKLDNN
// TODO(intel): will refactor this code later
// To avoid confusion when thread is reused from pool
if (config_.mkldnn_disable_cache_ > 0) paddle::platform::set_cur_thread_id(0);
jczaja marked this conversation as resolved.
Show resolved Hide resolved
#endif

return true;
}
Expand Down Expand Up @@ -595,6 +609,15 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(

bool AnalysisPredictor::ZeroCopyRun() {
paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
#ifdef PADDLE_WITH_MKLDNN
// TODO(intel): will refactor this code later
VLOG(3) << "AnalysisPredictor::ZeroCopyRun get_cur_thread_id="
<< paddle::platform::get_cur_thread_id()
<< ", mkldnn_disable_cache_=" << config_.mkldnn_disable_cache_
<< "\n";
if (paddle::platform::get_cur_thread_id() == 0)
paddle::platform::set_cur_thread_id(config_.mkldnn_disable_cache_);
#endif
executor_->Run();
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(sub_scope_);
Expand All @@ -603,6 +626,11 @@ bool AnalysisPredictor::ZeroCopyRun() {
// recover the cpu_math_library_num_threads to 1, in order to avoid thread
// conflict when integrating it into deployment service.
paddle::platform::SetNumThreads(1);
#ifdef PADDLE_WITH_MKLDNN
// TODO(intel): will refactor this code later
// To avoid confusion when thread is reused from pool
if (config_.mkldnn_disable_cache_ > 0) paddle::platform::set_cur_thread_id(0);
#endif
LeoZhao-Intel marked this conversation as resolved.
Show resolved Hide resolved

return true;
}
Expand Down
4 changes: 3 additions & 1 deletion paddle/fluid/inference/api/paddle_analysis_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,8 +177,9 @@ struct AnalysisConfig {
bool ngraph_enabled() const { return use_ngraph_; }

/** Turn on MKLDNN.
*@param mkldnn_disable_cache if disable mkldnn cache
*/
void EnableMKLDNN();
void EnableMKLDNN(int mkldnn_disable_cache = 0);
/** A boolean state telling whether to use the MKLDNN.
*/
bool mkldnn_enabled() const { return use_mkldnn_; }
Expand Down Expand Up @@ -287,6 +288,7 @@ struct AnalysisConfig {
bool use_ngraph_{false};
bool use_mkldnn_{false};
std::unordered_set<std::string> mkldnn_enabled_op_types_;
int mkldnn_disable_cache_{0};

bool model_from_memory_{false};

Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,13 @@ std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt));
platform::MKLDNNHandler::AppendKey(&key,
std::to_string(multi_input[0]->format()));
if (platform::get_cur_thread_id() == 0) {
auto tid = std::this_thread::get_id();
std::stringstream ss;
ss << tid;
platform::MKLDNNHandler::AppendKey(&key, "-t:");
platform::MKLDNNHandler::AppendKey(&key, ss.str());
}
return key;
}

Expand Down
9 changes: 6 additions & 3 deletions paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
user_weights_memory_p, pipeline, is_test);

std::shared_ptr<mkldnn::memory> dst_memory_p;
std::shared_ptr<mkldnn::memory> user_residual_memory_p;

if (fuse_residual_conn) {
auto residual_param = ctx.Input<Tensor>("ResidualData");
Expand All @@ -243,7 +244,7 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {

auto user_residual_md = platform::MKLDNNMemDesc(
residual_data_tz, residual_data_type, residual_param->format());
auto user_residual_memory_p = handler.AcquireResidualDataMemory(
user_residual_memory_p = handler.AcquireResidualDataMemory(
user_residual_md, to_void_cast<T>(residual_param_data));

dst_memory_p = handler.AcquireDstMemoryFromResidualDataMemory(
Expand All @@ -263,14 +264,16 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {

// create convolution op primitive
std::shared_ptr<mkldnn::convolution_forward> conv_p;
std::shared_ptr<mkldnn::memory> user_bias_memory_p;
std::shared_ptr<mkldnn::memory> bias_memory_p;
if (bias) {
const T* bias_data = bias->data<T>();
auto user_bias_md = platform::MKLDNNMemDesc(
{bias_tz}, platform::MKLDNNGetDataType<T>(), memory::format::x);
auto user_bias_memory_p =
user_bias_memory_p =
handler.AcquireBiasMemory(user_bias_md, to_void_cast<T>(bias_data));

auto bias_memory_p =
bias_memory_p =
handler.AcquireBiasMemoryFromPrimitive(user_bias_memory_p, pipeline);
conv_p = handler.AcquireConvolution(src_memory_p, weights_memory_p,
bias_memory_p, dst_memory_p);
Expand Down
15 changes: 12 additions & 3 deletions paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ std::string CreateKey(const paddle::framework::ExecutionContext& ctx,
platform::MKLDNNHandler::AppendKey(&key, std::to_string(dt));
platform::MKLDNNHandler::AppendKey(&key, std::to_string(fmt));
platform::MKLDNNHandler::AppendKey(&key, suffix);

if (platform::get_cur_thread_id() == 0) {
auto tid = std::this_thread::get_id();
std::stringstream ss;
ss << tid;
platform::MKLDNNHandler::AppendKey(&key, "-t:");
platform::MKLDNNHandler::AppendKey(&key, ss.str());
}
return key;
}

Expand Down Expand Up @@ -130,6 +138,7 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {

auto pool_p =
std::static_pointer_cast<pooling_forward>(dev_ctx.GetBlob(key_pool_p));
std::shared_ptr<mkldnn::memory> src_memory, dst_memory;
if (pool_p == nullptr) {
const std::vector<int>& padding_left_top(paddings);
std::vector<int> padding_right_bottom(paddings);
Expand Down Expand Up @@ -158,9 +167,9 @@ class PoolMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
// save pool_pd into global device context to be referred in backward path
if (!is_test) dev_ctx.SetBlob(key_pool_pd, pool_pd);

auto src_memory = std::make_shared<memory>(pool_pd->src_primitive_desc(),
to_void_cast<T>(input_data));
auto dst_memory =
src_memory = std::make_shared<memory>(pool_pd->src_primitive_desc(),
to_void_cast<T>(input_data));
dst_memory =
std::make_shared<memory>(pool_pd->dst_primitive_desc(), output_data);

dev_ctx.SetBlob(key_pool_src_mem_p, src_memory);
Expand Down
5 changes: 4 additions & 1 deletion paddle/fluid/platform/device_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,9 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,

int tid = platform::get_cur_thread_id();

// use tid to indicate if cache is enabled, tid > 0 means disable cache
if (tid > 0) return;

std::lock_guard<std::mutex> lock(*p_mutex_);

// Find KeyBlob for current thread
Expand All @@ -434,7 +437,7 @@ void MKLDNNDeviceContext::SetBlob(const std::string& name,
} else {
key_it->second = data; // set data to existing blob
}

VLOG(3) << "MKLDNNDeviceContext::SetBlob " << name << "\n";
// lock will be automatically released when out of scope
return;
}
Expand Down
3 changes: 3 additions & 0 deletions paddle/fluid/platform/mkldnn_reuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ class MKLDNNHandler {
std::stringstream ss;
ss << tid;
key_ = key_common_ + "-t:" + ss.str();
if (platform::get_cur_thread_id() > 0) {
key_ = key_common_;
}
}

std::shared_ptr<mkldnn::memory> AcquireSrcMemory(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def on_compression_begin(self, context):
infer_config.switch_ir_optim(True)
infer_config.disable_gpu()
infer_config.set_model(self.fp32_model_path)
infer_config.enable_mkldnn()
infer_config.enable_mkldnn(0)
infer_config.set_cpu_math_library_num_threads(
self.cpu_math_library_num_threads)

Expand Down