diff --git a/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp b/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp index 7936130997b6a2..d8f65b086fcbe0 100644 --- a/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp +++ b/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp @@ -131,23 +131,25 @@ void reserve_cpu_by_streams_info(const std::vector> _streams_in } for (size_t i = 0; i < _cpu_mapping_table.size(); i++) { - std::string cpu_string = std::to_string(_cpu_mapping_table[i][CPU_MAP_CORE_TYPE]) + - std::to_string(_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID]) + - std::to_string(_cpu_mapping_table[i][CPU_MAP_SOCKET_ID]); - for (size_t j = 0; j < stream_conditions.size(); j++) { - if (std::find(stream_conditions[j].begin(), stream_conditions[j].end(), cpu_string) != - stream_conditions[j].end()) { - _stream_processors[stream_pos[j]].push_back(_cpu_mapping_table[i][CPU_MAP_PROCESSOR_ID]); - _cpu_mapping_table[i][CPU_MAP_USED_FLAG] = _cpu_status; - if (static_cast(_stream_processors[stream_pos[j]].size()) == - streams_table[j][THREADS_PER_STREAM]) { - stream_pos[j]++; - stream_num[j]++; - } - if (stream_num[j] >= streams_table[j][NUMBER_OF_STREAMS]) { - stream_conditions[j].clear(); + if (_cpu_mapping_table[i][CPU_MAP_USED_FLAG] == NOT_USED) { + std::string cpu_string = std::to_string(_cpu_mapping_table[i][CPU_MAP_CORE_TYPE]) + + std::to_string(_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID]) + + std::to_string(_cpu_mapping_table[i][CPU_MAP_SOCKET_ID]); + for (size_t j = 0; j < stream_conditions.size(); j++) { + if (std::find(stream_conditions[j].begin(), stream_conditions[j].end(), cpu_string) != + stream_conditions[j].end()) { + _stream_processors[stream_pos[j]].push_back(_cpu_mapping_table[i][CPU_MAP_PROCESSOR_ID]); + _cpu_mapping_table[i][CPU_MAP_USED_FLAG] = _cpu_status; + if (static_cast(_stream_processors[stream_pos[j]].size()) == + streams_table[j][THREADS_PER_STREAM]) { + stream_pos[j]++; + stream_num[j]++; + } + if (stream_num[j] >= streams_table[j][NUMBER_OF_STREAMS]) { + stream_conditions[j].clear(); + } + break; } - break; } } } diff --git a/src/inference/tests/unit/cpu_reserve_test.cpp b/src/inference/tests/unit/cpu_reserve_test.cpp index 7a5427d777bea7..e5fe6b40abdf7b 100644 --- a/src/inference/tests/unit/cpu_reserve_test.cpp +++ b/src/inference/tests/unit/cpu_reserve_test.cpp @@ -44,6 +44,12 @@ class LinuxCpuReserveTests : public ov::test::TestsCommon, test_data._cpu_status); ASSERT_EQ(test_data._stream_processors, test_processors); + for (size_t i = 0; i < test_processors.size(); i++) { + for (size_t j = 0; j < test_processors[i].size(); j++) { + ASSERT_EQ(test_data._cpu_mapping_table[test_processors[i][j]][CPU_MAP_USED_FLAG], + test_data._cpu_status); + } + } } }; @@ -869,8 +875,6 @@ LinuxCpuReserveTestCase _1socket_16cores_hyper_4streams_ecoreonly = { }, NOT_USED, }; -// streams_info_table={1, MAIN_CORE_PROC, 36}, but the number of physical cores is 18, -// in this case, threads are assigned on physical and logical cores. LinuxCpuReserveTestCase _1socket_18cores_hyper_1streams = { 36, 1, @@ -933,6 +937,38 @@ LinuxCpuReserveTestCase _1socket_18cores_hyper_2streams = { }, NOT_USED, }; +// other plugin reserved 2 threads +LinuxCpuReserveTestCase _1socket_18cores_hyper_plugin_reserve_2threads = { + 36, + 1, + {{36, 18, 0, 18, 0, 0}}, + { + {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, + {2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, {3, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, + {4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {5, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, + {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, {7, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, + {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {9, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, + {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, + {12, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {13, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, + {14, 0, 0, 7, HYPER_THREADING_PROC, 7, -1}, {15, 0, 0, 7, HYPER_THREADING_PROC, 7, -1}, + {16, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {17, 0, 0, 9, HYPER_THREADING_PROC, 8, -1}, + {18, 0, 0, 0, MAIN_CORE_PROC, 0, CPU_USED}, {19, 0, 0, 1, MAIN_CORE_PROC, 1, CPU_USED}, + {20, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {21, 0, 0, 3, MAIN_CORE_PROC, 3, -1}, + {22, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {23, 0, 0, 5, MAIN_CORE_PROC, 5, -1}, + {24, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {25, 0, 0, 7, MAIN_CORE_PROC, 7, -1}, + {26, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {27, 0, 0, 9, MAIN_CORE_PROC, 9, -1}, + {28, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {29, 0, 0, 11, MAIN_CORE_PROC, 11, -1}, + {30, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, {31, 0, 0, 13, MAIN_CORE_PROC, 13, -1}, + {32, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, {33, 0, 0, 15, MAIN_CORE_PROC, 15, -1}, + {34, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, {35, 0, 0, 17, MAIN_CORE_PROC, 17, -1}, + }, + {{16, MAIN_CORE_PROC, 1, 0, 0}, {16, HYPER_THREADING_PROC, 1, 0, 0}}, + { + {20}, {21}, {22}, {23}, {24}, {25}, {26}, {27}, {28}, {29}, {30}, {31}, {32}, {33}, {34}, {35}, + {0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12}, {13}, {14}, {15}, + }, + NOT_USED, +}; LinuxCpuReserveTestCase _1socket_32cores_hyper_1streams = { 32, 1, @@ -986,6 +1022,7 @@ INSTANTIATE_TEST_SUITE_P(CPUReserve, _1socket_16cores_hyper_4streams_ecoreonly, _1socket_18cores_hyper_1streams, _1socket_18cores_hyper_2streams, + _1socket_18cores_hyper_plugin_reserve_2threads, _1socket_32cores_hyper_1streams)); #endif } // namespace diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp index 3b8581a1e2e34e..fc04e35748fe6c 100644 --- a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp +++ b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp @@ -35,6 +35,20 @@ std::shared_ptr create_task_executor(const std::sh if (config.get_property(ov::internal::exclusive_async_requests)) { //exclusive_async_requests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior return plugin->get_executor_manager()->get_executor("GPU"); + } else if (config.get_property(ov::hint::enable_cpu_pinning)) { + auto executor_config = + ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", + 0, + 0, + ov::threading::IStreamsExecutor::ThreadBindingType::CORES, + 1, + 0, + 0, + ov::threading::IStreamsExecutor::Config::PreferredCoreType::BIG, + {{config.get_property(ov::num_streams), MAIN_CORE_PROC, 1, 0, 0}}, + true}; + auto post_config = ov::threading::IStreamsExecutor::Config::reserve_cpu_threads(executor_config); + return std::make_shared(post_config); } else { return std::make_shared( ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_property(ov::num_streams)}); @@ -250,6 +264,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const { // Configs ov::PropertyName{ov::enable_profiling.name(), PropertyMutability::RO}, + ov::PropertyName{ov::hint::enable_cpu_pinning.name(), PropertyMutability::RO}, ov::PropertyName{ov::hint::model_priority.name(), PropertyMutability::RO}, ov::PropertyName{ov::intel_gpu::hint::host_task_priority.name(), PropertyMutability::RO}, ov::PropertyName{ov::intel_gpu::hint::queue_priority.name(), PropertyMutability::RO}, diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 55f4f5e7a42065..388269ddbb424d 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -593,6 +593,7 @@ std::vector Plugin::get_supported_properties() const { ov::PropertyName{ov::num_streams.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RW}, ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW}, + ov::PropertyName{ov::hint::enable_cpu_pinning.name(), PropertyMutability::RW}, ov::PropertyName{ov::device::id.name(), PropertyMutability::RW}, }; diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index e1375ef14ddb47..1b4719660820fd 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -48,6 +48,7 @@ void ExecutionConfig::set_default() { std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY, PerformanceModeValidator()), std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::PERFORMANCE), std::make_tuple(ov::hint::num_requests, 0), + std::make_tuple(ov::hint::enable_cpu_pinning, false), std::make_tuple(ov::intel_gpu::hint::host_task_priority, ov::hint::Priority::MEDIUM), std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM), diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp index 31c4d4884f05d1..39711f74105596 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp @@ -42,6 +42,7 @@ const std::vector gpu_setcore_properties = { const std::vector gpu_compileModel_properties = { {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY), ov::hint::num_requests(10), + ov::hint::enable_cpu_pinning(true), ov::enable_profiling(true)}}; INSTANTIATE_TEST_SUITE_P(smoke_gpuCompileModelBehaviorTests,