diff --git a/sycl/doc/extensions/proposed/sycl_ext_intel_queue_index.asciidoc b/sycl/doc/extensions/supported/sycl_ext_intel_queue_index.asciidoc similarity index 95% rename from sycl/doc/extensions/proposed/sycl_ext_intel_queue_index.asciidoc rename to sycl/doc/extensions/supported/sycl_ext_intel_queue_index.asciidoc index 245583276aabc..ed808e32589dc 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_intel_queue_index.asciidoc +++ b/sycl/doc/extensions/supported/sycl_ext_intel_queue_index.asciidoc @@ -43,11 +43,7 @@ SYCL specification refer to that revision. == Status -This is a proposed extension specification, intended to gather community -feedback. Interfaces defined in this specification may not be implemented yet -or may be in a preliminary state. The specification itself may also change in -incompatible ways before it is finalized. *Shipping software products should -not rely on APIs defined in this specification.* +This extension is implemented and fully supported by DPC++. == Overview diff --git a/sycl/include/sycl/detail/pi.def b/sycl/include/sycl/detail/pi.def index 59dab0c4721a1..2717d82bc5607 100644 --- a/sycl/include/sycl/detail/pi.def +++ b/sycl/include/sycl/detail/pi.def @@ -43,6 +43,7 @@ _PI_API(piextContextGetNativeHandle) _PI_API(piextContextCreateWithNativeHandle) // Queue _PI_API(piQueueCreate) +_PI_API(piextQueueCreate) _PI_API(piQueueGetInfo) _PI_API(piQueueFinish) _PI_API(piQueueFlush) diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 33c41767c4c44..338546b6482de 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -65,9 +65,12 @@ // partitioning by affinity domain is disabled by default and can be temporarily // restored via SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING // environment variable. +// 12.20 Added piextQueueCreate API to be used instead of piQueueCreate, also +// added PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES for piDeviceGetInfo. +// Both are needed to support sycl_ext_intel_queue_index extension. #define _PI_H_VERSION_MAJOR 12 -#define _PI_H_VERSION_MINOR 19 +#define _PI_H_VERSION_MINOR 20 #define _PI_STRING_HELPER(a) #a #define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) @@ -295,6 +298,9 @@ typedef enum { // Return 0 if device doesn't have any memory modules. Return the minimum of // the bus width values if there are several memory modules on the device. PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH = 0x10031, + // Return 1 if the device doesn't have a notion of a "queue index". Otherwise, + // return the number of queue indices that are available for this device. + PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES = 0x10032, PI_DEVICE_INFO_ATOMIC_64 = 0x10110, PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111, PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000, @@ -587,13 +593,17 @@ constexpr pi_usm_mem_properties PI_MEM_USM_ALLOC_BUFFER_LOCATION = 0x419E; // NOTE: queue properties are implemented this way to better support bit // manipulations using pi_queue_properties = pi_bitfield; -constexpr pi_queue_properties PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE = (1 << 0); -constexpr pi_queue_properties PI_QUEUE_PROFILING_ENABLE = (1 << 1); -constexpr pi_queue_properties PI_QUEUE_ON_DEVICE = (1 << 2); -constexpr pi_queue_properties PI_QUEUE_ON_DEVICE_DEFAULT = (1 << 3); -constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS = (1 << 4); -constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_PRIORITY_LOW = (1 << 5); -constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_PRIORITY_HIGH = (1 << 6); +constexpr pi_queue_properties PI_QUEUE_FLAGS = -1; +constexpr pi_queue_properties PI_QUEUE_COMPUTE_INDEX = -2; +// clang-format off +constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE = (1 << 0); +constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE = (1 << 1); +constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE = (1 << 2); +constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT = (1 << 3); +constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS = (1 << 4); +constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW = (1 << 5); +constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH = (1 << 6); +// clang-format on using pi_result = _pi_result; using pi_platform_info = _pi_platform_info; @@ -1125,9 +1135,18 @@ __SYCL_EXPORT pi_result piextContextCreateWithNativeHandle( // // Queue // + +// TODO: Remove during next ABI break and rename piextQueueCreate to +// piQueueCreate. __SYCL_EXPORT pi_result piQueueCreate(pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue); +/// \param properties points to a zero-terminated array of extra data describing +/// desired queue properties. Format is +/// {[PROPERTY[, property-specific elements of data]*,]* 0} +__SYCL_EXPORT pi_result piextQueueCreate(pi_context context, pi_device device, + pi_queue_properties *properties, + pi_queue *queue); __SYCL_EXPORT pi_result piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, diff --git a/sycl/include/sycl/detail/property_helper.hpp b/sycl/include/sycl/detail/property_helper.hpp index a7897d182d8f6..8be70fe39ae58 100644 --- a/sycl/include/sycl/detail/property_helper.hpp +++ b/sycl/include/sycl/detail/property_helper.hpp @@ -57,7 +57,8 @@ enum PropWithDataKind { ImageContextBound = 3, BufferMemChannel = 4, AccPropBufferLocation = 5, - PropWithDataKindSize = 6, + QueueComputeIndex = 6, + PropWithDataKindSize = 7, }; // Base class for dataless properties, needed to check that the type of an diff --git a/sycl/include/sycl/feature_test.hpp.in b/sycl/include/sycl/feature_test.hpp.in index 65766ddd35d3b..5b4b3b5db9a84 100755 --- a/sycl/include/sycl/feature_test.hpp.in +++ b/sycl/include/sycl/feature_test.hpp.in @@ -64,6 +64,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) { #define SYCL_EXT_INTEL_MEM_CHANNEL_PROPERTY 1 #define SYCL_EXT_INTEL_USM_ADDRESS_SPACES 2 #define SYCL_EXT_INTEL_RUNTIME_BUFFER_LOCATION 1 +#define SYCL_EXT_INTEL_QUEUE_INDEX 1 #define SYCL_EXT_ONEAPI_BACKEND_LEVEL_ZERO 3 #define SYCL_EXT_ONEAPI_USM_DEVICE_READ_ONLY 1 #define SYCL_EXT_ONEAPI_KERNEL_PROPERTIES 1 diff --git a/sycl/include/sycl/info/ext_intel_device_traits.def b/sycl/include/sycl/info/ext_intel_device_traits.def index 4b2ff5a4c2e15..e58b27af69756 100644 --- a/sycl/include/sycl/info/ext_intel_device_traits.def +++ b/sycl/include/sycl/info/ext_intel_device_traits.def @@ -15,6 +15,7 @@ __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, uuid, detail::uuid_type, PI_DEVICE_ __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, free_memory, pi_uint64, PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY) __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_clock_rate, pi_uint32, PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE) __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_bus_width, pi_uint32, PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH) +__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, max_compute_queue_indices, pi_int32, PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES) #ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC #undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF diff --git a/sycl/include/sycl/properties/queue_properties.hpp b/sycl/include/sycl/properties/queue_properties.hpp index d0865d1e16f42..3bae1890c6aa8 100644 --- a/sycl/include/sycl/properties/queue_properties.hpp +++ b/sycl/include/sycl/properties/queue_properties.hpp @@ -51,6 +51,24 @@ class use_default_stream // clang-format on } // namespace property::queue +namespace ext { +namespace intel { +namespace property { +namespace queue { +class compute_index : public sycl::detail::PropertyWithData< + sycl::detail::PropWithDataKind::QueueComputeIndex> { +public: + compute_index(int idx) : idx(idx) {} + int get_index() { return idx; } + +private: + int idx; +}; +} // namespace queue +} // namespace property +} // namespace intel +} // namespace ext + // Forward declaration class queue; @@ -78,6 +96,9 @@ struct is_property_of template <> struct is_property_of : std::true_type {}; +template <> +struct is_property_of + : std::true_type {}; } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/plugins/cuda/pi_cuda.cpp b/sycl/plugins/cuda/pi_cuda.cpp index d704ded971b25..8fb2368995b62 100644 --- a/sycl/plugins/cuda/pi_cuda.cpp +++ b/sycl/plugins/cuda/pi_cuda.cpp @@ -489,7 +489,7 @@ _pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue, streamToken_{stream_token}, evEnd_{nullptr}, evStart_{nullptr}, evQueued_{nullptr}, queue_{queue}, stream_{stream}, context_{context} { - bool profilingEnabled = queue_->properties_ & PI_QUEUE_PROFILING_ENABLE; + bool profilingEnabled = queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE; PI_CHECK_ERROR(cuEventCreate( &evEnd_, profilingEnabled ? CU_EVENT_DEFAULT : CU_EVENT_DISABLE_TIMING)); @@ -526,7 +526,7 @@ pi_result _pi_event::start() { pi_result result = PI_SUCCESS; try { - if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) { + if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) { // NOTE: This relies on the default stream to be unused. result = PI_CHECK_ERROR(cuEventRecord(evQueued_, 0)); result = PI_CHECK_ERROR(cuEventRecord(evStart_, stream_)); @@ -633,7 +633,7 @@ pi_result _pi_event::release() { PI_CHECK_ERROR(cuEventDestroy(evEnd_)); - if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) { + if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) { PI_CHECK_ERROR(cuEventDestroy(evQueued_)); PI_CHECK_ERROR(cuEventDestroy(evStart_)); } @@ -1681,14 +1681,14 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: { // The mandated minimum capability: - auto capability = - PI_QUEUE_PROFILING_ENABLE | PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + auto capability = PI_QUEUE_FLAG_PROFILING_ENABLE | + PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; return getInfo(param_value_size, param_value, param_value_size_ret, capability); } case PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: { // The mandated minimum capability: - auto capability = PI_QUEUE_PROFILING_ENABLE; + auto capability = PI_QUEUE_FLAG_PROFILING_ENABLE; return getInfo(param_value_size, param_value, param_value_size_ret, capability); } @@ -1945,6 +1945,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, sycl::detail::pi::assertion(value >= 0); return getInfo(param_value_size, param_value, param_value_size_ret, value); } + case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { + return getInfo(param_value_size, param_value, param_value_size_ret, + pi_int32{1}); + } // TODO: Investigate if this information is available on CUDA. case PI_DEVICE_INFO_DEVICE_ID: @@ -2501,7 +2505,7 @@ pi_result cuda_piQueueCreate(pi_context context, pi_device device, } const bool is_out_of_order = - properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + properties & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; std::vector computeCuStreams( is_out_of_order ? _pi_queue::default_num_compute_streams : 1); @@ -2524,6 +2528,17 @@ pi_result cuda_piQueueCreate(pi_context context, pi_device device, return PI_ERROR_OUT_OF_RESOURCES; } } +pi_result cuda_piextQueueCreate(pi_context Context, pi_device Device, + pi_queue_properties *Properties, + pi_queue *Queue) { + assert(Properties); + // Expect flags mask to be passed first. + assert(Properties[0] == PI_QUEUE_FLAGS); + pi_queue_properties Flags = Properties[1]; + // Extra data isn't supported yet. + assert(Properties[2] == 0); + return cuda_piQueueCreate(Context, Device, Flags, Queue); +} pi_result cuda_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, @@ -3849,7 +3864,8 @@ pi_result cuda_piEventGetProfilingInfo(pi_event event, assert(event != nullptr); pi_queue queue = event->get_queue(); - if (queue == nullptr || !(queue->properties_ & PI_QUEUE_PROFILING_ENABLE)) { + if (queue == nullptr || + !(queue->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE)) { return PI_ERROR_PROFILING_INFO_NOT_AVAILABLE; } @@ -5473,6 +5489,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { cuda_piextContextCreateWithNativeHandle) // Queue _PI_CL(piQueueCreate, cuda_piQueueCreate) + _PI_CL(piextQueueCreate, cuda_piextQueueCreate) _PI_CL(piQueueGetInfo, cuda_piQueueGetInfo) _PI_CL(piQueueFinish, cuda_piQueueFinish) _PI_CL(piQueueFlush, cuda_piQueueFlush) diff --git a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp index fa26c99c96024..88e49410a800f 100644 --- a/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp +++ b/sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp @@ -663,7 +663,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, case PI_DEVICE_INFO_OPENCL_C_VERSION: return ReturnValue(""); case PI_DEVICE_INFO_QUEUE_PROPERTIES: - return ReturnValue(pi_queue_properties{PI_QUEUE_ON_DEVICE}); + return ReturnValue(pi_queue_properties{PI_QUEUE_FLAG_ON_DEVICE}); case PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES: { struct { size_t Arr[3]; @@ -785,6 +785,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, return ReturnValue(pi_uint32{0}); case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: return ReturnValue(size_t{1}); + case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: + return ReturnValue(pi_int32{1}); CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS) CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS) @@ -923,11 +925,21 @@ bool _pi_context::checkSurfaceArgument(pi_mem_flags Flags, void *HostPtr) { return true; } +pi_result piextQueueCreate(pi_context Context, pi_device Device, + pi_queue_properties *Properties, pi_queue *Queue) { + assert(Properties); + // Expect flags mask to be passed first. + assert(Properties[0] == PI_QUEUE_FLAGS); + pi_queue_properties Flags = Properties[1]; + // Extra data isn't supported yet. + assert(Properties[2] == 0); + return piQueueCreate(Context, Device, Flags, Queue); +} pi_result piQueueCreate(pi_context Context, pi_device Device, pi_queue_properties Properties, pi_queue *Queue) { ARG_UNUSED(Device); - if (Properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { + if (Properties & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { // TODO : Support Out-of-order Queue *Queue = nullptr; return PI_ERROR_INVALID_QUEUE_PROPERTIES; diff --git a/sycl/plugins/hip/pi_hip.cpp b/sycl/plugins/hip/pi_hip.cpp index e3f565e9bcffa..845c2a60dcb08 100644 --- a/sycl/plugins/hip/pi_hip.cpp +++ b/sycl/plugins/hip/pi_hip.cpp @@ -534,7 +534,7 @@ _pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue, assert(type != PI_COMMAND_TYPE_USER); - bool profilingEnabled = queue_->properties_ & PI_QUEUE_PROFILING_ENABLE; + bool profilingEnabled = queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE; PI_CHECK_ERROR(hipEventCreateWithFlags( &evEnd_, profilingEnabled ? hipEventDefault : hipEventDisableTiming)); @@ -562,7 +562,7 @@ pi_result _pi_event::start() { pi_result result = PI_SUCCESS; try { - if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) { + if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) { // NOTE: This relies on the default stream to be unused. PI_CHECK_ERROR(hipEventRecord(evQueued_, 0)); PI_CHECK_ERROR(hipEventRecord(evStart_, queue_->get())); @@ -663,7 +663,7 @@ pi_result _pi_event::release() { assert(queue_ != nullptr); PI_CHECK_ERROR(hipEventDestroy(evEnd_)); - if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) { + if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) { PI_CHECK_ERROR(hipEventDestroy(evQueued_)); PI_CHECK_ERROR(hipEventDestroy(evStart_)); } @@ -1588,14 +1588,14 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, } case PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: { // The mandated minimum capability: - auto capability = - PI_QUEUE_PROFILING_ENABLE | PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + auto capability = PI_QUEUE_FLAG_PROFILING_ENABLE | + PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; return getInfo(param_value_size, param_value, param_value_size_ret, capability); } case PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: { // The mandated minimum capability: - auto capability = PI_QUEUE_PROFILING_ENABLE; + auto capability = PI_QUEUE_FLAG_PROFILING_ENABLE; return getInfo(param_value_size, param_value, param_value_size_ret, capability); } @@ -1841,6 +1841,10 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name, sycl::detail::pi::assertion(value >= 0); return getInfo(param_value_size, param_value, param_value_size_ret, value); } + case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { + return getInfo(param_value_size, param_value, param_value_size_ret, + pi_int32{1}); + } // TODO: Implement. case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES: @@ -2378,7 +2382,7 @@ pi_result hip_piQueueCreate(pi_context context, pi_device device, unsigned int flags = 0; const bool is_out_of_order = - properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + properties & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; std::vector computeHipStreams( is_out_of_order ? _pi_queue::default_num_compute_streams : 1); @@ -2401,6 +2405,17 @@ pi_result hip_piQueueCreate(pi_context context, pi_device device, return PI_ERROR_OUT_OF_RESOURCES; } } +pi_result hip_piextQueueCreate(pi_context Context, pi_device Device, + pi_queue_properties *Properties, + pi_queue *Queue) { + assert(Properties); + // Expect flags mask to be passed first. + assert(Properties[0] == PI_QUEUE_FLAGS); + pi_queue_properties Flags = Properties[1]; + // Extra data isn't supported yet. + assert(Properties[2] == 0); + return hip_piQueueCreate(Context, Device, Flags, Queue); +} pi_result hip_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, size_t param_value_size, void *param_value, @@ -3674,7 +3689,8 @@ pi_result hip_piEventGetProfilingInfo(pi_event event, assert(event != nullptr); pi_queue queue = event->get_queue(); - if (queue == nullptr || !(queue->properties_ & PI_QUEUE_PROFILING_ENABLE)) { + if (queue == nullptr || + !(queue->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE)) { return PI_ERROR_PROFILING_INFO_NOT_AVAILABLE; } @@ -5201,6 +5217,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { hip_piextContextCreateWithNativeHandle) // Queue _PI_CL(piQueueCreate, hip_piQueueCreate) + _PI_CL(piextQueueCreate, hip_piextQueueCreate) _PI_CL(piQueueGetInfo, hip_piQueueGetInfo) _PI_CL(piQueueFinish, hip_piQueueFinish) _PI_CL(piQueueFlush, hip_piQueueFlush) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 20e9629377295..f0318a80b2be8 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -1002,19 +1002,20 @@ bool pi_command_list_info_t::isCopy(pi_queue Queue) const { bool _pi_queue::isInOrderQueue() const { // If out-of-order queue property is not set, then this is a in-order queue. - return ((this->Properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) == 0); + return ((this->Properties & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) == + 0); } bool _pi_queue::isDiscardEvents() const { - return ((this->Properties & PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS) != 0); + return ((this->Properties & PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS) != 0); } bool _pi_queue::isPriorityLow() const { - return ((this->Properties & PI_EXT_ONEAPI_QUEUE_PRIORITY_LOW) != 0); + return ((this->Properties & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW) != 0); } bool _pi_queue::isPriorityHigh() const { - return ((this->Properties & PI_EXT_ONEAPI_QUEUE_PRIORITY_HIGH) != 0); + return ((this->Properties & PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH) != 0); } pi_result _pi_queue::resetCommandList(pi_command_list_ptr_t CommandList, @@ -1211,7 +1212,8 @@ _pi_queue::_pi_queue(std::vector &ComputeQueues, std::vector &CopyQueues, pi_context Context, pi_device Device, bool OwnZeCommandQueue, - pi_queue_properties PiQueueProperties) + pi_queue_properties PiQueueProperties, + int ForceComputeIndex) : Context{Context}, Device{Device}, OwnZeCommandQueue{OwnZeCommandQueue}, Properties(PiQueueProperties) { @@ -1227,9 +1229,19 @@ _pi_queue::_pi_queue(std::vector &ComputeQueues, ComputeQueueGroup.ZeQueues.size(), CommandListMap.end()); } if (ComputeQueueGroupInfo.ZeIndex >= 0) { + // Sub-sub-device + + // sycl::ext::intel::property::queue::compute_index works with any + // backend/device by allowing single zero index if multiple compute CCSes + // are not supported. Sub-sub-device falls into the same bucket. + assert(ForceComputeIndex <= 0); ComputeQueueGroup.LowerIndex = ComputeQueueGroupInfo.ZeIndex; ComputeQueueGroup.UpperIndex = ComputeQueueGroupInfo.ZeIndex; ComputeQueueGroup.NextIndex = ComputeQueueGroupInfo.ZeIndex; + } else if (ForceComputeIndex >= 0) { + ComputeQueueGroup.LowerIndex = ForceComputeIndex; + ComputeQueueGroup.UpperIndex = ForceComputeIndex; + ComputeQueueGroup.NextIndex = ForceComputeIndex; } else { // Set-up to round-robin across allowed range of engines. uint32_t FilterLowerIndex = getRangeOfAllowedComputeEngines().first; @@ -3034,8 +3046,9 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, // TODO: To find out correct value return ReturnValue(""); case PI_DEVICE_INFO_QUEUE_PROPERTIES: - return ReturnValue(pi_queue_properties{ - PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | PI_QUEUE_PROFILING_ENABLE}); + return ReturnValue( + pi_queue_properties{PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE | + PI_QUEUE_FLAG_PROFILING_ENABLE}); case PI_DEVICE_INFO_EXECUTION_CAPABILITIES: return ReturnValue( pi_device_exec_capabilities{PI_DEVICE_EXEC_CAPABILITIES_NATIVE_KERNEL}); @@ -3346,6 +3359,15 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName, Device->ZeDeviceMemoryProperties->end(), Comp); return ReturnValue(pi_uint32{MinIt->maxBusWidth}); } + case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { + if (Device->QueueGroup[_pi_queue::queue_type::Compute].ZeIndex >= 0) + // Sub-sub-device represents a particular compute index already. + return ReturnValue(pi_int32{1}); + + auto ZeDeviceNumIndices = Device->QueueGroup[_pi_queue::queue_type::Compute] + .ZeProperties.numQueues; + return ReturnValue(pi_cast(ZeDeviceNumIndices)); + } case PI_DEVICE_INFO_GPU_EU_COUNT: { pi_uint32 count = Device->ZeDeviceProperties->numEUsPerSubslice * Device->ZeDeviceProperties->numSubslicesPerSlice * @@ -3709,16 +3731,33 @@ pi_result piContextRelease(pi_context Context) { } pi_result piQueueCreate(pi_context Context, pi_device Device, - pi_queue_properties Properties, pi_queue *Queue) { + pi_queue_properties Flags, pi_queue *Queue) { + pi_queue_properties Properties[] = {PI_QUEUE_FLAGS, Flags, 0}; + return piextQueueCreate(Context, Device, Properties, Queue); +} +pi_result piextQueueCreate(pi_context Context, pi_device Device, + pi_queue_properties *Properties, pi_queue *Queue) { + PI_ASSERT(Properties, PI_ERROR_INVALID_VALUE); + // Expect flags mask to be passed first. + PI_ASSERT(Properties[0] == PI_QUEUE_FLAGS, PI_ERROR_INVALID_VALUE); + pi_queue_properties Flags = Properties[1]; + + PI_ASSERT(Properties[2] == 0 || + (Properties[2] == PI_QUEUE_COMPUTE_INDEX && Properties[4] == 0), + PI_ERROR_INVALID_VALUE); + auto ForceComputeIndex = Properties[2] == PI_QUEUE_COMPUTE_INDEX + ? static_cast(Properties[3]) + : -1; // Use default/round-robin. // Check that unexpected bits are not set. - PI_ASSERT(!(Properties & ~(PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | - PI_QUEUE_PROFILING_ENABLE | PI_QUEUE_ON_DEVICE | - PI_QUEUE_ON_DEVICE_DEFAULT | - PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS | - PI_EXT_ONEAPI_QUEUE_PRIORITY_LOW | - PI_EXT_ONEAPI_QUEUE_PRIORITY_HIGH)), - PI_ERROR_INVALID_VALUE); + PI_ASSERT( + !(Flags & ~(PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE | + PI_QUEUE_FLAG_PROFILING_ENABLE | PI_QUEUE_FLAG_ON_DEVICE | + PI_QUEUE_FLAG_ON_DEVICE_DEFAULT | + PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS | + PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW | + PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH)), + PI_ERROR_INVALID_VALUE); PI_ASSERT(Context, PI_ERROR_INVALID_CONTEXT); PI_ASSERT(Queue, PI_ERROR_INVALID_QUEUE); @@ -3748,7 +3787,7 @@ pi_result piQueueCreate(pi_context Context, pi_device Device, try { *Queue = new _pi_queue(ZeComputeCommandQueues, ZeCopyCommandQueues, Context, - Device, true, Properties); + Device, true, Flags, ForceComputeIndex); } catch (const std::bad_alloc &) { return PI_ERROR_OUT_OF_HOST_MEMORY; } catch (...) { @@ -5907,7 +5946,7 @@ void _pi_context::addEventToContextCache(pi_event Event) { static pi_result EventCreate(pi_context Context, pi_queue Queue, bool HostVisible, pi_event *RetEvent) { bool ProfilingEnabled = - !Queue || (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0; + !Queue || (Queue->Properties & PI_QUEUE_FLAG_PROFILING_ENABLE) != 0; if (auto CachedEvent = Context->getEventFromContextCache(HostVisible, ProfilingEnabled)) { @@ -6050,7 +6089,7 @@ pi_result piEventGetProfilingInfo(pi_event Event, pi_profiling_info ParamName, std::shared_lock EventLock(Event->Mutex); if (Event->Queue && - (Event->Queue->Properties & PI_QUEUE_PROFILING_ENABLE) == 0) { + (Event->Queue->Properties & PI_QUEUE_FLAG_PROFILING_ENABLE) == 0) { return PI_ERROR_PROFILING_INFO_NOT_AVAILABLE; } diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 63699061cacdf..4e6959d64527f 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -650,10 +650,12 @@ struct _pi_context : _pi_object { }; struct _pi_queue : _pi_object { + // ForceComputeIndex, if non-negative, indicates that the queue must be fixed + // to that particular compute CCS. _pi_queue(std::vector &ComputeQueues, std::vector &CopyQueues, pi_context Context, pi_device Device, bool OwnZeCommandQueue, - pi_queue_properties Properties = 0); + pi_queue_properties Properties = 0, int ForceComputeIndex = -1); using queue_type = _pi_device::queue_group_info_t::type; @@ -1275,7 +1277,7 @@ struct _pi_event : _pi_object { // Tells if this event is with profiling capabilities. bool isProfilingEnabled() const { return !Queue || // tentatively assume user events are profiling enabled - (Queue->Properties & PI_QUEUE_PROFILING_ENABLE) != 0; + (Queue->Properties & PI_QUEUE_FLAG_PROFILING_ENABLE) != 0; } // Keeps the command-queue and command associated with the event. diff --git a/sycl/plugins/opencl/pi_opencl.cpp b/sycl/plugins/opencl/pi_opencl.cpp index 9e3a5e8f57f36..83ac889876167 100644 --- a/sycl/plugins/opencl/pi_opencl.cpp +++ b/sycl/plugins/opencl/pi_opencl.cpp @@ -334,6 +334,11 @@ pi_result piDeviceGetInfo(pi_device device, pi_device_info paramName, out[2] = Max; return PI_SUCCESS; } + case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: { + pi_int32 result = 1; + std::memcpy(paramValue, &result, sizeof(pi_int32)); + return PI_SUCCESS; + } default: cl_int result = clGetDeviceInfo( @@ -461,6 +466,16 @@ pi_result piextDeviceCreateWithNativeHandle(pi_native_handle nativeHandle, return PI_SUCCESS; } +pi_result piextQueueCreate(pi_context Context, pi_device Device, + pi_queue_properties *Properties, pi_queue *Queue) { + assert(Properties); + // Expect flags mask to be passed first. + assert(Properties[0] == PI_QUEUE_FLAGS); + pi_queue_properties Flags = Properties[1]; + // Extra data isn't supported yet. + assert(Properties[2] == 0); + return piQueueCreate(Context, Device, Flags, Queue); +} pi_result piQueueCreate(pi_context context, pi_device device, pi_queue_properties properties, pi_queue *queue) { assert(queue && "piQueueCreate failed, queue argument is null"); @@ -474,9 +489,10 @@ pi_result piQueueCreate(pi_context context, pi_device device, // Check that unexpected bits are not set. assert(!(properties & - ~(PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | - PI_QUEUE_PROFILING_ENABLE | PI_QUEUE_ON_DEVICE | - PI_QUEUE_ON_DEVICE_DEFAULT | PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS))); + ~(PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE | + PI_QUEUE_FLAG_PROFILING_ENABLE | PI_QUEUE_FLAG_ON_DEVICE | + PI_QUEUE_FLAG_ON_DEVICE_DEFAULT | + PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS))); // Properties supported by OpenCL backend. cl_command_queue_properties SupportByOpenCL = @@ -1571,6 +1587,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) { _PI_CL(piextContextCreateWithNativeHandle, piextContextCreateWithNativeHandle) // Queue _PI_CL(piQueueCreate, piQueueCreate) + _PI_CL(piextQueueCreate, piextQueueCreate) _PI_CL(piQueueGetInfo, piQueueGetInfo) _PI_CL(piQueueFinish, clFinish) _PI_CL(piQueueFlush, clFlush) diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index 748e3399ac947..85ac540a955a5 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -1592,6 +1592,14 @@ get_device_info_host() { PI_ERROR_INVALID_DEVICE); } +template <> +inline int32_t +get_device_info_host() { + throw runtime_error( + "Obtaining max compute queue indices is not supported on HOST device", + PI_ERROR_INVALID_DEVICE); +} + } // namespace detail } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index e67d7e4c8123b..e0a296a8ede3c 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -104,6 +104,18 @@ class queue_impl { "Queue cannot be constructed with both of " "discard_events and enable_profiling."); } + if (has_property()) { + int Idx = get_property() + .get_index(); + int NumIndices = + createSyclObjFromImpl(Device) + .get_info(); + if (Idx < 0 || Idx >= NumIndices) + throw sycl::exception( + make_error_code(errc::invalid), + "Queue compute index must be a non-negative number less than " + "device's number of available compute queue indices."); + } if (!Context->isDeviceValid(Device)) { if (!Context->is_host() && Context->getPlugin().getBackend() == backend::opencl) @@ -297,10 +309,10 @@ class queue_impl { RT::PiQueueProperties CreationFlags = 0; if (Order == QueueOrder::OOO) { - CreationFlags = PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE; + CreationFlags = PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE; } if (MPropList.has_property()) { - CreationFlags |= PI_QUEUE_PROFILING_ENABLE; + CreationFlags |= PI_QUEUE_FLAG_PROFILING_ENABLE; } if (MPropList.has_property< ext::oneapi::cuda::property::queue::use_default_stream>()) { @@ -310,7 +322,7 @@ class queue_impl { .has_property()) { // Pass this flag to the Level Zero plugin to be able to check it from // queue property. - CreationFlags |= PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS; + CreationFlags |= PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS; } // Track that priority settings are not ambiguous. bool PrioritySeen = false; @@ -325,7 +337,7 @@ class queue_impl { make_error_code(errc::invalid), "Queue cannot be constructed with different priorities."); } - CreationFlags |= PI_EXT_ONEAPI_QUEUE_PRIORITY_LOW; + CreationFlags |= PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW; PrioritySeen = true; } if (MPropList.has_property()) { @@ -334,7 +346,7 @@ class queue_impl { make_error_code(errc::invalid), "Queue cannot be constructed with different priorities."); } - CreationFlags |= PI_EXT_ONEAPI_QUEUE_PRIORITY_HIGH; + CreationFlags |= PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH; PrioritySeen = true; } RT::PiQueue Queue{}; @@ -343,8 +355,16 @@ class queue_impl { const detail::plugin &Plugin = getPlugin(); assert(Plugin.getBackend() == MDevice->getPlugin().getBackend()); - RT::PiResult Error = Plugin.call_nocheck( - Context, Device, CreationFlags, &Queue); + RT::PiQueueProperties Properties[] = {PI_QUEUE_FLAGS, CreationFlags, 0, 0, + 0}; + if (has_property()) { + int Idx = get_property() + .get_index(); + Properties[2] = PI_QUEUE_COMPUTE_INDEX; + Properties[3] = static_cast(Idx); + } + RT::PiResult Error = Plugin.call_nocheck( + Context, Device, Properties, &Queue); // If creating out-of-order queue failed and this property is not // supported (for example, on FPGA), it will return diff --git a/sycl/test/abi/pi_level_zero_symbol_check.dump b/sycl/test/abi/pi_level_zero_symbol_check.dump index 2cde4ca788830..e6573b638e9f2 100644 --- a/sycl/test/abi/pi_level_zero_symbol_check.dump +++ b/sycl/test/abi/pi_level_zero_symbol_check.dump @@ -58,6 +58,7 @@ piMemRelease piMemRetain piPlatformGetInfo piPlatformsGet +piPluginGetLastError piPluginInit piProgramBuild piProgramCompile @@ -69,6 +70,7 @@ piProgramLink piProgramRelease piProgramRetain piQueueCreate +piextQueueCreate piQueueFinish piQueueFlush piQueueGetInfo @@ -78,7 +80,6 @@ piSamplerCreate piSamplerGetInfo piSamplerRelease piSamplerRetain -piPluginGetLastError piTearDown piclProgramCreateWithSource piextContextCreateWithNativeHandle diff --git a/sycl/test/abi/pi_opencl_symbol_check.dump b/sycl/test/abi/pi_opencl_symbol_check.dump index 6bd8936a54fb7..3bfece8f62ed4 100644 --- a/sycl/test/abi/pi_opencl_symbol_check.dump +++ b/sycl/test/abi/pi_opencl_symbol_check.dump @@ -26,6 +26,7 @@ piProgramCreate piProgramCreateWithBinary piProgramLink piQueueCreate +piextQueueCreate piQueueGetInfo piSamplerCreate piTearDown diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 2c0c26efa8bec..4438e382c4e9a 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -4192,6 +4192,7 @@ _ZNK4sycl3_V16device8get_infoINS0_3ext5intel4info6device17memory_clock_rateEEENS _ZNK4sycl3_V16device8get_infoINS0_3ext5intel4info6device21gpu_hw_threads_per_euEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_3ext5intel4info6device23gpu_subslices_per_sliceEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_3ext5intel4info6device25gpu_eu_count_per_subsliceEEENS0_6detail19is_device_info_descIT_E11return_typeEv +_ZNK4sycl3_V16device8get_infoINS0_3ext5intel4info6device25max_compute_queue_indicesEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_3ext5intel4info6device4uuidEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_3ext5intel4info6device9device_idEEENS0_6detail19is_device_info_descIT_E11return_typeEv _ZNK4sycl3_V16device8get_infoINS0_3ext6oneapi12experimental4info6device15max_work_groupsILi1EEEEENS0_6detail19is_device_info_descIT_E11return_typeEv diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 2e5f68e24ba66..121ba4fa3795c 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -98,6 +98,7 @@ ??$get_info@Ulocal_mem_size@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA_KXZ ??$get_info@Ulocal_mem_type@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA?AW4local_mem_type@info@12@XZ ??$get_info@Umax_clock_frequency@device@info@_V1@sycl@@@device@_V1@sycl@@QEBAIXZ +??$get_info@Umax_compute_queue_indices@device@info@intel@ext@_V1@sycl@@@device@_V1@sycl@@QEBAHXZ ??$get_info@Umax_compute_units@device@info@_V1@sycl@@@device@_V1@sycl@@QEBAIXZ ??$get_info@Umax_constant_args@device@info@_V1@sycl@@@device@_V1@sycl@@QEBAIXZ ??$get_info@Umax_constant_buffer_size@device@info@_V1@sycl@@@device@_V1@sycl@@QEBA_KXZ diff --git a/sycl/unittests/helpers/PiMockPlugin.hpp b/sycl/unittests/helpers/PiMockPlugin.hpp index 84c776333aa94..6663df7f852bd 100644 --- a/sycl/unittests/helpers/PiMockPlugin.hpp +++ b/sycl/unittests/helpers/PiMockPlugin.hpp @@ -322,6 +322,12 @@ inline pi_result mock_piQueueCreate(pi_context context, pi_device device, *queue = createDummyHandle(); return PI_SUCCESS; } +inline pi_result mock_piextQueueCreate(pi_context context, pi_device device, + pi_queue_properties *properties, + pi_queue *queue) { + *queue = createDummyHandle(); + return PI_SUCCESS; +} inline pi_result mock_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name, diff --git a/sycl/unittests/queue/EventClear.cpp b/sycl/unittests/queue/EventClear.cpp index 043616100518e..9dba15d63be69 100644 --- a/sycl/unittests/queue/EventClear.cpp +++ b/sycl/unittests/queue/EventClear.cpp @@ -25,12 +25,13 @@ std::unique_ptr TestContext; const int ExpectedEventThreshold = 128; -pi_result redefinedQueueCreate(pi_context context, pi_device device, - pi_queue_properties properties, - pi_queue *queue) { +pi_result redefinedQueueCreateEx(pi_context context, pi_device device, + pi_queue_properties *properties, + pi_queue *queue) { + assert(properties && properties[0] == PI_QUEUE_FLAGS); // Use in-order queues to force storing events for calling wait on them, // rather than calling piQueueFinish. - if (properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { + if (properties[1] & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { return PI_ERROR_INVALID_QUEUE_PROPERTIES; } return PI_SUCCESS; @@ -71,7 +72,8 @@ pi_result redefinedEventRelease(pi_event event) { } void preparePiMock(unittest::PiMock &Mock) { - Mock.redefineBefore(redefinedQueueCreate); + Mock.redefineBefore( + redefinedQueueCreateEx); Mock.redefineBefore(redefinedEventsWait); Mock.redefineAfter( redefinedEventGetInfoAfter); diff --git a/sycl/unittests/queue/InOrderQueue.cpp b/sycl/unittests/queue/InOrderQueue.cpp index 0279dbf4dfc12..684eae329d819 100644 --- a/sycl/unittests/queue/InOrderQueue.cpp +++ b/sycl/unittests/queue/InOrderQueue.cpp @@ -6,10 +6,13 @@ using namespace sycl; static bool InOrderFlagSeen = false; -pi_result piQueueCreateRedefineBefore(pi_context context, pi_device device, - pi_queue_properties properties, - pi_queue *queue) { - InOrderFlagSeen = !(properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE); +pi_result piextQueueCreateRedefineBefore(pi_context context, pi_device device, + pi_queue_properties *properties, + pi_queue *queue) { + EXPECT_TRUE(properties != nullptr); + EXPECT_TRUE(properties[0] == PI_QUEUE_FLAGS); + InOrderFlagSeen = + !(properties[1] & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE); return PI_SUCCESS; } @@ -17,8 +20,8 @@ TEST(InOrderQueue, CheckFlagIsPassed) { unittest::PiMock Mock; platform Plt = Mock.getPlatform(); - Mock.redefineBefore( - piQueueCreateRedefineBefore); + Mock.redefineBefore( + piextQueueCreateRedefineBefore); EXPECT_FALSE(InOrderFlagSeen); queue q1{}; diff --git a/sycl/unittests/queue/Wait.cpp b/sycl/unittests/queue/Wait.cpp index e32e2c8ad2a33..7b1c48a262ab4 100644 --- a/sycl/unittests/queue/Wait.cpp +++ b/sycl/unittests/queue/Wait.cpp @@ -26,11 +26,12 @@ struct TestCtx { }; static TestCtx TestContext; -pi_result redefinedQueueCreate(pi_context context, pi_device device, - pi_queue_properties properties, - pi_queue *queue) { +pi_result redefinedQueueCreateEx(pi_context context, pi_device device, + pi_queue_properties *properties, + pi_queue *queue) { + assert(properties && properties[0] == PI_QUEUE_FLAGS); if (!TestContext.SupportOOO && - properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) { + properties[1] & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) { return PI_ERROR_INVALID_QUEUE_PROPERTIES; } return PI_SUCCESS; @@ -77,7 +78,8 @@ pi_result redefinedEventRelease(pi_event event) { TEST(QueueWait, QueueWaitTest) { sycl::unittest::PiMock Mock; sycl::platform Plt = Mock.getPlatform(); - Mock.redefineBefore(redefinedQueueCreate); + Mock.redefineBefore( + redefinedQueueCreateEx); Mock.redefineBefore(redefinedQueueFinish); Mock.redefineBefore( redefinedUSMEnqueueMemset);