Skip to content

Commit

Permalink
[SYCL][Level Zero] Implement sycl_ext_intel_queue_index extension (#7599
Browse files Browse the repository at this point in the history
)

The feature needs to pass extra data to `piQueueCreate` which is
impossible with the current interface. As such, and because of the
current ABI freeze, a new `piQueueCreateEx` interface has been added
accepting `pi_queue_properties *Properties` (similarly to other
interfaces allowing optional/additional data) with the plan to retire
the old one at the next ABI break window.

Extension spec: #7520
  • Loading branch information
aelovikov-intel authored Dec 13, 2022
1 parent 86ba180 commit d2ec964
Show file tree
Hide file tree
Showing 23 changed files with 268 additions and 79 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,7 @@ SYCL specification refer to that revision.

== Status

This is a proposed extension specification, intended to gather community
feedback. Interfaces defined in this specification may not be implemented yet
or may be in a preliminary state. The specification itself may also change in
incompatible ways before it is finalized. *Shipping software products should
not rely on APIs defined in this specification.*
This extension is implemented and fully supported by DPC++.


== Overview
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/detail/pi.def
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ _PI_API(piextContextGetNativeHandle)
_PI_API(piextContextCreateWithNativeHandle)
// Queue
_PI_API(piQueueCreate)
_PI_API(piextQueueCreate)
_PI_API(piQueueGetInfo)
_PI_API(piQueueFinish)
_PI_API(piQueueFlush)
Expand Down
35 changes: 27 additions & 8 deletions sycl/include/sycl/detail/pi.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,12 @@
// partitioning by affinity domain is disabled by default and can be temporarily
// restored via SYCL_PI_LEVEL_ZERO_EXPOSE_CSLICE_IN_AFFINITY_PARTITIONING
// environment variable.
// 12.20 Added piextQueueCreate API to be used instead of piQueueCreate, also
// added PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES for piDeviceGetInfo.
// Both are needed to support sycl_ext_intel_queue_index extension.

#define _PI_H_VERSION_MAJOR 12
#define _PI_H_VERSION_MINOR 19
#define _PI_H_VERSION_MINOR 20

#define _PI_STRING_HELPER(a) #a
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b)
Expand Down Expand Up @@ -295,6 +298,9 @@ typedef enum {
// Return 0 if device doesn't have any memory modules. Return the minimum of
// the bus width values if there are several memory modules on the device.
PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH = 0x10031,
// Return 1 if the device doesn't have a notion of a "queue index". Otherwise,
// return the number of queue indices that are available for this device.
PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES = 0x10032,
PI_DEVICE_INFO_ATOMIC_64 = 0x10110,
PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111,
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000,
Expand Down Expand Up @@ -587,13 +593,17 @@ constexpr pi_usm_mem_properties PI_MEM_USM_ALLOC_BUFFER_LOCATION = 0x419E;
// NOTE: queue properties are implemented this way to better support bit
// manipulations
using pi_queue_properties = pi_bitfield;
constexpr pi_queue_properties PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE = (1 << 0);
constexpr pi_queue_properties PI_QUEUE_PROFILING_ENABLE = (1 << 1);
constexpr pi_queue_properties PI_QUEUE_ON_DEVICE = (1 << 2);
constexpr pi_queue_properties PI_QUEUE_ON_DEVICE_DEFAULT = (1 << 3);
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_DISCARD_EVENTS = (1 << 4);
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_PRIORITY_LOW = (1 << 5);
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_PRIORITY_HIGH = (1 << 6);
constexpr pi_queue_properties PI_QUEUE_FLAGS = -1;
constexpr pi_queue_properties PI_QUEUE_COMPUTE_INDEX = -2;
// clang-format off
constexpr pi_queue_properties PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE = (1 << 0);
constexpr pi_queue_properties PI_QUEUE_FLAG_PROFILING_ENABLE = (1 << 1);
constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE = (1 << 2);
constexpr pi_queue_properties PI_QUEUE_FLAG_ON_DEVICE_DEFAULT = (1 << 3);
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_DISCARD_EVENTS = (1 << 4);
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_LOW = (1 << 5);
constexpr pi_queue_properties PI_EXT_ONEAPI_QUEUE_FLAG_PRIORITY_HIGH = (1 << 6);
// clang-format on

using pi_result = _pi_result;
using pi_platform_info = _pi_platform_info;
Expand Down Expand Up @@ -1125,9 +1135,18 @@ __SYCL_EXPORT pi_result piextContextCreateWithNativeHandle(
//
// Queue
//

// TODO: Remove during next ABI break and rename piextQueueCreate to
// piQueueCreate.
__SYCL_EXPORT pi_result piQueueCreate(pi_context context, pi_device device,
pi_queue_properties properties,
pi_queue *queue);
/// \param properties points to a zero-terminated array of extra data describing
/// desired queue properties. Format is
/// {[PROPERTY[, property-specific elements of data]*,]* 0}
__SYCL_EXPORT pi_result piextQueueCreate(pi_context context, pi_device device,
pi_queue_properties *properties,
pi_queue *queue);

__SYCL_EXPORT pi_result piQueueGetInfo(pi_queue command_queue,
pi_queue_info param_name,
Expand Down
3 changes: 2 additions & 1 deletion sycl/include/sycl/detail/property_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ enum PropWithDataKind {
ImageContextBound = 3,
BufferMemChannel = 4,
AccPropBufferLocation = 5,
PropWithDataKindSize = 6,
QueueComputeIndex = 6,
PropWithDataKindSize = 7,
};

// Base class for dataless properties, needed to check that the type of an
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/feature_test.hpp.in
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ __SYCL_INLINE_VER_NAMESPACE(_V1) {
#define SYCL_EXT_INTEL_MEM_CHANNEL_PROPERTY 1
#define SYCL_EXT_INTEL_USM_ADDRESS_SPACES 2
#define SYCL_EXT_INTEL_RUNTIME_BUFFER_LOCATION 1
#define SYCL_EXT_INTEL_QUEUE_INDEX 1
#define SYCL_EXT_ONEAPI_BACKEND_LEVEL_ZERO 3
#define SYCL_EXT_ONEAPI_USM_DEVICE_READ_ONLY 1
#define SYCL_EXT_ONEAPI_KERNEL_PROPERTIES 1
Expand Down
1 change: 1 addition & 0 deletions sycl/include/sycl/info/ext_intel_device_traits.def
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ __SYCL_PARAM_TRAITS_SPEC(ext::intel, device, uuid, detail::uuid_type, PI_DEVICE_
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, free_memory, pi_uint64, PI_EXT_INTEL_DEVICE_INFO_FREE_MEMORY)
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_clock_rate, pi_uint32, PI_EXT_INTEL_DEVICE_INFO_MEMORY_CLOCK_RATE)
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, memory_bus_width, pi_uint32, PI_EXT_INTEL_DEVICE_INFO_MEMORY_BUS_WIDTH)
__SYCL_PARAM_TRAITS_SPEC(ext::intel, device, max_compute_queue_indices, pi_int32, PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES)
#ifdef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC
#undef __SYCL_PARAM_TRAITS_TEMPLATE_SPEC_NEEDS_UNDEF
Expand Down
21 changes: 21 additions & 0 deletions sycl/include/sycl/properties/queue_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,24 @@ class use_default_stream
// clang-format on
} // namespace property::queue

namespace ext {
namespace intel {
namespace property {
namespace queue {
class compute_index : public sycl::detail::PropertyWithData<
sycl::detail::PropWithDataKind::QueueComputeIndex> {
public:
compute_index(int idx) : idx(idx) {}
int get_index() { return idx; }

private:
int idx;
};
} // namespace queue
} // namespace property
} // namespace intel
} // namespace ext

// Forward declaration
class queue;

Expand Down Expand Up @@ -78,6 +96,9 @@ struct is_property_of<property::queue::cuda::use_default_stream, queue>
template <>
struct is_property_of<ext::oneapi::cuda::property::queue::use_default_stream,
queue> : std::true_type {};
template <>
struct is_property_of<ext::intel::property::queue::compute_index, queue>
: std::true_type {};

} // __SYCL_INLINE_VER_NAMESPACE(_V1)
} // namespace sycl
33 changes: 25 additions & 8 deletions sycl/plugins/cuda/pi_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,7 @@ _pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue,
streamToken_{stream_token}, evEnd_{nullptr}, evStart_{nullptr},
evQueued_{nullptr}, queue_{queue}, stream_{stream}, context_{context} {

bool profilingEnabled = queue_->properties_ & PI_QUEUE_PROFILING_ENABLE;
bool profilingEnabled = queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE;

PI_CHECK_ERROR(cuEventCreate(
&evEnd_, profilingEnabled ? CU_EVENT_DEFAULT : CU_EVENT_DISABLE_TIMING));
Expand Down Expand Up @@ -526,7 +526,7 @@ pi_result _pi_event::start() {
pi_result result = PI_SUCCESS;

try {
if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) {
if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) {
// NOTE: This relies on the default stream to be unused.
result = PI_CHECK_ERROR(cuEventRecord(evQueued_, 0));
result = PI_CHECK_ERROR(cuEventRecord(evStart_, stream_));
Expand Down Expand Up @@ -633,7 +633,7 @@ pi_result _pi_event::release() {

PI_CHECK_ERROR(cuEventDestroy(evEnd_));

if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) {
if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) {
PI_CHECK_ERROR(cuEventDestroy(evQueued_));
PI_CHECK_ERROR(cuEventDestroy(evStart_));
}
Expand Down Expand Up @@ -1681,14 +1681,14 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
}
case PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: {
// The mandated minimum capability:
auto capability =
PI_QUEUE_PROFILING_ENABLE | PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
auto capability = PI_QUEUE_FLAG_PROFILING_ENABLE |
PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
return getInfo(param_value_size, param_value, param_value_size_ret,
capability);
}
case PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: {
// The mandated minimum capability:
auto capability = PI_QUEUE_PROFILING_ENABLE;
auto capability = PI_QUEUE_FLAG_PROFILING_ENABLE;
return getInfo(param_value_size, param_value, param_value_size_ret,
capability);
}
Expand Down Expand Up @@ -1945,6 +1945,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name,
sycl::detail::pi::assertion(value >= 0);
return getInfo(param_value_size, param_value, param_value_size_ret, value);
}
case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: {
return getInfo(param_value_size, param_value, param_value_size_ret,
pi_int32{1});
}

// TODO: Investigate if this information is available on CUDA.
case PI_DEVICE_INFO_DEVICE_ID:
Expand Down Expand Up @@ -2501,7 +2505,7 @@ pi_result cuda_piQueueCreate(pi_context context, pi_device device,
}

const bool is_out_of_order =
properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
properties & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;

std::vector<CUstream> computeCuStreams(
is_out_of_order ? _pi_queue::default_num_compute_streams : 1);
Expand All @@ -2524,6 +2528,17 @@ pi_result cuda_piQueueCreate(pi_context context, pi_device device,
return PI_ERROR_OUT_OF_RESOURCES;
}
}
pi_result cuda_piextQueueCreate(pi_context Context, pi_device Device,
pi_queue_properties *Properties,
pi_queue *Queue) {
assert(Properties);
// Expect flags mask to be passed first.
assert(Properties[0] == PI_QUEUE_FLAGS);
pi_queue_properties Flags = Properties[1];
// Extra data isn't supported yet.
assert(Properties[2] == 0);
return cuda_piQueueCreate(Context, Device, Flags, Queue);
}

pi_result cuda_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name,
size_t param_value_size, void *param_value,
Expand Down Expand Up @@ -3849,7 +3864,8 @@ pi_result cuda_piEventGetProfilingInfo(pi_event event,
assert(event != nullptr);

pi_queue queue = event->get_queue();
if (queue == nullptr || !(queue->properties_ & PI_QUEUE_PROFILING_ENABLE)) {
if (queue == nullptr ||
!(queue->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE)) {
return PI_ERROR_PROFILING_INFO_NOT_AVAILABLE;
}

Expand Down Expand Up @@ -5473,6 +5489,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
cuda_piextContextCreateWithNativeHandle)
// Queue
_PI_CL(piQueueCreate, cuda_piQueueCreate)
_PI_CL(piextQueueCreate, cuda_piextQueueCreate)
_PI_CL(piQueueGetInfo, cuda_piQueueGetInfo)
_PI_CL(piQueueFinish, cuda_piQueueFinish)
_PI_CL(piQueueFlush, cuda_piQueueFlush)
Expand Down
16 changes: 14 additions & 2 deletions sycl/plugins/esimd_emulator/pi_esimd_emulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,7 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
case PI_DEVICE_INFO_OPENCL_C_VERSION:
return ReturnValue("");
case PI_DEVICE_INFO_QUEUE_PROPERTIES:
return ReturnValue(pi_queue_properties{PI_QUEUE_ON_DEVICE});
return ReturnValue(pi_queue_properties{PI_QUEUE_FLAG_ON_DEVICE});
case PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES: {
struct {
size_t Arr[3];
Expand Down Expand Up @@ -785,6 +785,8 @@ pi_result piDeviceGetInfo(pi_device Device, pi_device_info ParamName,
return ReturnValue(pi_uint32{0});
case PI_DEVICE_INFO_SUB_GROUP_SIZES_INTEL:
return ReturnValue(size_t{1});
case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES:
return ReturnValue(pi_int32{1});

CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_MAX_NUM_SUB_GROUPS)
CASE_PI_UNSUPPORTED(PI_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS)
Expand Down Expand Up @@ -923,11 +925,21 @@ bool _pi_context::checkSurfaceArgument(pi_mem_flags Flags, void *HostPtr) {
return true;
}

pi_result piextQueueCreate(pi_context Context, pi_device Device,
pi_queue_properties *Properties, pi_queue *Queue) {
assert(Properties);
// Expect flags mask to be passed first.
assert(Properties[0] == PI_QUEUE_FLAGS);
pi_queue_properties Flags = Properties[1];
// Extra data isn't supported yet.
assert(Properties[2] == 0);
return piQueueCreate(Context, Device, Flags, Queue);
}
pi_result piQueueCreate(pi_context Context, pi_device Device,
pi_queue_properties Properties, pi_queue *Queue) {
ARG_UNUSED(Device);

if (Properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
if (Properties & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE) {
// TODO : Support Out-of-order Queue
*Queue = nullptr;
return PI_ERROR_INVALID_QUEUE_PROPERTIES;
Expand Down
33 changes: 25 additions & 8 deletions sycl/plugins/hip/pi_hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,7 @@ _pi_event::_pi_event(pi_command_type type, pi_context context, pi_queue queue,

assert(type != PI_COMMAND_TYPE_USER);

bool profilingEnabled = queue_->properties_ & PI_QUEUE_PROFILING_ENABLE;
bool profilingEnabled = queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE;

PI_CHECK_ERROR(hipEventCreateWithFlags(
&evEnd_, profilingEnabled ? hipEventDefault : hipEventDisableTiming));
Expand Down Expand Up @@ -562,7 +562,7 @@ pi_result _pi_event::start() {
pi_result result = PI_SUCCESS;

try {
if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) {
if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) {
// NOTE: This relies on the default stream to be unused.
PI_CHECK_ERROR(hipEventRecord(evQueued_, 0));
PI_CHECK_ERROR(hipEventRecord(evStart_, queue_->get()));
Expand Down Expand Up @@ -663,7 +663,7 @@ pi_result _pi_event::release() {
assert(queue_ != nullptr);
PI_CHECK_ERROR(hipEventDestroy(evEnd_));

if (queue_->properties_ & PI_QUEUE_PROFILING_ENABLE) {
if (queue_->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE) {
PI_CHECK_ERROR(hipEventDestroy(evQueued_));
PI_CHECK_ERROR(hipEventDestroy(evStart_));
}
Expand Down Expand Up @@ -1588,14 +1588,14 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
}
case PI_DEVICE_INFO_QUEUE_ON_DEVICE_PROPERTIES: {
// The mandated minimum capability:
auto capability =
PI_QUEUE_PROFILING_ENABLE | PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
auto capability = PI_QUEUE_FLAG_PROFILING_ENABLE |
PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;
return getInfo(param_value_size, param_value, param_value_size_ret,
capability);
}
case PI_DEVICE_INFO_QUEUE_ON_HOST_PROPERTIES: {
// The mandated minimum capability:
auto capability = PI_QUEUE_PROFILING_ENABLE;
auto capability = PI_QUEUE_FLAG_PROFILING_ENABLE;
return getInfo(param_value_size, param_value, param_value_size_ret,
capability);
}
Expand Down Expand Up @@ -1841,6 +1841,10 @@ pi_result hip_piDeviceGetInfo(pi_device device, pi_device_info param_name,
sycl::detail::pi::assertion(value >= 0);
return getInfo(param_value_size, param_value, param_value_size_ret, value);
}
case PI_EXT_INTEL_DEVICE_INFO_MAX_COMPUTE_QUEUE_INDICES: {
return getInfo(param_value_size, param_value, param_value_size_ret,
pi_int32{1});
}

// TODO: Implement.
case PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES:
Expand Down Expand Up @@ -2378,7 +2382,7 @@ pi_result hip_piQueueCreate(pi_context context, pi_device device,
unsigned int flags = 0;

const bool is_out_of_order =
properties & PI_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE;
properties & PI_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE;

std::vector<hipStream_t> computeHipStreams(
is_out_of_order ? _pi_queue::default_num_compute_streams : 1);
Expand All @@ -2401,6 +2405,17 @@ pi_result hip_piQueueCreate(pi_context context, pi_device device,
return PI_ERROR_OUT_OF_RESOURCES;
}
}
pi_result hip_piextQueueCreate(pi_context Context, pi_device Device,
pi_queue_properties *Properties,
pi_queue *Queue) {
assert(Properties);
// Expect flags mask to be passed first.
assert(Properties[0] == PI_QUEUE_FLAGS);
pi_queue_properties Flags = Properties[1];
// Extra data isn't supported yet.
assert(Properties[2] == 0);
return hip_piQueueCreate(Context, Device, Flags, Queue);
}

pi_result hip_piQueueGetInfo(pi_queue command_queue, pi_queue_info param_name,
size_t param_value_size, void *param_value,
Expand Down Expand Up @@ -3674,7 +3689,8 @@ pi_result hip_piEventGetProfilingInfo(pi_event event,
assert(event != nullptr);

pi_queue queue = event->get_queue();
if (queue == nullptr || !(queue->properties_ & PI_QUEUE_PROFILING_ENABLE)) {
if (queue == nullptr ||
!(queue->properties_ & PI_QUEUE_FLAG_PROFILING_ENABLE)) {
return PI_ERROR_PROFILING_INFO_NOT_AVAILABLE;
}

Expand Down Expand Up @@ -5201,6 +5217,7 @@ pi_result piPluginInit(pi_plugin *PluginInit) {
hip_piextContextCreateWithNativeHandle)
// Queue
_PI_CL(piQueueCreate, hip_piQueueCreate)
_PI_CL(piextQueueCreate, hip_piextQueueCreate)
_PI_CL(piQueueGetInfo, hip_piQueueGetInfo)
_PI_CL(piQueueFinish, hip_piQueueFinish)
_PI_CL(piQueueFlush, hip_piQueueFlush)
Expand Down
Loading

0 comments on commit d2ec964

Please sign in to comment.