diff --git a/extensions/cl_exp_tensor.asciidoc b/extensions/cl_exp_tensor.asciidoc
new file mode 100644
index 00000000..619cca5c
--- /dev/null
+++ b/extensions/cl_exp_tensor.asciidoc
@@ -0,0 +1,1083 @@
+:data-uri:
+:icons: font
+//include::../config/attribs.txt[]
+//include::{generated}/api/api-dictionary.asciidoc[]
+:source-highlighter: coderay
+
+= cl_exp_tensor
+
+This extension provides a new buffer abstraction, tensor objects, for
+managing N-dimensional data.
+
+== XXX - Not complete yet!!!
+
+== Name Strings
+
+`cl_exp_tensor`
+
+== Contact
+
+TODO
+
+== Contributors
+
+Henry Linjamäki, Intel. +
+Pekka Jääskeläinen, Intel. +
+Ben Ashbaugh, Intel. +
+
+== Notice
+
+TODO
+
+== Status
+
+Draft spec, NOT APPROVED!!
+
+== Version
+
+Built On: {docdate} +
+Version: 0.2.0
+
+== Dependencies
+
+This extension is written against the OpenCL Specification version 3.0.14.
+
+This extension requires OpenCL 1.2 or later.
+
+== Overview
+
+The extension provides a new tensor object abstraction. Tensor objects
+are similar to image types in regard that they represent N-dimensional
+data of an application chosen data type and they may be mapped to
+dedicated hardware, with the following key differences:
+
+* Higher than 3-dimensional data can be supported (limited by
+  devices' capabilities).
+
+* Applications may choose how the data elements of the tensors are
+  laid out in the buffers using the tensor layout descriptions
+  provided in this extension.
+
+Applications may also choose the memory layouts of the tensors to be
+implementation-specified, letting the driver to optimize the tensor
+data layout for better performance or to lay out the data as required by
+hardware accelerated functions (e.g. exposed via builtin kernels).
+
+The scope of this extension is to provide host APIs for creating tensor
+objects and transfer data between tensors, host and other memory
+objects.
+
+A separate extension implemented on top of this extension,
+cl_exp_defined_builtin_kernels provides "defined built-in
+kernels" (DKBs) which can operate on tensors. It also provides mechanism
+for drivers to create DBKs that are optimized for the tensor arguments
+they operate on.
+
+== New API Functions
+
+[source,c]
+----
+cl_int clEnqueueImportFromTensorEXP(
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  cl_bool blocking_command,
+  const size_t* tensor_origin,
+  const size_t* mem_origin,
+  const size_t* region,
+  const size_t* mem_pitch,
+  cl_mem buffer,
+  void* host_ptr,
+  cl_uint num_events_in_wait_list,
+  const cl_event* event_wait_list,
+  cl_event* event);
+
+cl_int clEnqueueExportToTensorEXP(
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  cl_bool blocking_command,
+  const size_t* tensor_origin,
+  const size_t* mem_origin,
+  const size_t* region,
+  const size_t* mem_pitch,
+  cl_mem buffer,
+  const void* host_ptr,
+  cl_uint num_events_in_wait_list,
+  const cl_event* event_wait_list,
+  cl_event* event);
+
+cl_int clEnqueueCopyTensorEXP(
+  cl_command_queue command_queue,
+  cl_tensor src_tensor,
+  cl_tensor dst_tensor,
+  const cl_tensor_shape* src_origin,
+  const cl_tensor_shape* dst_origin,
+  const cl_tensor_shape* region,
+  cl_uint num_events_in_wait_list,
+  const cl_event* event_wait_list,
+  cl_event* event);
+
+cl_int clCommandImportFromTensorEXP(
+  cl_command_buffer_khr command_buffer,
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  const size_t* tensor_origin,
+  const size_t* mem_origin,
+  const size_t* region,
+  const size_t* mem_pitch,
+  cl_mem buffer,
+  void* host_ptr,
+  cl_uint num_sync_points_in_wait_list,
+  const cl_sync_point_khr* sync_point_wait_list,
+  cl_sync_point_khr* sync_point,
+  cl_mutable_command_khr* mutable_handle);
+
+cl_int clCommandExportToTensorEXP(
+  cl_command_buffer_khr command_buffer,
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  const size_t* tensor_origin,
+  const size_t* mem_origin,
+  const size_t* region,
+  const size_t* mem_pitch,
+  cl_mem buffer,
+  const void* host_ptr,
+  cl_uint num_sync_points_in_wait_list,
+  const cl_sync_point_khr* sync_point_wait_list,
+  cl_sync_point_khr* sync_point,
+  cl_mutable_command_khr* mutable_handle);
+----
+
+== New API Types
+
+[source,c]
+----
+typedef cl_uint cl_tensor_layout_type_exp;
+typedef cl_uint cl_tensor_dim_exp;
+typedef cl_uint cl_tensor_layout_ml_type_exp;
+typedef cl_properties cl_tensor_properties_exp;
+
+#define CL_TENSOR_DESC_MAX_RANK_EXP       20u
+#define CL_TENSOR_DESC_MAX_PROPERTIES_EXP 16u
+
+typedef struct cl_tensor_desc_exp {
+    cl_uint               rank;
+    cl_tensor_datatype    dtype;
+    cl_tensor_properties_exp  properties[CL_TENSOR_DESC_MAX_PROPERTIES_EXP]
+    cl_tensor_shape       shape[CL_TENSOR_DESC_MAX_RANK_EXP];
+    const void*           layout;
+    cl_tensor_layout_type_exp layout_type;
+} cl_tensor_desc_exp;
+
+typedef struct cl_tensor_layout_blas_exp {
+    cl_tensor_dim_exp    leading_dims[CL_TENSOR_DESC_MAX_RANK_EXP];
+} cl_tensor_layout_blas_exp;
+
+typedef struct cl_tensor_layout_blas_pitched_exp {
+    cl_tensor_dim_exp    leading_dims[CL_TENSOR_DESC_MAX_RANK_EXP];
+    cl_tensor_stride     leading_strides[CL_TENSOR_DESC_MAX_RANK_EXP];
+} cl_tensor_layout_blas_pitched__exp;
+
+typedef struct cl_tensor_layout_ml_exp {
+  cl_tensor_layout_ml_type_exp ml_type;
+} cl_tensor_layout_ml_exp;
+----
+
+== New API Enums
+
+Accepted value for the _properties_ parameter to
+*clCreateBufferWithProperties* for creating a tensor object:
+
+[source,c]
+----
+CL_MEM_TENSOR_EXP               0x????
+----
+
+Accepted values for the _param_name_ parameter to *clGetDeviceInfo*:
+
+[source,c]
+----
+CL_DEVICE_MAX_TENSOR_ARGS_EXP     0x????
+CL_DEVICE_MAX_TENSOR_RANK_EXP     0x????
+CL_DEVICE_MAX_TENSOR_ELEMENTS_EXP 0x????
+CL_DEVICE_MAX_TENSOR_STRIDE_EXP   0x????
+----
+
+Accepted values for *cl_tensor_datatype* type:
+
+[source,c]
+----
+CL_TENSOR_DTYPE_BOOL_EXP        0x????
+
+CL_TENSOR_DTYPE_INT4_EXP        0x????
+CL_TENSOR_DTYPE_INT8_EXP        0x????
+CL_TENSOR_DTYPE_INT16_EXP       0x????
+CL_TENSOR_DTYPE_INT32_EXP       0x????
+CL_TENSOR_DTYPE_INT64_EXP       0x????
+
+CL_TENSOR_DTYPE_UINT4_EXP       0x????
+CL_TENSOR_DTYPE_UINT8_EXP       0x????
+CL_TENSOR_DTYPE_UINT16_EXP      0x????
+CL_TENSOR_DTYPE_UINT32_EXP      0x????
+CL_TENSOR_DTYPE_UINT64_EXP      0x????
+
+CL_TENSOR_DTYPE_FP8E4M3_EXP     0x????
+CL_TENSOR_DTYPE_FP8E5M2_EXP     0x????
+CL_TENSOR_DTYPE_FP16_EXP        0x????
+CL_TENSOR_DTYPE_FP32_EXP        0x????
+CL_TENSOR_DTYPE_FP64_EXP        0x????
+
+CL_TENSOR_DTYPE_BFLOAT16_EXP    0x????
+
+CL_TENSOR_DTYPE_COMPLEX64_EXP   0x????
+CL_TENSOR_DTYPE_COMPLEX128_EXP  0x????
+----
+
+Accepted values for *cl_tensor_layout_type_exp*:
+
+[source,c]
+----
+CL_TENSOR_LAYOUT_OPAQUE_EXP       0x????
+CL_TENSOR_LAYOUT_BLAS_EXP         0x????
+CL_TENSOR_LAYOUT_BLAS_PITCHED_EXP 0x????
+CL_TENSOR_LAYOUT_ML_EXP           0x????
+----
+
+Accepted values for *cl_tensor_layout_ml_type_exp*:
+
+[source,c]
+----
+CL_TENSOR_LAYOUT_ML_C_EXP       0x????
+CL_TENSOR_LAYOUT_ML_NC_EXP      0x????
+CL_TENSOR_LAYOUT_ML_CN_EXP      0x????
+CL_TENSOR_LAYOUT_ML_HW_EXP      0x????
+CL_TENSOR_LAYOUT_ML_CHW_EXP     0x????
+CL_TENSOR_LAYOUT_ML_NCHW_EXP    0x????
+CL_TENSOR_LAYOUT_ML_NHWC_EXP    0x????
+----
+
+New error codes:
+
+[source,c]
+----
+CL_INVALID_TENSOR_RANK_EXP   0x????
+CL_INVALID_TENSOR_DTYPE_EXP  0x????
+CL_INVALID_TENSOR_SHAPE_EXP  0x????
+CL_INVALID_TENSOR_LAYOUT_EXP 0x????
+----
+
+=== Modifications to The OpenCL API Specification
+
+(Modify Section 4.2, *Querying Devices*) ::
++
+--
+(Add the following to Table 5., _List of supported _param_names_ by *clGetDeviceInfo*) ::
++
+--
+
+[cols="2,1,2",stripes=odd,options="header"]
+|===
+| Device Info
+| Return Type
+| Description
+
+// The following enumerators are introduced for Vulkan layering in
+// mind. The minimum values are copied from the Vulkan's tensor draft
+// spec.
+
+| CL_DEVICE_MAX_TENSOR_ARGS_EXP | cl_uint | Max number of tensor objects
+  arguments specified as arguments to.
+
+| CL_DEVICE_MAX_TENSOR_RANK_EXP | cl_uint | Max tensor rank. The minimum
+  value is 4.
+
+| CL_DEVICE_MAX_TENSOR_ELEMENTS_EXP | size_t | Maximum number of tensor
+  elements in total. The minimum value is 65536.
+
+| CL_DEVICE_MAX_TENSOR_PITCH_EXP | size_t | Maximum pitch value for
+  all pitch components for
+  <<cl-tensor-layout-blas,CL_TENSOR_LAYOUT_BLAS_PITCHED_EXP>> memory
+  layout.
+
+The minimum value is 65536.
+
+|===
+--
+--
+// End (Modify Section 4.2, *Querying Devices*)
+
+(Modify Section 5.2.1, *Creating Buffer Objects*) ::
++
+--
+(Add the following to Table 18.,  _Buffer creation properties_) ::
++
+--
+
+[cols="2,1,2",stripes=odd,options="header"]
+|===
+| cl_mem_properties
+| Property Value
+| Description
+
+| CL_MEM_TENSOR_EXP | cl_tensor_desc_exp a| Creates a tensor object with
+properties set in *cl_tensor_desc_exp* tensor description structure.
+
+The _size_ parameter of the *clCreateBufferWithProperties()* is
+ignored and may be set to zero. The required storage space needed is
+inferred from the tensor description. The storage size of the queried
+with *clGetMemObjectInfo()*. The storage size may change during
+the runtime unless constrained by the given tensor description.
+
+// The last sentence is for accommodating tensors with dynamic
+// dimension sizes and rank which are present in many ML frameworks.
+|===
+--
+
+(Add to list of error codes *clCreateBufferWithProperties()*) ::
++
+--
+
+* `CL_INVALID_VALUE` if `CL_MEM_TENSOR_EXP` property is specified and the
+  `rank` member of the `cl_tensor_desc_exp` structure has invalid or
+  unsupported value.
+
+* `CL_INVALID_TENSOR_SHAPE_EXP` if `CL_MEM_TENSOR_EXP` property is
+  specified and the `shape` member of the `cl_tensor_desc_exp`
+  structure has invalid or unsupported description.
+
+* `CL_INVALID_TENSOR_LAYOUT_TYPE_EXP` if `CL_MEM_TENSOR_EXP` property is
+  specified and the `layout_type` member of the `cl_tensor_desc_exp`
+  structure has an invalid enumeration constant.
+
+* `CL_INVALID_TENSOR_LAYOUT_EXP` if `CL_MEM_TENSOR_EXP` property is
+  specified and the `layout` member of the `cl_tensor_desc_exp` has an
+  invalid description.
+--
+--
+// End (Modify Section 5.2.1, *Creating Buffer Objects*)
+
+(Add the following to Section 5.2.2, *Reading, Writing and Copying Buffer Objects*) ::
++
+--
+The following functions are for reading from a tensor to host memory /
+buffer object or to write to a tensor object from host memory / buffer
+object.
+
+[source,c]
+----
+cl_int clEnqueueImportFromTensorEXP(
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  cl_bool blocking_command,
+  const size_t* tensor_origin,
+  const size_t* mem_origin,
+  const size_t* region,
+  const size_t* mem_pitch,
+  cl_mem buffer,
+  void* host_ptr,
+  cl_uint num_events_in_wait_list,
+  const cl_event* event_wait_list,
+  cl_event* event);
+----
+
+[source,c]
+----
+cl_int clEnqueueExportToTensorEXP(
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  cl_bool blocking_command,
+  const size_t* tensor_origin,
+  const size_t* mem_origin,
+  const size_t* region,
+  const size_t* mem_pitch,
+  cl_mem buffer,
+  const void* host_ptr,
+  cl_uint num_events_in_wait_list,
+  const cl_event* event_wait_list,
+  cl_event* event);
+----
+
+* _command_queue_ is a valid host command-queue in which the read /
+  write command will be queued. _command_queue_ and _tensor_ must be
+  created with the same OpenCL context.
+
+* _tensor_ refers to a valid tensor object which is bound to a buffer.
+
+* _blocking_command_ indicate if the read and write operations are
+  blocking or non-blocking (see below).
+
+* _tensor_origin_ defines the offset coordinates in _tensor_ for start of
+  the regions to read / write tensor data. The length of the array
+  must be at least rank the the _tensor_.
+
+* _mem_origin_ defines the offset coordinates in the memory region
+  pointed by _buffer_ or _host_ptr_ expressed in elements of _tensor_
+  data type. The length of the array must be at least rank the the
+  _tensor_.
+
+* _region_ defines the region being read or written expressed in in
+  elements of _tensor_ data type. The length of the array must be at
+  least rank the the _tensor_. If _region_ is NULL then _tensor_'s
+  shape will be used as the region.
+
+* _mem_pitch_ defines the length of each dimension in elements to be
+  used for the memory region of _buffer_ or _host_ptr_. The length of
+  the array must be at least the rank of _tensor_ minus one. if
+  _mem_pitch_ is NULL or _mem_pitch_[i] is zero, _mem_pitch_[i] is
+  computed as _region_[i + 1].
+
+* _buffer_ and _host_ptr_ refer to a valid buffer object / host
+  allocation where data is to be read into or to be written from.
+  Either the _buffer_ or _host_ptr_ can be non-NULL in which case the
+  non-NULL argument is used as the operand for the operation.
+
+* _event_wait_list_ and _num_events_in_wait_list_ specify events that
+  need to complete before this particular command can be executed. If
+  _event_wait_list_ is NULL, then this particular command does not
+  wait on any event to complete. If _event_wait_list_ is NULL,
+  _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not
+  NULL, the list of events pointed to by _event_wait_list_ must be
+  valid and _num_events_in_wait_list_ must be greater than 0. The
+  events specified in _event_wait_list_ act as synchronization
+  points. The context associated with events in _event_wait_list_ and
+  _command_queue_ must be the same. The memory associated with
+  _event_wait_list_ can be reused or freed after the function returns.
+
+* _event_ returns an event object that identifies this read / write
+  command and can be used to query or queue a wait for this command to
+  complete. If _event_ is NULL or the enqueue is unsuccessful, no
+  event will be created and therefore it will not be possible to query
+  the status of this command or to wait for this command to
+  complete. If _event_wait_list_ and _event_ are not NULL, _event_
+  must not refer to an element of the _event_wait_list_ array.
+
+The *clEnqueueExportToTensorEXP* function copies contents of the buffer
+object / host allocation to tensor's storage in
+implementation-defined, opaque memory layout. The
+*clEnqueueImportFromTensorEXP* function copies data from tensor's
+storage to buffer object / host allocation.
+
+The elements of buffer object / host allocation are mapped to tensor
+coordinates and vice versa as follows in pseudo C code:
+
+[source,c]
+----
+tensor_element(
+  tensor,
+  tensor_origin[0] + i[0],
+  tensor_origin[1] + i[1],
+  ...,
+  tensor_origin[N-2] + i[N-2],
+  tensor_origin[N-2] + i[N-1]) ==
+((TENSOR_DATATYPE *)buffer_or_host_ptr)[
+  (mem_origin[0] + i[0]) * pitch(0) +
+  (mem_origin[1] + i[1]) * pitch(1) +
+  ... +
+  (mem_origin[N-2] + i[N-2]) * pitch(N-2) +
+  (mem_origin[N-1] + i[N-1])];
+----
+
+Where the `N` is tensor rank, the `i[X]` is a tensor coordinate with
+inclusive range of `0..<region[X]-1>` and the `pitch` is computed as
+follows in pseudo C code:
+
+[source,c]
+----
+size_t pitch(size_t dim) {
+  size_t pitch = 1;
+  for (size_t i = dim; i < tensor_rank - 1; i++)
+    pitch *=
+      (mem_pitch != NULL || mem_pitch[i] == 0) ? mem_pitch[i] : region[i + 1];
+  return pitch;
+}
+----
+
+For `dim` in `0..(tensor_rank()-1)`. The `tensor_element()` represents
+an abstract function that accesses a tensor element in its storage at
+given coordinate. The method how the coordinates translate to tensor
+storage addresses is unspecified.
+
+*clEnqueueImportFromTensorEXP* and *clEnqueueExportToTensorEXP*
+returns CL_SUCCESS if the function is executed
+successfully. Otherwise, it returns one of the following errors:
+
+* CL_INVALID_COMMAND_QUEUE if _command_queue_ is not a valid host
+  command-queue.
+
+* CL_INVALID_CONTEXT if the context associated with _command_queue_
+  and buffer are not the same or if the context associated with
+  _command_queue_ and events in _event_wait_list_ are not the same.
+
+* CL_INVALID_MEM_OBJECT if _buffer_ is not a valid buffer object.
+
+* CL_INVALID_VALUE if _tensor_origin_ or _mem_origin_ is NULL.
+
+* CL_INVALID_VALUE if the region being read or written specified by
+  (_mem_origin_, _region_, _mem_pitch_) is out of bounds.
+
+* CL_INVALID_VALUE if any _region_ array element is 0.
+
+* CL_INVALID_VALUE if _mem_pitch_ is not NULL and _mem_pitch_[i] is
+  not 0 and _mem_pitch_[i] is less than _region_[i].
+
+* CL_INVALID_VALUE if _buffer_ and _host_ptr_ both are NULL or non-NULL.
+
+* CL_INVALID_EVENT_WAIT_LIST if _event_wait_list_ is NULL and
+  _num_events_in_wait_list_ > 0, or _event_wait_list_ is not NULL and
+  _num_events_in_wait_list_ is 0, or if event objects in
+  _event_wait_list_ are not valid events.
+
+* CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST if the read and write
+  operations are blocking and the execution status of any of the
+  events in _event_wait_list_ is a negative integer value.
+
+* CL_OUT_OF_RESOURCES if there is a failure to allocate resources
+  required by the OpenCL implementation on the device.
+
+* CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+  required by the OpenCL implementation on the host.
+
+
+To copy elements from one tensor to another use:
+
+[source,c]
+----
+cl_int clEnqueueCopyTensorEXP(
+  cl_command_queue command_queue,
+  cl_tensor src_tensor,
+  cl_tensor dst_tensor,
+  const cl_tensor_shape* src_origin,
+  const cl_tensor_shape* dst_origin,
+  const cl_tensor_shape* region,
+  cl_uint num_events_in_wait_list,
+  const cl_event* event_wait_list,
+  cl_event* event);
+----
+
+* _command_queue_ is a valid host command-queue in which the read /
+  write command will be queued. _command_queue_ and _tensor_ must be
+  created with the same OpenCL context.
+
+* _src_tensor_ and _dst_tensor_ refer to valid buffer objects created
+  with `CL_MEM_TENSOR_EXP`. Tensor elements are copied from _src_tensor_
+  to _dst_tensor_. Rank of the _src_tensor_ and _dst_tensor_ must match.
+
+* _src_origin_ and _dst_origin_ define origins of the copy region. The
+  length of the arrays must be at least tensors' rank.
+
+* _region_ defines extends of the slice being being copied. The length
+  of the arrays must be at least tensors' rank.
+
+* _event_wait_list_ and _num_events_in_wait_list_ specify events that
+  need to complete before this particular command can be executed. If
+  _event_wait_list_ is NULL, then this particular command does not
+  wait on any event to complete. If _event_wait_list_ is NULL,
+  _num_events_in_wait_list_ must be 0. If _event_wait_list_ is not
+  NULL, the list of events pointed to by _event_wait_list_ must be
+  valid and _num_events_in_wait_list_ must be greater than 0. The
+  events specified in _event_wait_list_ act as synchronization
+  points. The context associated with events in _event_wait_list_ and
+  _command_queue_ must be the same. The memory associated with
+  _event_wait_list_ can be reused or freed after the function returns.
+
+* _event_ returns an event object that identifies this read / write
+  command and can be used to query or queue a wait for this command to
+  complete. If _event_ is NULL or the enqueue is unsuccessful, no
+  event will be created and therefore it will not be possible to query
+  the status of this command or to wait for this command to
+  complete. If _event_wait_list_ and _event_ are not NULL, _event_
+  must not refer to an element of the _event_wait_list_ array.
+
+Elements are copied from the source tensor to the destination tensor
+so that after the completion following condition holds expressed in
+pseudo C:
+
+[source,c]
+----
+// 'so' and 'do' are aliases for src_origin and dst_origin, respectively.
+tensor_element(dst_tensor, do[0] + i[0], do[1] + i[1], ..., do[N-1] + i[N-1])
+==
+tensor_element(src_tensor, so[0] + i[0], so[1] + i[1], ..., so[N-1] + i[N-1]);
+----
+
+Where the `N` is tensor rank, the `i[X]` is a tensor coordinate with
+inclusive range of `0..<region[X]-1>`.
+
+*clEnqueueCopyTensorEXP* returns CL_SUCCESS if the function is
+executed successfully. Otherwise, it returns one of the following
+errors:
+
+* CL_INVALID_COMMAND_QUEUE if _command_queue_ is not a valid host
+  command-queue.
+
+* CL_INVALID_CONTEXT if the context associated with _command_queue_
+  and buffer are not the same or if the context associated with
+  _command_queue_ and events in _event_wait_list_ are not the same.
+
+* CL_INVALID_MEM_OBJECT if _src_tensor_ or _dst_tensor_ are not a
+  valid buffer object created with `CL_MEM_TENSOR_EXP`.
+
+* CL_INVALID_VALUE if _tensor_origin_ or _mem_origin_ is NULL.
+
+* CL_INVALID_VALUE if _src_origin_, _dst_origin_ or _region_ is NULL.
+
+* CL_INVALID_VALUE if `region[i]` is zero for i in `[0, tensor_rank)`.
+
+* CL_INVALID_VALUE if `origin[i] + region[i] > tensor_shape[i]` at any
+  dimension `i` in range `[0, tensor_rank)`.
+
+* CL_INVALID_EVENT_WAIT_LIST if _event_wait_list_ is NULL and
+  _num_events_in_wait_list_ > 0, or _event_wait_list_ is not NULL and
+  _num_events_in_wait_list_ is 0, or if event objects in
+  _event_wait_list_ are not valid events.
+
+* CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST if the read and write
+  operations are blocking and the execution status of any of the
+  events in _event_wait_list_ is a negative integer value.
+
+* CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate
+  memory for data store associated with memory object the _tensor_ is
+  bound to.
+
+* CL_OUT_OF_RESOURCES if there is a failure to allocate resources
+  required by the OpenCL implementation on the device.
+
+* CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+  required by the OpenCL implementation on the host.
+
+// TODO: add clEnqueueFillTensor?
+
+--
+// End (Add the following to Section 5.2.2, *Reading, Writing and Copying Buffer Objects*)
+
+
+(Add the following to Section 5.17.5, *Recording Commands to a Command-Buffer*) ::
++
+--
+
+If *cl_khr_command_buffer* is supported, then the following command
+buffer counterparts of the *clEnqueueImportFromTensorEXP* and
+*clEnqueueExportToTensorEXP* commands are available.
+
+[source,c]
+----
+cl_int clCommandImportFromTensorEXP(
+  cl_command_buffer_khr command_buffer,
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  const size_t* tensor_origin,
+  const size_t* mem_origin,
+  const size_t* region,
+  const size_t* mem_pitch,
+  cl_mem buffer,
+  void* host_ptr,
+  cl_uint num_sync_points_in_wait_list,
+  const cl_sync_point_khr* sync_point_wait_list,
+  cl_sync_point_khr* sync_point,
+  cl_mutable_command_khr* mutable_handle);
+
+cl_int clCommandExportToTensorEXP(
+  cl_command_buffer_khr command_buffer,
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  const size_t* tensor_origin,
+  const size_t* mem_origin,
+  const size_t* region,
+  const size_t* mem_pitch,
+  cl_mem buffer,
+  const void* host_ptr,
+  cl_uint num_sync_points_in_wait_list,
+  const cl_sync_point_khr* sync_point_wait_list,
+  cl_sync_point_khr* sync_point,
+  cl_mutable_command_khr* mutable_handle);
+----
+
+* _command_buffer_ refers to valid command-buffer object.
+
+* For _command_queue_, _tensor_, _tensor_origin_, _mem_origin_,
+  _region_, _mem_pitch_, _buffer_ and _host_ptr_ parameters refer to
+  *clEnqueueImportFromTensor*.
+
+* For _num_sync_points_in_wait_list_, _sync_point_wait_list_,
+  _sync_point_, _mutable_handle_ parameters refer to
+  *clCommandCopyBufferEXP*.
+
+*clCommandImportFromTensorEXP* and *clCommandImportFromTensorEXP*
+returns CL_SUCCESS if the function is executed
+successfully. Otherwise, it returns one of the following errors:
+
+* CL_INVALID_COMMAND_QUEUE if _command_queue_ is not NULL.
+
+* CL_INVALID_COMMAND_BUFFER_KHR if _command_buffer_ is not a valid
+  command-buffer.
+
+* CL_INVALID_CONTEXT if the context associated with _command_queue_
+  and _command_buffer_ is not the same.
+
+* CL_INVALID_OPERATION if _command_buffer_ has been finalized.
+
+* CL_INVALID_VALUE if _mutable_handle_ is not NULL.
+
+* CL_INVALID_SYNC_POINT_WAIT_LIST_KHR if _sync_point_wait_list_ is
+  NULL and _num_sync_points_in_wait_list_ is > 0, or
+  _sync_point_wait_list_ is not NULL and _num_sync_points_in_wait_list_ is
+  0, or if synchronization-point objects in _sync_point_wait_list_ are
+  not valid synchronization-points.
+
+* CL_OUT_OF_RESOURCES if there is a failure to allocate resources
+  required by the OpenCL implementation on the device.
+
+* CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+  required by the OpenCL implementation on the host.
+
+--
+// End (Add the following to Section 5.17.5, *Recording Commands to a Command-Buffer*)
+
+
+(Add the following to new Section 5.X.Y, *Tensor Descriptions*) ::
++
+--
+
+The following structure describes properties of a tensor to be created
+with *clCreateBufferWithProperties()* using `CL_MEM_TENSOR_EXP` property:
+
+[source,c]
+----
+typedef struct cl_tensor_desc_exp {
+    cl_uint               rank;
+    cl_tensor_datatype    dtype;
+    cl_tensor_properties_exp  properties[CL_TENSOR_DESC_MAX_PROPERTIES_EXP]
+    cl_tensor_shape       shape[CL_TENSOR_DESC_MAX_RANK_EXP];
+    const void*           layout;
+    cl_tensor_layout_type_exp layout_type;
+} cl_tensor_desc_exp;
+----
+
+* _rank_ defines the tensor's rank - the number of dimensions.
+
+* _dtype_ defines the data type of the elements in the
+  tensor. Possible types are listed in <<tensor-dtype-table, tensor
+  element type>> table.
+
+* _properties_ is an optional list of properties for the tensor object
+  and their corresponding values. The list is terminated with the
+  special property 0. If no properties are required, properties may be
+  NULL. This extension does not define any optional properties for
+  tensors, but future extensions may define properties.
+
+* _shape_ defines the extends of the tensor's dimensions in number of
+  elements.
+
+* _layout_ points to an optional structure describing how tensor
+  elements are laid out in the buffer memory. The structure must be a
+  type corresponding to the _layout_type_ listed in
+  <<layout-types-table, tensor layout type>> table. The pointer is
+  ignored if the _tensor_type_ is `CL_TENSOR_LAYOUT_OPAQUE_EXP`.
+
+* _layout_type_ indicates the layout structure type the _layout_
+  point to.
+
+
+[[tensor-dtypes-table]]
+.Tensor element types. The API type indicates the corresponding type for copying elements from an host allocation / buffer object to tensor or vice versa.
+[cols="1,1,1",stripes=even]
+|===
+| *Tensor element data type* | *Description* | *API type*
+
+| CL_TENSOR_DTYPE_BOOL | Data type representing true or false.  |
+cl_uchar. footnote:[zero and non-zero bytes are interpreted as false
+and true values, respectively.]
+
+| CL_TENSOR_DTYPE_INT4_EXP        | 4-bit signed integer.            | cl_char.
+| CL_TENSOR_DTYPE_INT8_EXP        | 8-bit signed integer.            | cl_char.
+| CL_TENSOR_DTYPE_INT16_EXP       | 16-bit signed integer.           | cl_short.
+| CL_TENSOR_DTYPE_INT32_EXP       | 32-bit signed integer.           | cl_int.
+| CL_TENSOR_DTYPE_INT64_EXP       | 64-bit signed integer.           | cl_long.
+| CL_TENSOR_DTYPE_UINT8_EXP       | 8-bit unsigned integer.          | cl_uchar.
+| CL_TENSOR_DTYPE_UINT16_EXP      | 16-bit unsigned integer.         | cl_ushort.
+| CL_TENSOR_DTYPE_UINT32_EXP      | 32-bit unsigned integer.         | cl_uint.
+| CL_TENSOR_DTYPE_UINT64_EXP      | 64-bit unsigned integer.         | cl_ulong.
+
+| CL_TENSOR_DTYPE_FP8E4M3_EXP | 8-bit floating point with a sign bit,
+  4 exponent bits, 3 mantissa bits and a exponent bias of 7.
+| cl_char.
+
+| CL_TENSOR_DTYPE_FP8E5M2_EXP | 8-bit floating point with a sign bit,
+  5 exponent bits, 2 mantissa bits and a exponent bias of 15.
+| cl_char.
+
+// Reference: https://arxiv.org/pdf/2209.05433
+
+| CL_TENSOR_DTYPE_FP16_EXP        | Half precision floating-point.   | cl_half.
+| CL_TENSOR_DTYPE_BFLOAT16_EXP    | 16-bit brain floating-point.     | cl_ushort
+| CL_TENSOR_DTYPE_FP32_EXP        | Single precision floating-point. | cl_float.
+| CL_TENSOR_DTYPE_FP64_EXP        | Double precision floating-point. | cl_double.
+| CL_TENSOR_DTYPE_COMPLEX64_EXP   | 64-bit complex floating-point with
+  32-bit real and imaginary part. | cl_float2
+| CL_TENSOR_DTYPE_COMPLEX128_EXP  | 128-bit complex floating-point with
+  64-bit real and imaginary part. | cl_double2
+|===
+
+[[layout-types-table]]
+.Optional tensor memory layout types.
+[cols="1,1,4",stripes=even]
+|===
+| *layout type* | *tensor layout type* | *Description*
+
+| CL_TENSOR_LAYOUT_OPAQUE_EXP | N/A   a| The tensor doesn't have
+  application defined memory layout. Driver controls the tensors
+  layout. To read or write elements of the tensor, the application
+  must:
+
+* use *clEnqueueExportToTensor* and *clEnqueueImportFromTensor* (or their
+   command buffer variants) or
+* use *clEnqueueCopyTensor* to copy elements to / from another tensor
+   object with an application-defined memory layout.
+
+| CL_TENSOR_LAYOUT_BLAS_EXP
+|<<cl-tensor-layout-blas,cl_tensor_layout_blas_exp>>
+| A type that describes a packed memory layout similar ones used in BLAS APIs.
+
+| CL_TENSOR_LAYOUT_BLAS_PITCHED_EXP
+|<<cl-tensor-layout-blas,cl_tensor_layout_blas_pitched_exp>>
+| A type that describe memory layout similar ones used in BLAS APIs.
+
+| CL_TENSOR_LAYOUT_ML_EXP       | <<cl-tensor-layout-blas,cl_tensor_layout_ml_exp>> |
+
+The tensor layout is specified with an enumerator. Each enumerator
+corresponds to a predefined configuration of
+*cl_tensor_layout_blas_exp* structure.
+
+|===
+
+--
+// End (Add the following to new Section 5.X.Y, *Tensor Descriptions*)
+
+
+[[cl-tensor-layout-blas]]
+(Add the following to new Section 5.X.Y.1, *BLAS Tensor Layout*) ::
++
+--
+The following structures describe packed / pitched BLAS-like memory
+layout for the tensor:
+
+[source,c]
+----
+typedef struct cl_tensor_layout_blas_exp {
+    cl_tensor_dim_exp    leading_dims[CL_TENSOR_DESC_MAX_RANK_EXP];
+} cl_tensor_layout_blas_exp;
+
+typedef struct cl_tensor_layout_blas_pitched_exp {
+    cl_tensor_dim_exp    leading_dims[CL_TENSOR_DESC_MAX_RANK_EXP];
+    cl_tensor_pitch      leading_pitches[CL_TENSOR_DESC_MAX_RANK_EXP];
+} cl_tensor_layout_blas_pitched_exp;
+
+typedef struct cl_tensor_layout_ml_exp {
+    cl_tensor_layout_ml_type_exp ml_type;
+} cl_tensor_layout_ml_exp;
+----
+
+* _leading_dims_ describes which elements along the tensor dimension
+  are laid out in the memory. `leading_dims[0]` points to the dimension
+  whose elements are laid out first, followed by elements along
+  the dimension by `leading_dims[1]` and so on. The first N elements must
+  be non-zero where N is a tensor's rank and the values must be unique
+  and within range `[0, tensor_rank)`.
+
+* _leading_pitches_ describes the distance between an element to the
+  next one for the leading dimensions in _leading_dims_. The distance
+  is measured in number of elements. The first N elements must be
+  non-zero where the N is tensor's rank minus one. The values of the
+  array must be non-zero for the first tensor rank minus one elements
+  and following conditions must hold:
+
+** `leading_pitches[0] >= tensor_shape[leading_dims[0]]` if the tensor
+   rank is greater than one and
+
+** `leading_pitches[i + 1] >= tensor_shape[leading_dims[i]] *
+  leading_pitches[i]` for `i` in `[0, tensor_rank - 1)` if the tensor
+  rank is greater than two.
+
+// ^ This condition is meant to ensure that the tensor elements at different
+// coordinates don't alias in memory.
+
+* _ml_type_ defines the memory layout via enumerators which corresponds to
+  predefined configurations of `cl_tensor_layout_blas_exp` structure
+  as listed in <<tensor-layout-ml-types,ML tensor layout type>> table.
+
+The memory layout descriptions map tensor coordinates to buffer's
+memory byte locations respect to buffer's base address as in the
+following pseudo C code example:
+
+[source,c]
+----
+size_t index = 0;
+for (unsigned i = 0; i < tensor_rank - 1; i++)
+  index += tensor_coordinates[leading_dims[i]] * pitches[i];
+buffer_offset = index * tensor_element_size;
+----
+
+Where `pitches[i]` equals to:
+
+* _leading_pitches_[i] for `cl_tensor_layout_blas_pitched_exp`.
+
+* `tensor_shape[leading_dims[i]] *
+  tensor_shape[leading_dims[i-1]] * ... *
+  tensor_shape[leading_dims[0]]` for `cl_tensor_layout_blas_exp`.
+
+
+[[tensor-layout-ml-type]]
+.ML tensor layout types and their corresponding cl_tensor_layout_blas_exp configuration.
+[cols="1,2",stripes=even]
+|===
+| *ML layout type* | *Equivalent _leading_dims_ configuration*
+
+|CL_TENSOR_LAYOUT_ML_C_EXP    | `{}`
+|CL_TENSOR_LAYOUT_ML_NC_EXP   | `{1}`
+|CL_TENSOR_LAYOUT_ML_CN_EXP   | `{0}`
+|CL_TENSOR_LAYOUT_ML_HW_EXP   | `{1}`
+|CL_TENSOR_LAYOUT_ML_CHW_EXP  | `{2, 1}`
+|CL_TENSOR_LAYOUT_ML_NCHW_EXP | `{3, 2, 1}`
+|CL_TENSOR_LAYOUT_ML_NHWC_EXP | `{1, 3, 2}`
+|===
+--
+
+== Sample Codes
+
+An example usage of tensors:
+
+[source,cpp]
+----
+constexpr size_t b = 64, m = 100, n = 200, k = 50;
+
+std::vector<float> in0_data = ...;
+std::vector<float> in1_data = ...;
+std::vector<float> out_data(b * m * n);
+
+// Create a tensor with an opaque layout.
+cl_tensor_desc_exp in0_desc;
+in0_desc.rank = 3;
+in0_desc.properties[0] = 0;
+in0_desc.shape[0] = b;
+in0_desc.shape[1] = m;
+in0_desc.shape[2] = k;
+in0_desc.layout = nullptr;
+in0_desc.layout_type = CL_TENSOR_LAYOUT_OPAQUE_EXP;
+
+cl_int err;
+cl_mem in0_tensor = clCreateBufferWithProperties(
+  ctx, {CL_MEM_TENSOR_EXP, in0_desc, 0},
+  CL_MEM_READ_ONLY, 0, nullptr, &err);
+
+// Create tensor from a host allocation using an application-defined
+// layout description for mapping elements to the tensor.
+cl_tensor_desc_exp in1_desc;
+in1_desc.rank = 3;
+in1_desc.properties[0] = 0;
+in1_desc.shape[0] = b;
+in1_desc.shape[1] = k;
+in1_desc.shape[2] = n;
+
+cl_tensor_layout_blas_exp col_major;
+col_major.leading_dims[0] = 1,
+col_major.leading_dims[1] = 2,
+in1_desc.layout = &col_major;
+in1_desc.layout_type = CL_TENSOR_LAYOUT_BLAS_EXP;
+
+cl_mem in1_tensor = clCreateBufferWithProperties(
+  ctx, {CL_MEM_TENSOR_EXP, in1_desc, 0},
+  CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, 0, in1_data.data(), &err);
+
+// Create another tensor with an application-defined layout.
+cl_tensor_desc_exp out_desc;
+out_desc.rank = 3;
+out_desc.properties[0] = 0;
+out_desc.shape[0] = b;
+out_desc.shape[1] = m;
+out_desc.shape[2] = n;
+
+cl_tensor_layout_blas_exp row_major;
+row_major.leading_dims[0] = 2,
+row_major.leading_dims[1] = 1,
+out_desc.layout = &row_major;
+out_desc.layout_type = CL_TENSOR_LAYOUT_BLAS_EXP;
+
+cl_mem out_tensor = clCreateBufferWithProperties(
+  ctx, {CL_MEM_TENSOR_EXP, out_desc, 0},
+  CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, 0, out_data.data(), &err);
+
+// Create a kernel that operates on the tensors and is possibly
+// optimized for them using via yet realized API extension.
+cl_kernel batched_matmul_kernel = create_batched_matmul_kernel(
+  ctx, device_span, in1_desc, in2_desc, out_desc);
+
+clSetKernelArg(batched_matmul_kernel, 0, sizeof(cl_mem), &in0_tensor);
+clSetKernelArg(batched_matmul_kernel, 1, sizeof(cl_mem), &in1_tensor);
+clSetKernelArg(batched_matmul_kernel, 2, sizeof(cl_mem), &out_tensor);
+
+// Required command for transferring data to layout-opaque tensors and
+// from it elsewhere.
+clEnqueueExportToTensor(
+  cmd_q, in0_tensor, false, {0, 0, 0}, {0, 0, 0}, {b, m, k},
+  nullptr, nullptr, in0_data.data(), 0, nullptr, nullptr);
+
+clEnqueueNDRangeKernel(
+  cmd_q, batched_matmul_kernel, 3, matmul_grid, nullptr, nullptr, 0, nullptr, nullptr);
+
+clEnqueueMapBuffer(
+  cmd_q, out_tensor, CL_TRUE, CL_MAP_READ, 0, b * m * n, 0, nullptr, nullptr);
+----
+
+
+== Issues and Open Questions
+
+. Should we support tensors with undefined shape and tensors
+  with unknown / symbolic dimension sizes like in ONNX?
++
+--
+// https://onnx.ai/onnx/repo-docs/ShapeInference.html
+*UNRESOLVED*
+--
+
+. Should we define OpenCL C language features for accessing tensors?
++
+--
+*RESOLVED*: OpenCL C support for tensors can be introduced later in a
+           separate extension. Built-in kernels may benefit from this
+           extension as it is.
+--
+
+. What is the use case of `cl_tensor_layout_blas_pitch_exp`?
++
+--
+*UNRESOLVED*
+--
+
+. Should image types be extended instead of adding a separate tensor type?
++
+--
+*UNRESOLVED*
+--
+
+== Version History
+
+[cols="5,10,15,40"]
+[grid="rows"]
+[options="header"]
+|====
+| Version | Date       | Author           | Changes
+| 0.1.0   | 2023-11-23 | Henry Linjamäki  | *Initial revision*
+
+| 0.2.0   | 2024-8-14  |
+Henry Linjamäki +
+Pekka Jääskeläinen +
+Michal Babej +
+Freddie Witherden
+a|
+
+* Rework document structure match to the cl_khr_extension_template.
+
+* Added clEnqueueCopyTensor.
+
+* Added an API for setting the memory layout for tensors.
+
+|====
diff --git a/extensions/cl_exp_tensor.html b/extensions/cl_exp_tensor.html
new file mode 100644
index 00000000..db1045c9
--- /dev/null
+++ b/extensions/cl_exp_tensor.html
@@ -0,0 +1,2050 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta http-equiv="X-UA-Compatible" content="IE=edge">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<meta name="generator" content="Asciidoctor 2.0.16">
+<title>cl_exp_tensor</title>
+<link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Open+Sans:300,300italic,400,400italic,600,600italic%7CNoto+Serif:400,400italic,700,700italic%7CDroid+Sans+Mono:400,700">
+<style>
+/*! Asciidoctor default stylesheet | MIT License | https://asciidoctor.org */
+/* Uncomment the following line when using as a custom stylesheet */
+/* @import "https://fonts.googleapis.com/css?family=Open+Sans:300,300italic,400,400italic,600,600italic%7CNoto+Serif:400,400italic,700,700italic%7CDroid+Sans+Mono:400,700"; */
+html{font-family:sans-serif;-webkit-text-size-adjust:100%}
+a{background:none}
+a:focus{outline:thin dotted}
+a:active,a:hover{outline:0}
+h1{font-size:2em;margin:.67em 0}
+b,strong{font-weight:bold}
+abbr{font-size:.9em}
+abbr[title]{cursor:help;border-bottom:1px dotted #dddddf;text-decoration:none}
+dfn{font-style:italic}
+hr{height:0}
+mark{background:#ff0;color:#000}
+code,kbd,pre,samp{font-family:monospace;font-size:1em}
+pre{white-space:pre-wrap}
+q{quotes:"\201C" "\201D" "\2018" "\2019"}
+small{font-size:80%}
+sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}
+sup{top:-.5em}
+sub{bottom:-.25em}
+img{border:0}
+svg:not(:root){overflow:hidden}
+figure{margin:0}
+audio,video{display:inline-block}
+audio:not([controls]){display:none;height:0}
+fieldset{border:1px solid silver;margin:0 2px;padding:.35em .625em .75em}
+legend{border:0;padding:0}
+button,input,select,textarea{font-family:inherit;font-size:100%;margin:0}
+button,input{line-height:normal}
+button,select{text-transform:none}
+button,html input[type=button],input[type=reset],input[type=submit]{-webkit-appearance:button;cursor:pointer}
+button[disabled],html input[disabled]{cursor:default}
+input[type=checkbox],input[type=radio]{padding:0}
+button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}
+textarea{overflow:auto;vertical-align:top}
+table{border-collapse:collapse;border-spacing:0}
+*,::before,::after{box-sizing:border-box}
+html,body{font-size:100%}
+body{background:#fff;color:rgba(0,0,0,.8);padding:0;margin:0;font-family:"Noto Serif","DejaVu Serif",serif;line-height:1;position:relative;cursor:auto;-moz-tab-size:4;-o-tab-size:4;tab-size:4;word-wrap:anywhere;-moz-osx-font-smoothing:grayscale;-webkit-font-smoothing:antialiased}
+a:hover{cursor:pointer}
+img,object,embed{max-width:100%;height:auto}
+object,embed{height:100%}
+img{-ms-interpolation-mode:bicubic}
+.left{float:left!important}
+.right{float:right!important}
+.text-left{text-align:left!important}
+.text-right{text-align:right!important}
+.text-center{text-align:center!important}
+.text-justify{text-align:justify!important}
+.hide{display:none}
+img,object,svg{display:inline-block;vertical-align:middle}
+textarea{height:auto;min-height:50px}
+select{width:100%}
+.subheader,.admonitionblock td.content>.title,.audioblock>.title,.exampleblock>.title,.imageblock>.title,.listingblock>.title,.literalblock>.title,.stemblock>.title,.openblock>.title,.paragraph>.title,.quoteblock>.title,table.tableblock>.title,.verseblock>.title,.videoblock>.title,.dlist>.title,.olist>.title,.ulist>.title,.qlist>.title,.hdlist>.title{line-height:1.45;color:#7a2518;font-weight:400;margin-top:0;margin-bottom:.25em}
+div,dl,dt,dd,ul,ol,li,h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6,pre,form,p,blockquote,th,td{margin:0;padding:0}
+a{color:#2156a5;text-decoration:underline;line-height:inherit}
+a:hover,a:focus{color:#1d4b8f}
+a img{border:0}
+p{line-height:1.6;margin-bottom:1.25em;text-rendering:optimizeLegibility}
+p aside{font-size:.875em;line-height:1.35;font-style:italic}
+h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6{font-family:"Open Sans","DejaVu Sans",sans-serif;font-weight:300;font-style:normal;color:#ba3925;text-rendering:optimizeLegibility;margin-top:1em;margin-bottom:.5em;line-height:1.0125em}
+h1 small,h2 small,h3 small,#toctitle small,.sidebarblock>.content>.title small,h4 small,h5 small,h6 small{font-size:60%;color:#e99b8f;line-height:0}
+h1{font-size:2.125em}
+h2{font-size:1.6875em}
+h3,#toctitle,.sidebarblock>.content>.title{font-size:1.375em}
+h4,h5{font-size:1.125em}
+h6{font-size:1em}
+hr{border:solid #dddddf;border-width:1px 0 0;clear:both;margin:1.25em 0 1.1875em}
+em,i{font-style:italic;line-height:inherit}
+strong,b{font-weight:bold;line-height:inherit}
+small{font-size:60%;line-height:inherit}
+code{font-family:"Droid Sans Mono","DejaVu Sans Mono",monospace;font-weight:400;color:rgba(0,0,0,.9)}
+ul,ol,dl{line-height:1.6;margin-bottom:1.25em;list-style-position:outside;font-family:inherit}
+ul,ol{margin-left:1.5em}
+ul li ul,ul li ol{margin-left:1.25em;margin-bottom:0}
+ul.square li ul,ul.circle li ul,ul.disc li ul{list-style:inherit}
+ul.square{list-style-type:square}
+ul.circle{list-style-type:circle}
+ul.disc{list-style-type:disc}
+ol li ul,ol li ol{margin-left:1.25em;margin-bottom:0}
+dl dt{margin-bottom:.3125em;font-weight:bold}
+dl dd{margin-bottom:1.25em}
+blockquote{margin:0 0 1.25em;padding:.5625em 1.25em 0 1.1875em;border-left:1px solid #ddd}
+blockquote,blockquote p{line-height:1.6;color:rgba(0,0,0,.85)}
+@media screen and (min-width:768px){h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6{line-height:1.2}
+h1{font-size:2.75em}
+h2{font-size:2.3125em}
+h3,#toctitle,.sidebarblock>.content>.title{font-size:1.6875em}
+h4{font-size:1.4375em}}
+table{background:#fff;margin-bottom:1.25em;border:1px solid #dedede;word-wrap:normal}
+table thead,table tfoot{background:#f7f8f7}
+table thead tr th,table thead tr td,table tfoot tr th,table tfoot tr td{padding:.5em .625em .625em;font-size:inherit;color:rgba(0,0,0,.8);text-align:left}
+table tr th,table tr td{padding:.5625em .625em;font-size:inherit;color:rgba(0,0,0,.8)}
+table tr.even,table tr.alt{background:#f8f8f7}
+table thead tr th,table tfoot tr th,table tbody tr td,table tr td,table tfoot tr td{line-height:1.6}
+h1,h2,h3,#toctitle,.sidebarblock>.content>.title,h4,h5,h6{line-height:1.2;word-spacing:-.05em}
+h1 strong,h2 strong,h3 strong,#toctitle strong,.sidebarblock>.content>.title strong,h4 strong,h5 strong,h6 strong{font-weight:400}
+.center{margin-left:auto;margin-right:auto}
+.stretch{width:100%}
+.clearfix::before,.clearfix::after,.float-group::before,.float-group::after{content:" ";display:table}
+.clearfix::after,.float-group::after{clear:both}
+:not(pre).nobreak{word-wrap:normal}
+:not(pre).nowrap{white-space:nowrap}
+:not(pre).pre-wrap{white-space:pre-wrap}
+:not(pre):not([class^=L])>code{font-size:.9375em;font-style:normal!important;letter-spacing:0;padding:.1em .5ex;word-spacing:-.15em;background:#f7f7f8;border-radius:4px;line-height:1.45;text-rendering:optimizeSpeed}
+pre{color:rgba(0,0,0,.9);font-family:"Droid Sans Mono","DejaVu Sans Mono",monospace;line-height:1.45;text-rendering:optimizeSpeed}
+pre code,pre pre{color:inherit;font-size:inherit;line-height:inherit}
+pre>code{display:block}
+pre.nowrap,pre.nowrap pre{white-space:pre;word-wrap:normal}
+em em{font-style:normal}
+strong strong{font-weight:400}
+.keyseq{color:rgba(51,51,51,.8)}
+kbd{font-family:"Droid Sans Mono","DejaVu Sans Mono",monospace;display:inline-block;color:rgba(0,0,0,.8);font-size:.65em;line-height:1.45;background:#f7f7f7;border:1px solid #ccc;border-radius:3px;box-shadow:0 1px 0 rgba(0,0,0,.2),inset 0 0 0 .1em #fff;margin:0 .15em;padding:.2em .5em;vertical-align:middle;position:relative;top:-.1em;white-space:nowrap}
+.keyseq kbd:first-child{margin-left:0}
+.keyseq kbd:last-child{margin-right:0}
+.menuseq,.menuref{color:#000}
+.menuseq b:not(.caret),.menuref{font-weight:inherit}
+.menuseq{word-spacing:-.02em}
+.menuseq b.caret{font-size:1.25em;line-height:.8}
+.menuseq i.caret{font-weight:bold;text-align:center;width:.45em}
+b.button::before,b.button::after{position:relative;top:-1px;font-weight:400}
+b.button::before{content:"[";padding:0 3px 0 2px}
+b.button::after{content:"]";padding:0 2px 0 3px}
+p a>code:hover{color:rgba(0,0,0,.9)}
+#header,#content,#footnotes,#footer{width:100%;margin:0 auto;max-width:62.5em;*zoom:1;position:relative;padding-left:.9375em;padding-right:.9375em}
+#header::before,#header::after,#content::before,#content::after,#footnotes::before,#footnotes::after,#footer::before,#footer::after{content:" ";display:table}
+#header::after,#content::after,#footnotes::after,#footer::after{clear:both}
+#content{margin-top:1.25em}
+#content::before{content:none}
+#header>h1:first-child{color:rgba(0,0,0,.85);margin-top:2.25rem;margin-bottom:0}
+#header>h1:first-child+#toc{margin-top:8px;border-top:1px solid #dddddf}
+#header>h1:only-child,body.toc2 #header>h1:nth-last-child(2){border-bottom:1px solid #dddddf;padding-bottom:8px}
+#header .details{border-bottom:1px solid #dddddf;line-height:1.45;padding-top:.25em;padding-bottom:.25em;padding-left:.25em;color:rgba(0,0,0,.6);display:flex;flex-flow:row wrap}
+#header .details span:first-child{margin-left:-.125em}
+#header .details span.email a{color:rgba(0,0,0,.85)}
+#header .details br{display:none}
+#header .details br+span::before{content:"\00a0\2013\00a0"}
+#header .details br+span.author::before{content:"\00a0\22c5\00a0";color:rgba(0,0,0,.85)}
+#header .details br+span#revremark::before{content:"\00a0|\00a0"}
+#header #revnumber{text-transform:capitalize}
+#header #revnumber::after{content:"\00a0"}
+#content>h1:first-child:not([class]){color:rgba(0,0,0,.85);border-bottom:1px solid #dddddf;padding-bottom:8px;margin-top:0;padding-top:1rem;margin-bottom:1.25rem}
+#toc{border-bottom:1px solid #e7e7e9;padding-bottom:.5em}
+#toc>ul{margin-left:.125em}
+#toc ul.sectlevel0>li>a{font-style:italic}
+#toc ul.sectlevel0 ul.sectlevel1{margin:.5em 0}
+#toc ul{font-family:"Open Sans","DejaVu Sans",sans-serif;list-style-type:none}
+#toc li{line-height:1.3334;margin-top:.3334em}
+#toc a{text-decoration:none}
+#toc a:active{text-decoration:underline}
+#toctitle{color:#7a2518;font-size:1.2em}
+@media screen and (min-width:768px){#toctitle{font-size:1.375em}
+body.toc2{padding-left:15em;padding-right:0}
+#toc.toc2{margin-top:0!important;background:#f8f8f7;position:fixed;width:15em;left:0;top:0;border-right:1px solid #e7e7e9;border-top-width:0!important;border-bottom-width:0!important;z-index:1000;padding:1.25em 1em;height:100%;overflow:auto}
+#toc.toc2 #toctitle{margin-top:0;margin-bottom:.8rem;font-size:1.2em}
+#toc.toc2>ul{font-size:.9em;margin-bottom:0}
+#toc.toc2 ul ul{margin-left:0;padding-left:1em}
+#toc.toc2 ul.sectlevel0 ul.sectlevel1{padding-left:0;margin-top:.5em;margin-bottom:.5em}
+body.toc2.toc-right{padding-left:0;padding-right:15em}
+body.toc2.toc-right #toc.toc2{border-right-width:0;border-left:1px solid #e7e7e9;left:auto;right:0}}
+@media screen and (min-width:1280px){body.toc2{padding-left:20em;padding-right:0}
+#toc.toc2{width:20em}
+#toc.toc2 #toctitle{font-size:1.375em}
+#toc.toc2>ul{font-size:.95em}
+#toc.toc2 ul ul{padding-left:1.25em}
+body.toc2.toc-right{padding-left:0;padding-right:20em}}
+#content #toc{border:1px solid #e0e0dc;margin-bottom:1.25em;padding:1.25em;background:#f8f8f7;border-radius:4px}
+#content #toc>:first-child{margin-top:0}
+#content #toc>:last-child{margin-bottom:0}
+#footer{max-width:none;background:rgba(0,0,0,.8);padding:1.25em}
+#footer-text{color:hsla(0,0%,100%,.8);line-height:1.44}
+#content{margin-bottom:.625em}
+.sect1{padding-bottom:.625em}
+@media screen and (min-width:768px){#content{margin-bottom:1.25em}
+.sect1{padding-bottom:1.25em}}
+.sect1:last-child{padding-bottom:0}
+.sect1+.sect1{border-top:1px solid #e7e7e9}
+#content h1>a.anchor,h2>a.anchor,h3>a.anchor,#toctitle>a.anchor,.sidebarblock>.content>.title>a.anchor,h4>a.anchor,h5>a.anchor,h6>a.anchor{position:absolute;z-index:1001;width:1.5ex;margin-left:-1.5ex;display:block;text-decoration:none!important;visibility:hidden;text-align:center;font-weight:400}
+#content h1>a.anchor::before,h2>a.anchor::before,h3>a.anchor::before,#toctitle>a.anchor::before,.sidebarblock>.content>.title>a.anchor::before,h4>a.anchor::before,h5>a.anchor::before,h6>a.anchor::before{content:"\00A7";font-size:.85em;display:block;padding-top:.1em}
+#content h1:hover>a.anchor,#content h1>a.anchor:hover,h2:hover>a.anchor,h2>a.anchor:hover,h3:hover>a.anchor,#toctitle:hover>a.anchor,.sidebarblock>.content>.title:hover>a.anchor,h3>a.anchor:hover,#toctitle>a.anchor:hover,.sidebarblock>.content>.title>a.anchor:hover,h4:hover>a.anchor,h4>a.anchor:hover,h5:hover>a.anchor,h5>a.anchor:hover,h6:hover>a.anchor,h6>a.anchor:hover{visibility:visible}
+#content h1>a.link,h2>a.link,h3>a.link,#toctitle>a.link,.sidebarblock>.content>.title>a.link,h4>a.link,h5>a.link,h6>a.link{color:#ba3925;text-decoration:none}
+#content h1>a.link:hover,h2>a.link:hover,h3>a.link:hover,#toctitle>a.link:hover,.sidebarblock>.content>.title>a.link:hover,h4>a.link:hover,h5>a.link:hover,h6>a.link:hover{color:#a53221}
+details,.audioblock,.imageblock,.literalblock,.listingblock,.stemblock,.videoblock{margin-bottom:1.25em}
+details{margin-left:1.25rem}
+details>summary{cursor:pointer;display:block;position:relative;line-height:1.6;margin-bottom:.625rem;-webkit-tap-highlight-color:transparent}
+details>summary::before{content:"";border:solid transparent;border-left:solid;border-width:.3em 0 .3em .5em;position:absolute;top:.5em;left:-1.25rem;transform:translateX(15%)}
+details[open]>summary::before{border:solid transparent;border-top:solid;border-width:.5em .3em 0;transform:translateY(15%)}
+details>summary::after{content:"";width:1.25rem;height:1em;position:absolute;top:.3em;left:-1.25rem}
+.admonitionblock td.content>.title,.audioblock>.title,.exampleblock>.title,.imageblock>.title,.listingblock>.title,.literalblock>.title,.stemblock>.title,.openblock>.title,.paragraph>.title,.quoteblock>.title,table.tableblock>.title,.verseblock>.title,.videoblock>.title,.dlist>.title,.olist>.title,.ulist>.title,.qlist>.title,.hdlist>.title{text-rendering:optimizeLegibility;text-align:left;font-family:"Noto Serif","DejaVu Serif",serif;font-size:1rem;font-style:italic}
+table.tableblock.fit-content>caption.title{white-space:nowrap;width:0}
+.paragraph.lead>p,#preamble>.sectionbody>[class=paragraph]:first-of-type p{font-size:1.21875em;line-height:1.6;color:rgba(0,0,0,.85)}
+.admonitionblock>table{border-collapse:separate;border:0;background:none;width:100%}
+.admonitionblock>table td.icon{text-align:center;width:80px}
+.admonitionblock>table td.icon img{max-width:none}
+.admonitionblock>table td.icon .title{font-weight:bold;font-family:"Open Sans","DejaVu Sans",sans-serif;text-transform:uppercase}
+.admonitionblock>table td.content{padding-left:1.125em;padding-right:1.25em;border-left:1px solid #dddddf;color:rgba(0,0,0,.6);word-wrap:anywhere}
+.admonitionblock>table td.content>:last-child>:last-child{margin-bottom:0}
+.exampleblock>.content{border:1px solid #e6e6e6;margin-bottom:1.25em;padding:1.25em;background:#fff;border-radius:4px}
+.exampleblock>.content>:first-child{margin-top:0}
+.exampleblock>.content>:last-child{margin-bottom:0}
+.sidebarblock{border:1px solid #dbdbd6;margin-bottom:1.25em;padding:1.25em;background:#f3f3f2;border-radius:4px}
+.sidebarblock>:first-child{margin-top:0}
+.sidebarblock>:last-child{margin-bottom:0}
+.sidebarblock>.content>.title{color:#7a2518;margin-top:0;text-align:center}
+.exampleblock>.content>:last-child>:last-child,.exampleblock>.content .olist>ol>li:last-child>:last-child,.exampleblock>.content .ulist>ul>li:last-child>:last-child,.exampleblock>.content .qlist>ol>li:last-child>:last-child,.sidebarblock>.content>:last-child>:last-child,.sidebarblock>.content .olist>ol>li:last-child>:last-child,.sidebarblock>.content .ulist>ul>li:last-child>:last-child,.sidebarblock>.content .qlist>ol>li:last-child>:last-child{margin-bottom:0}
+.literalblock pre,.listingblock>.content>pre{border-radius:4px;overflow-x:auto;padding:1em;font-size:.8125em}
+@media screen and (min-width:768px){.literalblock pre,.listingblock>.content>pre{font-size:.90625em}}
+@media screen and (min-width:1280px){.literalblock pre,.listingblock>.content>pre{font-size:1em}}
+.literalblock pre,.listingblock>.content>pre:not(.highlight),.listingblock>.content>pre[class=highlight],.listingblock>.content>pre[class^="highlight "]{background:#f7f7f8}
+.literalblock.output pre{color:#f7f7f8;background:rgba(0,0,0,.9)}
+.listingblock>.content{position:relative}
+.listingblock code[data-lang]::before{display:none;content:attr(data-lang);position:absolute;font-size:.75em;top:.425rem;right:.5rem;line-height:1;text-transform:uppercase;color:inherit;opacity:.5}
+.listingblock:hover code[data-lang]::before{display:block}
+.listingblock.terminal pre .command::before{content:attr(data-prompt);padding-right:.5em;color:inherit;opacity:.5}
+.listingblock.terminal pre .command:not([data-prompt])::before{content:"$"}
+.listingblock pre.highlightjs{padding:0}
+.listingblock pre.highlightjs>code{padding:1em;border-radius:4px}
+.listingblock pre.prettyprint{border-width:0}
+.prettyprint{background:#f7f7f8}
+pre.prettyprint .linenums{line-height:1.45;margin-left:2em}
+pre.prettyprint li{background:none;list-style-type:inherit;padding-left:0}
+pre.prettyprint li code[data-lang]::before{opacity:1}
+pre.prettyprint li:not(:first-child) code[data-lang]::before{display:none}
+table.linenotable{border-collapse:separate;border:0;margin-bottom:0;background:none}
+table.linenotable td[class]{color:inherit;vertical-align:top;padding:0;line-height:inherit;white-space:normal}
+table.linenotable td.code{padding-left:.75em}
+table.linenotable td.linenos{border-right:1px solid;opacity:.35;padding-right:.5em}
+pre.pygments .lineno{border-right:1px solid;opacity:.35;display:inline-block;margin-right:.75em}
+pre.pygments .lineno::before{content:"";margin-right:-.125em}
+.quoteblock{margin:0 1em 1.25em 1.5em;display:table}
+.quoteblock:not(.excerpt)>.title{margin-left:-1.5em;margin-bottom:.75em}
+.quoteblock blockquote,.quoteblock p{color:rgba(0,0,0,.85);font-size:1.15rem;line-height:1.75;word-spacing:.1em;letter-spacing:0;font-style:italic;text-align:justify}
+.quoteblock blockquote{margin:0;padding:0;border:0}
+.quoteblock blockquote::before{content:"\201c";float:left;font-size:2.75em;font-weight:bold;line-height:.6em;margin-left:-.6em;color:#7a2518;text-shadow:0 1px 2px rgba(0,0,0,.1)}
+.quoteblock blockquote>.paragraph:last-child p{margin-bottom:0}
+.quoteblock .attribution{margin-top:.75em;margin-right:.5ex;text-align:right}
+.verseblock{margin:0 1em 1.25em}
+.verseblock pre{font-family:"Open Sans","DejaVu Sans",sans-serif;font-size:1.15rem;color:rgba(0,0,0,.85);font-weight:300;text-rendering:optimizeLegibility}
+.verseblock pre strong{font-weight:400}
+.verseblock .attribution{margin-top:1.25rem;margin-left:.5ex}
+.quoteblock .attribution,.verseblock .attribution{font-size:.9375em;line-height:1.45;font-style:italic}
+.quoteblock .attribution br,.verseblock .attribution br{display:none}
+.quoteblock .attribution cite,.verseblock .attribution cite{display:block;letter-spacing:-.025em;color:rgba(0,0,0,.6)}
+.quoteblock.abstract blockquote::before,.quoteblock.excerpt blockquote::before,.quoteblock .quoteblock blockquote::before{display:none}
+.quoteblock.abstract blockquote,.quoteblock.abstract p,.quoteblock.excerpt blockquote,.quoteblock.excerpt p,.quoteblock .quoteblock blockquote,.quoteblock .quoteblock p{line-height:1.6;word-spacing:0}
+.quoteblock.abstract{margin:0 1em 1.25em;display:block}
+.quoteblock.abstract>.title{margin:0 0 .375em;font-size:1.15em;text-align:center}
+.quoteblock.excerpt>blockquote,.quoteblock .quoteblock{padding:0 0 .25em 1em;border-left:.25em solid #dddddf}
+.quoteblock.excerpt,.quoteblock .quoteblock{margin-left:0}
+.quoteblock.excerpt blockquote,.quoteblock.excerpt p,.quoteblock .quoteblock blockquote,.quoteblock .quoteblock p{color:inherit;font-size:1.0625rem}
+.quoteblock.excerpt .attribution,.quoteblock .quoteblock .attribution{color:inherit;font-size:.85rem;text-align:left;margin-right:0}
+p.tableblock:last-child{margin-bottom:0}
+td.tableblock>.content{margin-bottom:1.25em;word-wrap:anywhere}
+td.tableblock>.content>:last-child{margin-bottom:-1.25em}
+table.tableblock,th.tableblock,td.tableblock{border:0 solid #dedede}
+table.grid-all>*>tr>*{border-width:1px}
+table.grid-cols>*>tr>*{border-width:0 1px}
+table.grid-rows>*>tr>*{border-width:1px 0}
+table.frame-all{border-width:1px}
+table.frame-ends{border-width:1px 0}
+table.frame-sides{border-width:0 1px}
+table.frame-none>colgroup+*>:first-child>*,table.frame-sides>colgroup+*>:first-child>*{border-top-width:0}
+table.frame-none>:last-child>:last-child>*,table.frame-sides>:last-child>:last-child>*{border-bottom-width:0}
+table.frame-none>*>tr>:first-child,table.frame-ends>*>tr>:first-child{border-left-width:0}
+table.frame-none>*>tr>:last-child,table.frame-ends>*>tr>:last-child{border-right-width:0}
+table.stripes-all tr,table.stripes-odd tr:nth-of-type(odd),table.stripes-even tr:nth-of-type(even),table.stripes-hover tr:hover{background:#f8f8f7}
+th.halign-left,td.halign-left{text-align:left}
+th.halign-right,td.halign-right{text-align:right}
+th.halign-center,td.halign-center{text-align:center}
+th.valign-top,td.valign-top{vertical-align:top}
+th.valign-bottom,td.valign-bottom{vertical-align:bottom}
+th.valign-middle,td.valign-middle{vertical-align:middle}
+table thead th,table tfoot th{font-weight:bold}
+tbody tr th{background:#f7f8f7}
+tbody tr th,tbody tr th p,tfoot tr th,tfoot tr th p{color:rgba(0,0,0,.8);font-weight:bold}
+p.tableblock>code:only-child{background:none;padding:0}
+p.tableblock{font-size:1em}
+ol{margin-left:1.75em}
+ul li ol{margin-left:1.5em}
+dl dd{margin-left:1.125em}
+dl dd:last-child,dl dd:last-child>:last-child{margin-bottom:0}
+ol>li p,ul>li p,ul dd,ol dd,.olist .olist,.ulist .ulist,.ulist .olist,.olist .ulist{margin-bottom:.625em}
+ul.checklist,ul.none,ol.none,ul.no-bullet,ol.no-bullet,ol.unnumbered,ul.unstyled,ol.unstyled{list-style-type:none}
+ul.no-bullet,ol.no-bullet,ol.unnumbered{margin-left:.625em}
+ul.unstyled,ol.unstyled{margin-left:0}
+ul.checklist>li>p:first-child{margin-left:-1em}
+ul.checklist>li>p:first-child>.fa-square-o:first-child,ul.checklist>li>p:first-child>.fa-check-square-o:first-child{width:1.25em;font-size:.8em;position:relative;bottom:.125em}
+ul.checklist>li>p:first-child>input[type=checkbox]:first-child{margin-right:.25em}
+ul.inline{display:flex;flex-flow:row wrap;list-style:none;margin:0 0 .625em -1.25em}
+ul.inline>li{margin-left:1.25em}
+.unstyled dl dt{font-weight:400;font-style:normal}
+ol.arabic{list-style-type:decimal}
+ol.decimal{list-style-type:decimal-leading-zero}
+ol.loweralpha{list-style-type:lower-alpha}
+ol.upperalpha{list-style-type:upper-alpha}
+ol.lowerroman{list-style-type:lower-roman}
+ol.upperroman{list-style-type:upper-roman}
+ol.lowergreek{list-style-type:lower-greek}
+.hdlist>table,.colist>table{border:0;background:none}
+.hdlist>table>tbody>tr,.colist>table>tbody>tr{background:none}
+td.hdlist1,td.hdlist2{vertical-align:top;padding:0 .625em}
+td.hdlist1{font-weight:bold;padding-bottom:1.25em}
+td.hdlist2{word-wrap:anywhere}
+.literalblock+.colist,.listingblock+.colist{margin-top:-.5em}
+.colist td:not([class]):first-child{padding:.4em .75em 0;line-height:1;vertical-align:top}
+.colist td:not([class]):first-child img{max-width:none}
+.colist td:not([class]):last-child{padding:.25em 0}
+.thumb,.th{line-height:0;display:inline-block;border:4px solid #fff;box-shadow:0 0 0 1px #ddd}
+.imageblock.left{margin:.25em .625em 1.25em 0}
+.imageblock.right{margin:.25em 0 1.25em .625em}
+.imageblock>.title{margin-bottom:0}
+.imageblock.thumb,.imageblock.th{border-width:6px}
+.imageblock.thumb>.title,.imageblock.th>.title{padding:0 .125em}
+.image.left,.image.right{margin-top:.25em;margin-bottom:.25em;display:inline-block;line-height:0}
+.image.left{margin-right:.625em}
+.image.right{margin-left:.625em}
+a.image{text-decoration:none;display:inline-block}
+a.image object{pointer-events:none}
+sup.footnote,sup.footnoteref{font-size:.875em;position:static;vertical-align:super}
+sup.footnote a,sup.footnoteref a{text-decoration:none}
+sup.footnote a:active,sup.footnoteref a:active{text-decoration:underline}
+#footnotes{padding-top:.75em;padding-bottom:.75em;margin-bottom:.625em}
+#footnotes hr{width:20%;min-width:6.25em;margin:-.25em 0 .75em;border-width:1px 0 0}
+#footnotes .footnote{padding:0 .375em 0 .225em;line-height:1.3334;font-size:.875em;margin-left:1.2em;margin-bottom:.2em}
+#footnotes .footnote a:first-of-type{font-weight:bold;text-decoration:none;margin-left:-1.05em}
+#footnotes .footnote:last-of-type{margin-bottom:0}
+#content #footnotes{margin-top:-.625em;margin-bottom:0;padding:.75em 0}
+.gist .file-data>table{border:0;background:#fff;width:100%;margin-bottom:0}
+.gist .file-data>table td.line-data{width:99%}
+div.unbreakable{page-break-inside:avoid}
+.big{font-size:larger}
+.small{font-size:smaller}
+.underline{text-decoration:underline}
+.overline{text-decoration:overline}
+.line-through{text-decoration:line-through}
+.aqua{color:#00bfbf}
+.aqua-background{background:#00fafa}
+.black{color:#000}
+.black-background{background:#000}
+.blue{color:#0000bf}
+.blue-background{background:#0000fa}
+.fuchsia{color:#bf00bf}
+.fuchsia-background{background:#fa00fa}
+.gray{color:#606060}
+.gray-background{background:#7d7d7d}
+.green{color:#006000}
+.green-background{background:#007d00}
+.lime{color:#00bf00}
+.lime-background{background:#00fa00}
+.maroon{color:#600000}
+.maroon-background{background:#7d0000}
+.navy{color:#000060}
+.navy-background{background:#00007d}
+.olive{color:#606000}
+.olive-background{background:#7d7d00}
+.purple{color:#600060}
+.purple-background{background:#7d007d}
+.red{color:#bf0000}
+.red-background{background:#fa0000}
+.silver{color:#909090}
+.silver-background{background:#bcbcbc}
+.teal{color:#006060}
+.teal-background{background:#007d7d}
+.white{color:#bfbfbf}
+.white-background{background:#fafafa}
+.yellow{color:#bfbf00}
+.yellow-background{background:#fafa00}
+span.icon>.fa{cursor:default}
+a span.icon>.fa{cursor:inherit}
+.admonitionblock td.icon [class^="fa icon-"]{font-size:2.5em;text-shadow:1px 1px 2px rgba(0,0,0,.5);cursor:default}
+.admonitionblock td.icon .icon-note::before{content:"\f05a";color:#19407c}
+.admonitionblock td.icon .icon-tip::before{content:"\f0eb";text-shadow:1px 1px 2px rgba(155,155,0,.8);color:#111}
+.admonitionblock td.icon .icon-warning::before{content:"\f071";color:#bf6900}
+.admonitionblock td.icon .icon-caution::before{content:"\f06d";color:#bf3400}
+.admonitionblock td.icon .icon-important::before{content:"\f06a";color:#bf0000}
+.conum[data-value]{display:inline-block;color:#fff!important;background:rgba(0,0,0,.8);border-radius:50%;text-align:center;font-size:.75em;width:1.67em;height:1.67em;line-height:1.67em;font-family:"Open Sans","DejaVu Sans",sans-serif;font-style:normal;font-weight:bold}
+.conum[data-value] *{color:#fff!important}
+.conum[data-value]+b{display:none}
+.conum[data-value]::after{content:attr(data-value)}
+pre .conum[data-value]{position:relative;top:-.125em}
+b.conum *{color:inherit!important}
+.conum:not([data-value]):empty{display:none}
+dt,th.tableblock,td.content,div.footnote{text-rendering:optimizeLegibility}
+h1,h2,p,td.content,span.alt,summary{letter-spacing:-.01em}
+p strong,td.content strong,div.footnote strong{letter-spacing:-.005em}
+p,blockquote,dt,td.content,span.alt,summary{font-size:1.0625rem}
+p{margin-bottom:1.25rem}
+.sidebarblock p,.sidebarblock dt,.sidebarblock td.content,p.tableblock{font-size:1em}
+.exampleblock>.content{background:#fffef7;border-color:#e0e0dc;box-shadow:0 1px 4px #e0e0dc}
+.print-only{display:none!important}
+@page{margin:1.25cm .75cm}
+@media print{*{box-shadow:none!important;text-shadow:none!important}
+html{font-size:80%}
+a{color:inherit!important;text-decoration:underline!important}
+a.bare,a[href^="#"],a[href^="mailto:"]{text-decoration:none!important}
+a[href^="http:"]:not(.bare)::after,a[href^="https:"]:not(.bare)::after{content:"(" attr(href) ")";display:inline-block;font-size:.875em;padding-left:.25em}
+abbr[title]{border-bottom:1px dotted}
+abbr[title]::after{content:" (" attr(title) ")"}
+pre,blockquote,tr,img,object,svg{page-break-inside:avoid}
+thead{display:table-header-group}
+svg{max-width:100%}
+p,blockquote,dt,td.content{font-size:1em;orphans:3;widows:3}
+h2,h3,#toctitle,.sidebarblock>.content>.title{page-break-after:avoid}
+#header,#content,#footnotes,#footer{max-width:none}
+#toc,.sidebarblock,.exampleblock>.content{background:none!important}
+#toc{border-bottom:1px solid #dddddf!important;padding-bottom:0!important}
+body.book #header{text-align:center}
+body.book #header>h1:first-child{border:0!important;margin:2.5em 0 1em}
+body.book #header .details{border:0!important;display:block;padding:0!important}
+body.book #header .details span:first-child{margin-left:0!important}
+body.book #header .details br{display:block}
+body.book #header .details br+span::before{content:none!important}
+body.book #toc{border:0!important;text-align:left!important;padding:0!important;margin:0!important}
+body.book #toc,body.book #preamble,body.book h1.sect0,body.book .sect1>h2{page-break-before:always}
+.listingblock code[data-lang]::before{display:block}
+#footer{padding:0 .9375em}
+.hide-on-print{display:none!important}
+.print-only{display:block!important}
+.hide-for-print{display:none!important}
+.show-for-print{display:inherit!important}}
+@media amzn-kf8,print{#header>h1:first-child{margin-top:1.25rem}
+.sect1{padding:0!important}
+.sect1+.sect1{border:0}
+#footer{background:none}
+#footer-text{color:rgba(0,0,0,.6);font-size:.9em}}
+@media amzn-kf8{#header,#content,#footnotes,#footer{padding:0}}
+</style>
+<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
+<style>
+/* Stylesheet for CodeRay to match GitHub theme | MIT License | http://foundation.zurb.com */
+pre.CodeRay{background:#f7f7f8}
+.CodeRay .line-numbers{border-right:1px solid currentColor;opacity:.35;padding:0 .5em 0 0}
+.CodeRay span.line-numbers{display:inline-block;margin-right:.75em}
+.CodeRay .line-numbers strong{color:#000}
+table.CodeRay{border-collapse:separate;border:0;margin-bottom:0;background:none}
+table.CodeRay td{vertical-align:top;line-height:inherit}
+table.CodeRay td.line-numbers{text-align:right}
+table.CodeRay td.code{padding:0 0 0 .75em}
+.CodeRay .debug{color:#fff !important;background:#000080 !important}
+.CodeRay .annotation{color:#007}
+.CodeRay .attribute-name{color:#000080}
+.CodeRay .attribute-value{color:#700}
+.CodeRay .binary{color:#509}
+.CodeRay .comment{color:#998;font-style:italic}
+.CodeRay .char{color:#04d}
+.CodeRay .char .content{color:#04d}
+.CodeRay .char .delimiter{color:#039}
+.CodeRay .class{color:#458;font-weight:bold}
+.CodeRay .complex{color:#a08}
+.CodeRay .constant,.CodeRay .predefined-constant{color:#008080}
+.CodeRay .color{color:#099}
+.CodeRay .class-variable{color:#369}
+.CodeRay .decorator{color:#b0b}
+.CodeRay .definition{color:#099}
+.CodeRay .delimiter{color:#000}
+.CodeRay .doc{color:#970}
+.CodeRay .doctype{color:#34b}
+.CodeRay .doc-string{color:#d42}
+.CodeRay .escape{color:#666}
+.CodeRay .entity{color:#800}
+.CodeRay .error{color:#808}
+.CodeRay .exception{color:inherit}
+.CodeRay .filename{color:#099}
+.CodeRay .function{color:#900;font-weight:bold}
+.CodeRay .global-variable{color:#008080}
+.CodeRay .hex{color:#058}
+.CodeRay .integer,.CodeRay .float{color:#099}
+.CodeRay .include{color:#555}
+.CodeRay .inline{color:#000}
+.CodeRay .inline .inline{background:#ccc}
+.CodeRay .inline .inline .inline{background:#bbb}
+.CodeRay .inline .inline-delimiter{color:#d14}
+.CodeRay .inline-delimiter{color:#d14}
+.CodeRay .important{color:#555;font-weight:bold}
+.CodeRay .interpreted{color:#b2b}
+.CodeRay .instance-variable{color:#008080}
+.CodeRay .label{color:#970}
+.CodeRay .local-variable{color:#963}
+.CodeRay .octal{color:#40e}
+.CodeRay .predefined{color:#369}
+.CodeRay .preprocessor{color:#579}
+.CodeRay .pseudo-class{color:#555}
+.CodeRay .directive{font-weight:bold}
+.CodeRay .type{font-weight:bold}
+.CodeRay .predefined-type{color:inherit}
+.CodeRay .reserved,.CodeRay .keyword {color:#000;font-weight:bold}
+.CodeRay .key{color:#808}
+.CodeRay .key .delimiter{color:#606}
+.CodeRay .key .char{color:#80f}
+.CodeRay .value{color:#088}
+.CodeRay .regexp .delimiter{color:#808}
+.CodeRay .regexp .content{color:#808}
+.CodeRay .regexp .modifier{color:#808}
+.CodeRay .regexp .char{color:#d14}
+.CodeRay .regexp .function{color:#404;font-weight:bold}
+.CodeRay .string{color:#d20}
+.CodeRay .string .string .string{background:#ffd0d0}
+.CodeRay .string .content{color:#d14}
+.CodeRay .string .char{color:#d14}
+.CodeRay .string .delimiter{color:#d14}
+.CodeRay .shell{color:#d14}
+.CodeRay .shell .delimiter{color:#d14}
+.CodeRay .symbol{color:#990073}
+.CodeRay .symbol .content{color:#a60}
+.CodeRay .symbol .delimiter{color:#630}
+.CodeRay .tag{color:#008080}
+.CodeRay .tag-special{color:#d70}
+.CodeRay .variable{color:#036}
+.CodeRay .insert{background:#afa}
+.CodeRay .delete{background:#faa}
+.CodeRay .change{color:#aaf;background:#007}
+.CodeRay .head{color:#f8f;background:#505}
+.CodeRay .insert .insert{color:#080}
+.CodeRay .delete .delete{color:#800}
+.CodeRay .change .change{color:#66f}
+.CodeRay .head .head{color:#f4f}
+</style>
+</head>
+<body class="article">
+<div id="header">
+<h1>cl_exp_tensor</h1>
+</div>
+<div id="content">
+<div id="preamble">
+<div class="sectionbody">
+<div class="paragraph">
+<p>This extension provides a new buffer abstraction, tensor objects, for
+managing N-dimensional data.</p>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_xxx_not_complete_yet">XXX - Not complete yet!!!</h2>
+<div class="sectionbody">
+
+</div>
+</div>
+<div class="sect1">
+<h2 id="_name_strings">Name Strings</h2>
+<div class="sectionbody">
+<div class="paragraph">
+<p><code>cl_exp_tensor</code></p>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_contact">Contact</h2>
+<div class="sectionbody">
+<div class="paragraph">
+<p>TODO</p>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_contributors">Contributors</h2>
+<div class="sectionbody">
+<div class="paragraph">
+<p>Henry Linjamäki, Intel.<br>
+Pekka Jääskeläinen, Intel.<br>
+Ben Ashbaugh, Intel.<br></p>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_notice">Notice</h2>
+<div class="sectionbody">
+<div class="paragraph">
+<p>TODO</p>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_status">Status</h2>
+<div class="sectionbody">
+<div class="paragraph">
+<p>Draft spec, NOT APPROVED!!</p>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_version">Version</h2>
+<div class="sectionbody">
+<div class="paragraph">
+<p>Built On: 2024-08-15<br>
+Version: 0.2.0</p>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_dependencies">Dependencies</h2>
+<div class="sectionbody">
+<div class="paragraph">
+<p>This extension is written against the OpenCL Specification version 3.0.14.</p>
+</div>
+<div class="paragraph">
+<p>This extension requires OpenCL 1.2 or later.</p>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_overview">Overview</h2>
+<div class="sectionbody">
+<div class="paragraph">
+<p>The extension provides a new tensor object abstraction. Tensor objects
+are similar to image types in regard that they represent N-dimensional
+data of an application chosen data type and they may be mapped to
+dedicated hardware, with the following key differences:</p>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p>Higher than 3-dimensional data can be supported (limited by
+devices' capabilities).</p>
+</li>
+<li>
+<p>Applications may choose how the data elements of the tensors are
+laid out in the buffers using the tensor layout descriptions
+provided in this extension.</p>
+</li>
+</ul>
+</div>
+<div class="paragraph">
+<p>Applications may also choose the memory layouts of the tensors to be
+implementation-specified, letting the driver to optimize the tensor
+data layout for better performance or to lay out the data as required by
+hardware accelerated functions (e.g. exposed via builtin kernels).</p>
+</div>
+<div class="paragraph">
+<p>The scope of this extension is to provide host APIs for creating tensor
+objects and transfer data between tensors, host and other memory
+objects.</p>
+</div>
+<div class="paragraph">
+<p>A separate extension implemented on top of this extension,
+cl_exp_defined_builtin_kernels provides "defined built-in
+kernels" (DKBs) which can operate on tensors. It also provides mechanism
+for drivers to create DBKs that are optimized for the tensor arguments
+they operate on.</p>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_functions">New API Functions</h2>
+<div class="sectionbody">
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c">cl_int clEnqueueImportFromTensorEXP(
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  cl_bool blocking_command,
+  <span class="directive">const</span> size_t* tensor_origin,
+  <span class="directive">const</span> size_t* mem_origin,
+  <span class="directive">const</span> size_t* region,
+  <span class="directive">const</span> size_t* mem_pitch,
+  cl_mem buffer,
+  <span class="directive">void</span>* host_ptr,
+  cl_uint num_events_in_wait_list,
+  <span class="directive">const</span> cl_event* event_wait_list,
+  cl_event* event);
+
+cl_int clEnqueueExportToTensorEXP(
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  cl_bool blocking_command,
+  <span class="directive">const</span> size_t* tensor_origin,
+  <span class="directive">const</span> size_t* mem_origin,
+  <span class="directive">const</span> size_t* region,
+  <span class="directive">const</span> size_t* mem_pitch,
+  cl_mem buffer,
+  <span class="directive">const</span> <span class="directive">void</span>* host_ptr,
+  cl_uint num_events_in_wait_list,
+  <span class="directive">const</span> cl_event* event_wait_list,
+  cl_event* event);
+
+cl_int clEnqueueCopyTensorEXP(
+  cl_command_queue command_queue,
+  cl_tensor src_tensor,
+  cl_tensor dst_tensor,
+  <span class="directive">const</span> cl_tensor_shape* src_origin,
+  <span class="directive">const</span> cl_tensor_shape* dst_origin,
+  <span class="directive">const</span> cl_tensor_shape* region,
+  cl_uint num_events_in_wait_list,
+  <span class="directive">const</span> cl_event* event_wait_list,
+  cl_event* event);
+
+cl_int clCommandImportFromTensorEXP(
+  cl_command_buffer_khr command_buffer,
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  <span class="directive">const</span> size_t* tensor_origin,
+  <span class="directive">const</span> size_t* mem_origin,
+  <span class="directive">const</span> size_t* region,
+  <span class="directive">const</span> size_t* mem_pitch,
+  cl_mem buffer,
+  <span class="directive">void</span>* host_ptr,
+  cl_uint num_sync_points_in_wait_list,
+  <span class="directive">const</span> cl_sync_point_khr* sync_point_wait_list,
+  cl_sync_point_khr* sync_point,
+  cl_mutable_command_khr* mutable_handle);
+
+cl_int clCommandExportToTensorEXP(
+  cl_command_buffer_khr command_buffer,
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  <span class="directive">const</span> size_t* tensor_origin,
+  <span class="directive">const</span> size_t* mem_origin,
+  <span class="directive">const</span> size_t* region,
+  <span class="directive">const</span> size_t* mem_pitch,
+  cl_mem buffer,
+  <span class="directive">const</span> <span class="directive">void</span>* host_ptr,
+  cl_uint num_sync_points_in_wait_list,
+  <span class="directive">const</span> cl_sync_point_khr* sync_point_wait_list,
+  cl_sync_point_khr* sync_point,
+  cl_mutable_command_khr* mutable_handle);</code></pre>
+</div>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_types">New API Types</h2>
+<div class="sectionbody">
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c"><span class="keyword">typedef</span> cl_uint cl_tensor_layout_type_exp;
+<span class="keyword">typedef</span> cl_uint cl_tensor_dim_exp;
+<span class="keyword">typedef</span> cl_uint cl_tensor_layout_ml_type_exp;
+<span class="keyword">typedef</span> cl_properties cl_tensor_properties_exp;
+
+<span class="preprocessor">#define</span> CL_TENSOR_DESC_MAX_RANK_EXP       <span class="integer">20</span>u
+<span class="preprocessor">#define</span> CL_TENSOR_DESC_MAX_PROPERTIES_EXP <span class="integer">16</span>u
+
+<span class="keyword">typedef</span> <span class="keyword">struct</span> cl_tensor_desc_exp {
+    cl_uint               rank;
+    cl_tensor_datatype    dtype;
+    cl_tensor_properties_exp  properties[CL_TENSOR_DESC_MAX_PROPERTIES_EXP]
+    cl_tensor_shape       shape[CL_TENSOR_DESC_MAX_RANK_EXP];
+    <span class="directive">const</span> <span class="directive">void</span>*           layout;
+    cl_tensor_layout_type_exp layout_type;
+} cl_tensor_desc_exp;
+
+<span class="keyword">typedef</span> <span class="keyword">struct</span> cl_tensor_layout_blas_exp {
+    cl_tensor_dim_exp    leading_dims[CL_TENSOR_DESC_MAX_RANK_EXP];
+} cl_tensor_layout_blas_exp;
+
+<span class="keyword">typedef</span> <span class="keyword">struct</span> cl_tensor_layout_blas_pitched_exp {
+    cl_tensor_dim_exp    leading_dims[CL_TENSOR_DESC_MAX_RANK_EXP];
+    cl_tensor_stride     leading_strides[CL_TENSOR_DESC_MAX_RANK_EXP];
+} cl_tensor_layout_blas_pitched__exp;
+
+<span class="keyword">typedef</span> <span class="keyword">struct</span> cl_tensor_layout_ml_exp {
+  cl_tensor_layout_ml_type_exp ml_type;
+} cl_tensor_layout_ml_exp;</code></pre>
+</div>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_new_api_enums">New API Enums</h2>
+<div class="sectionbody">
+<div class="paragraph">
+<p>Accepted value for the <em>properties</em> parameter to
+<strong>clCreateBufferWithProperties</strong> for creating a tensor object:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c">CL_MEM_TENSOR_EXP               <span class="integer">0</span>x????</code></pre>
+</div>
+</div>
+<div class="paragraph">
+<p>Accepted values for the <em>param_name</em> parameter to <strong>clGetDeviceInfo</strong>:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c">CL_DEVICE_MAX_TENSOR_ARGS_EXP     <span class="integer">0</span>x????
+CL_DEVICE_MAX_TENSOR_RANK_EXP     <span class="integer">0</span>x????
+CL_DEVICE_MAX_TENSOR_ELEMENTS_EXP <span class="integer">0</span>x????
+CL_DEVICE_MAX_TENSOR_STRIDE_EXP   <span class="integer">0</span>x????</code></pre>
+</div>
+</div>
+<div class="paragraph">
+<p>Accepted values for <strong>cl_tensor_datatype</strong> type:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c">CL_TENSOR_DTYPE_BOOL_EXP        <span class="integer">0</span>x????
+
+CL_TENSOR_DTYPE_INT4_EXP        <span class="integer">0</span>x????
+CL_TENSOR_DTYPE_INT8_EXP        <span class="integer">0</span>x????
+CL_TENSOR_DTYPE_INT16_EXP       <span class="integer">0</span>x????
+CL_TENSOR_DTYPE_INT32_EXP       <span class="integer">0</span>x????
+CL_TENSOR_DTYPE_INT64_EXP       <span class="integer">0</span>x????
+
+CL_TENSOR_DTYPE_UINT4_EXP       <span class="integer">0</span>x????
+CL_TENSOR_DTYPE_UINT8_EXP       <span class="integer">0</span>x????
+CL_TENSOR_DTYPE_UINT16_EXP      <span class="integer">0</span>x????
+CL_TENSOR_DTYPE_UINT32_EXP      <span class="integer">0</span>x????
+CL_TENSOR_DTYPE_UINT64_EXP      <span class="integer">0</span>x????
+
+CL_TENSOR_DTYPE_FP8E4M3_EXP     <span class="integer">0</span>x????
+CL_TENSOR_DTYPE_FP8E5M2_EXP     <span class="integer">0</span>x????
+CL_TENSOR_DTYPE_FP16_EXP        <span class="integer">0</span>x????
+CL_TENSOR_DTYPE_FP32_EXP        <span class="integer">0</span>x????
+CL_TENSOR_DTYPE_FP64_EXP        <span class="integer">0</span>x????
+
+CL_TENSOR_DTYPE_BFLOAT16_EXP    <span class="integer">0</span>x????
+
+CL_TENSOR_DTYPE_COMPLEX64_EXP   <span class="integer">0</span>x????
+CL_TENSOR_DTYPE_COMPLEX128_EXP  <span class="integer">0</span>x????</code></pre>
+</div>
+</div>
+<div class="paragraph">
+<p>Accepted values for <strong>cl_tensor_layout_type_exp</strong>:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c">CL_TENSOR_LAYOUT_OPAQUE_EXP       <span class="integer">0</span>x????
+CL_TENSOR_LAYOUT_BLAS_EXP         <span class="integer">0</span>x????
+CL_TENSOR_LAYOUT_BLAS_PITCHED_EXP <span class="integer">0</span>x????
+CL_TENSOR_LAYOUT_ML_EXP           <span class="integer">0</span>x????</code></pre>
+</div>
+</div>
+<div class="paragraph">
+<p>Accepted values for <strong>cl_tensor_layout_ml_type_exp</strong>:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c">CL_TENSOR_LAYOUT_ML_C_EXP       <span class="integer">0</span>x????
+CL_TENSOR_LAYOUT_ML_NC_EXP      <span class="integer">0</span>x????
+CL_TENSOR_LAYOUT_ML_CN_EXP      <span class="integer">0</span>x????
+CL_TENSOR_LAYOUT_ML_HW_EXP      <span class="integer">0</span>x????
+CL_TENSOR_LAYOUT_ML_CHW_EXP     <span class="integer">0</span>x????
+CL_TENSOR_LAYOUT_ML_NCHW_EXP    <span class="integer">0</span>x????
+CL_TENSOR_LAYOUT_ML_NHWC_EXP    <span class="integer">0</span>x????</code></pre>
+</div>
+</div>
+<div class="paragraph">
+<p>New error codes:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c">CL_INVALID_TENSOR_RANK_EXP   <span class="integer">0</span>x????
+CL_INVALID_TENSOR_DTYPE_EXP  <span class="integer">0</span>x????
+CL_INVALID_TENSOR_SHAPE_EXP  <span class="integer">0</span>x????
+CL_INVALID_TENSOR_LAYOUT_EXP <span class="integer">0</span>x????</code></pre>
+</div>
+</div>
+<div class="sect2">
+<h3 id="_modifications_to_the_opencl_api_specification">Modifications to The OpenCL API Specification</h3>
+<div class="dlist">
+<dl>
+<dt class="hdlist1">(Modify Section 4.2, <strong>Querying Devices</strong>) </dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="dlist">
+<dl>
+<dt class="hdlist1">(Add the following to Table 5., <em>List of supported _param_names</em> by <strong>clGetDeviceInfo</strong>) </dt>
+</dl>
+</div>
+</div>
+</div>
+</dd>
+</dl>
+</div>
+<table class="tableblock frame-all grid-all stripes-odd stretch">
+<colgroup>
+<col style="width: 40%;">
+<col style="width: 20%;">
+<col style="width: 40%;">
+</colgroup>
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top">Device Info</th>
+<th class="tableblock halign-left valign-top">Return Type</th>
+<th class="tableblock halign-left valign-top">Description</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_DEVICE_MAX_TENSOR_ARGS_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_uint</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">Max number of tensor objects
+  arguments specified as arguments to.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_DEVICE_MAX_TENSOR_RANK_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_uint</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">Max tensor rank. The minimum
+  value is 4.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_DEVICE_MAX_TENSOR_ELEMENTS_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">size_t</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">Maximum number of tensor
+  elements in total. The minimum value is 65536.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_DEVICE_MAX_TENSOR_PITCH_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">size_t</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">Maximum pitch value for
+  all pitch components for
+  <a href="#cl-tensor-layout-blas">CL_TENSOR_LAYOUT_BLAS_PITCHED_EXP</a> memory
+  layout.</p>
+<p class="tableblock">The minimum value is 65536.</p></td>
+</tr>
+</tbody>
+</table>
+<div class="openblock">
+<div class="content">
+
+</div>
+</div>
+<div class="dlist">
+<dl>
+<dt class="hdlist1">(Modify Section 5.2.1, <strong>Creating Buffer Objects</strong>) </dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="dlist">
+<dl>
+<dt class="hdlist1">(Add the following to Table 18.,  <em>Buffer creation properties</em>) </dt>
+</dl>
+</div>
+</div>
+</div>
+</dd>
+</dl>
+</div>
+<table class="tableblock frame-all grid-all stripes-odd stretch">
+<colgroup>
+<col style="width: 40%;">
+<col style="width: 20%;">
+<col style="width: 40%;">
+</colgroup>
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top">cl_mem_properties</th>
+<th class="tableblock halign-left valign-top">Property Value</th>
+<th class="tableblock halign-left valign-top">Description</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_MEM_TENSOR_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_tensor_desc_exp</p></td>
+<td class="tableblock halign-left valign-top"><div class="content"><div class="paragraph">
+<p>Creates a tensor object with
+properties set in <strong>cl_tensor_desc_exp</strong> tensor description structure.</p>
+</div>
+<div class="paragraph">
+<p>The <em>size</em> parameter of the <strong>clCreateBufferWithProperties()</strong> is
+ignored and may be set to zero. The required storage space needed is
+inferred from the tensor description. The storage size of the queried
+with <strong>clGetMemObjectInfo()</strong>. The storage size may change during
+the runtime unless constrained by the given tensor description.</p>
+</div></div></td>
+</tr>
+</tbody>
+</table>
+<div class="openblock">
+<div class="content">
+<div class="dlist">
+<dl>
+<dt class="hdlist1">(Add to list of error codes <strong>clCreateBufferWithProperties()</strong>) </dt>
+</dl>
+</div>
+</div>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p><code>CL_INVALID_VALUE</code> if <code>CL_MEM_TENSOR_EXP</code> property is specified and the
+<code>rank</code> member of the <code>cl_tensor_desc_exp</code> structure has invalid or
+unsupported value.</p>
+</li>
+<li>
+<p><code>CL_INVALID_TENSOR_SHAPE_EXP</code> if <code>CL_MEM_TENSOR_EXP</code> property is
+specified and the <code>shape</code> member of the <code>cl_tensor_desc_exp</code>
+structure has invalid or unsupported description.</p>
+</li>
+<li>
+<p><code>CL_INVALID_TENSOR_LAYOUT_TYPE_EXP</code> if <code>CL_MEM_TENSOR_EXP</code> property is
+specified and the <code>layout_type</code> member of the <code>cl_tensor_desc_exp</code>
+structure has an invalid enumeration constant.</p>
+</li>
+<li>
+<p><code>CL_INVALID_TENSOR_LAYOUT_EXP</code> if <code>CL_MEM_TENSOR_EXP</code> property is
+specified and the <code>layout</code> member of the <code>cl_tensor_desc_exp</code> has an
+invalid description.</p>
+</li>
+</ul>
+</div>
+<div class="openblock">
+<div class="content">
+
+</div>
+</div>
+<div class="dlist">
+<dl>
+<dt class="hdlist1">(Add the following to Section 5.2.2, <strong>Reading, Writing and Copying Buffer Objects</strong>) </dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="paragraph">
+<p>The following functions are for reading from a tensor to host memory /
+buffer object or to write to a tensor object from host memory / buffer
+object.</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c">cl_int clEnqueueImportFromTensorEXP(
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  cl_bool blocking_command,
+  <span class="directive">const</span> size_t* tensor_origin,
+  <span class="directive">const</span> size_t* mem_origin,
+  <span class="directive">const</span> size_t* region,
+  <span class="directive">const</span> size_t* mem_pitch,
+  cl_mem buffer,
+  <span class="directive">void</span>* host_ptr,
+  cl_uint num_events_in_wait_list,
+  <span class="directive">const</span> cl_event* event_wait_list,
+  cl_event* event);</code></pre>
+</div>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c">cl_int clEnqueueExportToTensorEXP(
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  cl_bool blocking_command,
+  <span class="directive">const</span> size_t* tensor_origin,
+  <span class="directive">const</span> size_t* mem_origin,
+  <span class="directive">const</span> size_t* region,
+  <span class="directive">const</span> size_t* mem_pitch,
+  cl_mem buffer,
+  <span class="directive">const</span> <span class="directive">void</span>* host_ptr,
+  cl_uint num_events_in_wait_list,
+  <span class="directive">const</span> cl_event* event_wait_list,
+  cl_event* event);</code></pre>
+</div>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p><em>command_queue</em> is a valid host command-queue in which the read /
+write command will be queued. <em>command_queue</em> and <em>tensor</em> must be
+created with the same OpenCL context.</p>
+</li>
+<li>
+<p><em>tensor</em> refers to a valid tensor object which is bound to a buffer.</p>
+</li>
+<li>
+<p><em>blocking_command</em> indicate if the read and write operations are
+blocking or non-blocking (see below).</p>
+</li>
+<li>
+<p><em>tensor_origin</em> defines the offset coordinates in <em>tensor</em> for start of
+the regions to read / write tensor data. The length of the array
+must be at least rank the the <em>tensor</em>.</p>
+</li>
+<li>
+<p><em>mem_origin</em> defines the offset coordinates in the memory region
+pointed by <em>buffer</em> or <em>host_ptr</em> expressed in elements of <em>tensor</em>
+data type. The length of the array must be at least rank the the
+<em>tensor</em>.</p>
+</li>
+<li>
+<p><em>region</em> defines the region being read or written expressed in in
+elements of <em>tensor</em> data type. The length of the array must be at
+least rank the the <em>tensor</em>. If <em>region</em> is NULL then <em>tensor</em>'s
+shape will be used as the region.</p>
+</li>
+<li>
+<p><em>mem_pitch</em> defines the length of each dimension in elements to be
+used for the memory region of <em>buffer</em> or <em>host_ptr</em>. The length of
+the array must be at least the rank of <em>tensor</em> minus one. if
+<em>mem_pitch</em> is NULL or <em>mem_pitch</em>[i] is zero, <em>mem_pitch</em>[i] is
+computed as <em>region</em>[i + 1].</p>
+</li>
+<li>
+<p><em>buffer</em> and <em>host_ptr</em> refer to a valid buffer object / host
+allocation where data is to be read into or to be written from.
+Either the <em>buffer</em> or <em>host_ptr</em> can be non-NULL in which case the
+non-NULL argument is used as the operand for the operation.</p>
+</li>
+<li>
+<p><em>event_wait_list</em> and <em>num_events_in_wait_list</em> specify events that
+need to complete before this particular command can be executed. If
+<em>event_wait_list</em> is NULL, then this particular command does not
+wait on any event to complete. If <em>event_wait_list</em> is NULL,
+<em>num_events_in_wait_list</em> must be 0. If <em>event_wait_list</em> is not
+NULL, the list of events pointed to by <em>event_wait_list</em> must be
+valid and <em>num_events_in_wait_list</em> must be greater than 0. The
+events specified in <em>event_wait_list</em> act as synchronization
+points. The context associated with events in <em>event_wait_list</em> and
+<em>command_queue</em> must be the same. The memory associated with
+<em>event_wait_list</em> can be reused or freed after the function returns.</p>
+</li>
+<li>
+<p><em>event</em> returns an event object that identifies this read / write
+command and can be used to query or queue a wait for this command to
+complete. If <em>event</em> is NULL or the enqueue is unsuccessful, no
+event will be created and therefore it will not be possible to query
+the status of this command or to wait for this command to
+complete. If <em>event_wait_list</em> and <em>event</em> are not NULL, <em>event</em>
+must not refer to an element of the <em>event_wait_list</em> array.</p>
+</li>
+</ul>
+</div>
+<div class="paragraph">
+<p>The <strong>clEnqueueExportToTensorEXP</strong> function copies contents of the buffer
+object / host allocation to tensor&#8217;s storage in
+implementation-defined, opaque memory layout. The
+<strong>clEnqueueImportFromTensorEXP</strong> function copies data from tensor&#8217;s
+storage to buffer object / host allocation.</p>
+</div>
+<div class="paragraph">
+<p>The elements of buffer object / host allocation are mapped to tensor
+coordinates and vice versa as follows in pseudo C code:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c">tensor_element(
+  tensor,
+  tensor_origin[<span class="integer">0</span>] + i[<span class="integer">0</span>],
+  tensor_origin[<span class="integer">1</span>] + i[<span class="integer">1</span>],
+  ...,
+  tensor_origin[N-<span class="integer">2</span>] + i[N-<span class="integer">2</span>],
+  tensor_origin[N-<span class="integer">2</span>] + i[N-<span class="integer">1</span>]) ==
+((TENSOR_DATATYPE *)buffer_or_host_ptr)[
+  (mem_origin[<span class="integer">0</span>] + i[<span class="integer">0</span>]) * pitch(<span class="integer">0</span>) +
+  (mem_origin[<span class="integer">1</span>] + i[<span class="integer">1</span>]) * pitch(<span class="integer">1</span>) +
+  ... +
+  (mem_origin[N-<span class="integer">2</span>] + i[N-<span class="integer">2</span>]) * pitch(N-<span class="integer">2</span>) +
+  (mem_origin[N-<span class="integer">1</span>] + i[N-<span class="integer">1</span>])];</code></pre>
+</div>
+</div>
+<div class="paragraph">
+<p>Where the <code>N</code> is tensor rank, the <code>i[X]</code> is a tensor coordinate with
+inclusive range of <code>0..&lt;region[X]-1&gt;</code> and the <code>pitch</code> is computed as
+follows in pseudo C code:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c">size_t pitch(size_t dim) {
+  size_t pitch = <span class="integer">1</span>;
+  <span class="keyword">for</span> (size_t i = dim; i &lt; tensor_rank - <span class="integer">1</span>; i++)
+    pitch *=
+      (mem_pitch != <span class="predefined-constant">NULL</span> || mem_pitch[i] == <span class="integer">0</span>) ? mem_pitch[i] : region[i + <span class="integer">1</span>];
+  <span class="keyword">return</span> pitch;
+}</code></pre>
+</div>
+</div>
+<div class="paragraph">
+<p>For <code>dim</code> in <code>0..(tensor_rank()-1)</code>. The <code>tensor_element()</code> represents
+an abstract function that accesses a tensor element in its storage at
+given coordinate. The method how the coordinates translate to tensor
+storage addresses is unspecified.</p>
+</div>
+<div class="paragraph">
+<p><strong>clEnqueueImportFromTensorEXP</strong> and <strong>clEnqueueExportToTensorEXP</strong>
+returns CL_SUCCESS if the function is executed
+successfully. Otherwise, it returns one of the following errors:</p>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p>CL_INVALID_COMMAND_QUEUE if <em>command_queue</em> is not a valid host
+command-queue.</p>
+</li>
+<li>
+<p>CL_INVALID_CONTEXT if the context associated with <em>command_queue</em>
+and buffer are not the same or if the context associated with
+<em>command_queue</em> and events in <em>event_wait_list</em> are not the same.</p>
+</li>
+<li>
+<p>CL_INVALID_MEM_OBJECT if <em>buffer</em> is not a valid buffer object.</p>
+</li>
+<li>
+<p>CL_INVALID_VALUE if <em>tensor_origin</em> or <em>mem_origin</em> is NULL.</p>
+</li>
+<li>
+<p>CL_INVALID_VALUE if the region being read or written specified by
+(<em>mem_origin</em>, <em>region</em>, <em>mem_pitch</em>) is out of bounds.</p>
+</li>
+<li>
+<p>CL_INVALID_VALUE if any <em>region</em> array element is 0.</p>
+</li>
+<li>
+<p>CL_INVALID_VALUE if <em>mem_pitch</em> is not NULL and <em>mem_pitch</em>[i] is
+not 0 and <em>mem_pitch</em>[i] is less than <em>region</em>[i].</p>
+</li>
+<li>
+<p>CL_INVALID_VALUE if <em>buffer</em> and <em>host_ptr</em> both are NULL or non-NULL.</p>
+</li>
+<li>
+<p>CL_INVALID_EVENT_WAIT_LIST if <em>event_wait_list</em> is NULL and
+<em>num_events_in_wait_list</em> &gt; 0, or <em>event_wait_list</em> is not NULL and
+<em>num_events_in_wait_list</em> is 0, or if event objects in
+<em>event_wait_list</em> are not valid events.</p>
+</li>
+<li>
+<p>CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST if the read and write
+operations are blocking and the execution status of any of the
+events in <em>event_wait_list</em> is a negative integer value.</p>
+</li>
+<li>
+<p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources
+required by the OpenCL implementation on the device.</p>
+</li>
+<li>
+<p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+required by the OpenCL implementation on the host.</p>
+</li>
+</ul>
+</div>
+<div class="paragraph">
+<p>To copy elements from one tensor to another use:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c">cl_int clEnqueueCopyTensorEXP(
+  cl_command_queue command_queue,
+  cl_tensor src_tensor,
+  cl_tensor dst_tensor,
+  <span class="directive">const</span> cl_tensor_shape* src_origin,
+  <span class="directive">const</span> cl_tensor_shape* dst_origin,
+  <span class="directive">const</span> cl_tensor_shape* region,
+  cl_uint num_events_in_wait_list,
+  <span class="directive">const</span> cl_event* event_wait_list,
+  cl_event* event);</code></pre>
+</div>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p><em>command_queue</em> is a valid host command-queue in which the read /
+write command will be queued. <em>command_queue</em> and <em>tensor</em> must be
+created with the same OpenCL context.</p>
+</li>
+<li>
+<p><em>src_tensor</em> and <em>dst_tensor</em> refer to valid buffer objects created
+with <code>CL_MEM_TENSOR_EXP</code>. Tensor elements are copied from <em>src_tensor</em>
+to <em>dst_tensor</em>. Rank of the <em>src_tensor</em> and <em>dst_tensor</em> must match.</p>
+</li>
+<li>
+<p><em>src_origin</em> and <em>dst_origin</em> define origins of the copy region. The
+length of the arrays must be at least tensors' rank.</p>
+</li>
+<li>
+<p><em>region</em> defines extends of the slice being being copied. The length
+of the arrays must be at least tensors' rank.</p>
+</li>
+<li>
+<p><em>event_wait_list</em> and <em>num_events_in_wait_list</em> specify events that
+need to complete before this particular command can be executed. If
+<em>event_wait_list</em> is NULL, then this particular command does not
+wait on any event to complete. If <em>event_wait_list</em> is NULL,
+<em>num_events_in_wait_list</em> must be 0. If <em>event_wait_list</em> is not
+NULL, the list of events pointed to by <em>event_wait_list</em> must be
+valid and <em>num_events_in_wait_list</em> must be greater than 0. The
+events specified in <em>event_wait_list</em> act as synchronization
+points. The context associated with events in <em>event_wait_list</em> and
+<em>command_queue</em> must be the same. The memory associated with
+<em>event_wait_list</em> can be reused or freed after the function returns.</p>
+</li>
+<li>
+<p><em>event</em> returns an event object that identifies this read / write
+command and can be used to query or queue a wait for this command to
+complete. If <em>event</em> is NULL or the enqueue is unsuccessful, no
+event will be created and therefore it will not be possible to query
+the status of this command or to wait for this command to
+complete. If <em>event_wait_list</em> and <em>event</em> are not NULL, <em>event</em>
+must not refer to an element of the <em>event_wait_list</em> array.</p>
+</li>
+</ul>
+</div>
+<div class="paragraph">
+<p>Elements are copied from the source tensor to the destination tensor
+so that after the completion following condition holds expressed in
+pseudo C:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c"><span class="comment">// 'so' and 'do' are aliases for src_origin and dst_origin, respectively.</span>
+tensor_element(dst_tensor, <span class="keyword">do</span>[<span class="integer">0</span>] + i[<span class="integer">0</span>], <span class="keyword">do</span>[<span class="integer">1</span>] + i[<span class="integer">1</span>], ..., <span class="keyword">do</span>[N-<span class="integer">1</span>] + i[N-<span class="integer">1</span>])
+==
+tensor_element(src_tensor, so[<span class="integer">0</span>] + i[<span class="integer">0</span>], so[<span class="integer">1</span>] + i[<span class="integer">1</span>], ..., so[N-<span class="integer">1</span>] + i[N-<span class="integer">1</span>]);</code></pre>
+</div>
+</div>
+<div class="paragraph">
+<p>Where the <code>N</code> is tensor rank, the <code>i[X]</code> is a tensor coordinate with
+inclusive range of <code>0..&lt;region[X]-1&gt;</code>.</p>
+</div>
+<div class="paragraph">
+<p><strong>clEnqueueCopyTensorEXP</strong> returns CL_SUCCESS if the function is
+executed successfully. Otherwise, it returns one of the following
+errors:</p>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p>CL_INVALID_COMMAND_QUEUE if <em>command_queue</em> is not a valid host
+command-queue.</p>
+</li>
+<li>
+<p>CL_INVALID_CONTEXT if the context associated with <em>command_queue</em>
+and buffer are not the same or if the context associated with
+<em>command_queue</em> and events in <em>event_wait_list</em> are not the same.</p>
+</li>
+<li>
+<p>CL_INVALID_MEM_OBJECT if <em>src_tensor</em> or <em>dst_tensor</em> are not a
+valid buffer object created with <code>CL_MEM_TENSOR_EXP</code>.</p>
+</li>
+<li>
+<p>CL_INVALID_VALUE if <em>tensor_origin</em> or <em>mem_origin</em> is NULL.</p>
+</li>
+<li>
+<p>CL_INVALID_VALUE if <em>src_origin</em>, <em>dst_origin</em> or <em>region</em> is NULL.</p>
+</li>
+<li>
+<p>CL_INVALID_VALUE if <code>region[i]</code> is zero for i in <code>[0, tensor_rank)</code>.</p>
+</li>
+<li>
+<p>CL_INVALID_VALUE if <code>origin[i] + region[i] &gt; tensor_shape[i]</code> at any
+dimension <code>i</code> in range <code>[0, tensor_rank)</code>.</p>
+</li>
+<li>
+<p>CL_INVALID_EVENT_WAIT_LIST if <em>event_wait_list</em> is NULL and
+<em>num_events_in_wait_list</em> &gt; 0, or <em>event_wait_list</em> is not NULL and
+<em>num_events_in_wait_list</em> is 0, or if event objects in
+<em>event_wait_list</em> are not valid events.</p>
+</li>
+<li>
+<p>CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST if the read and write
+operations are blocking and the execution status of any of the
+events in <em>event_wait_list</em> is a negative integer value.</p>
+</li>
+<li>
+<p>CL_MEM_OBJECT_ALLOCATION_FAILURE if there is a failure to allocate
+memory for data store associated with memory object the <em>tensor</em> is
+bound to.</p>
+</li>
+<li>
+<p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources
+required by the OpenCL implementation on the device.</p>
+</li>
+<li>
+<p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+required by the OpenCL implementation on the host.</p>
+</li>
+</ul>
+</div>
+</div>
+</div>
+</dd>
+<dt class="hdlist1">(Add the following to Section 5.17.5, <strong>Recording Commands to a Command-Buffer</strong>) </dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="paragraph">
+<p>If <strong>cl_khr_command_buffer</strong> is supported, then the following command
+buffer counterparts of the <strong>clEnqueueImportFromTensorEXP</strong> and
+<strong>clEnqueueExportToTensorEXP</strong> commands are available.</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c">cl_int clCommandImportFromTensorEXP(
+  cl_command_buffer_khr command_buffer,
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  <span class="directive">const</span> size_t* tensor_origin,
+  <span class="directive">const</span> size_t* mem_origin,
+  <span class="directive">const</span> size_t* region,
+  <span class="directive">const</span> size_t* mem_pitch,
+  cl_mem buffer,
+  <span class="directive">void</span>* host_ptr,
+  cl_uint num_sync_points_in_wait_list,
+  <span class="directive">const</span> cl_sync_point_khr* sync_point_wait_list,
+  cl_sync_point_khr* sync_point,
+  cl_mutable_command_khr* mutable_handle);
+
+cl_int clCommandExportToTensorEXP(
+  cl_command_buffer_khr command_buffer,
+  cl_command_queue command_queue,
+  cl_tensor tensor,
+  <span class="directive">const</span> size_t* tensor_origin,
+  <span class="directive">const</span> size_t* mem_origin,
+  <span class="directive">const</span> size_t* region,
+  <span class="directive">const</span> size_t* mem_pitch,
+  cl_mem buffer,
+  <span class="directive">const</span> <span class="directive">void</span>* host_ptr,
+  cl_uint num_sync_points_in_wait_list,
+  <span class="directive">const</span> cl_sync_point_khr* sync_point_wait_list,
+  cl_sync_point_khr* sync_point,
+  cl_mutable_command_khr* mutable_handle);</code></pre>
+</div>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p><em>command_buffer</em> refers to valid command-buffer object.</p>
+</li>
+<li>
+<p>For <em>command_queue</em>, <em>tensor</em>, <em>tensor_origin</em>, <em>mem_origin</em>,
+<em>region</em>, <em>mem_pitch</em>, <em>buffer</em> and <em>host_ptr</em> parameters refer to
+<strong>clEnqueueImportFromTensor</strong>.</p>
+</li>
+<li>
+<p>For <em>num_sync_points_in_wait_list</em>, <em>sync_point_wait_list</em>,
+<em>sync_point</em>, <em>mutable_handle</em> parameters refer to
+<strong>clCommandCopyBufferEXP</strong>.</p>
+</li>
+</ul>
+</div>
+<div class="paragraph">
+<p><strong>clCommandImportFromTensorEXP</strong> and <strong>clCommandImportFromTensorEXP</strong>
+returns CL_SUCCESS if the function is executed
+successfully. Otherwise, it returns one of the following errors:</p>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p>CL_INVALID_COMMAND_QUEUE if <em>command_queue</em> is not NULL.</p>
+</li>
+<li>
+<p>CL_INVALID_COMMAND_BUFFER_KHR if <em>command_buffer</em> is not a valid
+command-buffer.</p>
+</li>
+<li>
+<p>CL_INVALID_CONTEXT if the context associated with <em>command_queue</em>
+and <em>command_buffer</em> is not the same.</p>
+</li>
+<li>
+<p>CL_INVALID_OPERATION if <em>command_buffer</em> has been finalized.</p>
+</li>
+<li>
+<p>CL_INVALID_VALUE if <em>mutable_handle</em> is not NULL.</p>
+</li>
+<li>
+<p>CL_INVALID_SYNC_POINT_WAIT_LIST_KHR if <em>sync_point_wait_list</em> is
+NULL and <em>num_sync_points_in_wait_list</em> is &gt; 0, or
+<em>sync_point_wait_list</em> is not NULL and <em>num_sync_points_in_wait_list</em> is
+0, or if synchronization-point objects in <em>sync_point_wait_list</em> are
+not valid synchronization-points.</p>
+</li>
+<li>
+<p>CL_OUT_OF_RESOURCES if there is a failure to allocate resources
+required by the OpenCL implementation on the device.</p>
+</li>
+<li>
+<p>CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources
+required by the OpenCL implementation on the host.</p>
+</li>
+</ul>
+</div>
+</div>
+</div>
+</dd>
+<dt class="hdlist1">(Add the following to new Section 5.X.Y, <strong>Tensor Descriptions</strong>) </dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="paragraph">
+<p>The following structure describes properties of a tensor to be created
+with <strong>clCreateBufferWithProperties()</strong> using <code>CL_MEM_TENSOR_EXP</code> property:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c"><span class="keyword">typedef</span> <span class="keyword">struct</span> cl_tensor_desc_exp {
+    cl_uint               rank;
+    cl_tensor_datatype    dtype;
+    cl_tensor_properties_exp  properties[CL_TENSOR_DESC_MAX_PROPERTIES_EXP]
+    cl_tensor_shape       shape[CL_TENSOR_DESC_MAX_RANK_EXP];
+    <span class="directive">const</span> <span class="directive">void</span>*           layout;
+    cl_tensor_layout_type_exp layout_type;
+} cl_tensor_desc_exp;</code></pre>
+</div>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p><em>rank</em> defines the tensor&#8217;s rank - the number of dimensions.</p>
+</li>
+<li>
+<p><em>dtype</em> defines the data type of the elements in the
+tensor. Possible types are listed in <a href="#tensor-dtype-table">tensor
+element type</a> table.</p>
+</li>
+<li>
+<p><em>properties</em> is an optional list of properties for the tensor object
+and their corresponding values. The list is terminated with the
+special property 0. If no properties are required, properties may be
+NULL. This extension does not define any optional properties for
+tensors, but future extensions may define properties.</p>
+</li>
+<li>
+<p><em>shape</em> defines the extends of the tensor&#8217;s dimensions in number of
+elements.</p>
+</li>
+<li>
+<p><em>layout</em> points to an optional structure describing how tensor
+elements are laid out in the buffer memory. The structure must be a
+type corresponding to the <em>layout_type</em> listed in
+<a href="#layout-types-table">tensor layout type</a> table. The pointer is
+ignored if the <em>tensor_type</em> is <code>CL_TENSOR_LAYOUT_OPAQUE_EXP</code>.</p>
+</li>
+<li>
+<p><em>layout_type</em> indicates the layout structure type the <em>layout</em>
+point to.</p>
+</li>
+</ul>
+</div>
+<table id="tensor-dtypes-table" class="tableblock frame-all grid-all stripes-even stretch">
+<caption class="title">Table 1. Tensor element types. The API type indicates the corresponding type for copying elements from an host allocation / buffer object to tensor or vice versa.</caption>
+<colgroup>
+<col style="width: 33.3333%;">
+<col style="width: 33.3333%;">
+<col style="width: 33.3334%;">
+</colgroup>
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top"><strong>Tensor element data type</strong></th>
+<th class="tableblock halign-left valign-top"><strong>Description</strong></th>
+<th class="tableblock halign-left valign-top"><strong>API type</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_BOOL</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">Data type representing true or false.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_uchar. <sup class="footnote">[<a id="_footnoteref_1" class="footnote" href="#_footnotedef_1" title="View footnote.">1</a>]</sup></p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_INT4_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">4-bit signed integer.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_char.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_INT8_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">8-bit signed integer.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_char.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_INT16_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">16-bit signed integer.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_short.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_INT32_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">32-bit signed integer.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_int.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_INT64_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">64-bit signed integer.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_long.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_UINT8_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">8-bit unsigned integer.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_uchar.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_UINT16_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">16-bit unsigned integer.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_ushort.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_UINT32_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">32-bit unsigned integer.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_uint.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_UINT64_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">64-bit unsigned integer.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_ulong.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_FP8E4M3_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">8-bit floating point with a sign bit,
+  4 exponent bits, 3 mantissa bits and a exponent bias of 7.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_char.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_FP8E5M2_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">8-bit floating point with a sign bit,
+  5 exponent bits, 2 mantissa bits and a exponent bias of 15.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_char.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_FP16_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">Half precision floating-point.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_half.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_BFLOAT16_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">16-bit brain floating-point.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_ushort</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_FP32_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">Single precision floating-point.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_float.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_FP64_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">Double precision floating-point.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_double.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_COMPLEX64_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">64-bit complex floating-point with
+  32-bit real and imaginary part.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_float2</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_DTYPE_COMPLEX128_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">128-bit complex floating-point with
+  64-bit real and imaginary part.</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">cl_double2</p></td>
+</tr>
+</tbody>
+</table>
+<table id="layout-types-table" class="tableblock frame-all grid-all stripes-even stretch">
+<caption class="title">Table 2. Optional tensor memory layout types.</caption>
+<colgroup>
+<col style="width: 16.6666%;">
+<col style="width: 16.6666%;">
+<col style="width: 66.6668%;">
+</colgroup>
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top"><strong>layout type</strong></th>
+<th class="tableblock halign-left valign-top"><strong>tensor layout type</strong></th>
+<th class="tableblock halign-left valign-top"><strong>Description</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_LAYOUT_OPAQUE_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">N/A</p></td>
+<td class="tableblock halign-left valign-top"><div class="content"><div class="paragraph">
+<p>The tensor doesn&#8217;t have
+  application defined memory layout. Driver controls the tensors
+  layout. To read or write elements of the tensor, the application
+  must:</p>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p>use <strong>clEnqueueExportToTensor</strong> and <strong>clEnqueueImportFromTensor</strong> (or their
+command buffer variants) or</p>
+</li>
+<li>
+<p>use <strong>clEnqueueCopyTensor</strong> to copy elements to / from another tensor
+object with an application-defined memory layout.</p>
+</li>
+</ul>
+</div></div></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_LAYOUT_BLAS_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl-tensor-layout-blas">cl_tensor_layout_blas_exp</a></p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">A type that describes a packed memory layout similar ones used in BLAS APIs.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_LAYOUT_BLAS_PITCHED_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl-tensor-layout-blas">cl_tensor_layout_blas_pitched_exp</a></p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">A type that describe memory layout similar ones used in BLAS APIs.</p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_LAYOUT_ML_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock"><a href="#cl-tensor-layout-blas">cl_tensor_layout_ml_exp</a></p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">The tensor layout is specified with an enumerator. Each enumerator
+corresponds to a predefined configuration of
+<strong>cl_tensor_layout_blas_exp</strong> structure.</p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</div>
+</dd>
+</dl>
+</div>
+<div id="cl-tensor-layout-blas" class="dlist">
+<dl>
+<dt class="hdlist1">(Add the following to new Section 5.X.Y.1, <strong>BLAS Tensor Layout</strong>) </dt>
+<dd>
+<div class="openblock">
+<div class="content">
+<div class="paragraph">
+<p>The following structures describe packed / pitched BLAS-like memory
+layout for the tensor:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c"><span class="keyword">typedef</span> <span class="keyword">struct</span> cl_tensor_layout_blas_exp {
+    cl_tensor_dim_exp    leading_dims[CL_TENSOR_DESC_MAX_RANK_EXP];
+} cl_tensor_layout_blas_exp;
+
+<span class="keyword">typedef</span> <span class="keyword">struct</span> cl_tensor_layout_blas_pitched_exp {
+    cl_tensor_dim_exp    leading_dims[CL_TENSOR_DESC_MAX_RANK_EXP];
+    cl_tensor_pitch      leading_pitches[CL_TENSOR_DESC_MAX_RANK_EXP];
+} cl_tensor_layout_blas_pitched_exp;
+
+<span class="keyword">typedef</span> <span class="keyword">struct</span> cl_tensor_layout_ml_exp {
+    cl_tensor_layout_ml_type_exp ml_type;
+} cl_tensor_layout_ml_exp;</code></pre>
+</div>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p><em>leading_dims</em> describes which elements along the tensor dimension
+are laid out in the memory. <code>leading_dims[0]</code> points to the dimension
+whose elements are laid out first, followed by elements along
+the dimension by <code>leading_dims[1]</code> and so on. The first N elements must
+be non-zero where N is a tensor&#8217;s rank and the values must be unique
+and within range <code>[0, tensor_rank)</code>.</p>
+</li>
+<li>
+<p><em>leading_pitches</em> describes the distance between an element to the
+next one for the leading dimensions in <em>leading_dims</em>. The distance
+is measured in number of elements. The first N elements must be
+non-zero where the N is tensor&#8217;s rank minus one. The values of the
+array must be non-zero for the first tensor rank minus one elements
+and following conditions must hold:</p>
+<div class="ulist">
+<ul>
+<li>
+<p><code>leading_pitches[0] &gt;= tensor_shape[leading_dims[0]]</code> if the tensor
+rank is greater than one and</p>
+</li>
+<li>
+<p><code>leading_pitches[i + 1] &gt;= tensor_shape[leading_dims[i]] *
+leading_pitches[i]</code> for <code>i</code> in <code>[0, tensor_rank - 1)</code> if the tensor
+rank is greater than two.</p>
+</li>
+</ul>
+</div>
+</li>
+</ul>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p><em>ml_type</em> defines the memory layout via enumerators which corresponds to
+predefined configurations of <code>cl_tensor_layout_blas_exp</code> structure
+as listed in <a href="#tensor-layout-ml-types">ML tensor layout type</a> table.</p>
+</li>
+</ul>
+</div>
+<div class="paragraph">
+<p>The memory layout descriptions map tensor coordinates to buffer&#8217;s
+memory byte locations respect to buffer&#8217;s base address as in the
+followed in pseudo C code example:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="c">size_t index = <span class="integer">0</span>;
+<span class="keyword">for</span> (<span class="predefined-type">unsigned</span> i = <span class="integer">0</span>; i &lt; tensor_rank - <span class="integer">1</span>; i++)
+  index += tensor_coordinates[leading_dims[i]] * pitches[i];
+buffer_offset = index * tensor_element_size;</code></pre>
+</div>
+</div>
+<div class="paragraph">
+<p>Where <code>pitches[i]</code> equals to:</p>
+</div>
+<div class="ulist">
+<ul>
+<li>
+<p><em>leading_pitches</em>[i] for <code>cl_tensor_layout_blas_pitched_exp</code>.</p>
+</li>
+<li>
+<p><code>tensor_shape[leading_dims[i]] *
+tensor_shape[leading_dims[i-1]] * &#8230;&#8203; *
+tensor_shape[leading_dims[0]]</code> for <code>cl_tensor_layout_blas_exp</code>.</p>
+</li>
+</ul>
+</div>
+<table id="tensor-layout-ml-type" class="tableblock frame-all grid-all stripes-even stretch">
+<caption class="title">Table 3. ML tensor layout types and their corresponding cl_tensor_layout_blas_exp configuration.</caption>
+<colgroup>
+<col style="width: 33.3333%;">
+<col style="width: 66.6667%;">
+</colgroup>
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top"><strong>ML layout type</strong></th>
+<th class="tableblock halign-left valign-top"><strong>Equivalent <em>leading_dims</em> configuration</strong></th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_LAYOUT_ML_C_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock"><code>{}</code></p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_LAYOUT_ML_NC_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock"><code>{1}</code></p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_LAYOUT_ML_CN_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock"><code>{0}</code></p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_LAYOUT_ML_HW_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock"><code>{1}</code></p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_LAYOUT_ML_CHW_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock"><code>{2, 1}</code></p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_LAYOUT_ML_NCHW_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock"><code>{3, 2, 1}</code></p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">CL_TENSOR_LAYOUT_ML_NHWC_EXP</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock"><code>{1, 3, 2}</code></p></td>
+</tr>
+</tbody>
+</table>
+</div>
+</div>
+</dd>
+</dl>
+</div>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_sample_codes">Sample Codes</h2>
+<div class="sectionbody">
+<div class="paragraph">
+<p>An example usage of tensors:</p>
+</div>
+<div class="listingblock">
+<div class="content">
+<pre class="CodeRay highlight"><code data-lang="cpp"><span class="directive">constexpr</span> size_t b = <span class="integer">64</span>, m = <span class="integer">100</span>, n = <span class="integer">200</span>, k = <span class="integer">50</span>;
+
+std::vector&lt;<span class="predefined-type">float</span>&gt; in0_data = ...;
+std::vector&lt;<span class="predefined-type">float</span>&gt; in1_data = ...;
+std::vector&lt;<span class="predefined-type">float</span>&gt; out_data(b * m * n);
+
+<span class="comment">// Create a tensor with an opaque layout.</span>
+cl_tensor_desc_exp in0_desc;
+in0_desc.rank = <span class="integer">3</span>;
+in0_desc.properties[<span class="integer">0</span>] = <span class="integer">0</span>;
+in0_desc.shape[<span class="integer">0</span>] = b;
+in0_desc.shape[<span class="integer">1</span>] = m;
+in0_desc.shape[<span class="integer">2</span>] = k;
+in0_desc.layout = <span class="predefined-constant">nullptr</span>;
+in0_desc.layout_type = CL_TENSOR_LAYOUT_OPAQUE_EXP;
+
+cl_int err;
+cl_mem in0_tensor = clCreateBufferWithProperties(
+  ctx, {CL_MEM_TENSOR_EXP, in0_desc, <span class="integer">0</span>},
+  CL_MEM_READ_ONLY, <span class="integer">0</span>, <span class="predefined-constant">nullptr</span>, &amp;err);
+
+<span class="comment">// Create tensor from a host allocation using an application-defined</span>
+<span class="comment">// layout description for mapping elements to the tensor.</span>
+cl_tensor_desc_exp in1_desc;
+in1_desc.rank = <span class="integer">3</span>;
+in1_desc.properties[<span class="integer">0</span>] = <span class="integer">0</span>;
+in1_desc.shape[<span class="integer">0</span>] = b;
+in1_desc.shape[<span class="integer">1</span>] = k;
+in1_desc.shape[<span class="integer">2</span>] = n;
+
+cl_tensor_layout_blas_exp col_major;
+col_major.leading_dims[<span class="integer">0</span>] = <span class="integer">1</span>,
+col_major.leading_dims[<span class="integer">1</span>] = <span class="integer">2</span>,
+in1_desc.layout = &amp;col_major;
+in1_desc.layout_type = CL_TENSOR_LAYOUT_BLAS_EXP;
+
+cl_mem in1_tensor = clCreateBufferWithProperties(
+  ctx, {CL_MEM_TENSOR_EXP, in1_desc, <span class="integer">0</span>},
+  CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, <span class="integer">0</span>, in1_data.data(), &amp;err);
+
+<span class="comment">// Create another tensor with an application-defined layout.</span>
+cl_tensor_desc_exp out_desc;
+out_desc.rank = <span class="integer">3</span>;
+out_desc.properties[<span class="integer">0</span>] = <span class="integer">0</span>;
+out_desc.shape[<span class="integer">0</span>] = b;
+out_desc.shape[<span class="integer">1</span>] = m;
+out_desc.shape[<span class="integer">2</span>] = n;
+
+cl_tensor_layout_blas_exp row_major;
+row_major.leading_dims[<span class="integer">0</span>] = <span class="integer">2</span>,
+row_major.leading_dims[<span class="integer">1</span>] = <span class="integer">1</span>,
+out_desc.layout = &amp;row_major;
+out_desc.layout_type = CL_TENSOR_LAYOUT_BLAS_EXP;
+
+cl_mem out_tensor = clCreateBufferWithProperties(
+  ctx, {CL_MEM_TENSOR_EXP, out_desc, <span class="integer">0</span>},
+  CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, <span class="integer">0</span>, out_data.data(), &amp;err);
+
+<span class="comment">// Create a kernel that operates on the tensors and is possibly</span>
+<span class="comment">// optimized for them using via yet realized API extension.</span>
+cl_kernel batched_matmul_kernel = create_batched_matmul_kernel(
+  ctx, device_span, in1_desc, in2_desc, out_desc);
+
+clSetKernelArg(batched_matmul_kernel, <span class="integer">0</span>, <span class="keyword">sizeof</span>(cl_mem), &amp;in0_tensor);
+clSetKernelArg(batched_matmul_kernel, <span class="integer">1</span>, <span class="keyword">sizeof</span>(cl_mem), &amp;in1_tensor);
+clSetKernelArg(batched_matmul_kernel, <span class="integer">2</span>, <span class="keyword">sizeof</span>(cl_mem), &amp;out_tensor);
+
+<span class="comment">// Required command for transferring data to layout-opaque tensors and</span>
+<span class="comment">// from it elsewhere.</span>
+clEnqueueExportToTensor(
+  cmd_q, in0_tensor, <span class="predefined-constant">false</span>, {<span class="integer">0</span>, <span class="integer">0</span>, <span class="integer">0</span>}, {<span class="integer">0</span>, <span class="integer">0</span>, <span class="integer">0</span>}, {b, m, k},
+  <span class="predefined-constant">nullptr</span>, <span class="predefined-constant">nullptr</span>, in0_data.data(), <span class="integer">0</span>, <span class="predefined-constant">nullptr</span>, <span class="predefined-constant">nullptr</span>);
+
+clEnqueueNDRangeKernel(
+  cmd_q, batched_matmul_kernel, <span class="integer">3</span>, matmul_grid, <span class="predefined-constant">nullptr</span>, <span class="predefined-constant">nullptr</span>, <span class="integer">0</span>, <span class="predefined-constant">nullptr</span>, <span class="predefined-constant">nullptr</span>);
+
+clEnqueueMapBuffer(
+  cmd_q, out_tensor, CL_TRUE, CL_MAP_READ, <span class="integer">0</span>, b * m * n, <span class="integer">0</span>, <span class="predefined-constant">nullptr</span>, <span class="predefined-constant">nullptr</span>);</code></pre>
+</div>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_issues_and_open_questions">Issues and Open Questions</h2>
+<div class="sectionbody">
+<div class="olist arabic">
+<ol class="arabic">
+<li>
+<p>Should we support tensors with undefined shape and tensors
+with unknown / symbolic dimension sizes like in ONNX?</p>
+<div class="openblock">
+<div class="content">
+<div class="paragraph">
+<p><strong>UNRESOLVED</strong></p>
+</div>
+</div>
+</div>
+</li>
+<li>
+<p>Should we define OpenCL C language features for accessing tensors?</p>
+<div class="openblock">
+<div class="content">
+<div class="paragraph">
+<p><strong>RESOLVED</strong>: OpenCL C support for tensors can be introduced later in a
+           separate extension. Built-in kernels may benefit from this
+           extension as it is.</p>
+</div>
+</div>
+</div>
+</li>
+<li>
+<p>What is the use case of <code>cl_tensor_layout_blas_pitch_exp</code>?</p>
+<div class="openblock">
+<div class="content">
+<div class="paragraph">
+<p><strong>UNRESOLVED</strong></p>
+</div>
+</div>
+</div>
+</li>
+<li>
+<p>Should image types be extended instead of adding a separate tensor type?</p>
+<div class="openblock">
+<div class="content">
+<div class="paragraph">
+<p><strong>UNRESOLVED</strong></p>
+</div>
+</div>
+</div>
+</li>
+</ol>
+</div>
+</div>
+</div>
+<div class="sect1">
+<h2 id="_version_history">Version History</h2>
+<div class="sectionbody">
+<table class="tableblock frame-all grid-rows stretch">
+<colgroup>
+<col style="width: 7.1428%;">
+<col style="width: 14.2857%;">
+<col style="width: 21.4285%;">
+<col style="width: 57.143%;">
+</colgroup>
+<thead>
+<tr>
+<th class="tableblock halign-left valign-top">Version</th>
+<th class="tableblock halign-left valign-top">Date</th>
+<th class="tableblock halign-left valign-top">Author</th>
+<th class="tableblock halign-left valign-top">Changes</th>
+</tr>
+</thead>
+<tbody>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">0.1.0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">2023-11-23</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">Henry Linjamäki</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock"><strong>Initial revision</strong></p></td>
+</tr>
+<tr>
+<td class="tableblock halign-left valign-top"><p class="tableblock">0.2.0</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">2024-8-14</p></td>
+<td class="tableblock halign-left valign-top"><p class="tableblock">Henry Linjamäki<br>
+Pekka Jääskeläinen<br>
+Michal Babej<br>
+Freddie Witherden</p></td>
+<td class="tableblock halign-left valign-top"><div class="content"><div class="ulist">
+<ul>
+<li>
+<p>Rework document structure match to the cl_khr_extension_template.</p>
+</li>
+<li>
+<p>Added clEnqueueCopyTensor.</p>
+</li>
+<li>
+<p>Added an API for setting the memory layout for tensors.</p>
+</li>
+</ul>
+</div></div></td>
+</tr>
+</tbody>
+</table>
+</div>
+</div>
+</div>
+<div id="footnotes">
+<hr>
+<div class="footnote" id="_footnotedef_1">
+<a href="#_footnoteref_1">1</a>. zero and non-zero bytes are interpreted as false and true values, respectively.
+</div>
+</div>
+<div id="footer">
+<div id="footer-text">
+Last updated 2024-08-15 14:07:25 +0300
+</div>
+</div>
+</body>
+</html>
\ No newline at end of file