Skip to content

Commit

Permalink
Bump IREE and MLIR-AIR (#523)
Browse files Browse the repository at this point in the history
There are major changes in IREE runtime:

iree-org/iree@dcc8a0d

iree-org/iree@f8f2996

iree-org/iree@9ffe473

I'm able to build runtime locally with this commit changes.
@nirvedhmeshram Could you review this PR and double check the above IREE
commits to see if anything is missing?
  • Loading branch information
yzhang93 authored Jul 10, 2024
1 parent 0739400 commit 9f809c6
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 56 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ void AMDAIEPropagateDataLayoutPass::runOnOperation() {
RewritePatternSet patterns(context);

linalg::populateDataLayoutPropagationPatterns(
patterns, [](Operation *op) { return true; });
patterns, [](OpOperand *opOperand) { return true; });
patterns.add<RemoveOutsDependency>(context);

if (failed(applyPatternsAndFoldGreedily(getOperation(), std::move(patterns))))
Expand Down
79 changes: 35 additions & 44 deletions runtime/src/iree-amd-aie/driver/xrt/direct_command_buffer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ iree_hal_xrt_direct_command_buffer_cast(iree_hal_command_buffer_t* base_value) {
}

iree_status_t iree_hal_xrt_direct_command_buffer_create(
iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode,
iree_hal_allocator_t* device_allocator, iree_hal_command_buffer_mode_t mode,
iree_hal_command_category_t command_categories,
iree_host_size_t binding_capacity, iree_arena_block_pool_t* block_pool,
iree_allocator_t host_allocator,
iree_hal_command_buffer_t** out_command_buffer) {
IREE_ASSERT_ARGUMENT(device);
IREE_ASSERT_ARGUMENT(device_allocator);
IREE_ASSERT_ARGUMENT(out_command_buffer);
*out_command_buffer = NULL;
if (binding_capacity > 0) {
Expand All @@ -61,13 +61,17 @@ iree_status_t iree_hal_xrt_direct_command_buffer_create(

iree_hal_xrt_direct_command_buffer_t* command_buffer = NULL;
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_allocator_malloc(host_allocator, sizeof(*command_buffer),
(void**)&command_buffer));
z0,
iree_allocator_malloc(host_allocator,
sizeof(*command_buffer) +
iree_hal_command_buffer_validation_state_size(
mode, binding_capacity),
(void**)&command_buffer));
IREE_TRACE_ZONE_END(z0);
iree_hal_command_buffer_initialize(
device, mode, command_categories, IREE_HAL_QUEUE_AFFINITY_ANY,
binding_capacity, &iree_hal_xrt_direct_command_buffer_vtable,
&command_buffer->base);
device_allocator, mode, command_categories, IREE_HAL_QUEUE_AFFINITY_ANY,
binding_capacity, (uint8_t*)command_buffer + sizeof(*command_buffer),
&iree_hal_xrt_direct_command_buffer_vtable, &command_buffer->base);
command_buffer->host_allocator = host_allocator;
iree_arena_initialize(block_pool, &command_buffer->arena);
iree_status_t status =
Expand Down Expand Up @@ -182,70 +186,69 @@ static iree_status_t iree_hal_xrt_direct_command_buffer_wait_events(
}

static iree_status_t iree_hal_xrt_direct_command_buffer_discard_buffer(
iree_hal_command_buffer_t* base_command_buffer, iree_hal_buffer_t* buffer) {
iree_hal_command_buffer_t* base_command_buffer,
iree_hal_buffer_ref_t buffer) {
// It is okay to do nothing here.
return iree_ok_status();
}

static iree_status_t iree_hal_xrt_direct_command_buffer_fill_buffer(
iree_hal_command_buffer_t* base_command_buffer,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length, const void* pattern,
iree_hal_buffer_ref_t target_ref, const void* pattern,
iree_host_size_t pattern_length) {
return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
"fill buffer not yet supported");
}

static iree_status_t iree_hal_xrt_direct_command_buffer_update_buffer(
iree_hal_command_buffer_t* base_command_buffer, const void* source_buffer,
iree_host_size_t source_offset, iree_hal_buffer_t* target_buffer,
iree_device_size_t target_offset, iree_device_size_t length) {
iree_host_size_t source_offset, iree_hal_buffer_ref_t target_ref) {
IREE_TRACE_ZONE_BEGIN(z0);
const uint8_t* src = (const uint8_t*)source_buffer + source_offset;

// No need to Allocate scratch space (in an arena) as the memcpy
// used below is expected to be synchronized.
xrt::bo target_device_buffer = iree_hal_xrt_buffer_handle(
iree_hal_buffer_allocated_buffer(target_buffer));
iree_hal_buffer_allocated_buffer(target_ref.buffer));
void* target_device_buffer_ptr = target_device_buffer.map();
uint8_t* dst = (uint8_t*)target_device_buffer_ptr +
iree_hal_buffer_byte_offset(target_buffer) + target_offset;
memcpy(dst, src, length);
iree_hal_buffer_byte_offset(target_ref.buffer) +
target_ref.offset;
memcpy(dst, src, target_ref.length);

IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}

static iree_status_t iree_hal_xrt_direct_command_buffer_copy_buffer(
iree_hal_command_buffer_t* base_command_buffer,
iree_hal_buffer_t* source_buffer, iree_device_size_t source_offset,
iree_hal_buffer_t* target_buffer, iree_device_size_t target_offset,
iree_device_size_t length) {
iree_hal_buffer_ref_t source_ref, iree_hal_buffer_ref_t target_ref) {
IREE_TRACE_ZONE_BEGIN(z0);

xrt::bo* target_device_buffer = iree_hal_xrt_buffer_handle(
iree_hal_buffer_allocated_buffer(target_buffer));
iree_hal_buffer_allocated_buffer(target_ref.buffer));
void* target_device_buffer_ptr = target_device_buffer->map();
target_offset += iree_hal_buffer_byte_offset(target_buffer);
iree_device_size_t target_offset =
iree_hal_buffer_byte_offset(target_ref.buffer) + target_ref.offset;

xrt::bo* source_device_buffer = iree_hal_xrt_buffer_handle(
iree_hal_buffer_allocated_buffer(source_buffer));
iree_hal_buffer_allocated_buffer(source_ref.buffer));
void* source_device_buffer_ptr = source_device_buffer->map();
source_offset += iree_hal_buffer_byte_offset(source_buffer);
iree_device_size_t source_offset =
iree_hal_buffer_byte_offset(source_ref.buffer) + source_ref.offset;

uint8_t* dst = (uint8_t*)target_device_buffer_ptr + target_offset;
uint8_t* src = (uint8_t*)source_device_buffer_ptr + source_offset;
memcpy(dst, src, length);
memcpy(dst, src, target_ref.length);

IREE_TRACE_ZONE_END(z0);
return iree_ok_status();
}

static iree_status_t iree_hal_xrt_direct_command_buffer_collective(
iree_hal_command_buffer_t* base_command_buffer, iree_hal_channel_t* channel,
iree_hal_collective_op_t op, uint32_t param,
iree_hal_buffer_binding_t send_binding,
iree_hal_buffer_binding_t recv_binding, iree_device_size_t element_count) {
iree_hal_collective_op_t op, uint32_t param, iree_hal_buffer_ref_t send_ref,
iree_hal_buffer_ref_t recv_ref, iree_device_size_t element_count) {
return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
"collectives not yet supported");
}
Expand All @@ -261,8 +264,7 @@ static iree_status_t iree_hal_xrt_direct_command_buffer_push_constants(
static iree_status_t iree_hal_xrt_direct_command_buffer_push_descriptor_set(
iree_hal_command_buffer_t* base_command_buffer,
iree_hal_pipeline_layout_t* pipeline_layout, uint32_t set,
iree_host_size_t binding_count,
const iree_hal_descriptor_set_binding_t* bindings) {
iree_host_size_t binding_count, const iree_hal_buffer_ref_t* bindings) {
if (binding_count > IREE_HAL_XRT_MAX_DESCRIPTOR_SET_BINDING_COUNT) {
return iree_make_status(
IREE_STATUS_RESOURCE_EXHAUSTED,
Expand All @@ -281,7 +283,7 @@ static iree_status_t iree_hal_xrt_direct_command_buffer_push_descriptor_set(
iree_device_size_t* current_lengths =
command_buffer->descriptor_sets[set].lengths;
for (iree_host_size_t i = 0; i < binding_count; i++) {
const iree_hal_descriptor_set_binding_t* binding = &bindings[i];
const iree_hal_buffer_ref_t* binding = &bindings[i];
if (!binding->buffer) {
IREE_TRACE_ZONE_END(z0);
return iree_make_status(
Expand All @@ -292,11 +294,11 @@ static iree_status_t iree_hal_xrt_direct_command_buffer_push_descriptor_set(
z0, iree_hal_resource_set_insert(command_buffer->resource_set, 1,
&binding->buffer));
std::unique_ptr<xrt::bo> sub_buffer;
current_bindings[binding->binding] = iree_hal_xrt_buffer_handle(
current_bindings[binding->ordinal] = iree_hal_xrt_buffer_handle(
iree_hal_buffer_allocated_buffer(binding->buffer));
current_offsets[binding->binding] =
current_offsets[binding->ordinal] =
iree_hal_buffer_byte_offset(binding->buffer) + binding->offset;
current_lengths[binding->binding] = binding->length;
current_lengths[binding->ordinal] = binding->length;
}

IREE_TRACE_ZONE_END(z0);
Expand Down Expand Up @@ -372,20 +374,11 @@ static iree_status_t iree_hal_xrt_direct_command_buffer_dispatch(
static iree_status_t iree_hal_xrt_direct_command_buffer_dispatch_indirect(
iree_hal_command_buffer_t* base_command_buffer,
iree_hal_executable_t* executable, int32_t entry_point,
iree_hal_buffer_t* workgroups_buffer,
iree_device_size_t workgroups_offset) {
iree_hal_buffer_ref_t workgroups_ref) {
return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
"need xrt implementation of dispatch indirect");
}

static iree_status_t iree_hal_xrt_direct_command_buffer_execute_commands(
iree_hal_command_buffer_t* base_command_buffer,
iree_hal_command_buffer_t* base_commands,
iree_hal_buffer_binding_table_t binding_table) {
return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
"indirect command buffers not yet implemented");
}

namespace {
const iree_hal_command_buffer_vtable_t
iree_hal_xrt_direct_command_buffer_vtable = {
Expand Down Expand Up @@ -414,7 +407,5 @@ const iree_hal_command_buffer_vtable_t
/*.dispatch = */ iree_hal_xrt_direct_command_buffer_dispatch,
/*.dispatch_indirect = */
iree_hal_xrt_direct_command_buffer_dispatch_indirect,
/*.execute_commands = */
iree_hal_xrt_direct_command_buffer_execute_commands,
};
} // namespace
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ extern "C" {
// |out_command_buffer| must be released by the caller (see
// iree_hal_command_buffer_release).
iree_status_t iree_hal_xrt_direct_command_buffer_create(
iree_hal_device_t* device, iree_hal_command_buffer_mode_t mode,
iree_hal_allocator_t* device_allocator, iree_hal_command_buffer_mode_t mode,
iree_hal_command_category_t command_categories,
iree_host_size_t binding_capacity, iree_arena_block_pool_t* block_pool,
iree_allocator_t host_allocator,
Expand Down
15 changes: 7 additions & 8 deletions runtime/src/iree-amd-aie/driver/xrt/xrt_device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -187,16 +187,13 @@ static iree_status_t iree_hal_xrt_device_create_command_buffer(
iree_hal_queue_affinity_t queue_affinity, iree_host_size_t binding_capacity,
iree_hal_command_buffer_t** out_command_buffer) {
iree_hal_xrt_device_t* device = iree_hal_xrt_device_cast(base_device);
if (iree_any_bit_set(mode, IREE_HAL_COMMAND_BUFFER_MODE_NESTED))
return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
"nested command buffer not yet supported");
if (!iree_all_bits_set(mode, IREE_HAL_COMMAND_BUFFER_MODE_ONE_SHOT))
return iree_make_status(IREE_STATUS_UNIMPLEMENTED,
"unimplmented multi-shot command buffer");
return iree_hal_deferred_command_buffer_create(
base_device, mode, command_categories, binding_capacity,
&device->block_pool, iree_hal_device_host_allocator(base_device),
out_command_buffer);
iree_hal_device_allocator(base_device), mode, command_categories,
binding_capacity, &device->block_pool,
iree_hal_device_host_allocator(base_device), out_command_buffer);
}

static iree_status_t iree_hal_xrt_device_create_descriptor_set_layout(
Expand Down Expand Up @@ -340,7 +337,8 @@ static iree_status_t iree_hal_xrt_device_queue_execute(
const iree_hal_semaphore_list_t wait_semaphore_list,
const iree_hal_semaphore_list_t signal_semaphore_list,
iree_host_size_t command_buffer_count,
iree_hal_command_buffer_t* const* command_buffers) {
iree_hal_command_buffer_t* const* command_buffers,
iree_hal_buffer_binding_table_t const* binding_tables) {
IREE_TRACE_ZONE_BEGIN(z0);
iree_hal_xrt_device_t* device = iree_hal_xrt_device_cast(base_device);
for (iree_host_size_t i = 0; i < command_buffer_count; i++) {
Expand All @@ -351,7 +349,8 @@ static iree_status_t iree_hal_xrt_device_queue_execute(
IREE_HAL_COMMAND_BUFFER_MODE_UNVALIDATED;
IREE_RETURN_AND_END_ZONE_IF_ERROR(
z0, iree_hal_xrt_direct_command_buffer_create(
base_device, mode, IREE_HAL_COMMAND_CATEGORY_ANY,
iree_hal_device_allocator(base_device), mode,
IREE_HAL_COMMAND_CATEGORY_ANY,
/*binding_capacity=*/0, &device->block_pool,
device->host_allocator, &xrt_command_buffer));
IREE_RETURN_AND_END_ZONE_IF_ERROR(
Expand Down
2 changes: 1 addition & 1 deletion sync_deps.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
### Update with: shark-workspace pin

PINNED_VERSIONS = {
"iree": "4294a5b0ebaec6dcca483bf16f5918108b09ea0a",
"iree": "dcc8a0d1cce384baf10dd9dd5870f148e02db288",
}

ORIGINS = {
Expand Down
2 changes: 1 addition & 1 deletion third_party/mlir-air
Submodule mlir-air updated 54 files
+68 −116 mlir/lib/Conversion/AIRLoweringPass.cpp
+5 −9 mlir/lib/Conversion/AIRRtToNpuPass.cpp
+12 −9 mlir/lib/Transform/AIRDependencyScheduleOpt.cpp
+8 −8 mlir/lib/Transform/AIRLinalgCodegen.cpp
+115 −106 mlir/lib/Util/Util.cpp
+5 −10 mlir/test/Conversion/AIRLowering/air_launch.mlir
+10 −0 mlir/test/Transform/AIRDependencyScheduleOpt/specialize-channel-wrap-and-stride.mlir
+6 −2 programming_examples/README.md
+9 −0 programming_examples/channel_examples/README.md
+12 −0 programming_examples/channel_examples/channel_size/Makefile
+124 −0 programming_examples/channel_examples/channel_size/channel_size.py
+23 −9 programming_examples/channel_examples/channel_size/run.py
+8 −0 programming_examples/channel_examples/channel_size/run_makefile.lit
+2 −0 programming_examples/channel_examples/herd_to_herd/Makefile
+3 −15 programming_examples/channel_examples/herd_to_herd/herd_to_herd.py
+71 −16 programming_examples/channel_examples/herd_to_herd/run.py
+1 −1 programming_examples/channel_examples/herd_to_herd/run_makefile.lit
+9 −0 programming_examples/data_transfer_transpose/README.md
+15 −0 programming_examples/data_transfer_transpose/channel/Makefile
+49 −0 programming_examples/data_transfer_transpose/channel/run.py
+8 −0 programming_examples/data_transfer_transpose/channel/run_makefile.lit
+72 −0 programming_examples/data_transfer_transpose/channel/transpose.py
+75 −0 programming_examples/data_transfer_transpose/common.py
+15 −0 programming_examples/data_transfer_transpose/dma/Makefile
+49 −0 programming_examples/data_transfer_transpose/dma/run.py
+8 −0 programming_examples/data_transfer_transpose/dma/run_makefile.lit
+79 −0 programming_examples/data_transfer_transpose/dma/transpose.py
+6 −3 programming_examples/matrix_scalar_add/common.py
+2 −0 programming_examples/matrix_scalar_add/multi_core_channel/Makefile
+72 −117 programming_examples/matrix_scalar_add/multi_core_channel/multi_core_channel.py
+1 −1 programming_examples/matrix_scalar_add/multi_core_channel/run.py
+1 −2 programming_examples/matrix_scalar_add/multi_core_channel/run_makefile.lit
+2 −0 programming_examples/matrix_scalar_add/multi_core_dma/Makefile
+4 −18 programming_examples/matrix_scalar_add/multi_core_dma/multi_core_dma.py
+1 −1 programming_examples/matrix_scalar_add/multi_core_dma/run.py
+1 −1 programming_examples/matrix_scalar_add/multi_core_dma/run_makefile.lit
+2 −0 programming_examples/matrix_scalar_add/multi_launch_channel/Makefile
+1 −1 programming_examples/matrix_scalar_add/multi_launch_channel/run.py
+1 −1 programming_examples/matrix_scalar_add/multi_launch_channel/run_makefile.lit
+2 −0 programming_examples/matrix_scalar_add/single_core_channel/Makefile
+1 −1 programming_examples/matrix_scalar_add/single_core_channel/run.py
+1 −1 programming_examples/matrix_scalar_add/single_core_channel/run_makefile.lit
+4 −4 programming_examples/matrix_scalar_add/single_core_channel/single_core_channel.py
+2 −0 programming_examples/matrix_scalar_add/single_core_dma/Makefile
+1 −1 programming_examples/matrix_scalar_add/single_core_dma/run.py
+1 −1 programming_examples/matrix_scalar_add/single_core_dma/run_makefile.lit
+1 −1 programming_examples/matrix_scalar_add/single_core_dma/single_core_dma.py
+2 −5 programming_examples/shim_dma_2d/CMakeLists.txt
+2 −7 programming_examples/shim_dma_2d/Makefile
+1 −1 programming_examples/shim_dma_2d/run_makefile.lit
+2 −0 programming_examples/shim_dma_2d/test.cpp
+0 −1 python/CMakeLists.txt
+1 −1 utils/clone-llvm.sh
+1 −1 utils/clone-mlir-aie.sh

0 comments on commit 9f809c6

Please sign in to comment.