Skip to content

Commit

Permalink
Add examples of matmul lowering for a special accelerator
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhang93 committed Sep 22, 2023
1 parent dacd37a commit f4047ec
Show file tree
Hide file tree
Showing 3 changed files with 152 additions and 0 deletions.
65 changes: 65 additions & 0 deletions transform_dialect/examples/accel/matmul_codegen_spec.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// This script shows a basic example lowering matmul through IREE for a special accelerator.
//
// ```
// export IREE_DIR=${HOME}/github/iree; \
// export IREE_SAMPLES_DIR=${HOME}/github/iree-samples; \
// ${IREE_DIR}/build/tools/iree-opt \
// ${IREE_SAMPLES_DIR}/transform_dialect/examples/accel/matmul_source.mlir \
// --iree-hal-target-backends=llvm-cpu \
// --iree-abi-transformation-pipeline \
// --iree-flow-transformation-pipeline \
// --iree-stream-transformation-pipeline \
// --iree-hal-configuration-pipeline | \
// ${IREE_DIR}/build/tools/iree-opt \
// --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target)))' \
// --iree-codegen-llvmcpu-use-transform-dialect=${IREE_SAMPLES_DIR}/transform_dialect/examples/accel/matmul_codegen_spec.mlir
// ```

module attributes { transform.with_named_sequence } {
transform.named_sequence @cleanup(%variant_op: !transform.any_op {transform.readonly}) {
%func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
transform.apply_patterns to %func {
transform.apply_patterns.linalg.tiling_canonicalization
transform.apply_patterns.iree.fold_fill_into_pad
transform.apply_patterns.scf.for_loop_canonicalization
transform.apply_patterns.canonicalization
} : !transform.any_op
transform.iree.apply_licm %func : !transform.any_op
transform.iree.apply_cse %func : !transform.any_op
transform.yield
}

transform.sequence failures(propagate) {
^bb1(%variant_op: !transform.any_op):
%matmul = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!transform.any_op) -> !transform.any_op

// First level tile to forall with tile_sizes [15, 20].
%forall, %tiled_matmul =
transform.structured.tile_to_forall_op %matmul tile_sizes [15, 20]
( mapping = [#gpu.block<x>, #gpu.block<y>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.iree.populate_workgroup_count_region_using_num_threads_slice %forall
: (!transform.any_op) -> ()

// Tile reduction dimension.
%tiled_reduction, %loop =
transform.structured.tile %tiled_matmul [0, 0, 10]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)

// Second level tile to forall with tile_sizes [5, 10].
%forall_1, %tiled_matmul_1 =
transform.structured.tile_to_forall_op %tiled_reduction tile_sizes [5, 10]
( mapping = [#gpu.thread<x>, #gpu.thread<y>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)

// Clean up.
transform.include @cleanup failures(propagate) (%variant_op) : (!transform.any_op) -> ()
transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> ()

// Bufferize and drop HAL decriptor from memref ops.
%variant_op_3 = transform.iree.bufferize %variant_op : (!transform.any_op) -> !transform.any_op

// Post-bufferization mapping workgroup.
%memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
transform.iree.forall_to_workgroup %memref_func : (!transform.any_op) -> ()
transform.iree.map_nested_forall_to_gpu_threads %memref_func workgroup_dims = [3, 2, 1] subgroup_size = 8 : (!transform.any_op) -> ()
}
}
81 changes: 81 additions & 0 deletions transform_dialect/examples/accel/matmul_codegen_spec_pad.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// This script shows an example lowering matmul through IREE for a special accelerator.
//
// ```
// export IREE_DIR=${HOME}/github/iree; \
// export IREE_SAMPLES_DIR=${HOME}/github/iree-samples; \
// ${IREE_DIR}/build/tools/iree-opt \
// ${IREE_SAMPLES_DIR}/transform_dialect/examples/accel/matmul_source.mlir \
// --iree-hal-target-backends=llvm-cpu \
// --iree-abi-transformation-pipeline \
// --iree-flow-transformation-pipeline \
// --iree-stream-transformation-pipeline \
// --iree-hal-configuration-pipeline | \
// ${IREE_DIR}/build/tools/iree-opt \
// --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target)))' \
// --iree-codegen-llvmcpu-use-transform-dialect=${IREE_SAMPLES_DIR}/transform_dialect/examples/accel/matmul_codegen_spec_pad.mlir
// ```

module attributes { transform.with_named_sequence } {
transform.named_sequence @cleanup(%variant_op: !transform.any_op {transform.readonly}) {
%func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op
transform.apply_patterns to %func {
transform.apply_patterns.linalg.tiling_canonicalization
transform.apply_patterns.iree.fold_fill_into_pad
transform.apply_patterns.scf.for_loop_canonicalization
transform.apply_patterns.canonicalization
} : !transform.any_op
transform.iree.apply_licm %func : !transform.any_op
transform.iree.apply_cse %func : !transform.any_op
transform.yield
}

transform.sequence failures(propagate) {
^bb1(%variant_op: !transform.any_op):
%matmul = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!transform.any_op) -> !transform.any_op

// First level tile to forall with tile_sizes [32, 16].
%forall, %tiled_matmul =
transform.structured.tile_to_forall_op %matmul tile_sizes [32, 16]
( mapping = [#gpu.block<x>, #gpu.block<y>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)
transform.iree.populate_workgroup_count_region_using_num_threads_slice %forall
: (!transform.any_op) -> ()

// Tile reduction dimension.
%tiled_reduction, %loop =
transform.structured.tile %tiled_matmul [0, 0, 16]
: (!transform.any_op) -> (!transform.any_op, !transform.any_op)

// Pad operation.
%padded, %pad = transform.structured.pad %tiled_reduction {
padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32],
padding_dimensions=[0, 1, 2],
pack_paddings=[1, 1, 0],
copy_back_op="none"
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)

// Second level tile to forall with tile_sizes [5, 10].
%forall_1, %tiled_matmul_1 =
transform.structured.tile_to_forall_op %padded tile_sizes [5, 10]
( mapping = [#gpu.thread<x>, #gpu.thread<y>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op)

// Pad operation.
%padded_1, %pad_1 = transform.structured.pad %tiled_matmul_1 {
padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32],
padding_dimensions=[0, 1, 2],
pack_paddings=[0, 0, 1],
copy_back_op="none"
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op)

// Clean up.
transform.include @cleanup failures(propagate) (%variant_op) : (!transform.any_op) -> ()
transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> ()

// Bufferize and drop HAL decriptor from memref ops.
%variant_op_3 = transform.iree.bufferize %variant_op : (!transform.any_op) -> !transform.any_op

// Post-bufferization mapping workgroup.
%memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op
transform.iree.forall_to_workgroup %memref_func : (!transform.any_op) -> ()
transform.iree.map_nested_forall_to_gpu_threads %memref_func workgroup_dims = [7, 2, 1] subgroup_size = 8 : (!transform.any_op) -> ()
}
}
6 changes: 6 additions & 0 deletions transform_dialect/examples/accel/matmul_source.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
func.func @matmul_example(%lhs: tensor<300x200xf32>, %rhs: tensor<200x100xf32>, %init : tensor<300x100xf32>) -> tensor<300x100xf32>
{
%res = linalg.matmul ins(%lhs, %rhs: tensor<300x200xf32>, tensor<200x100xf32>)
outs(%init: tensor<300x100xf32>) -> tensor<300x100xf32>
return %res : tensor<300x100xf32>
}

0 comments on commit f4047ec

Please sign in to comment.