-
Notifications
You must be signed in to change notification settings - Fork 46
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add examples of transform dialect for a special accelerator (#203)
* Add examples of matmul lowering for a special accelerator * Minor modifications to `matmul_codegen_spec_pad.mlir`. - Make "y" the outermost distribution dimension, and "x" the innermost - Make tile sizes for local divide the tile sizes for shared - Add a pass to hoist the static allocations to the "example script header". --------- Co-authored-by: MaheshRavishankar <mahesh@nod-labs.com>
- Loading branch information
1 parent
dacd37a
commit bdd6265
Showing
3 changed files
with
153 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
// This script shows a basic example lowering matmul through IREE for a special accelerator. | ||
// | ||
// ``` | ||
// export IREE_DIR=${HOME}/github/iree; \ | ||
// export IREE_SAMPLES_DIR=${HOME}/github/iree-samples; \ | ||
// ${IREE_DIR}/build/tools/iree-opt \ | ||
// ${IREE_SAMPLES_DIR}/transform_dialect/examples/accel/matmul_source.mlir \ | ||
// --iree-hal-target-backends=llvm-cpu \ | ||
// --iree-abi-transformation-pipeline \ | ||
// --iree-flow-transformation-pipeline \ | ||
// --iree-stream-transformation-pipeline \ | ||
// --iree-hal-configuration-pipeline | \ | ||
// ${IREE_DIR}/build/tools/iree-opt \ | ||
// --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target)))' \ | ||
// --iree-codegen-llvmcpu-use-transform-dialect=${IREE_SAMPLES_DIR}/transform_dialect/examples/accel/matmul_codegen_spec.mlir | ||
// ``` | ||
|
||
module attributes { transform.with_named_sequence } { | ||
transform.named_sequence @cleanup(%variant_op: !transform.any_op {transform.readonly}) { | ||
%func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op | ||
transform.apply_patterns to %func { | ||
transform.apply_patterns.linalg.tiling_canonicalization | ||
transform.apply_patterns.iree.fold_fill_into_pad | ||
transform.apply_patterns.scf.for_loop_canonicalization | ||
transform.apply_patterns.canonicalization | ||
} : !transform.any_op | ||
transform.iree.apply_licm %func : !transform.any_op | ||
transform.iree.apply_cse %func : !transform.any_op | ||
transform.yield | ||
} | ||
|
||
transform.sequence failures(propagate) { | ||
^bb1(%variant_op: !transform.any_op): | ||
%matmul = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!transform.any_op) -> !transform.any_op | ||
|
||
// First level tile to forall with tile_sizes [15, 20]. | ||
%forall, %tiled_matmul = | ||
transform.structured.tile_to_forall_op %matmul tile_sizes [15, 20] | ||
( mapping = [#gpu.block<x>, #gpu.block<y>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op) | ||
transform.iree.populate_workgroup_count_region_using_num_threads_slice %forall | ||
: (!transform.any_op) -> () | ||
|
||
// Tile reduction dimension. | ||
%tiled_reduction, %loop = | ||
transform.structured.tile %tiled_matmul [0, 0, 10] | ||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op) | ||
|
||
// Second level tile to forall with tile_sizes [5, 10]. | ||
%forall_1, %tiled_matmul_1 = | ||
transform.structured.tile_to_forall_op %tiled_reduction tile_sizes [5, 10] | ||
( mapping = [#gpu.thread<x>, #gpu.thread<y>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op) | ||
|
||
// Clean up. | ||
transform.include @cleanup failures(propagate) (%variant_op) : (!transform.any_op) -> () | ||
transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> () | ||
|
||
// Bufferize and drop HAL decriptor from memref ops. | ||
%variant_op_3 = transform.iree.bufferize %variant_op : (!transform.any_op) -> !transform.any_op | ||
|
||
// Post-bufferization mapping workgroup. | ||
%memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op | ||
transform.iree.forall_to_workgroup %memref_func : (!transform.any_op) -> () | ||
transform.iree.map_nested_forall_to_gpu_threads %memref_func workgroup_dims = [3, 2, 1] subgroup_size = 8 : (!transform.any_op) -> () | ||
} | ||
} |
82 changes: 82 additions & 0 deletions
82
transform_dialect/examples/accel/matmul_codegen_spec_pad.mlir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
// This script shows an example lowering matmul through IREE for a special accelerator. | ||
// | ||
// ``` | ||
// export IREE_DIR=${HOME}/github/iree; \ | ||
// export IREE_SAMPLES_DIR=${HOME}/github/iree-samples; \ | ||
// ${IREE_DIR}/build/tools/iree-opt \ | ||
// ${IREE_SAMPLES_DIR}/transform_dialect/examples/accel/matmul_source.mlir \ | ||
// --iree-hal-target-backends=llvm-cpu \ | ||
// --iree-abi-transformation-pipeline \ | ||
// --iree-flow-transformation-pipeline \ | ||
// --iree-stream-transformation-pipeline \ | ||
// --iree-hal-configuration-pipeline | \ | ||
// ${IREE_DIR}/build/tools/iree-opt \ | ||
// --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target, builtin.module(func.func(iree-hoist-statically-bound-allocations)))))' \ | ||
// --iree-codegen-llvmcpu-use-transform-dialect=${IREE_SAMPLES_DIR}/transform_dialect/examples/accel/matmul_codegen_spec_pad.mlir | ||
// ``` | ||
|
||
module attributes { transform.with_named_sequence } { | ||
transform.named_sequence @cleanup(%variant_op: !transform.any_op {transform.readonly}) { | ||
%func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op | ||
transform.apply_patterns to %func { | ||
transform.apply_patterns.linalg.tiling_canonicalization | ||
transform.apply_patterns.iree.fold_fill_into_pad | ||
transform.apply_patterns.scf.for_loop_canonicalization | ||
transform.apply_patterns.canonicalization | ||
} : !transform.any_op | ||
transform.iree.apply_licm %func : !transform.any_op | ||
transform.iree.apply_cse %func : !transform.any_op | ||
transform.yield | ||
} | ||
|
||
transform.sequence failures(propagate) { | ||
^bb1(%variant_op: !transform.any_op): | ||
%matmul = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!transform.any_op) -> !transform.any_op | ||
|
||
// First level tile to forall with tile_sizes [16, 32]. | ||
%forall, %tiled_matmul = | ||
transform.structured.tile_to_forall_op %matmul tile_sizes [16, 32] | ||
( mapping = [#gpu.block<y>, #gpu.block<x>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op) | ||
transform.iree.populate_workgroup_count_region_using_num_threads_slice %forall | ||
: (!transform.any_op) -> () | ||
|
||
// Tile reduction dimension. | ||
%tiled_reduction, %loop = | ||
transform.structured.tile %tiled_matmul [0, 0, 8] | ||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op) | ||
|
||
// Pad operation. | ||
%padded, %pad, %__ = transform.structured.pad %tiled_reduction { | ||
padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], | ||
padding_dimensions=[0, 1, 2], | ||
pack_paddings=[1, 1, 0], | ||
copy_back_op="none" | ||
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) | ||
|
||
// Second level tile to forall with tile_sizes [8, 8]. | ||
%forall_1, %tiled_matmul_1 = | ||
transform.structured.tile_to_forall_op %padded tile_sizes [8, 8] | ||
( mapping = [#gpu.thread<y>, #gpu.thread<x>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op) | ||
|
||
// Pad operation. | ||
%padded_1, %pad_1, %_ = transform.structured.pad %tiled_matmul_1 { | ||
padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], | ||
padding_dimensions=[0, 1, 2], | ||
pack_paddings=[0, 0, 1], | ||
copy_back_op="none" | ||
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) | ||
|
||
// Clean up. | ||
transform.include @cleanup failures(propagate) (%variant_op) : (!transform.any_op) -> () | ||
transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> () | ||
|
||
// Bufferize and drop HAL decriptor from memref ops. | ||
%variant_op_3 = transform.iree.bufferize %variant_op : (!transform.any_op) -> !transform.any_op | ||
|
||
// Post-bufferization mapping workgroup. | ||
%memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op | ||
transform.iree.forall_to_workgroup %memref_func : (!transform.any_op) -> () | ||
transform.iree.map_nested_forall_to_gpu_threads %memref_func workgroup_dims = [4, 2, 1] subgroup_size = 1 : (!transform.any_op) -> () | ||
transform.iree.hoist_static_alloc %memref_func : (!transform.any_op) -> () | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
func.func @matmul_example(%lhs: tensor<300x200xf32>, %rhs: tensor<200x100xf32>, %init : tensor<300x100xf32>) -> tensor<300x100xf32> | ||
{ | ||
%res = linalg.matmul ins(%lhs, %rhs: tensor<300x200xf32>, tensor<200x100xf32>) | ||
outs(%init: tensor<300x100xf32>) -> tensor<300x100xf32> | ||
return %res : tensor<300x100xf32> | ||
} |