-
Notifications
You must be signed in to change notification settings - Fork 46
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add examples of matmul lowering for a special accelerator
- Loading branch information
Showing
3 changed files
with
152 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
// This script shows a basic example lowering matmul through IREE for a special accelerator. | ||
// | ||
// ``` | ||
// export IREE_DIR=${HOME}/github/iree; \ | ||
// export IREE_SAMPLES_DIR=${HOME}/github/iree-samples; \ | ||
// ${IREE_DIR}/build/tools/iree-opt \ | ||
// ${IREE_SAMPLES_DIR}/transform_dialect/examples/accel/matmul_source.mlir \ | ||
// --iree-hal-target-backends=llvm-cpu \ | ||
// --iree-abi-transformation-pipeline \ | ||
// --iree-flow-transformation-pipeline \ | ||
// --iree-stream-transformation-pipeline \ | ||
// --iree-hal-configuration-pipeline | \ | ||
// ${IREE_DIR}/build/tools/iree-opt \ | ||
// --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target)))' \ | ||
// --iree-codegen-llvmcpu-use-transform-dialect=${IREE_SAMPLES_DIR}/transform_dialect/examples/accel/matmul_codegen_spec.mlir | ||
// ``` | ||
|
||
module attributes { transform.with_named_sequence } { | ||
transform.named_sequence @cleanup(%variant_op: !transform.any_op {transform.readonly}) { | ||
%func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op | ||
transform.apply_patterns to %func { | ||
transform.apply_patterns.linalg.tiling_canonicalization | ||
transform.apply_patterns.iree.fold_fill_into_pad | ||
transform.apply_patterns.scf.for_loop_canonicalization | ||
transform.apply_patterns.canonicalization | ||
} : !transform.any_op | ||
transform.iree.apply_licm %func : !transform.any_op | ||
transform.iree.apply_cse %func : !transform.any_op | ||
transform.yield | ||
} | ||
|
||
transform.sequence failures(propagate) { | ||
^bb1(%variant_op: !transform.any_op): | ||
%matmul = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!transform.any_op) -> !transform.any_op | ||
|
||
// First level tile to forall with tile_sizes [15, 20]. | ||
%forall, %tiled_matmul = | ||
transform.structured.tile_to_forall_op %matmul tile_sizes [15, 20] | ||
( mapping = [#gpu.block<x>, #gpu.block<y>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op) | ||
transform.iree.populate_workgroup_count_region_using_num_threads_slice %forall | ||
: (!transform.any_op) -> () | ||
|
||
// Tile reduction dimension. | ||
%tiled_reduction, %loop = | ||
transform.structured.tile %tiled_matmul [0, 0, 10] | ||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op) | ||
|
||
// Second level tile to forall with tile_sizes [5, 10]. | ||
%forall_1, %tiled_matmul_1 = | ||
transform.structured.tile_to_forall_op %tiled_reduction tile_sizes [5, 10] | ||
( mapping = [#gpu.thread<x>, #gpu.thread<y>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op) | ||
|
||
// Clean up. | ||
transform.include @cleanup failures(propagate) (%variant_op) : (!transform.any_op) -> () | ||
transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> () | ||
|
||
// Bufferize and drop HAL decriptor from memref ops. | ||
%variant_op_3 = transform.iree.bufferize %variant_op : (!transform.any_op) -> !transform.any_op | ||
|
||
// Post-bufferization mapping workgroup. | ||
%memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op | ||
transform.iree.forall_to_workgroup %memref_func : (!transform.any_op) -> () | ||
transform.iree.map_nested_forall_to_gpu_threads %memref_func workgroup_dims = [3, 2, 1] subgroup_size = 8 : (!transform.any_op) -> () | ||
} | ||
} |
81 changes: 81 additions & 0 deletions
81
transform_dialect/examples/accel/matmul_codegen_spec_pad.mlir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
// This script shows an example lowering matmul through IREE for a special accelerator. | ||
// | ||
// ``` | ||
// export IREE_DIR=${HOME}/github/iree; \ | ||
// export IREE_SAMPLES_DIR=${HOME}/github/iree-samples; \ | ||
// ${IREE_DIR}/build/tools/iree-opt \ | ||
// ${IREE_SAMPLES_DIR}/transform_dialect/examples/accel/matmul_source.mlir \ | ||
// --iree-hal-target-backends=llvm-cpu \ | ||
// --iree-abi-transformation-pipeline \ | ||
// --iree-flow-transformation-pipeline \ | ||
// --iree-stream-transformation-pipeline \ | ||
// --iree-hal-configuration-pipeline | \ | ||
// ${IREE_DIR}/build/tools/iree-opt \ | ||
// --pass-pipeline='builtin.module(hal.executable(hal.executable.variant(iree-llvmcpu-lower-executable-target)))' \ | ||
// --iree-codegen-llvmcpu-use-transform-dialect=${IREE_SAMPLES_DIR}/transform_dialect/examples/accel/matmul_codegen_spec_pad.mlir | ||
// ``` | ||
|
||
module attributes { transform.with_named_sequence } { | ||
transform.named_sequence @cleanup(%variant_op: !transform.any_op {transform.readonly}) { | ||
%func = transform.structured.match ops{["func.func"]} in %variant_op : (!transform.any_op) -> !transform.any_op | ||
transform.apply_patterns to %func { | ||
transform.apply_patterns.linalg.tiling_canonicalization | ||
transform.apply_patterns.iree.fold_fill_into_pad | ||
transform.apply_patterns.scf.for_loop_canonicalization | ||
transform.apply_patterns.canonicalization | ||
} : !transform.any_op | ||
transform.iree.apply_licm %func : !transform.any_op | ||
transform.iree.apply_cse %func : !transform.any_op | ||
transform.yield | ||
} | ||
|
||
transform.sequence failures(propagate) { | ||
^bb1(%variant_op: !transform.any_op): | ||
%matmul = transform.structured.match ops{["linalg.matmul"]} in %variant_op : (!transform.any_op) -> !transform.any_op | ||
|
||
// First level tile to forall with tile_sizes [32, 16]. | ||
%forall, %tiled_matmul = | ||
transform.structured.tile_to_forall_op %matmul tile_sizes [32, 16] | ||
( mapping = [#gpu.block<x>, #gpu.block<y>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op) | ||
transform.iree.populate_workgroup_count_region_using_num_threads_slice %forall | ||
: (!transform.any_op) -> () | ||
|
||
// Tile reduction dimension. | ||
%tiled_reduction, %loop = | ||
transform.structured.tile %tiled_matmul [0, 0, 16] | ||
: (!transform.any_op) -> (!transform.any_op, !transform.any_op) | ||
|
||
// Pad operation. | ||
%padded, %pad = transform.structured.pad %tiled_reduction { | ||
padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], | ||
padding_dimensions=[0, 1, 2], | ||
pack_paddings=[1, 1, 0], | ||
copy_back_op="none" | ||
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) | ||
|
||
// Second level tile to forall with tile_sizes [5, 10]. | ||
%forall_1, %tiled_matmul_1 = | ||
transform.structured.tile_to_forall_op %padded tile_sizes [5, 10] | ||
( mapping = [#gpu.thread<x>, #gpu.thread<y>] ) : (!transform.any_op) -> (!transform.any_op, !transform.any_op) | ||
|
||
// Pad operation. | ||
%padded_1, %pad_1 = transform.structured.pad %tiled_matmul_1 { | ||
padding_values=[0.0 : f32, 0.0 : f32, 0.0 : f32], | ||
padding_dimensions=[0, 1, 2], | ||
pack_paddings=[0, 0, 1], | ||
copy_back_op="none" | ||
} : (!transform.any_op) -> (!transform.any_op, !transform.any_op) | ||
|
||
// Clean up. | ||
transform.include @cleanup failures(propagate) (%variant_op) : (!transform.any_op) -> () | ||
transform.iree.eliminate_empty_tensors %variant_op : (!transform.any_op) -> () | ||
|
||
// Bufferize and drop HAL decriptor from memref ops. | ||
%variant_op_3 = transform.iree.bufferize %variant_op : (!transform.any_op) -> !transform.any_op | ||
|
||
// Post-bufferization mapping workgroup. | ||
%memref_func = transform.structured.match ops{["func.func"]} in %variant_op_3 : (!transform.any_op) -> !transform.any_op | ||
transform.iree.forall_to_workgroup %memref_func : (!transform.any_op) -> () | ||
transform.iree.map_nested_forall_to_gpu_threads %memref_func workgroup_dims = [7, 2, 1] subgroup_size = 8 : (!transform.any_op) -> () | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
func.func @matmul_example(%lhs: tensor<300x200xf32>, %rhs: tensor<200x100xf32>, %init : tensor<300x100xf32>) -> tensor<300x100xf32> | ||
{ | ||
%res = linalg.matmul ins(%lhs, %rhs: tensor<300x200xf32>, tensor<200x100xf32>) | ||
outs(%init: tensor<300x100xf32>) -> tensor<300x100xf32> | ||
return %res : tensor<300x100xf32> | ||
} |