Skip to content

Commit

Permalink
add benchmark op. support executing region. add BenchmarkStats. (Padd…
Browse files Browse the repository at this point in the history
  • Loading branch information
Shibo Tao authored Mar 17, 2021
1 parent 960f283 commit 5702502
Show file tree
Hide file tree
Showing 21 changed files with 514 additions and 20 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
*~
*.swp
*.swo
*.swn
*.pyc
*.o
*.a
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ add_dependencies(cinncore GEN_LLVM_RUNTIME_IR_HEADER
# MLIR td file generations
ops_inc
basic_kernels_inc
test_kernels_inc
cinn_base_inc
tensor_shape_inc
dense_tensor_inc
Expand Down
3 changes: 3 additions & 0 deletions cinnrt/dialect/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ core_gather_srcs(SRCS
dialect.cc
types.cc
basic_kernels.cc
test_kernels.cc
cinn_base.cc
init_cinn_dialects.cc
tensor_shape.cc
Expand All @@ -16,6 +17,7 @@ core_gather_srcs(SRCS

mlir_tablegen_on(ops)
mlir_tablegen_on(basic_kernels)
mlir_tablegen_on(test_kernels)
mlir_tablegen_on(cinn_base DIALECT cinn)
mlir_tablegen_on(tensor_shape DIALECT ts)
mlir_tablegen_on(dense_tensor DIALECT dt)
Expand Down Expand Up @@ -49,3 +51,4 @@ cc_test(test_mlir_loader SRCS mlir_loader_test.cc DEPS cinncore ${MLIR_IR_LIBS})
# execute mlir and run FileCheck
cinn_exec_check(run_and_check_tensor_type mlir_tests/tensor_type.mlir)
cinn_exec_check(run_and_check_basic mlir_tests/basic.mlir)
cinn_exec_check(run_and_check_benchmark mlir_tests/benchmark.mlir)
3 changes: 2 additions & 1 deletion cinnrt/dialect/basic_kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
#include <mlir/IR/OpDefinition.h>
#include <mlir/Interfaces/SideEffectInterfaces.h>

namespace cinnrt::dialect {
using namespace mlir; // NOLINT

namespace cinnrt::dialect {
#define GET_OP_CLASSES
#include "cinnrt/dialect/basic_kernels.hpp.inc"
} // namespace cinnrt::dialect
8 changes: 6 additions & 2 deletions cinnrt/dialect/cinn_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "cinnrt/dialect/basic_kernels.h"
#include "cinnrt/dialect/dense_tensor.h"
#include "cinnrt/dialect/test_kernels.h"

namespace cinnrt::dialect {

Expand All @@ -12,11 +13,14 @@ void CINNDialect::initialize() {

addTypes<cinnrt::dt::TensorType>();

#define GET_OP_LIST
addOperations<
#define GET_OP_LIST
#include "cinnrt/dialect/basic_kernels.cpp.inc"
>();
#undef GET_OP_LIST
addOperations<
#define GET_OP_LIST
#include "cinnrt/dialect/test_kernels.cpp.inc"
>();
}

mlir::Type CINNDialect::parseType(mlir::DialectAsmParser &parser) const {
Expand Down
23 changes: 23 additions & 0 deletions cinnrt/dialect/mlir_tests/benchmark.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// CHECK-LABEL: @benchmark
func @benchmark() {
// CHECK-LABEL: BM:add.f32:Duration(ns)
// CHECK-LABEL: BM:add.f32:Count: 3
// CHECK-LABEL: BM:add.f32:Time Min(ns)
// CHECK-LABEL: BM:add.f32:Time 50%(ns)
// CHECK-LABEL: BM:add.f32:Time 95%(ns)
// CHECK-LABEL: BM:add.f32:Time 99%(ns)
// CHECK-LABEL: BM:add.f32:CPU Min(ns)
// CHECK-LABEL: BM:add.f32:CPU 50%(ns)
// CHECK-LABEL: BM:add.f32:CPU 95%(ns)
// CHECK-LABEL: BM:add.f32:CPU 99%(ns)
// CHECK-LABEL: BM:add.f32:CPU utilization(percent)
cinn.benchmark "add.f32"() duration_secs = 1, max_count = 3, num_warmup_runs = 3
{
%0 = cinn.constant.f32 1.0
%1 = cinn.constant.f32 2.0
%res = "cinn.add.f32"(%0, %1) : (f32, f32) -> f32
"cinn.print.f32"(%res) : (f32) -> ()
cinn.return %res : f32
}
cinn.return
}
138 changes: 138 additions & 0 deletions cinnrt/dialect/test_kernels.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#include "cinnrt/dialect/test_kernels.h"

#include "mlir/IR/Builders.h"
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/OpImplementation.h"
#include "mlir/IR/StandardTypes.h"
#include "mlir/IR/TypeUtilities.h"

namespace cinnrt::dialect {

//===----------------------------------------------------------------------===//
// BenchmarkOp
//===----------------------------------------------------------------------===//

// Parse the BenchmarkOp in the following format
// cinn.benchmark "add.i32"(%c : i32, %d : f32)
// max_count = 100, duration_secs = 1 {
// ...
// }

static ParseResult parseBenchmarkOp(OpAsmParser &parser, OperationState &result) {
StringAttr nameAttr;
if (parser.parseAttribute(nameAttr, "name", result.attributes)) return failure();

// Parse the operands, e.g. (%c : i32, %d : f32)
if (parser.parseLParen()) return failure();

SmallVector<OpAsmParser::OperandType, 4> operands;
SmallVector<Type, 4> types;
llvm::SMLoc type_loc = parser.getCurrentLocation();

if (parser.parseOptionalRParen()) {
// Parse non-empty operands
do {
// Parse %c : i32,
OpAsmParser::OperandType operand;
Type type;

if (parser.parseOperand(operand) || parser.parseColonType(type)) return failure();

operands.push_back(operand);
types.push_back(type);

} while (succeeded(parser.parseOptionalComma()));

if (parser.parseRParen()) return failure();
}

if (parser.resolveOperands(operands, types, type_loc, result.operands)) return failure();

// Parse the keyword attribute, e.g. max_count = 100, duration_secs = 1
do {
StringRef attr;
Attribute resultAttr;
if (parser.parseKeyword(&attr) || parser.parseEqual() ||
parser.parseAttribute(resultAttr, parser.getBuilder().getIntegerType(32), attr, result.attributes))
return failure();
} while (succeeded(parser.parseOptionalComma()));

// Set the default attribute num_warmup_runs to 1 if unset
auto setDefaultAttrIfUnset = [&](const char *attr_name, int value) {
bool found =
llvm::any_of(result.attributes, [attr_name](const NamedAttribute &attr) { return attr.first == attr_name; });
if (!found) {
IntegerAttr default_val = parser.getBuilder().getI32IntegerAttr(value);
result.addAttribute(attr_name, default_val);
}
};
setDefaultAttrIfUnset("num_warmup_runs", 1);

Region *target = result.addRegion();
return parser.parseRegion(*target,
operands,
types,
/*enableNameShadowing=*/true);
}

// Print the BenchmarkOp in the following format
// cinn.benchmark "add.i32"(%c : i32, %d : f32)
// max_count = 100, duration_secs = 1 {
// ...
// }
static void print(OpAsmPrinter &p, BenchmarkOp op) {
p << "cinn.benchmark ";

// Print the name attribute, e.g "add.i32"
auto name_attr = op.getAttr("name");
p << name_attr;

// Print the operands and types, e.g. (%c : i32, %d : f32)
p << '(';
llvm::interleaveComma(llvm::zip(op.getOperands(), op.getOperandTypes()), p, [&](const auto &it) {
p << std::get<0>(it) << " : " << std::get<1>(it);
});
p << ") ";

bool need_comma = false;
// Print the attributes, e.g. max_count = 100, duration_secs = 1
for (auto &name_attr : op.getAttrs()) {
auto id = name_attr.first;
if (id == "name") continue;
if (need_comma) p << ", ";
auto attr = name_attr.second;
p << id << " = ";
if (auto int_attr = attr.dyn_cast<IntegerAttr>()) {
int_attr.getValue().print(p.getStream(), /*isSigned=*/false);
} else {
op.emitOpError("Unexpected attribute");
}
need_comma = true;
}
p << ' ';

// Print the region
// Reuse the argument names provided to the op for the bbarg names within
// the region.
p.shadowRegionArgs(op.region(), op.getOperands());
p.printRegion(op.region(), /*printEntryBlockArgs=*/false);
}

static LogicalResult verify(BenchmarkOp op) {
// Verify that the target benchmark region has exactly one return value.
auto &region = op.region();
auto &last_op = region.front().back();
if (last_op.getName().getStringRef() != "cinn.return") {
return op.emitOpError("missing return statement");
}
if (last_op.getNumOperands() != 1) {
return op.emitOpError("incorrect number of return values. One return value is expected");
}

return success();
}

#define GET_OP_CLASSES
#include "cinnrt/dialect/test_kernels.cpp.inc"

} // namespace cinnrt::dialect
9 changes: 9 additions & 0 deletions cinnrt/dialect/test_kernels.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#pragma once
#include "mlir/IR/OpDefinition.h"
#include "mlir/Interfaces/SideEffectInterfaces.h"

namespace cinnrt::dialect {
using namespace mlir; // NOLINT
#define GET_OP_CLASSES
#include "cinnrt/dialect/test_kernels.hpp.inc"
} // namespace cinnrt::dialect
65 changes: 65 additions & 0 deletions cinnrt/dialect/test_kernels.td
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
// Operation definitions for testing.

#ifdef TEST_OPS
#else
#define TEST_OPS

include "cinnrt/dialect/cinn_base.td"
include "mlir/Interfaces/SideEffectInterfaces.td"

// Base class for Test dialect ops.
class Test_Op<string mnemonic, list<OpTrait> traits = []> :
Op<CINN_Dialect, mnemonic, !listconcat(traits, [IsolatedFromAbove])> {

// Each registered op in the Test namespace needs to provide all of a printer,
// parser and verifier.
let printer = [{ return cinnrt::dialect::print(p, *this); }];
let verifier = [{ return cinnrt::dialect::verify(*this); }];
let parser = [{ return cinnrt::dialect::parse$cppClass(parser, result); }];
}

def BenchmarkOp : Test_Op<"benchmark"> {
let summary = "benchmark operation";
let description = [{
The "cinn.benchmark" operation benchmarks the performance of an MLIR
region by executing the given MLIR region repeatedly up to the
`duratino_secs` seconds or `max_count` times. `num_warmup_runs` specifies
the number of warm up runs to run the given MLIR region before the
benchmark starts.

The target MLIR region can take an arbitrary number of arguments and
should return exactly one value. The arguments for the MLIR region are
provided as the operands of the cinn.benchmark op.

Example:
cinn.benchmark "add.i32"(%c : i32, %d : f32) max_count = 100, duration_secs = 1 {
// code for benchmarking
...
}

cinn.benchmark "add.i32"(%c : i32)
duration_secs = 1,
max_count = 100,
num_warmup_runs = 10 {
// The MLIR code to be benchmarked goes here.
// The following code benchmarks the cinn.add.i32 kernel.
%x = cinn.add.i32 %c, %c
// The benchmarked function needs to return exactly one value.
cinn.return %x : i32
}
}];

let regions = (region SizedRegion<1>:$region);

let arguments = (ins
Variadic<AnyType>,
I32Attr:$duration_secs,
I32Attr:$max_count,
StrAttr:$name,
DefaultValuedAttr<I32Attr, "1">:$num_warmup_runs
);

let results = (outs);
}

#endif // TEST_OPS
1 change: 1 addition & 0 deletions cinnrt/host_context/core_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ SymbolTable* CoreRuntime::symbol_table() { return &impl_->symbol_table; }
CoreRuntime::CoreRuntime(CoreRuntime::Impl* impl) : impl_(impl) { CHECK(impl); }

void CoreRuntime::Execute() {
// std::cout << "CoreRuntime::Execute" << std::endl;
int op_offset = 0;
for (auto& op : impl_->op_executables) {
VLOG(3) << "running op " << op_offset++ << " " << op.name();
Expand Down
4 changes: 3 additions & 1 deletion cinnrt/host_context/function.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ class Function {
size_t num_arguments() const { return num_arguments_; }
size_t num_results() const { return num_results_; }

virtual void Execute(llvm::ArrayRef<Value*> arguments, llvm::MutableArrayRef<ValueRef> results) const {}
virtual void Execute(llvm::ArrayRef<Value*> arguments,
llvm::MutableArrayRef<ValueRef> results,
bool is_region = false) const {}

virtual ~Function() = default;

Expand Down
2 changes: 2 additions & 0 deletions cinnrt/host_context/mlir_exec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "cinnrt/kernel/control_flow_kernels.h"
#include "cinnrt/kernel/tensor_kernels.h"
#include "cinnrt/kernel/tensor_shape_kernels.h"
#include "cinnrt/kernel/test_kernels.h"
#include "llvm/Support/DynamicLibrary.h"

static llvm::cl::list<std::string> cl_shared_libs( // NOLINT
Expand All @@ -32,6 +33,7 @@ int main(int argc, char** argv) {
host_context::KernelRegistry registry;

kernel::RegisterBasicKernels(&registry);
kernel::RegisterTestKernels(&registry);
kernel::RegisterTensorShapeKernels(&registry);
kernel::RegisterTensorKernels(&registry);
kernel::RegisterControlFlowKernels(&registry);
Expand Down
Loading

0 comments on commit 5702502

Please sign in to comment.