// mlir-opt mlir/test/mlir-cpu-runner/async.mlir -pass-pipeline="builtin.module(async-to-async-runtime,func.func(async-runtime-ref-counting,async-runtime-ref-counting-opt),convert-async-to-llvm,func.func(convert-linalg-to-loops,convert-scf-to-cf),finalize-memref-to-llvm,func.func(convert-arith-to-llvm),convert-func-to-llvm,reconcile-unrealized-casts)" module { llvm.func @abort() llvm.func @puts(!llvm.ptr) llvm.mlir.global private constant @assert_msg(dense<[65, 119, 97, 105, 116, 101, 100, 32, 97, 115, 121, 110, 99, 32, 111, 112, 101, 114, 97, 110, 100, 32, 105, 115, 32, 105, 110, 32, 101, 114, 114, 111, 114, 32, 115, 116, 97, 116, 101, 0]> : tensor<40xi8>) {addr_space = 0 : i32} : !llvm.array<40 x i8> llvm.func @malloc(i64) -> !llvm.ptr llvm.func @free(!llvm.ptr) llvm.func @aligned_alloc(i64, i64) -> !llvm.ptr llvm.func @main() { %0 = llvm.mlir.constant(1 : index) : i64 %1 = llvm.mlir.constant(4 : index) : i64 %2 = llvm.mlir.constant(true) : i1 %3 = llvm.mlir.constant(1 : i64) : i64 %4 = llvm.mlir.constant(0 : index) : i64 %5 = llvm.mlir.constant(0.000000e+00 : f32) : f32 %6 = llvm.mlir.constant(1.000000e+00 : f32) : f32 %7 = llvm.mlir.constant(4 : index) : i64 %8 = llvm.mlir.constant(1 : index) : i64 %9 = llvm.mlir.zero : !llvm.ptr %10 = llvm.getelementptr %9[4] : (!llvm.ptr) -> !llvm.ptr, f32 %11 = llvm.ptrtoint %10 : !llvm.ptr to i64 %12 = llvm.call @malloc(%11) : (i64) -> !llvm.ptr %13 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %14 = llvm.insertvalue %12, %13[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %15 = llvm.insertvalue %12, %14[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %16 = llvm.mlir.constant(0 : index) : i64 %17 = llvm.insertvalue %16, %15[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %18 = llvm.insertvalue %7, %17[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %19 = llvm.insertvalue %8, %18[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> llvm.br ^bb1(%4 : i64) ^bb1(%20: i64): // 2 preds: ^bb0, ^bb2 %21 = llvm.icmp "slt" %20, %1 : i64 llvm.cond_br %21, ^bb2, ^bb3 ^bb2: // pred: ^bb1 %22 = llvm.getelementptr %12[%20] : (!llvm.ptr, i64) -> !llvm.ptr, f32 llvm.store %5, %22 : f32, !llvm.ptr %23 = llvm.add %20, %0 : i64 llvm.br ^bb1(%23 : i64) ^bb3: // pred: ^bb1 %24 = llvm.mlir.constant(1 : index) : i64 %25 = llvm.alloca %24 x !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> : (i64) -> !llvm.ptr llvm.store %19, %25 : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>, !llvm.ptr %26 = llvm.mlir.constant(1 : index) : i64 %27 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> %28 = llvm.insertvalue %26, %27[0] : !llvm.struct<(i64, ptr)> %29 = llvm.insertvalue %25, %28[1] : !llvm.struct<(i64, ptr)> llvm.call @printMemrefF32(%26, %25) : (i64, !llvm.ptr) -> () %30 = llvm.getelementptr %12[%4] : (!llvm.ptr, i64) -> !llvm.ptr, f32 llvm.store %6, %30 : f32, !llvm.ptr llvm.call @mlirAsyncRuntimePrintCurrentThreadId() : () -> () llvm.call @printMemrefF32(%26, %25) : (i64, !llvm.ptr) -> () %31 = llvm.call @async_execute_fn_1(%12, %12, %16, %7, %8, %26, %25) : (!llvm.ptr, !llvm.ptr, i64, i64, i64, i64, !llvm.ptr) -> !llvm.ptr llvm.call @mlirAsyncRuntimeAwaitToken(%31) : (!llvm.ptr) -> () %32 = llvm.call @mlirAsyncRuntimeIsTokenError(%31) : (!llvm.ptr) -> i1 llvm.call @mlirAsyncRuntimeDropRef(%31, %3) : (!llvm.ptr, i64) -> () %33 = llvm.xor %32, %2 : i1 llvm.cond_br %33, ^bb4, ^bb5 ^bb4: // pred: ^bb3 llvm.call @mlirAsyncRuntimePrintCurrentThreadId() : () -> () llvm.call @printMemrefF32(%26, %25) : (i64, !llvm.ptr) -> () llvm.call @free(%12) : (!llvm.ptr) -> () llvm.return ^bb5: // pred: ^bb3 %34 = llvm.mlir.addressof @assert_msg : !llvm.ptr llvm.call @puts(%34) : (!llvm.ptr) -> () llvm.call @abort() : () -> () llvm.unreachable } llvm.func @mlirAsyncRuntimePrintCurrentThreadId() attributes {sym_visibility = "private"} llvm.func private @printMemrefF32(%arg0: i64, %arg1: !llvm.ptr) attributes {llvm.emit_c_interface, sym_visibility = "private"} { %0 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> %1 = llvm.insertvalue %arg0, %0[0] : !llvm.struct<(i64, ptr)> %2 = llvm.insertvalue %arg1, %1[1] : !llvm.struct<(i64, ptr)> %3 = llvm.mlir.constant(1 : index) : i64 %4 = llvm.alloca %3 x !llvm.struct<(i64, ptr)> : (i64) -> !llvm.ptr llvm.store %2, %4 : !llvm.struct<(i64, ptr)>, !llvm.ptr llvm.call @_mlir_ciface_printMemrefF32(%4) : (!llvm.ptr) -> () llvm.return } llvm.func @_mlir_ciface_printMemrefF32(!llvm.ptr) attributes {llvm.emit_c_interface, sym_visibility = "private"} llvm.func @async_execute_fn() -> !llvm.ptr attributes {passthrough = ["presplitcoroutine"], sym_visibility = "private"} { %0 = llvm.mlir.constant(false) : i1 %1 = llvm.mlir.addressof @__resume : !llvm.ptr %2 = llvm.mlir.constant(0 : i64) : i64 %3 = llvm.mlir.constant(1 : i64) : i64 %4 = llvm.mlir.zero : !llvm.ptr %5 = llvm.mlir.constant(0 : i32) : i32 %6 = llvm.call @mlirAsyncRuntimeCreateToken() : () -> !llvm.ptr %7 = llvm.intr.coro.id %5, %4, %4, %4 : (i32, !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.token %8 = llvm.intr.coro.size : i64 %9 = llvm.intr.coro.align : i64 %10 = llvm.add %8, %9 : i64 %11 = llvm.sub %10, %3 : i64 %12 = llvm.sub %2, %9 : i64 %13 = llvm.and %11, %12 : i64 %14 = llvm.call @aligned_alloc(%9, %13) : (i64, i64) -> !llvm.ptr %15 = llvm.intr.coro.begin %7, %14 : (!llvm.token, !llvm.ptr) -> !llvm.ptr %16 = llvm.intr.coro.save %15 : (!llvm.ptr) -> !llvm.token llvm.call @mlirAsyncRuntimeExecute(%15, %1) : (!llvm.ptr, !llvm.ptr) -> () %17 = llvm.intr.coro.suspend %16, %0 : i8 %18 = llvm.sext %17 : i8 to i32 llvm.switch %18 : i32, ^bb3 [ 0: ^bb1, 1: ^bb2 ] ^bb1: // pred: ^bb0 llvm.call @mlirAsyncRuntimePrintCurrentThreadId() : () -> () llvm.call @mlirAsyncRuntimeEmplaceToken(%6) : (!llvm.ptr) -> () llvm.br ^bb2 ^bb2: // 2 preds: ^bb0, ^bb1 %19 = llvm.intr.coro.free %7, %15 : (!llvm.token, !llvm.ptr) -> !llvm.ptr llvm.call @free(%19) : (!llvm.ptr) -> () llvm.br ^bb3 ^bb3: // 2 preds: ^bb0, ^bb2 %20 = llvm.mlir.none : !llvm.token %21 = llvm.intr.coro.end %15, %0, %20 : (!llvm.ptr, i1, !llvm.token) -> i1 llvm.return %6 : !llvm.ptr } llvm.func @async_execute_fn_0(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr, %arg3: i64, %arg4: i64, %arg5: i64, %arg6: i64, %arg7: !llvm.ptr) -> !llvm.ptr attributes {passthrough = ["presplitcoroutine"], sym_visibility = "private"} { %0 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> %1 = llvm.insertvalue %arg6, %0[0] : !llvm.struct<(i64, ptr)> %2 = llvm.insertvalue %arg7, %1[1] : !llvm.struct<(i64, ptr)> %3 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %4 = llvm.insertvalue %arg1, %3[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %5 = llvm.insertvalue %arg2, %4[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %6 = llvm.insertvalue %arg3, %5[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %7 = llvm.insertvalue %arg4, %6[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %8 = llvm.insertvalue %arg5, %7[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %9 = llvm.mlir.constant(2 : index) : i64 %10 = llvm.mlir.constant(3.000000e+00 : f32) : f32 %11 = llvm.mlir.constant(1 : i64) : i64 %12 = llvm.mlir.constant(false) : i1 %13 = llvm.mlir.addressof @__resume : !llvm.ptr %14 = llvm.mlir.constant(0 : i64) : i64 %15 = llvm.mlir.constant(1 : i64) : i64 %16 = llvm.mlir.zero : !llvm.ptr %17 = llvm.mlir.constant(0 : i32) : i32 %18 = llvm.call @mlirAsyncRuntimeCreateToken() : () -> !llvm.ptr %19 = llvm.intr.coro.id %17, %16, %16, %16 : (i32, !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.token %20 = llvm.intr.coro.size : i64 %21 = llvm.intr.coro.align : i64 %22 = llvm.add %20, %21 : i64 %23 = llvm.sub %22, %15 : i64 %24 = llvm.sub %14, %21 : i64 %25 = llvm.and %23, %24 : i64 %26 = llvm.call @aligned_alloc(%21, %25) : (i64, i64) -> !llvm.ptr %27 = llvm.intr.coro.begin %19, %26 : (!llvm.token, !llvm.ptr) -> !llvm.ptr %28 = llvm.intr.coro.save %27 : (!llvm.ptr) -> !llvm.token llvm.call @mlirAsyncRuntimeExecute(%27, %13) : (!llvm.ptr, !llvm.ptr) -> () %29 = llvm.intr.coro.suspend %28, %12 : i8 %30 = llvm.sext %29 : i8 to i32 llvm.switch %30 : i32, ^bb6 [ 0: ^bb1, 1: ^bb5 ] ^bb1: // pred: ^bb0 %31 = llvm.intr.coro.save %27 : (!llvm.ptr) -> !llvm.token llvm.call @mlirAsyncRuntimeAwaitTokenAndExecute(%arg0, %27, %13) : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () %32 = llvm.intr.coro.suspend %31, %12 : i8 %33 = llvm.sext %32 : i8 to i32 llvm.switch %33 : i32, ^bb6 [ 0: ^bb2, 1: ^bb5 ] ^bb2: // pred: ^bb1 %34 = llvm.call @mlirAsyncRuntimeIsTokenError(%arg0) : (!llvm.ptr) -> i1 llvm.call @mlirAsyncRuntimeDropRef(%arg0, %11) : (!llvm.ptr, i64) -> () llvm.cond_br %34, ^bb4, ^bb3 ^bb3: // pred: ^bb2 %35 = llvm.extractvalue %8[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %36 = llvm.getelementptr %35[%9] : (!llvm.ptr, i64) -> !llvm.ptr, f32 llvm.store %10, %36 : f32, !llvm.ptr llvm.call @mlirAsyncRuntimePrintCurrentThreadId() : () -> () llvm.call @printMemrefF32(%arg6, %arg7) : (i64, !llvm.ptr) -> () llvm.call @mlirAsyncRuntimeEmplaceToken(%18) : (!llvm.ptr) -> () llvm.br ^bb5 ^bb4: // pred: ^bb2 llvm.call @mlirAsyncRuntimeSetTokenError(%18) : (!llvm.ptr) -> () llvm.br ^bb5 ^bb5: // 4 preds: ^bb0, ^bb1, ^bb3, ^bb4 %37 = llvm.intr.coro.free %19, %27 : (!llvm.token, !llvm.ptr) -> !llvm.ptr llvm.call @free(%37) : (!llvm.ptr) -> () llvm.br ^bb6 ^bb6: // 3 preds: ^bb0, ^bb1, ^bb5 %38 = llvm.mlir.none : !llvm.token %39 = llvm.intr.coro.end %27, %12, %38 : (!llvm.ptr, i1, !llvm.token) -> i1 llvm.return %18 : !llvm.ptr } llvm.func @async_execute_fn_1(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: i64, %arg6: !llvm.ptr) -> !llvm.ptr attributes {passthrough = ["presplitcoroutine"], sym_visibility = "private"} { %0 = llvm.mlir.undef : !llvm.struct<(i64, ptr)> %1 = llvm.insertvalue %arg5, %0[0] : !llvm.struct<(i64, ptr)> %2 = llvm.insertvalue %arg6, %1[1] : !llvm.struct<(i64, ptr)> %3 = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %4 = llvm.insertvalue %arg0, %3[0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %5 = llvm.insertvalue %arg1, %4[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %6 = llvm.insertvalue %arg2, %5[2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %7 = llvm.insertvalue %arg3, %6[3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %8 = llvm.insertvalue %arg4, %7[4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %9 = llvm.mlir.constant(1 : i64) : i64 %10 = llvm.mlir.constant(3 : index) : i64 %11 = llvm.mlir.constant(4.000000e+00 : f32) : f32 %12 = llvm.mlir.constant(1 : index) : i64 %13 = llvm.mlir.constant(2.000000e+00 : f32) : f32 %14 = llvm.mlir.constant(false) : i1 %15 = llvm.mlir.addressof @__resume : !llvm.ptr %16 = llvm.mlir.constant(0 : i64) : i64 %17 = llvm.mlir.constant(1 : i64) : i64 %18 = llvm.mlir.zero : !llvm.ptr %19 = llvm.mlir.constant(0 : i32) : i32 %20 = llvm.call @mlirAsyncRuntimeCreateToken() : () -> !llvm.ptr %21 = llvm.intr.coro.id %19, %18, %18, %18 : (i32, !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.token %22 = llvm.intr.coro.size : i64 %23 = llvm.intr.coro.align : i64 %24 = llvm.add %22, %23 : i64 %25 = llvm.sub %24, %17 : i64 %26 = llvm.sub %16, %23 : i64 %27 = llvm.and %25, %26 : i64 %28 = llvm.call @aligned_alloc(%23, %27) : (i64, i64) -> !llvm.ptr %29 = llvm.intr.coro.begin %21, %28 : (!llvm.token, !llvm.ptr) -> !llvm.ptr %30 = llvm.intr.coro.save %29 : (!llvm.ptr) -> !llvm.token llvm.call @mlirAsyncRuntimeExecute(%29, %15) : (!llvm.ptr, !llvm.ptr) -> () %31 = llvm.intr.coro.suspend %30, %14 : i8 %32 = llvm.sext %31 : i8 to i32 llvm.switch %32 : i32, ^bb6 [ 0: ^bb1, 1: ^bb5 ] ^bb1: // pred: ^bb0 %33 = llvm.extractvalue %8[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %34 = llvm.getelementptr %33[%12] : (!llvm.ptr, i64) -> !llvm.ptr, f32 llvm.store %13, %34 : f32, !llvm.ptr llvm.call @mlirAsyncRuntimePrintCurrentThreadId() : () -> () llvm.call @printMemrefF32(%arg5, %arg6) : (i64, !llvm.ptr) -> () %35 = llvm.call @async_execute_fn() : () -> !llvm.ptr %36 = llvm.call @async_execute_fn_0(%35, %arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6) : (!llvm.ptr, !llvm.ptr, !llvm.ptr, i64, i64, i64, i64, !llvm.ptr) -> !llvm.ptr %37 = llvm.intr.coro.save %29 : (!llvm.ptr) -> !llvm.token llvm.call @mlirAsyncRuntimeAwaitTokenAndExecute(%36, %29, %15) : (!llvm.ptr, !llvm.ptr, !llvm.ptr) -> () %38 = llvm.intr.coro.suspend %37, %14 : i8 %39 = llvm.sext %38 : i8 to i32 llvm.switch %39 : i32, ^bb6 [ 0: ^bb2, 1: ^bb5 ] ^bb2: // pred: ^bb1 %40 = llvm.call @mlirAsyncRuntimeIsTokenError(%36) : (!llvm.ptr) -> i1 llvm.call @mlirAsyncRuntimeDropRef(%36, %9) : (!llvm.ptr, i64) -> () llvm.cond_br %40, ^bb4, ^bb3 ^bb3: // pred: ^bb2 %41 = llvm.extractvalue %8[1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> %42 = llvm.getelementptr %41[%10] : (!llvm.ptr, i64) -> !llvm.ptr, f32 llvm.store %11, %42 : f32, !llvm.ptr llvm.call @mlirAsyncRuntimePrintCurrentThreadId() : () -> () llvm.call @printMemrefF32(%arg5, %arg6) : (i64, !llvm.ptr) -> () llvm.call @mlirAsyncRuntimeEmplaceToken(%20) : (!llvm.ptr) -> () llvm.br ^bb5 ^bb4: // pred: ^bb2 llvm.call @mlirAsyncRuntimeSetTokenError(%20) : (!llvm.ptr) -> () llvm.br ^bb5 ^bb5: // 4 preds: ^bb0, ^bb1, ^bb3, ^bb4 %43 = llvm.intr.coro.free %21, %29 : (!llvm.token, !llvm.ptr) -> !llvm.ptr llvm.call @free(%43) : (!llvm.ptr) -> () llvm.br ^bb6 ^bb6: // 3 preds: ^bb0, ^bb1, ^bb5 %44 = llvm.mlir.none : !llvm.token %45 = llvm.intr.coro.end %29, %14, %44 : (!llvm.ptr, i1, !llvm.token) -> i1 llvm.return %20 : !llvm.ptr } llvm.func @mlirAsyncRuntimeAddRef(!llvm.ptr, i64) attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeDropRef(!llvm.ptr, i64) attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeCreateToken() -> !llvm.ptr attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeCreateValue(i64) -> !llvm.ptr attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeCreateGroup(i64) -> !llvm.ptr attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeEmplaceToken(!llvm.ptr) attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeEmplaceValue(!llvm.ptr) attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeSetTokenError(!llvm.ptr) attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeSetValueError(!llvm.ptr) attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeIsTokenError(!llvm.ptr) -> i1 attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeIsValueError(!llvm.ptr) -> i1 attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeIsGroupError(!llvm.ptr) -> i1 attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeAwaitToken(!llvm.ptr) attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeAwaitValue(!llvm.ptr) attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeAwaitAllInGroup(!llvm.ptr) attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeExecute(!llvm.ptr, !llvm.ptr) attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeGetValueStorage(!llvm.ptr) -> !llvm.ptr attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeAddTokenToGroup(!llvm.ptr, !llvm.ptr) -> i64 attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeAwaitTokenAndExecute(!llvm.ptr, !llvm.ptr, !llvm.ptr) attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeAwaitValueAndExecute(!llvm.ptr, !llvm.ptr, !llvm.ptr) attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimeAwaitAllInGroupAndExecute(!llvm.ptr, !llvm.ptr, !llvm.ptr) attributes {sym_visibility = "private"} llvm.func @mlirAsyncRuntimGetNumWorkerThreads() -> i64 attributes {sym_visibility = "private"} llvm.func @__resume(%arg0: !llvm.ptr) attributes {sym_visibility = "private"} { llvm.intr.coro.resume %arg0 : !llvm.ptr llvm.return } }