Skip to content

Commit

Permalink
#3188: apply review feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
dongjin-na committed Nov 17, 2023
1 parent fbc0e04 commit 27af785
Show file tree
Hide file tree
Showing 12 changed files with 76 additions and 184 deletions.
24 changes: 8 additions & 16 deletions tests/tt_eager/ops/test_moreh_matmul_op.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,15 @@ using namespace tt;
using namespace tt_metal;
using namespace constants;

inline std::vector<bfloat16> create_identity_matrix(int b, int rows, int cols, int num_ones) {
std::vector<bfloat16> vec(b * rows * cols, (float)0);
int rows_x_cols = rows * cols;
// b = b1 x b2
for (int i = 0; i < b; ++i) {
for (int j = 0; j < num_ones; j++) {
vec.at(i * rows_x_cols + j * cols + j) = bfloat16((float)1);
Tensor diagonal(const Shape &shape, float value) {
Tensor tensor = tt::numpy::zeros(shape);
auto buffer = owned_buffer::get_as<bfloat16>(tensor);
for (int i = 0; i < shape[0] * shape[1]; ++i) {
for (int j = 0; j < std::min(shape[2], shape[3]); j++) {
buffer[i * shape[2] * shape[3] + j * shape[3] + j] = bfloat16(value);
}
}
return vec;
}

Tensor get_identity_tensor(const Shape &shape) {
std::vector<bfloat16> identity_bf16_vec =
create_identity_matrix(shape[0] * shape[1], shape[2], shape[3], std::min(shape[2], shape[3]));
auto owned_buffer = owned_buffer::create<bfloat16>(std::move(identity_bf16_vec));
return Tensor(OwnedStorage{owned_buffer}, shape, DataType::BFLOAT16, Layout::ROW_MAJOR);
return tensor;
}

static bool nearly_equal(float a, float b, float epsilon = 1e-5f, float abs_threshold = 1e-5f) {
Expand Down Expand Up @@ -162,7 +154,7 @@ int main(int argc, char **argv) {

// Allocates a DRAM buffer on device populated with values specified by initialize
Tensor a = tt::numpy::random::random(shapea).to(Layout::TILE).to(device);
Tensor b = get_identity_tensor(shapeb).to(Layout::TILE).to(device);
Tensor b = diagonal(shapeb, 1.0f).to(Layout::TILE).to(device);
Tensor out_cpu = tt::operations::primary::moreh_matmul(a, b, false, static_cast<bool>(transpose_b)).cpu();
////////////////////////////////////////////////////////////////////////////
// Validation & Teardown
Expand Down
160 changes: 42 additions & 118 deletions tests/tt_eager/python_api_testing/unit_testing/test_moreh_matmul.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,19 +71,10 @@ def compare(tt_out, torch_out, atol=0.2, rtop=0.2):
"input_a_shape",
(
[1, 1, TILE_HEIGHT, TILE_WIDTH],
[1, 1, TILE_HEIGHT - 1, TILE_WIDTH],
[1, 1, TILE_HEIGHT - 1, TILE_WIDTH - 11],
[1, 1, TILE_HEIGHT - 7, TILE_WIDTH - 17],
[1, 1, TILE_HEIGHT * 2, TILE_WIDTH * 2],
[1, 1, TILE_HEIGHT * 2 - 1, TILE_WIDTH * 2],
[1, 1, TILE_HEIGHT * 2 - 1, TILE_WIDTH * 2 - 11],
[1, 1, TILE_HEIGHT * 2 - 7, TILE_WIDTH * 2 - 17],
[1, 1, TILE_HEIGHT * 3, TILE_WIDTH * 3],
[1, 1, TILE_HEIGHT * 3 - 1, TILE_WIDTH * 3],
[1, 1, TILE_HEIGHT * 3 - 1, TILE_WIDTH * 3 - 11],
[1, 1, TILE_HEIGHT * 3 - 7, TILE_WIDTH * 3 - 17],
[1, 1, TILE_HEIGHT * 9 - 7, TILE_WIDTH * 3 - 1],
[1, 1, TILE_HEIGHT * 18 - 7, TILE_WIDTH * 2 - 1],
[1, 1, TILE_HEIGHT * 9 - 7, TILE_WIDTH * 3 - 10],
[1, 1, TILE_HEIGHT * 18 - 17, TILE_WIDTH * 2 - 10],
),
)

Expand All @@ -93,18 +84,9 @@ def compare(tt_out, torch_out, atol=0.2, rtop=0.2):
(
[1, 1, TILE_HEIGHT, TILE_WIDTH],
[1, 1, TILE_HEIGHT, TILE_WIDTH - 1],
[1, 1, TILE_HEIGHT, TILE_WIDTH - 13],
[1, 1, TILE_HEIGHT, TILE_WIDTH - 31],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 2],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 2 - 1],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 2 - 13],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 2 - 31],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 3],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 3 - 1],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 3 - 13],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 3 - 31],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 12 - 1],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 24 - 1],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 12 - 10],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 24 - 20],
),
)
def test_moreh_matmul(input_a_shape, input_b_shape, device):
Expand Down Expand Up @@ -148,19 +130,12 @@ def test_moreh_matmul(input_a_shape, input_b_shape, device):
"input_a_shape",
(
[1, 1, TILE_HEIGHT, TILE_WIDTH],
[1, 2, TILE_HEIGHT - 1, TILE_WIDTH],
[2, 1, TILE_HEIGHT - 1, TILE_WIDTH - 11],
[2, 2, TILE_HEIGHT - 7, TILE_WIDTH - 17],
[1, 1, TILE_HEIGHT * 2, TILE_WIDTH * 2],
[1, 3, TILE_HEIGHT * 2 - 1, TILE_WIDTH * 2],
[3, 1, TILE_HEIGHT * 2 - 1, TILE_WIDTH * 2 - 11],
[3, 3, TILE_HEIGHT * 2 - 7, TILE_WIDTH * 2 - 17],
[1, 1, TILE_HEIGHT * 3, TILE_WIDTH * 3],
[1, 5, TILE_HEIGHT * 3 - 1, TILE_WIDTH * 3],
[5, 1, TILE_HEIGHT * 3 - 1, TILE_WIDTH * 3 - 11],
[5, 5, TILE_HEIGHT * 3 - 7, TILE_WIDTH * 3 - 17],
[5, 5, TILE_HEIGHT * 9 - 7, TILE_WIDTH * 3 - 1],
[5, 5, TILE_HEIGHT * 18 - 7, TILE_WIDTH * 2 - 1],
[1, 2, TILE_HEIGHT * 5 - 1, TILE_WIDTH],
[2, 1, TILE_HEIGHT * 5 - 1, TILE_WIDTH - 11],
[2, 2, TILE_HEIGHT * 5 - 7, TILE_WIDTH - 17],
[1, 5, TILE_HEIGHT * 10 - 1, TILE_WIDTH * 3],
[5, 1, TILE_HEIGHT * 10 - 1, TILE_WIDTH * 3 - 11],
[5, 5, TILE_HEIGHT * 20 - 7, TILE_WIDTH * 3 - 1],
),
)

Expand All @@ -169,19 +144,12 @@ def test_moreh_matmul(input_a_shape, input_b_shape, device):
"input_b_shape",
(
[1, 1, TILE_HEIGHT, TILE_WIDTH],
[1, 2, TILE_HEIGHT, TILE_WIDTH - 1],
[2, 1, TILE_HEIGHT, TILE_WIDTH - 13],
[2, 2, TILE_HEIGHT, TILE_WIDTH - 31],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 2],
[1, 3, TILE_HEIGHT, TILE_WIDTH * 2 - 1],
[3, 1, TILE_HEIGHT, TILE_WIDTH * 2 - 13],
[3, 3, TILE_HEIGHT, TILE_WIDTH * 2 - 31],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 3],
[1, 5, TILE_HEIGHT, TILE_WIDTH * 3 - 1],
[5, 1, TILE_HEIGHT, TILE_WIDTH * 3 - 13],
[5, 5, TILE_HEIGHT, TILE_WIDTH * 3 - 31],
[5, 5, TILE_HEIGHT, TILE_WIDTH * 12 - 1],
[5, 5, TILE_HEIGHT, TILE_WIDTH * 24 - 1],
[1, 2, TILE_HEIGHT, TILE_WIDTH * 5 - 1],
[2, 1, TILE_HEIGHT, TILE_WIDTH * 5 - 13],
[2, 2, TILE_HEIGHT, TILE_WIDTH * 5 - 31],
[1, 5, TILE_HEIGHT, TILE_WIDTH * 10 - 1],
[5, 1, TILE_HEIGHT, TILE_WIDTH * 10 - 13],
[5, 5, TILE_HEIGHT, TILE_WIDTH * 20 - 1],
),
)
def test_batched_moreh_matmul(input_a_shape, input_b_shape, device):
Expand Down Expand Up @@ -226,40 +194,22 @@ def test_batched_moreh_matmul(input_a_shape, input_b_shape, device):
"input_a_shape",
(
[1, 1, TILE_HEIGHT, TILE_WIDTH],
[1, 1, TILE_HEIGHT - 1, TILE_WIDTH],
[1, 1, TILE_HEIGHT - 1, TILE_WIDTH - 11],
[1, 1, TILE_HEIGHT - 7, TILE_WIDTH - 17],
[1, 1, TILE_HEIGHT * 2, TILE_WIDTH * 2],
[1, 1, TILE_HEIGHT * 2 - 1, TILE_WIDTH * 2],
[1, 1, TILE_HEIGHT * 2 - 1, TILE_WIDTH * 2 - 11],
[1, 1, TILE_HEIGHT * 2 - 7, TILE_WIDTH * 2 - 17],
[1, 1, TILE_HEIGHT * 3, TILE_WIDTH * 3],
[1, 1, TILE_HEIGHT * 3 - 1, TILE_WIDTH * 3],
[1, 1, TILE_HEIGHT * 3 - 1, TILE_WIDTH * 3 - 11],
[1, 1, TILE_HEIGHT * 3 - 7, TILE_WIDTH * 3 - 17],
[1, 1, TILE_HEIGHT * 9 - 7, TILE_WIDTH * 3 - 1],
[1, 1, TILE_HEIGHT * 18 - 7, TILE_WIDTH * 2 - 1],
[1, 1, TILE_HEIGHT * 9 - 7, TILE_WIDTH * 3 - 10],
[1, 1, TILE_HEIGHT * 18 - 17, TILE_WIDTH * 2 - 10],
),
)

# input_b_shape[2] is dummy
# input_b_shape[3] is dummy
@pytest.mark.parametrize(
"input_b_shape",
(
[1, 1, TILE_HEIGHT, TILE_WIDTH],
[1, 1, TILE_HEIGHT, TILE_WIDTH - 1],
[1, 1, TILE_HEIGHT, TILE_WIDTH - 13],
[1, 1, TILE_HEIGHT, TILE_WIDTH - 31],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 2],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 2 - 1],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 2 - 13],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 2 - 31],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 3],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 3 - 1],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 3 - 13],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 3 - 31],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 12 - 1],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 24 - 1],
[1, 1, TILE_WIDTH, TILE_HEIGHT],
[1, 1, TILE_WIDTH - 1, TILE_HEIGHT],
[1, 1, TILE_WIDTH * 2 - 1, TILE_HEIGHT],
[1, 1, TILE_WIDTH * 12 - 10, TILE_HEIGHT],
[1, 1, TILE_WIDTH * 24 - 20, TILE_HEIGHT],
),
)
def test_moreh_matmul_transpose_b(input_a_shape, input_b_shape, device):
Expand Down Expand Up @@ -304,40 +254,26 @@ def test_moreh_matmul_transpose_b(input_a_shape, input_b_shape, device):
"input_a_shape",
(
[1, 1, TILE_HEIGHT, TILE_WIDTH],
[1, 2, TILE_HEIGHT - 1, TILE_WIDTH],
[2, 1, TILE_HEIGHT - 1, TILE_WIDTH - 11],
[2, 2, TILE_HEIGHT - 7, TILE_WIDTH - 17],
[1, 1, TILE_HEIGHT * 2, TILE_WIDTH * 2],
[1, 3, TILE_HEIGHT * 2 - 1, TILE_WIDTH * 2],
[3, 1, TILE_HEIGHT * 2 - 1, TILE_WIDTH * 2 - 11],
[3, 3, TILE_HEIGHT * 2 - 7, TILE_WIDTH * 2 - 17],
[1, 1, TILE_HEIGHT * 3, TILE_WIDTH * 3],
[1, 5, TILE_HEIGHT * 3 - 1, TILE_WIDTH * 3],
[5, 1, TILE_HEIGHT * 3 - 1, TILE_WIDTH * 3 - 11],
[5, 5, TILE_HEIGHT * 3 - 7, TILE_WIDTH * 3 - 17],
[5, 5, TILE_HEIGHT * 9 - 7, TILE_WIDTH * 3 - 1],
[5, 5, TILE_HEIGHT * 18 - 7, TILE_WIDTH * 2 - 1],
[1, 2, TILE_HEIGHT * 5 - 1, TILE_WIDTH],
[2, 1, TILE_HEIGHT * 5 - 1, TILE_WIDTH - 11],
[2, 2, TILE_HEIGHT * 5 - 7, TILE_WIDTH - 17],
[1, 5, TILE_HEIGHT * 10 - 1, TILE_WIDTH * 3],
[5, 1, TILE_HEIGHT * 10 - 1, TILE_WIDTH * 3 - 11],
[5, 5, TILE_HEIGHT * 20 - 7, TILE_WIDTH * 3 - 1],
),
)

# input_b_shape[2] is dummy
# input_b_shape[3] is dummy
@pytest.mark.parametrize(
"input_b_shape",
(
[1, 1, TILE_HEIGHT, TILE_WIDTH],
[1, 2, TILE_HEIGHT, TILE_WIDTH - 1],
[2, 1, TILE_HEIGHT, TILE_WIDTH - 13],
[2, 2, TILE_HEIGHT, TILE_WIDTH - 31],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 2],
[1, 3, TILE_HEIGHT, TILE_WIDTH * 2 - 1],
[3, 1, TILE_HEIGHT, TILE_WIDTH * 2 - 13],
[3, 3, TILE_HEIGHT, TILE_WIDTH * 2 - 31],
[1, 1, TILE_HEIGHT, TILE_WIDTH * 3],
[1, 5, TILE_HEIGHT, TILE_WIDTH * 3 - 1],
[5, 1, TILE_HEIGHT, TILE_WIDTH * 3 - 13],
[5, 5, TILE_HEIGHT, TILE_WIDTH * 3 - 31],
[5, 5, TILE_HEIGHT, TILE_WIDTH * 12 - 1],
[5, 5, TILE_HEIGHT, TILE_WIDTH * 24 - 1],
[1, 1, TILE_WIDTH, TILE_HEIGHT],
[1, 2, TILE_WIDTH * 5 - 1, TILE_HEIGHT],
[2, 1, TILE_WIDTH * 5 - 13, TILE_HEIGHT],
[2, 2, TILE_WIDTH * 5 - 31, TILE_HEIGHT],
[1, 5, TILE_WIDTH * 10 - 1, TILE_HEIGHT],
[5, 1, TILE_WIDTH * 10 - 13, TILE_HEIGHT],
[5, 5, TILE_WIDTH * 20 - 1, TILE_HEIGHT],
),
)
def test_batched_moreh_matmul_transpose_b(input_a_shape, input_b_shape, device):
Expand Down Expand Up @@ -381,22 +317,10 @@ def test_batched_moreh_matmul_transpose_b(input_a_shape, input_b_shape, device):
@pytest.mark.parametrize(
"input_shape",
(
[1, 1, 1, 1],
[1, 1, 1, 10],
[1, 1, 1, 31],
[1, 1, 1, TILE_WIDTH],
[1, 1, 1, TILE_WIDTH * 2],
[1, 1, 1, TILE_WIDTH * 2 - 1],
[1, 1, 1, TILE_WIDTH * 2 - 21],
[1, 1, 1, TILE_WIDTH * 3],
[1, 1, 1, TILE_WIDTH * 3 - 3],
[1, 1, 1, TILE_WIDTH * 3 - 17],
[1, 1, 1, TILE_WIDTH * 10],
[1, 1, 1, TILE_WIDTH * 10 - 3],
[1, 1, 1, TILE_WIDTH * 10 - 17],
[1, 1, 1, TILE_WIDTH * 20],
[1, 1, 1, TILE_WIDTH * 20 - 3],
[1, 1, 1, TILE_WIDTH * 20 - 17],
[1, 1, 1, 10], # test not mutiple of 32 case
[1, 1, 1, TILE_WIDTH], # test single tile
[1, 1, 1, TILE_WIDTH * 20], # test multiple tiles
[1, 1, 1, TILE_WIDTH * 20 - 17], # test multiple tiles, not a multiple of 32
),
)
def test_moreh_matmul_1d(input_shape, device):
Expand Down
2 changes: 0 additions & 2 deletions tt_eager/tt_dnn/op_library/moreh_dot/moreh_dot_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
*/

#pragma once
#include <optional>

#include "tensor/tensor.hpp"
#include "tt_dnn/op_library/run_operation.hpp"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include "dataflow_api.h"
#include "debug_print.h"

void mask_tile(uint32_t l1_addr, uint32_t mask_w = 32, uint32_t mask_h = 32) {
void mask_tile_in_reader(uint32_t l1_addr, uint32_t mask_w = 32, uint32_t mask_h = 32) {
union {
float f;
uint32_t u;
Expand Down Expand Up @@ -105,8 +105,8 @@ void kernel_main() {
noc_async_read_barrier();

if (last_tile) {
mask_tile(l1_write_addr_in0, mask_w, mask_h);
mask_tile(l1_write_addr_in1, mask_w, mask_h);
mask_tile_in_reader(l1_write_addr_in0, mask_w, mask_h);
mask_tile_in_reader(l1_write_addr_in1, mask_w, mask_h);
}

cb_push_back(cb_id_in0, onetile);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,20 @@ operation::ProgramWithCallbacks moreh_dot_single_core(const Tensor &a, const Ten
tt_metal::Buffer *dst_buffer = output.buffer();
TT_ASSERT(dst_buffer != nullptr, "Output buffer should be allocated on device!");

uint32_t src0_cb_index = 0;
uint32_t src0_cb_index = CB::c_in0;
uint32_t num_input_tiles = 2;
tt_metal::CircularBufferConfig cb_src0_config =
tt_metal::CircularBufferConfig(num_input_tiles * src0_single_tile_size, {{src0_cb_index, src0_cb_data_format}})
.set_page_size(src0_cb_index, src0_single_tile_size);
auto cb_src0 = tt_metal::CreateCircularBuffer(program, core, cb_src0_config);

uint32_t src1_cb_index = 1;
uint32_t src1_cb_index = CB::c_in1;
tt_metal::CircularBufferConfig cb_src1_config =
tt_metal::CircularBufferConfig(num_input_tiles * src1_single_tile_size, {{src1_cb_index, src1_cb_data_format}})
.set_page_size(src1_cb_index, src1_single_tile_size);
auto cb_src1 = tt_metal::CreateCircularBuffer(program, core, cb_src1_config);

uint32_t output_cb_index = 16; // output operands start at index 16
uint32_t output_cb_index = CB::c_out0; // output operands start at index 16
uint32_t num_output_tiles = 2;
tt_metal::CircularBufferConfig cb_output_config =
tt_metal::CircularBufferConfig(num_output_tiles * dst_single_tile_size, {{output_cb_index, dst_cb_data_format}})
Expand All @@ -67,13 +67,13 @@ operation::ProgramWithCallbacks moreh_dot_single_core(const Tensor &a, const Ten
.set_page_size(CB::c_in2, dst_single_tile_size);
auto cb_src2 = tt_metal::CreateCircularBuffer(program, core, cb_scaler_config);

uint32_t interm0_cb_index = 24;
uint32_t interm0_cb_index = CB::c_intermed0;
tt_metal::CircularBufferConfig interm0_cb_config =
tt_metal::CircularBufferConfig(dst_single_tile_size, {{interm0_cb_index, dst_cb_data_format}})
.set_page_size(interm0_cb_index, dst_single_tile_size);
auto cb_interm0 = tt_metal::CreateCircularBuffer(program, core, interm0_cb_config);

uint32_t interm1_cb_index = 25;
uint32_t interm1_cb_index = CB::c_intermed1;
tt_metal::CircularBufferConfig interm1_cb_config =
tt_metal::CircularBufferConfig(dst_single_tile_size, {{interm1_cb_index, dst_cb_data_format}})
.set_page_size(interm1_cb_index, dst_single_tile_size);
Expand All @@ -89,7 +89,7 @@ operation::ProgramWithCallbacks moreh_dot_single_core(const Tensor &a, const Ten

KernelID binary_reader_kernel_id = tt_metal::CreateDataMovementKernel(
program,
"tt_eager/tt_dnn/op_library/moreh_dot/single_core/kernels/reader_binary_interleaved_start_id.cpp",
"tt_eager/tt_dnn/op_library/moreh_dot/single_core/kernels/reader_moreh_dot.cpp",
core,
tt_metal::DataMovementConfig{
.processor = tt_metal::DataMovementProcessor::RISCV_1,
Expand All @@ -98,7 +98,7 @@ operation::ProgramWithCallbacks moreh_dot_single_core(const Tensor &a, const Ten

KernelID unary_writer_kernel_id = tt_metal::CreateDataMovementKernel(
program,
"tt_eager/tt_dnn/op_library/moreh_dot/single_core/kernels/writer_unary_interleaved_start_id.cpp",
"tt_eager/tt_dnn/op_library/moreh_dot/single_core/kernels/writer_moreh_dot.cpp",
core,
tt_metal::DataMovementConfig{
.processor = tt_metal::DataMovementProcessor::RISCV_0,
Expand All @@ -109,12 +109,10 @@ operation::ProgramWithCallbacks moreh_dot_single_core(const Tensor &a, const Ten
std::map<string, string> defines;
defines["REDUCE_OP"] = "PoolType::SUM";
defines["REDUCE_DIM"] = "ReduceDim::REDUCE_ROW";
// defines["ELTWISE_OP"] = "mul_tiles";
// defines["ELTWISE_OP_CODE"] = "2";

auto dot_kernel = tt_metal::CreateComputeKernel(
program,
"tt_eager/tt_dnn/op_library/moreh_dot/single_core/kernels/dot.cpp",
"tt_eager/tt_dnn/op_library/moreh_dot/single_core/kernels/moreh_dot.cpp",
core,
tt_metal::ComputeConfig{.compile_args = compute_kernel_args, .defines = defines});

Expand Down
2 changes: 0 additions & 2 deletions tt_eager/tt_dnn/op_library/moreh_matmul/moreh_matmul_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
*/

#pragma once
#include <optional>

#include "tensor/tensor.hpp"
#include "tt_dnn/op_library/run_operation.hpp"

Expand Down
Loading

0 comments on commit 27af785

Please sign in to comment.