-
Notifications
You must be signed in to change notification settings - Fork 75
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add dynamic NoC support for GS, WH, and BH (#13376)
* #0: add noc modes to kernel config * #0: remove all hardcoded noc_index * #0: add u-bench for read DRAM and write to remote L1 * #0: u-bench code clean up + add constexpr to cmd_bufs * #0: rename to DM_DEDICATED_NOC and DM_DYNAMIC_NOC * #0: fix soc descriptor for moving FD cores * #0: add fix to fast div, change back soc desc * #0: fix dram read l1 write for GS * #0: #0: fix dram read l1 write for BH * #0: minor fix after rebase * #0: re-calculate ret addr for atomic cmd * #0: code clean up + add dynamic noc for eth * #0: add read dram write l1 u-bench to CI * #0: reduce code size, remove dynamic noc for eth, rename dynamic_noc_init * #0: reduce code size by not using extern for noc_index when compile kernel * #0: remove NOC_MODE FW define, move NOC_MODE outside of noc_nonblocking_api * #0: reduce code size by remove else condition for noc_local_state_init
- Loading branch information
Showing
24 changed files
with
1,721 additions
and
260 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
117 changes: 117 additions & 0 deletions
117
...tt_metal/perf_microbenchmark/9_dram_adjacent_read_remote_l1_write/kernels/reader_dram.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include <stdint.h> | ||
|
||
#include "dataflow_api.h" | ||
|
||
#include "debug/dprint.h" | ||
|
||
template <uint32_t bank_base_address, uint32_t page_size, bool use_vc> | ||
FORCE_INLINE | ||
void noc_async_read_tile_dram_sharded(uint32_t src_addr, uint32_t dest_addr, uint32_t bank_id = 0, const uint32_t vc = 0) { | ||
uint32_t src_addr_; | ||
uint32_t src_noc_xy; | ||
|
||
src_addr_ = src_addr + bank_base_address; | ||
src_addr_ += bank_to_dram_offset[bank_id]; | ||
src_noc_xy = dram_bank_to_noc_xy[noc_index][bank_id]; | ||
|
||
WAYPOINT("NRTW"); | ||
DEBUG_SANITIZE_NOC_READ_TRANSACTION(noc_index, get_noc_addr_helper(src_noc_xy, src_addr_), dest_addr, page_size); | ||
while (!noc_cmd_buf_ready(noc_index, NCRISC_RD_CMD_BUF)); | ||
WAYPOINT("NRTD"); | ||
|
||
if constexpr(use_vc) { | ||
uint32_t noc_rd_cmd_field = NOC_CMD_CPY | NOC_CMD_RD | NOC_CMD_RESP_MARKED | NOC_CMD_VC_STATIC | NOC_CMD_STATIC_VC(vc); | ||
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_CTRL, noc_rd_cmd_field); | ||
} | ||
|
||
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_RET_ADDR_LO, dest_addr); | ||
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_LO, src_addr_); // (uint32_t)src_addr | ||
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_TARG_ADDR_COORDINATE, src_noc_xy); // src_addr >> 32 | ||
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_AT_LEN_BE, page_size); // len_bytes | ||
NOC_CMD_BUF_WRITE_REG(noc_index, NCRISC_RD_CMD_BUF, NOC_CMD_CTRL, NOC_CTRL_SEND_REQ); | ||
noc_reads_num_issued[noc_index] += 1; | ||
} | ||
|
||
void kernel_main() { | ||
constexpr uint32_t input_addr = get_compile_time_arg_val(0); | ||
constexpr uint32_t input_start_tile_id = get_compile_time_arg_val(1); | ||
constexpr uint32_t num_blocks = get_compile_time_arg_val(2); | ||
constexpr uint32_t num_pages = get_compile_time_arg_val(3); | ||
constexpr uint32_t block_num_tiles = get_compile_time_arg_val(4); | ||
constexpr uint32_t page_size = get_compile_time_arg_val(5); | ||
|
||
constexpr uint32_t block_size_bytes = page_size * num_pages; | ||
|
||
const uint32_t bank_id = get_arg_val<uint32_t>(0); | ||
const uint32_t vc = get_arg_val<uint32_t>(1); | ||
|
||
constexpr uint32_t cb_id = 0; | ||
|
||
uint32_t src_base_addr = noc_async_read_tile_dram_sharded_set_state<page_size, true>(input_addr, bank_id, vc); | ||
uint32_t src_read_addr = 0; | ||
|
||
#ifdef ARCH_GRAYSKULL | ||
for (uint32_t block = 0; block < num_blocks; ++block) { | ||
// Operand 1 | ||
cb_reserve_back(cb_id, block_num_tiles); | ||
auto l1_write_addr = get_write_ptr(cb_id); | ||
|
||
for (uint32_t h = 0; h < num_pages; ++h) { | ||
noc_async_read_tile_dram_sharded_with_state(src_base_addr, src_read_addr, l1_write_addr); | ||
src_read_addr += page_size; | ||
l1_write_addr += page_size; | ||
} | ||
|
||
noc_async_read_barrier(); | ||
cb_push_back(cb_id, block_num_tiles); | ||
} | ||
#else | ||
constexpr uint32_t total_num_blocks_in_buffer = 3; | ||
constexpr uint32_t total_num_trid = 4; | ||
uint32_t num_free_blocks_in_buffer = total_num_blocks_in_buffer; | ||
uint32_t curr_block_trid = 1; | ||
uint32_t block_trid_to_wait = 1; | ||
|
||
cb_reserve_back(cb_id, block_num_tiles); | ||
uint32_t l1_write_addr_offset = 0; | ||
uint32_t l1_write_addr_start = get_write_ptr(cb_id); | ||
uint32_t l1_write_addr = l1_write_addr_start; | ||
for (uint32_t block = 0; block < num_blocks; ++block) { | ||
noc_async_read_tile_dram_sharded_set_trid(curr_block_trid); | ||
|
||
for (uint32_t h = 0; h < num_pages; ++h) { | ||
noc_async_read_tile_dram_sharded_with_state_with_trid( | ||
src_base_addr, src_read_addr, l1_write_addr, curr_block_trid); | ||
src_read_addr += page_size; | ||
l1_write_addr += page_size; | ||
} | ||
|
||
if (num_free_blocks_in_buffer == 2) { | ||
noc_async_read_barrier_with_trid(block_trid_to_wait); | ||
cb_push_back(cb_id, block_num_tiles); | ||
// wait for next block trid | ||
block_trid_to_wait = block_trid_to_wait == 3 ? 1 : (block_trid_to_wait + 1); | ||
// reserve for next block | ||
cb_reserve_back(cb_id, block_num_tiles * 2); | ||
} else { | ||
num_free_blocks_in_buffer -= 1; | ||
} | ||
|
||
if (curr_block_trid == total_num_blocks_in_buffer) { | ||
l1_write_addr_offset = 0; | ||
curr_block_trid = 1; | ||
} else { | ||
l1_write_addr_offset += block_size_bytes; | ||
curr_block_trid += 1; | ||
} | ||
l1_write_addr = l1_write_addr_start + l1_write_addr_offset; | ||
} | ||
// last block to wait | ||
noc_async_read_barrier_with_trid(block_trid_to_wait); | ||
cb_push_back(cb_id, block_num_tiles); | ||
#endif | ||
} |
50 changes: 50 additions & 0 deletions
50
...l/tt_metal/perf_microbenchmark/9_dram_adjacent_read_remote_l1_write/kernels/writer_l1.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
// SPDX-FileCopyrightText: © 2024 Tenstorrent Inc. | ||
// | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
#include <stdint.h> | ||
|
||
#include "dataflow_api.h" | ||
|
||
#include "debug/dprint.h" | ||
|
||
|
||
void kernel_main() { | ||
constexpr uint32_t num_blocks = get_compile_time_arg_val(0); | ||
constexpr uint32_t num_pages = get_compile_time_arg_val(1); | ||
constexpr uint32_t block_num_tiles = get_compile_time_arg_val(2); | ||
constexpr uint32_t page_size = get_compile_time_arg_val(3); | ||
constexpr uint32_t noc = get_compile_time_arg_val(4); | ||
|
||
const uint32_t vc = get_arg_val<uint32_t>(0); | ||
const uint32_t noc_x = get_arg_val<uint32_t>(1); | ||
const uint32_t noc_y = get_arg_val<uint32_t>(2); | ||
|
||
constexpr uint32_t cb_id = 0; | ||
|
||
uint32_t l1_write_addr = get_write_ptr(cb_id); | ||
const uint64_t l1_noc_write_addr = get_noc_addr(noc_x, noc_y, l1_write_addr, noc); | ||
|
||
noc_async_write_one_packet_set_state(l1_noc_write_addr, page_size, noc, vc); | ||
|
||
for (uint32_t block = 0; block < num_blocks; ++block) { | ||
|
||
auto remote_l1_write_addr = l1_noc_write_addr; | ||
|
||
cb_wait_front(cb_id, block_num_tiles); | ||
auto l1_read_addr = get_read_ptr(cb_id); | ||
|
||
for (uint32_t h = 0; h < num_pages; ++h) { | ||
noc_async_write_one_packet_with_state(l1_read_addr, remote_l1_write_addr, noc); | ||
l1_read_addr += page_size; | ||
remote_l1_write_addr += page_size; | ||
} | ||
|
||
noc_async_write_barrier(noc); | ||
|
||
cb_pop_front(cb_id, block_num_tiles); | ||
|
||
} | ||
|
||
|
||
} |
Oops, something went wrong.