Skip to content

Commit

Permalink
#4984: Move dprint to dev_mem_map/msgs
Browse files Browse the repository at this point in the history
  • Loading branch information
pgkeller committed Aug 4, 2024
1 parent 1b0854b commit 5aa33ef
Show file tree
Hide file tree
Showing 13 changed files with 67 additions and 50 deletions.
14 changes: 10 additions & 4 deletions tests/tt_metal/tt_metal/perf_microbenchmark/common/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "tt_metal/detail/tt_metal.hpp"
#include "tt_metal/host_api.hpp"
#include "debug/dprint_buffer.h"

inline uint64_t get_t0_to_any_riscfw_end_cycle(tt::tt_metal::Device *device, const tt::tt_metal::Program &program) {
#if defined(TRACY_ENABLE)
Expand All @@ -21,17 +22,22 @@ inline uint64_t get_t0_to_any_riscfw_end_cycle(tt::tt_metal::Device *device, con
auto device_id = device->id();
uint64_t min_cycle = -1;
uint64_t max_cycle = 0;
vector<uint32_t> print_buffer_addrs = {
PRINT_BUFFER_NC, PRINT_BUFFER_BR, PRINT_BUFFER_T0, PRINT_BUFFER_T1, PRINT_BUFFER_T2};
vector<uint64_t> print_buffer_addrs = {
static_cast<uint64_t>(GET_MAILBOX_ADDRESS_HOST(dprint_buf.data[DPRINT_RISCV_INDEX_NC])),
static_cast<uint64_t>(GET_MAILBOX_ADDRESS_HOST(dprint_buf.data[DPRINT_RISCV_INDEX_BR])),
static_cast<uint64_t>(GET_MAILBOX_ADDRESS_HOST(dprint_buf.data[DPRINT_RISCV_INDEX_TR0])),
static_cast<uint64_t>(GET_MAILBOX_ADDRESS_HOST(dprint_buf.data[DPRINT_RISCV_INDEX_TR1])),
static_cast<uint64_t>(GET_MAILBOX_ADDRESS_HOST(dprint_buf.data[DPRINT_RISCV_INDEX_TR2]))
};
for (const auto &worker_core : worker_cores_used_in_program) {
for (const auto &buffer_addr : print_buffer_addrs) {
vector<std::uint32_t> profile_buffer;
uint32_t end_index;
uint32_t dropped_marker_counter;
profile_buffer = tt::llrt::read_hex_vec_from_core(device_id, worker_core, buffer_addr, PRINT_BUFFER_SIZE);
profile_buffer = tt::llrt::read_hex_vec_from_core(device_id, worker_core, buffer_addr, DPRINT_BUFFER_SIZE);

end_index = profile_buffer[BUFFER_END_INDEX];
TT_ASSERT(end_index < (PRINT_BUFFER_SIZE / sizeof(uint32_t)));
TT_ASSERT(end_index < (DPRINT_BUFFER_SIZE / sizeof(uint32_t)));
dropped_marker_counter = profile_buffer[DROPPED_MARKER_COUNTER];

uint32_t step = (end_index - MARKER_DATA_START) / TIMER_DATA_UINT32_SIZE;
Expand Down
15 changes: 1 addition & 14 deletions tt_metal/hostdevcommon/common_runtime_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,20 +77,7 @@ constexpr static std::uint32_t SEMAPHORE_BASE = PROFILER_L1_BUFFER_CONTROL + PRO
constexpr static std::uint32_t NUM_SEMAPHORES = 4;
constexpr static std::uint32_t SEMAPHORE_SIZE = NUM_SEMAPHORES * L1_ALIGNMENT;

// Debug printer buffers - A total of 5*PRINT_BUFFER_SIZE starting at PRINT_BUFFER_NC address
constexpr static std::uint32_t PRINT_BUFFER_START = SEMAPHORE_BASE + SEMAPHORE_SIZE; // per thread
constexpr static std::uint32_t PRINT_BUFFER_MAX_SIZE = 1024; // per thread

constexpr static std::uint32_t PRINT_BUFFER_SIZE = 204; // per thread
constexpr static std::uint32_t PRINT_BUFFERS_COUNT = 5; // one for each thread
constexpr static std::uint32_t PRINT_BUFFER_NC = PRINT_BUFFER_START; // NCRISC, address in bytes
constexpr static std::uint32_t PRINT_BUFFER_T0 = PRINT_BUFFER_NC + PRINT_BUFFER_SIZE; // TRISC0
constexpr static std::uint32_t PRINT_BUFFER_T1 = PRINT_BUFFER_T0 + PRINT_BUFFER_SIZE; // TRISC1
constexpr static std::uint32_t PRINT_BUFFER_T2 = PRINT_BUFFER_T1 + PRINT_BUFFER_SIZE; // TRISC2
constexpr static std::uint32_t PRINT_BUFFER_BR = PRINT_BUFFER_T2 + PRINT_BUFFER_SIZE; // BRISC
constexpr static std::uint32_t PRINT_BUFFER_IDLE_ER = PRINT_BUFFER_START; // Idle ERISC

constexpr static std::uint32_t L1_UNRESERVED_BASE = (((PRINT_BUFFER_START + PRINT_BUFFER_MAX_SIZE) - 1) | (DRAM_ALIGNMENT - 1)) + 1;
constexpr static std::uint32_t L1_UNRESERVED_BASE = ((SEMAPHORE_BASE + SEMAPHORE_SIZE - 1) | (DRAM_ALIGNMENT - 1)) + 1;
constexpr static std::uint32_t ERISC_L1_UNRESERVED_BASE = L1_UNRESERVED_BASE; // Start of unreserved space

// Helper functions to convert NoC coordinates to NoC-0 coordinates, used in metal as "physical" coordinates.
Expand Down
5 changes: 3 additions & 2 deletions tt_metal/hostdevcommon/dprint_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#pragma once

#include <cstddef>
#include <dev_msgs.h>

#define DPRINT_TYPES \
DPRINT_PREFIX(CSTR) \
Expand Down Expand Up @@ -78,7 +79,7 @@ struct DebugPrintMemLayout {
uint16_t core_x ATTR_ALIGN2;
uint16_t core_y ATTR_ALIGN2;
} aux ATTR_ALIGN4;
uint8_t data[PRINT_BUFFER_SIZE-sizeof(Aux)];
uint8_t data[DPRINT_BUFFER_SIZE-sizeof(Aux)];

static size_t rpos_offs() { return offsetof(DebugPrintMemLayout::Aux, rpos) + offsetof(DebugPrintMemLayout, aux); }

Expand Down Expand Up @@ -107,4 +108,4 @@ enum TypedU32_ARRAY_Format {
TypedU32_ARRAY_Format_COUNT,
};

static_assert(sizeof(DebugPrintMemLayout) == PRINT_BUFFER_SIZE);
static_assert(sizeof(DebugPrintMemLayout) == DPRINT_BUFFER_SIZE);
4 changes: 2 additions & 2 deletions tt_metal/hw/inc/blackhole/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,10 @@

#define MEM_BOOT_CODE_BASE 0
#define MEM_MAILBOX_BASE 16
#define MEM_MAILBOX_END (MEM_MAILBOX_BASE + 288)
#define MEM_MAILBOX_END (MEM_MAILBOX_BASE + 1312)
#define MEM_IERISC_MAILBOX_BASE 1024
#define MEM_IERISC_MAILBOX_END (MEM_IERISC_MAILBOX_BASE + 128)
#define MEM_ZEROS_BASE 768
#define MEM_ZEROS_BASE ((MEM_MAILBOX_END + 31) & ~31)
#define MEM_BRISC_FIRMWARE_BASE (MEM_ZEROS_BASE + MEM_ZEROS_SIZE)
#define MEM_NCRISC_FIRMWARE_BASE (MEM_BRISC_FIRMWARE_BASE + MEM_BRISC_FIRMWARE_SIZE)
#define MEM_IERISC_FIRMWARE_BASE 8192
Expand Down
4 changes: 1 addition & 3 deletions tt_metal/hw/inc/blackhole/eth_l1_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,11 @@ struct address_map {
// erisc early exit functionality re-uses mailboxes_t::ncrisc_halt_msg_t::stack_save memory
static constexpr std::int32_t ERISC_MEM_MAILBOX_STACK_SAVE = ERISC_MEM_MAILBOX_BASE + 4;

static constexpr std::int32_t PRINT_BUFFER_ER = ERISC_MEM_MAILBOX_BASE + 288 + 16;
static constexpr std::uint32_t PROFILER_L1_BUFFER_ER = PRINT_BUFFER_ER + 256;
static constexpr std::uint32_t PROFILER_L1_BUFFER_ER = ERISC_MEM_MAILBOX_BASE + 288 + 256 + 16;
static constexpr std::uint32_t PROFILER_L1_BUFFER_CONTROL = PROFILER_L1_BUFFER_ER + PROFILER_L1_BUFFER_SIZE;

static constexpr std::int32_t ERISC_L1_KERNEL_CONFIG_BASE = PROFILER_L1_BUFFER_CONTROL + PROFILER_L1_CONTROL_BUFFER_SIZE;

static_assert((PRINT_BUFFER_ER % 32) == 0);
static_assert((PROFILER_L1_BUFFER_ER % 32) == 0);
static_assert((PROFILER_L1_BUFFER_CONTROL % 32) == 0);

Expand Down
2 changes: 1 addition & 1 deletion tt_metal/hw/inc/debug/dprint.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ struct DebugPrinter {
}
uint8_t* buf() { return get_debug_print_buffer(); }
uint8_t* data() { return reinterpret_cast<DebugPrintMemLayout*>(buf())->data; }
uint8_t* bufend() { return buf() + PRINT_BUFFER_SIZE; }
uint8_t* bufend() { return buf() + DPRINT_BUFFER_SIZE; }

DebugPrinter() {
#if defined(DEBUG_PRINT_ENABLED)
Expand Down
28 changes: 21 additions & 7 deletions tt_metal/hw/inc/debug/dprint_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,35 @@

#pragma once

#include <dev_msgs.h>

// TODO: remove the HartFlags in dprint_server.hpp and drive from this
enum DebugPrintHartIndex : unsigned int {
DPRINT_RISCV_INDEX_NC = 0,
DPRINT_RISCV_INDEX_TR0 = 1,
DPRINT_RISCV_INDEX_TR1 = 2,
DPRINT_RISCV_INDEX_TR2 = 3,
DPRINT_RISCV_INDEX_BR = 4,
DPRINT_RISCV_INDEX_ER = 0,
};

// Returns the buffer address for current thread+core. Differs for NC/BR/ER/TR0-2.
inline uint8_t* get_debug_print_buffer() {
#if defined(COMPILE_FOR_NCRISC)
return reinterpret_cast<uint8_t*>(PRINT_BUFFER_NC);
return reinterpret_cast<uint8_t*>(GET_MAILBOX_ADDRESS_DEV(dprint_buf.data[DPRINT_RISCV_INDEX_NC]));
#elif defined(COMPILE_FOR_BRISC)
return reinterpret_cast<uint8_t*>(PRINT_BUFFER_BR);
return reinterpret_cast<uint8_t*>(GET_MAILBOX_ADDRESS_DEV(dprint_buf.data[DPRINT_RISCV_INDEX_BR]));
#elif defined(COMPILE_FOR_ERISC)
return reinterpret_cast<uint8_t*>(eth_l1_mem::address_map::PRINT_BUFFER_ER);
return reinterpret_cast<uint8_t*>(GET_MAILBOX_ADDRESS_DEV(dprint_buf.data[DPRINT_RISCV_INDEX_ER]));
#elif defined(COMPILE_FOR_IDLE_ERISC)
return reinterpret_cast<uint8_t*>(PRINT_BUFFER_IDLE_ER);
return reinterpret_cast<uint8_t*>(GET_MAILBOX_ADDRESS_DEV(dprint_buf.data[DPRINT_RISCV_INDEX_ER]));
#elif defined(UCK_CHLKC_UNPACK)
return reinterpret_cast<uint8_t*>(PRINT_BUFFER_T0);
return reinterpret_cast<uint8_t*>(GET_MAILBOX_ADDRESS_DEV(dprint_buf.data[DPRINT_RISCV_INDEX_TR0]));
#elif defined(UCK_CHLKC_MATH)
return reinterpret_cast<uint8_t*>(PRINT_BUFFER_T1);
return reinterpret_cast<uint8_t*>(GET_MAILBOX_ADDRESS_DEV(dprint_buf.data[DPRINT_RISCV_INDEX_TR1]));
#elif defined(UCK_CHLKC_PACK)
return reinterpret_cast<uint8_t*>(PRINT_BUFFER_T2);
return reinterpret_cast<uint8_t*>(GET_MAILBOX_ADDRESS_DEV(dprint_buf.data[DPRINT_RISCV_INDEX_TR2]));
#else
return 0;
#endif
}
13 changes: 13 additions & 0 deletions tt_metal/hw/inc/dev_msgs.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,18 @@ struct debug_ring_buf_msg_t {
uint32_t data[DEBUG_RING_BUFFER_ELEMENTS];
};

constexpr static std::uint32_t DPRINT_BUFFER_SIZE = 204; // per thread
#if defined(COMPILE_FOR_ERISC) || defined (COMPILE_FOR_IDLE_ERISC)
constexpr static std::uint32_t DPRINT_BUFFERS_COUNT = 1;
#else
constexpr static std::uint32_t DPRINT_BUFFERS_COUNT = 5;
#endif

struct dprint_buf_msg_t {
uint8_t data[DPRINT_BUFFERS_COUNT][DPRINT_BUFFER_SIZE];
uint32_t pad; // to 1024 bytes
};

enum watcher_enable_msg_t {
WatcherDisabled = 2,
WatcherEnabled = 3,
Expand All @@ -210,6 +222,7 @@ struct mailboxes_t {
struct debug_pause_msg_t pause_status;
struct debug_insert_delays_msg_t debug_insert_delays;
struct debug_ring_buf_msg_t debug_ring_buf;
struct dprint_buf_msg_t dprint_buf;
};

static_assert(sizeof(kernel_config_msg_t) % sizeof(uint32_t) == 0);
Expand Down
4 changes: 2 additions & 2 deletions tt_metal/hw/inc/grayskull/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,10 @@

#define MEM_BOOT_CODE_BASE 0
#define MEM_MAILBOX_BASE 16
#define MEM_MAILBOX_END (MEM_MAILBOX_BASE + 288)
#define MEM_MAILBOX_END (MEM_MAILBOX_BASE + 1312)
#define MEM_IERISC_MAILBOX_BASE 0
#define MEM_IERISC_MAILBOX_END 0
#define MEM_ZEROS_BASE 768
#define MEM_ZEROS_BASE ((MEM_MAILBOX_END + 31) & ~31)
#define MEM_BRISC_FIRMWARE_BASE (MEM_ZEROS_BASE + MEM_ZEROS_SIZE)
#define MEM_NCRISC_FIRMWARE_BASE (MEM_NCRISC_IRAM_BASE)
#define MEM_IERISC_FIRMWARE_BASE 0
Expand Down
1 change: 0 additions & 1 deletion tt_metal/hw/inc/grayskull/eth_l1_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ struct address_map {
static constexpr std::uint32_t COMPLETION_CQ_CB_BASE = 0;
static constexpr std::int32_t LAUNCH_ERISC_APP_FLAG = 0;
static constexpr std::uint32_t FW_VERSION_ADDR = 0;
static constexpr std::int32_t PRINT_BUFFER_ER = 0;

static constexpr std::int32_t ERISC_BARRIER_BASE = 0;
static constexpr std::int32_t MAX_L1_LOADING_SIZE = 1;
Expand Down
4 changes: 2 additions & 2 deletions tt_metal/hw/inc/wormhole/dev_mem_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,10 @@

#define MEM_BOOT_CODE_BASE 0
#define MEM_MAILBOX_BASE 16
#define MEM_MAILBOX_END (MEM_MAILBOX_BASE + 288)
#define MEM_MAILBOX_END (MEM_MAILBOX_BASE + 1312)
#define MEM_IERISC_MAILBOX_BASE 1024
#define MEM_IERISC_MAILBOX_END (MEM_IERISC_MAILBOX_BASE + 128)
#define MEM_ZEROS_BASE 768
#define MEM_ZEROS_BASE ((MEM_MAILBOX_END + 31) & ~31)
#define MEM_BRISC_FIRMWARE_BASE (MEM_ZEROS_BASE + MEM_ZEROS_SIZE)
#define MEM_NCRISC_FIRMWARE_BASE (MEM_NCRISC_IRAM_BASE)
#define MEM_IERISC_FIRMWARE_BASE 8192
Expand Down
5 changes: 1 addition & 4 deletions tt_metal/hw/inc/wormhole/eth_l1_address_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,11 @@ struct address_map {
// erisc early exit functionality re-uses mailboxes_t::ncrisc_halt_msg_t::stack_save memory
static constexpr std::int32_t ERISC_MEM_MAILBOX_STACK_SAVE = ERISC_MEM_MAILBOX_BASE + 4;

static constexpr std::int32_t PRINT_BUFFER_ER = ERISC_MEM_MAILBOX_BASE + 288 + 16;

static constexpr std::uint32_t PROFILER_L1_BUFFER_ER = PRINT_BUFFER_ER + 256;
static constexpr std::uint32_t PROFILER_L1_BUFFER_ER = ERISC_MEM_MAILBOX_BASE + 288 + 256 + 16;
static constexpr std::uint32_t PROFILER_L1_BUFFER_CONTROL = PROFILER_L1_BUFFER_ER + PROFILER_L1_BUFFER_SIZE;

static constexpr std::int32_t ERISC_L1_KERNEL_CONFIG_BASE = PROFILER_L1_BUFFER_CONTROL + PROFILER_L1_CONTROL_BUFFER_SIZE;

static_assert((PRINT_BUFFER_ER % 32) == 0);
static_assert((PROFILER_L1_BUFFER_ER % 32) == 0);
static_assert((PROFILER_L1_BUFFER_CONTROL % 32) == 0);

Expand Down
18 changes: 10 additions & 8 deletions tt_metal/impl/debug/dprint_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,21 +48,23 @@ static inline float bfloat16_to_float(uint16_t bfloat_val) {
return f;
}

static inline uint32_t GetBaseAddr(Device *device, const CoreCoord &phys_core, int hart_id) {
static inline uint64_t GetBaseAddr(Device *device, const CoreCoord &phys_core, int hart_id) {
// For tensix cores, compute the buffer address for the requested hart.
uint32_t base_addr = PRINT_BUFFER_START + hart_id*PRINT_BUFFER_SIZE;
uint64_t base_addr = GET_MAILBOX_ADDRESS_HOST(dprint_buf);

// Ethernet cores have a different address mapping.
if (tt::llrt::is_ethernet_core(phys_core, device->id())) {
CoreCoord logical_core = device->logical_core_from_ethernet_core(phys_core);
if (device->is_active_ethernet_core(logical_core)) {
base_addr = eth_l1_mem::address_map::PRINT_BUFFER_ER;
base_addr = GET_ETH_MAILBOX_ADDRESS_HOST(dprint_buf);
} else {
base_addr = PRINT_BUFFER_IDLE_ER;
base_addr = GET_IERISC_MAILBOX_ADDRESS_HOST(dprint_buf);
}
}

return base_addr;
dprint_buf_msg_t *buf = reinterpret_cast<dprint_buf_msg_t *>(base_addr);

return reinterpret_cast<uint64_t>(buf->data[hart_id]);
}

static inline int GetNumRiscs(int chip_id, const CoreCoord &core) {
Expand Down Expand Up @@ -325,11 +327,11 @@ static void PrintTypedUint32Array(ostream& stream, int setwidth, uint32_t raw_el
// Used for debug print server startup sequence.
void WriteInitMagic(Device *device, const CoreCoord& core, int hart_id, bool enabled) {
// compute the buffer address for the requested hart
uint32_t base_addr = GetBaseAddr(device, core, hart_id);
uint64_t base_addr = GetBaseAddr(device, core, hart_id);

// TODO(AP): this could use a cleanup - need a different mechanism to know if a kernel is running on device.
// Force wait for first kernel launch by first writing a non-zero and waiting for a zero.
vector<uint32_t> initbuf = vector<uint32_t>(PRINT_BUFFER_SIZE / sizeof(uint32_t), 0);
vector<uint32_t> initbuf = vector<uint32_t>(DPRINT_BUFFER_SIZE / sizeof(uint32_t), 0);
initbuf[0] = uint32_t(enabled ? DEBUG_PRINT_SERVER_STARTING_MAGIC : DEBUG_PRINT_SERVER_DISABLED_MAGIC);
tt::llrt::write_hex_vec_to_core(device->id(), core, initbuf, base_addr);
} // WriteInitMagic
Expand Down Expand Up @@ -677,7 +679,7 @@ bool DebugPrintServerContext::PeekOneHartNonBlocking(

if (rpos < wpos) {
// Now read the entire buffer
from_dev = tt::llrt::read_hex_vec_from_core(chip_id, core, base_addr, PRINT_BUFFER_SIZE);
from_dev = tt::llrt::read_hex_vec_from_core(chip_id, core, base_addr, DPRINT_BUFFER_SIZE);
// at this point rpos,wpos can be stale but not reset to 0 by the producer
// it's ok for the consumer to be behind the latest wpos+rpos from producer
// since the corresponding data in buffer for stale rpos+wpos will not be overwritten
Expand Down

0 comments on commit 5aa33ef

Please sign in to comment.