Skip to content

Commit

Permalink
#10052: [Blackhole bringup] Add pack untilize
Browse files Browse the repository at this point in the history
  • Loading branch information
rtawfik01 committed Jul 17, 2024
1 parent 66b8f3f commit 4a8dd01
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 15 deletions.
50 changes: 37 additions & 13 deletions tt_metal/hw/ckernels/blackhole/metal/llk_api/llk_pack_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,25 +184,49 @@ inline void llk_pack(std::uint32_t tile_index, std::uint32_t output, std::uint32
/*************************************************************************
* LLK PACK UNTILIZE
*************************************************************************/
template <std::uint32_t block_ct_dim = 8>
inline void llk_pack_untilize_init() {
_llk_pack_untilize_init_<block_ct_dim>();
template <std::uint32_t block_ct_dim = 8, std::uint32_t full_ct_dim = block_ct_dim, bool diagonal = false>
inline void llk_pack_untilize_init(std::uint32_t output, const std::uint32_t face_r_dim = FACE_R_DIM, const std::uint32_t num_faces = 4) {
static_assert(diagonal==false && "Diagonal packing is not supported for BH!");
const std::uint32_t output_id = get_output_id(output);

_llk_pack_untilize_init_<block_ct_dim, full_ct_dim, diagonal>(
pack_src_format[output_id],
pack_dst_format[output_id],
face_r_dim,
num_faces
);

// Pack row by row
// if constexpr (diagonal) {
// TT_SETADCXX(p_setadc::PAC, 1-1, 0x0);
// } else {
TT_SETADCXX(p_setadc::PAC, FACE_R_DIM-1, 0x0);
// }
}

template <std::uint32_t block_ct_dim = 8>
inline void llk_pack_untilize(std::uint32_t num_blocks, std::uint32_t output, const std::uint32_t face_r_dim = FACE_R_DIM, const std::uint32_t num_faces = 4, const std::uint32_t block_c_index = 0) {
template <std::uint32_t block_ct_dim = 8, std::uint32_t full_ct_dim = block_ct_dim, bool diagonal = false>
inline void llk_pack_untilize(
std::uint32_t block_rt_dim,
std::uint32_t output,
const std::uint32_t face_r_dim = FACE_R_DIM,
const std::uint32_t num_faces = 4,
const std::uint32_t block_c_index = 0) {

static_assert(diagonal==false && "Diagonal packing is not supported for BH!");
const std::uint32_t output_id = get_output_id(output);
std::uint32_t pack_tile_addr = cb_interface[output_id].fifo_wr_ptr - 1 + SCALE_DATUM_SIZE(pack_dst_format[output_id], (block_c_index * ((num_faces>1) ? num_faces/2 : 1) * block_ct_dim * FACE_R_DIM))/16;
std::uint32_t pack_tile_addr = cb_interface[output_id].fifo_wr_ptr - 1 + SCALE_DATUM_SIZE(pack_dst_format[output_id], (block_c_index * ((num_faces>2) ? num_faces/2 : num_faces) * block_ct_dim * FACE_C_DIM))/16;

for (std::uint32_t block=0; block<num_blocks; block++) {
for (std::uint32_t block_rt=0; block_rt<block_rt_dim; block_rt++) {

_llk_pack_untilize_<block_ct_dim>(
_llk_pack_untilize_<block_ct_dim, full_ct_dim, diagonal>(
pack_tile_addr,
pack_dst_format[output_id]
pack_dst_format[output_id],
face_r_dim,
num_faces,
block_rt*block_ct_dim
);

pack_tile_addr += block_ct_dim*cb_interface[output_id].fifo_page_size;
pack_tile_addr += full_ct_dim*cb_interface[output_id].fifo_page_size;
}
}

Expand Down Expand Up @@ -241,13 +265,13 @@ inline void llk_pack_dest_section_done() {
_llk_pack_dest_section_done_<DstSync::SyncHalf, is_fp32_dest_acc_en>();
}

template <bool untilize = false>
template <bool untilize = false, bool diagonal = false>
inline void llk_init_packer_dest_offset_registers(const std::uint32_t pack_output = 16) {
const std::uint32_t output_id = get_output_id(pack_output);
const std::uint32_t face_r_dim = get_output_face_r_dim(output_id);
const bool narrow_tile = get_output_narrow_tile(output_id);

_llk_init_packer_dest_offset_registers_<DstSync::SyncHalf, DstTileFaceLayout::RowMajor, untilize>(
_llk_init_packer_dest_offset_registers_<DstSync::SyncHalf, DstTileFaceLayout::RowMajor>(
face_r_dim,
narrow_tile
);
Expand All @@ -259,7 +283,7 @@ inline void llk_pack_dest_init(const std::uint32_t pack_output = 16) {
const std::uint32_t face_r_dim = get_output_face_r_dim(output_id);
const bool narrow_tile = get_output_narrow_tile(output_id);

_llk_pack_dest_init_<DstSync::SyncHalf, DstTileFaceLayout::RowMajor, untilize, is_fp32_dest_acc_en>(
_llk_pack_dest_init_<DstSync::SyncHalf, DstTileFaceLayout::RowMajor, is_fp32_dest_acc_en>(
face_r_dim,
narrow_tile
);
Expand Down
2 changes: 1 addition & 1 deletion tt_metal/include/compute_kernel_api/pack_untilize.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ ALWI void pack_untilize_init(uint32_t icb, uint32_t ocb)
{
MATH(( llk_math_eltwise_unary_datacopy_init<A2D, BroadcastType::NONE, DST_ACCUM_MODE>(false /*transpose of faces*/, false /*transpose within 16x16 face*/, icb) ));
MATH(( llk_math_pack_sync_init<DST_ACCUM_MODE>() ));
MATH(( llk_math_hw_configure_disaggregated() ));
MATH(( llk_math_hw_configure_disaggregated<true>() ));

PACK(( llk_pack_hw_configure_disaggregated<false, DST_ACCUM_MODE>(ocb) ));
PACK(( llk_pack_untilize_init<block_ct_dim>(ocb) ));
Expand Down

0 comments on commit 4a8dd01

Please sign in to comment.