Skip to content

Commit

Permalink
#0: Cleanup 1 - Remove unnecesary dispatch_d and prefetch RTAs
Browse files Browse the repository at this point in the history
  • Loading branch information
tt-asaigal committed Sep 21, 2024
1 parent e60944d commit c966ecc
Show file tree
Hide file tree
Showing 7 changed files with 97 additions and 124 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -469,18 +469,13 @@ int main(int argc, char **argv) {
0, // prefetch_local_downstream_sem_addr
0, // prefetch_downstream_buffer_pages
num_compute_cores, // max_write_packed_cores
true, // is_dram_variant
true, // is_host_variant
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
true, // is_dram_variant
true, // is_host_variant
};
std::vector<uint32_t> spoof_prefetch_compile_args =
{l1_buf_base,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1640,13 +1640,11 @@ void configure_for_single_chip(Device *device,
prefetch_downstream_cb_sem, // prefetch_d only
dispatch_constants::PREFETCH_D_BUFFER_LOG_PAGE_SIZE,
dispatch_constants::PREFETCH_D_BUFFER_BLOCKS, // prefetch_d only
true,
true,
0,
0,
0,
0,
0
0, // unused: for prefetch_hd <--> dispatch_hd
0, // unused: for prefetch_hd <--> dispatch_hd
0, // unused: for prefetch_hd <--> dispatch_hd
0, // unused: for prefetch_hd <--> dispatch_hd
0 // unused: for prefetch_hd <--> dispatch_hd
};

constexpr NOC my_noc_index = NOC::NOC_0;
Expand All @@ -1665,8 +1663,6 @@ void configure_for_single_chip(Device *device,
prefetch_compile_args[11] = prefetch_d_buffer_base;
prefetch_compile_args[12] = prefetch_d_buffer_pages * (1 << dispatch_constants::PREFETCH_D_BUFFER_LOG_PAGE_SIZE);
prefetch_compile_args[13] = scratch_db_base;
prefetch_compile_args[21] = true;
prefetch_compile_args[22] = false;
CoreCoord phys_prefetch_d_upstream_core =
packetized_path_en_g ? phys_prefetch_relay_demux_core : phys_prefetch_core_g;
configure_kernel_variant<true, false>(program,
Expand All @@ -1690,8 +1686,6 @@ void configure_for_single_chip(Device *device,
prefetch_compile_args[11] = cmddat_q_base;
prefetch_compile_args[12] = cmddat_q_size_g;
prefetch_compile_args[13] = 0;
prefetch_compile_args[21] = false;
prefetch_compile_args[22] = true;
CoreCoord phys_prefetch_h_downstream_core =
packetized_path_en_g ? phys_prefetch_relay_mux_core : phys_prefetch_d_core;
configure_kernel_variant<false, true>(program,
Expand Down Expand Up @@ -1899,18 +1893,11 @@ void configure_for_single_chip(Device *device,
prefetch_downstream_cb_sem,
prefetch_downstream_buffer_pages,
num_compute_cores, // max_write_packed_cores
true,
true,
0,
0,
0,
0,
0,
0,
0,
0,
0,
0
};

CoreCoord phys_upstream_from_dispatch_core = split_prefetcher_g ? phys_prefetch_d_core : phys_prefetch_core_g;
Expand All @@ -1924,8 +1911,6 @@ void configure_for_single_chip(Device *device,
dispatch_compile_args[12] = dispatch_downstream_cb_sem;
dispatch_compile_args[13] = dispatch_h_cb_sem;
dispatch_compile_args[14] = dispatch_d_preamble_size;
dispatch_compile_args[20] = true;
dispatch_compile_args[21] = false;
CoreCoord phys_dispatch_d_downstream_core =
packetized_path_en_g ? phys_dispatch_relay_mux_core : phys_dispatch_h_core;
configure_kernel_variant<true, false>(program,
Expand All @@ -1946,8 +1931,6 @@ void configure_for_single_chip(Device *device,
dispatch_compile_args[12] = dispatch_h_cb_sem;
dispatch_compile_args[13] = dispatch_downstream_cb_sem;
dispatch_compile_args[14] = 0; // preamble size
dispatch_compile_args[20] = false;
dispatch_compile_args[21] = true;
CoreCoord phys_dispatch_h_upstream_core =
packetized_path_en_g ? phys_dispatch_relay_demux_core : phys_dispatch_core;
configure_kernel_variant<false, true>(program,
Expand Down Expand Up @@ -2289,6 +2272,11 @@ void configure_for_multi_chip(Device *device,
prefetch_downstream_cb_sem, // prefetch_d only
dispatch_constants::PREFETCH_D_BUFFER_LOG_PAGE_SIZE,
dispatch_constants::PREFETCH_D_BUFFER_BLOCKS, // prefetch_d only
0, // unused: for prefetch_d <--> dispatch_d
0, // unused: for prefetch_d <--> dispatch_d
0, // unused: for prefetch_d <--> dispatch_d
0, // unused: for prefetch_d <--> dispatch_d
0 // unused: for prefetch_d <--> dispatch_d
};

constexpr NOC my_noc_index = NOC::NOC_0;
Expand Down Expand Up @@ -2623,7 +2611,12 @@ void configure_for_multi_chip(Device *device,
NOC_XY_ENCODING(phys_prefetch_core_g.x, phys_prefetch_core_g.y),
prefetch_downstream_cb_sem,
prefetch_downstream_buffer_pages,
num_compute_cores
num_compute_cores,
0,
0,
0,
0,
0,
};

CoreCoord phys_upstream_from_dispatch_core = split_prefetcher_g ? phys_prefetch_d_core : phys_prefetch_core_g;
Expand Down
2 changes: 1 addition & 1 deletion tests/ttnn/unit_tests/test_single_device_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


@pytest.mark.parametrize("shape", [(3, 1, 512, 512)])
@pytest.mark.parametrize("device_params", [{"num_command_queues": 2}], indirect=True)
@pytest.mark.parametrize("device_params", [{"num_command_queues": 1}], indirect=True)
def test_single_device_events(device, shape):
pytest.skip("Needs Eth dispatch to run on WH")
# Enable Program Cache and Async Mode
Expand Down
13 changes: 13 additions & 0 deletions tt_metal/common/core_descriptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,4 +154,17 @@ inline CoreCoord get_physical_core_coordinate(const tt_cxy_pair &logical_locatio
return soc_desc.get_physical_core_from_logical_core(CoreCoord(logical_location.x, logical_location.y), core_type);
}

inline std::tuple<uint32_t, CoreRange> get_physical_worker_grid_config(chip_id_t chip, uint8_t num_hw_cqs, CoreType dispatch_core_type) {
// Get logical compute grid dimensions and num workers
auto worker_grid = tt::get_compute_grid_size(chip, num_hw_cqs, dispatch_core_type);
std::size_t tensix_num_worker_cols = worker_grid.x;
std::size_t tensix_num_worker_rows = worker_grid.y;
uint32_t tensix_num_worker_cores = tensix_num_worker_cols * tensix_num_worker_rows;
const metal_SocDescriptor &soc_desc = tt::Cluster::instance().get_soc_desc(chip);
// Get physical compute grid range based on SOC Desc and Logical Coords
CoreCoord tensix_worker_start_phys = soc_desc.get_physical_core_from_logical_core(CoreCoord(0, 0), CoreType::WORKER); // Logical Worker Coords start at 0,0
CoreCoord tensix_worker_end_phys = soc_desc.get_physical_core_from_logical_core(CoreCoord(tensix_num_worker_cols - 1, tensix_num_worker_rows - 1), CoreType::WORKER);
CoreRange tensix_worker_physical_grid = CoreRange(tensix_worker_start_phys, tensix_worker_end_phys);
return std::make_tuple(tensix_num_worker_cores, tensix_worker_physical_grid);
}
} // namespace tt
Loading

0 comments on commit c966ecc

Please sign in to comment.