From 3d062e9ac4eb87d12850d130b358eff3cd6f3326 Mon Sep 17 00:00:00 2001 From: beomki-yeo Date: Sat, 27 Apr 2024 17:09:07 +0200 Subject: [PATCH] Skip holes and continue propagation for GPU CKF --- .../include/traccc/finding/candidate_link.hpp | 5 +- core/include/traccc/finding/ckf_aborter.hpp | 53 ++++ .../traccc/finding/finding_algorithm.hpp | 3 +- .../traccc/finding/finding_algorithm.ipp | 288 ++++++++---------- .../include/traccc/finding/finding_config.hpp | 5 +- device/common/CMakeLists.txt | 4 + .../edm/device/finding_global_counter.hpp | 3 + .../finding/device/add_links_for_holes.hpp | 29 ++ .../traccc/finding/device/build_tracks.hpp | 10 +- .../traccc/finding/device/find_tracks.hpp | 7 +- .../device/impl/add_links_for_holes.ipp | 63 ++++ .../finding/device/impl/build_tracks.ipp | 64 +++- .../device/impl/count_measurements.ipp | 6 + .../finding/device/impl/find_tracks.ipp | 24 +- .../device/impl/propagate_to_next_surface.ipp | 6 + .../finding/device/impl/prune_tracks.ipp | 43 +++ .../traccc/finding/device/prune_tracks.hpp | 32 ++ .../traccc/cuda/finding/finding_algorithm.hpp | 3 +- device/cuda/src/finding/finding_algorithm.cu | 129 ++++++-- examples/run/cuda/seq_example_cuda.cpp | 8 + 20 files changed, 576 insertions(+), 209 deletions(-) create mode 100644 core/include/traccc/finding/ckf_aborter.hpp create mode 100644 device/common/include/traccc/finding/device/add_links_for_holes.hpp create mode 100644 device/common/include/traccc/finding/device/impl/add_links_for_holes.ipp create mode 100644 device/common/include/traccc/finding/device/impl/prune_tracks.ipp create mode 100644 device/common/include/traccc/finding/device/prune_tracks.hpp diff --git a/core/include/traccc/finding/candidate_link.hpp b/core/include/traccc/finding/candidate_link.hpp index 4852fcc81f..72414872cd 100644 --- a/core/include/traccc/finding/candidate_link.hpp +++ b/core/include/traccc/finding/candidate_link.hpp @@ -20,7 +20,7 @@ namespace traccc { struct candidate_link { // Type of index - using link_index_type = thrust::pair; + using link_index_type = thrust::pair; // Index of link from the previous step link_index_type previous; @@ -30,9 +30,6 @@ struct candidate_link { // Index to the initial seed unsigned int seed_idx; - - // How many times it skipped a surface - unsigned int n_skipped; }; } // namespace traccc diff --git a/core/include/traccc/finding/ckf_aborter.hpp b/core/include/traccc/finding/ckf_aborter.hpp new file mode 100644 index 0000000000..8a1e55fabe --- /dev/null +++ b/core/include/traccc/finding/ckf_aborter.hpp @@ -0,0 +1,53 @@ +/** TRACCC library, part of the ACTS project (R&D line) + * + * (c) 2023-2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// Project include(s) +#include "detray/definitions/detail/qualifiers.hpp" +#include "detray/propagator/base_actor.hpp" +#include "detray/propagator/base_stepper.hpp" + +// System include(s) +#include + +namespace traccc { + +/// Aborter triggered when the next surface is reached +struct ckf_aborter : detray::actor { + struct state { + // minimal step length to prevent from staying on the same surface + scalar min_step_length = 0.f; + bool success = false; + + unsigned int count = 0; + unsigned int max_count = 100; + }; + + template + DETRAY_HOST_DEVICE void operator()(state &abrt_state, + propagator_state_t &prop_state) const { + + auto &navigation = prop_state._navigation; + auto &stepping = prop_state._stepping; + + abrt_state.count++; + + // Abort at the next sensitive surface + if (navigation.is_on_sensitive() && + stepping._s > abrt_state.min_step_length) { + prop_state._heartbeat &= navigation.abort(); + abrt_state.success = true; + } + + if (abrt_state.count > abrt_state.max_count) { + prop_state._heartbeat &= navigation.abort(); + } + } +}; + +} // namespace traccc \ No newline at end of file diff --git a/core/include/traccc/finding/finding_algorithm.hpp b/core/include/traccc/finding/finding_algorithm.hpp index fc9e96bfa3..cb8cfb28c5 100644 --- a/core/include/traccc/finding/finding_algorithm.hpp +++ b/core/include/traccc/finding/finding_algorithm.hpp @@ -12,6 +12,7 @@ #include "traccc/edm/measurement.hpp" #include "traccc/edm/track_candidate.hpp" #include "traccc/edm/track_state.hpp" +#include "traccc/finding/ckf_aborter.hpp" #include "traccc/finding/finding_config.hpp" #include "traccc/finding/interaction_register.hpp" #include "traccc/fitting/kalman_filter/gain_matrix_updater.hpp" @@ -62,7 +63,7 @@ class finding_algorithm using actor_type = detray::actor_chain, interactor, - detray::next_surface_aborter>; + ckf_aborter>; using propagator_type = detray::propagator; diff --git a/core/include/traccc/finding/finding_algorithm.ipp b/core/include/traccc/finding/finding_algorithm.ipp index a01167f089..15b8c4eb12 100644 --- a/core/include/traccc/finding/finding_algorithm.ipp +++ b/core/include/traccc/finding/finding_algorithm.ipp @@ -14,6 +14,7 @@ // System include #include +#include #include namespace traccc { @@ -25,8 +26,6 @@ finding_algorithm::operator()( const measurement_collection_types::host& measurements, const bound_track_parameters_collection_types::host& seeds) const { - track_candidate_container_types::host output_candidates; - /***************************************************************** * Measurement Operations *****************************************************************/ @@ -47,6 +46,7 @@ finding_algorithm::operator()( uniques[i], measurement_sort_comp()); upper_bounds.push_back(std::distance(measurements.begin(), up)); } + const auto n_meas = measurements.size(); // Get the number of measurements of each module std::vector sizes(n_modules); @@ -60,10 +60,6 @@ finding_algorithm::operator()( barcodes.push_back(uniques[i].surface_link); } - /********************** - * Find tracks - **********************/ - std::vector> links; links.resize(m_cfg.max_track_candidates_per_track); @@ -86,7 +82,8 @@ finding_algorithm::operator()( std::vector out_params; - for (unsigned int step = 0; step < m_cfg.max_track_candidates_per_track; + for (int step = 0; + step < static_cast(m_cfg.max_track_candidates_per_track); step++) { // Iterate over input parameters @@ -101,11 +98,14 @@ finding_algorithm::operator()( out_params.reserve(n_in_params); // Previous step ID - const unsigned int previous_step = - (step == 0) ? std::numeric_limits::max() : step - 1; + const int previous_step = + (step == 0) ? std::numeric_limits::max() : step - 1; std::fill(n_trks_per_seed.begin(), n_trks_per_seed.end(), 0); + // Parameters updated by Kalman fitter + std::vector updated_params; + for (unsigned int in_param_id = 0; in_param_id < n_in_params; in_param_id++) { @@ -115,11 +115,6 @@ finding_algorithm::operator()( ? in_param_id : links[step - 1][param_to_link[step - 1][in_param_id]] .seed_idx); - unsigned int skip_counter = - (step == 0 - ? 0 - : links[step - 1][param_to_link[step - 1][in_param_id]] - .n_skipped); /************************* * Material interaction @@ -138,35 +133,34 @@ finding_algorithm::operator()( std::abs( sf.cos_angle(ctx, in_param.dir(), in_param.bound_local()))); - /************************* - * CKF - *************************/ - // Get barcode and measurements range on surface const auto bcd = in_param.surface_link(); std::pair range; // Find the corresponding index of bcd in barcode vector - unsigned int bcd_id; - if (std::binary_search(barcodes.begin(), barcodes.end(), bcd)) { - const auto lo2 = - std::lower_bound(barcodes.begin(), barcodes.end(), bcd); - bcd_id = std::distance(barcodes.begin(), lo2); - - if (lo2 == barcodes.begin()) { - range.first = 0u; - range.second = upper_bounds[bcd_id]; - } else { - range.first = upper_bounds[bcd_id - 1]; - range.second = upper_bounds[bcd_id]; - } - } else { + + const auto lo2 = + std::lower_bound(barcodes.begin(), barcodes.end(), bcd); + + const auto bcd_id = std::distance(barcodes.begin(), lo2); + + if (lo2 == barcodes.begin()) { + range.first = 0u; + range.second = upper_bounds[bcd_id]; + } else if (lo2 == barcodes.end()) { range.first = 0u; range.second = 0u; + } else { + range.first = upper_bounds[bcd_id - 1]; + range.second = upper_bounds[bcd_id]; } unsigned int n_branches = 0; + /***************************************************************** + * Find tracks (CKF) + *****************************************************************/ + // Iterate over the measurements for (unsigned int item_id = range.first; item_id < range.second; item_id++) { @@ -194,149 +188,88 @@ finding_algorithm::operator()( // Found a good measurement if (chi2 < m_cfg.chi2_max) { - - // Current link ID - unsigned int cur_link_id = - static_cast(links[step].size()); - n_branches++; n_trks_per_seed[orig_param_id]++; - links[step].push_back({{previous_step, in_param_id}, - item_id, - orig_param_id, - skip_counter}); - - /********************************* - * Propagate to the next surface - *********************************/ - - // Create propagator state - typename propagator_type::state propagation( - trk_state.filtered(), field, det); - propagation._stepping.template set_constraint< - detray::step::constraint::e_accuracy>( - m_cfg.propagation.stepping.step_constraint); - - typename detray::pathlimit_aborter::state s0; - typename detray::parameter_transporter< - transform3_type>::state s1; - typename interactor::state s3; - typename interaction_register::state s2{s3}; - typename detray::next_surface_aborter::state s4{ - m_cfg.min_step_length_for_surface_aborter}; - // typename propagation::print_inspector::state s5{}; - - // @TODO: Should be removed once detray is fixed to set the - // volume in the constructor - propagation._navigation.set_volume( - trk_state.filtered().surface_link().volume()); - - // Propagate to the next surface - propagator.propagate_sync(propagation, - std::tie(s0, s1, s2, s3, s4)); - - /* - propagator.propagate_sync(propagation, - std::tie(s0, s1, s2, s3, s4, s5)); - */ - - // If a surface found, add the parameter for the next - // step - if (s4.success) { - out_params.push_back( - propagation._stepping._bound_params); - param_to_link[step].push_back(cur_link_id); - } - // Unless the track found a surface, it is considered a - // tip - else if (!s4.success && - step >= m_cfg.min_track_candidates_per_track - 1) { - tips.push_back({step, cur_link_id}); - } - - // If no more CKF step is expected, current candidate is - // kept as a tip - if (s4.success && - step == m_cfg.max_track_candidates_per_track - 1) { - tips.push_back({step, cur_link_id}); - } + links[step].push_back( + {{previous_step, in_param_id}, item_id, orig_param_id}); + updated_params.push_back(trk_state.filtered()); } } - // After the loop over the measurements + + /***************************************************************** + * Add a dummy links in case of no branches + *****************************************************************/ if (n_branches == 0) { // let's skip this CKF step for the current track candidate if (n_trks_per_seed[orig_param_id] >= m_cfg.max_num_branches_per_initial_seed) { - continue; } + // Put an invalid link with max item id + links[step].push_back({{previous_step, in_param_id}, + std::numeric_limits::max(), + orig_param_id}); bound_track_parameters bound_param(in_param.surface_link(), in_param.vector(), in_param.covariance()); + updated_params.push_back(bound_param); + n_branches++; + } + } - measurement dummy_meas; - - dummy_meas.local = in_param.bound_local(); - dummy_meas.variance = dummy_meas.variance + point2{10., 10.}; - dummy_meas.surface_link = in_param.surface_link(); - - track_state trk_state(dummy_meas); - - // Run the Kalman update - sf.template visit_mask>( - trk_state, bound_param); - - unsigned int cur_link_id = - static_cast(links[step].size()); - - links[step].push_back({{previous_step, in_param_id}, - std::numeric_limits::max(), - orig_param_id, - skip_counter + 1}); + /********************************* + * Propagate to the next surface + *********************************/ - if (skip_counter + 1 > m_cfg.max_num_skipping_per_cand) { - tips.push_back({step, cur_link_id}); - continue; - // exit from param_id loop - } + const unsigned int n_links = links[step].size(); + for (unsigned int link_id = 0; link_id < n_links; link_id++) { - // Create propagator state - typename propagator_type::state propagation( - trk_state.filtered(), field, det); - propagation._stepping.template set_constraint< - detray::step::constraint::e_accuracy>( + const auto& param = updated_params[link_id]; + // Create propagator state + typename propagator_type::state propagation(param, field, det); + propagation._stepping + .template set_constraint( m_cfg.propagation.stepping.step_constraint); - typename detray::pathlimit_aborter::state s0; - typename detray::parameter_transporter::state - s1; - typename interactor::state s3; - typename interaction_register::state s2{s3}; - typename detray::next_surface_aborter::state s4{ - m_cfg.min_step_length_for_surface_aborter}; - - propagation._navigation.set_volume( - trk_state.filtered().surface_link().volume()); - - // Propagate to the next surface - propagator.propagate_sync(propagation, - std::tie(s0, s1, s2, s3, s4)); - - // If a surface found, add the parameter for the next - // step - if (s4.success) { - out_params.push_back(propagation._stepping._bound_params); - param_to_link[step].push_back(cur_link_id); - } - // Unless the track found a surface, it is considered a - // tip - else if (!s4.success && - step >= m_cfg.min_track_candidates_per_track - 1) { - tips.push_back({step, cur_link_id}); - } + typename detray::pathlimit_aborter::state s0; + typename detray::parameter_transporter::state s1; + typename interactor::state s3; + typename interaction_register::state s2{s3}; + typename ckf_aborter::state s4{ + m_cfg.min_step_length_for_surface_aborter}; + + // @TODO: Should be removed once detray is fixed to set the + // volume in the constructor + propagation._navigation.set_volume(param.surface_link().volume()); + + // Propagate to the next surface + propagator.propagate_sync(propagation, + std::tie(s0, s1, s2, s3, s4)); + + // If a surface found, add the parameter for the next + // step + if (s4.success) { + out_params.push_back(propagation._stepping._bound_params); + param_to_link[step].push_back(link_id); + } + // Unless the track found a surface, it is considered a + // tip + else if (!s4.success && + step >= static_cast( + m_cfg.min_track_candidates_per_track) - + 1) { + tips.push_back({step, link_id}); + } + + // If no more CKF step is expected, current candidate is + // kept as a tip + if (s4.success && + step == static_cast(m_cfg.max_track_candidates_per_track) - + 1) { + tips.push_back({step, link_id}); } } @@ -349,35 +282,52 @@ finding_algorithm::operator()( **********************/ // Number of found tracks = number of tips + track_candidate_container_types::host output_candidates; output_candidates.reserve(tips.size()); for (const auto& tip : tips) { + // Get the link corresponding to tip + auto L = links[tip.first][tip.second]; - // Skip if the number of tracks candidates is too small - if (tip.first + 1 < m_cfg.min_track_candidates_per_track) { - continue; + // Count the number of skipped steps + unsigned int n_skipped{0u}; + while (true) { + + if (L.meas_idx > n_meas) { + n_skipped++; + } + + if (L.previous.first == 0u) { + break; + } + + const unsigned int link_pos = + param_to_link[L.previous.first][L.previous.second]; + L = links[L.previous.first][link_pos]; } - // Get the link corresponding to tip - auto L = links[tip.first][tip.second]; + const unsigned int n_cands = tip.first + 1 - n_skipped; // Skip if the number of tracks candidates is too small - if (tip.first + 1 - L.n_skipped < - m_cfg.min_track_candidates_per_track) { + if (n_cands < m_cfg.min_track_candidates_per_track || + n_cands > m_cfg.max_track_candidates_per_track) { continue; } + // Retrieve tip + L = links[tip.first][tip.second]; + vecmem::vector cands_per_track; - cands_per_track.resize(tip.first + 1 - L.n_skipped); + cands_per_track.resize(n_cands); // Reversely iterate to fill the track candidates for (auto it = cands_per_track.rbegin(); it != cands_per_track.rend(); it++) { - while (L.meas_idx > measurements.size()) { - - if (L.previous.first > tip.first + 1) - break; // should not happen. + while (L.meas_idx > n_meas) { + if (L.previous.first < 0) { + break; + } const auto link_pos = param_to_link[L.previous.first][L.previous.second]; @@ -385,8 +335,12 @@ finding_algorithm::operator()( L = links[L.previous.first][link_pos]; } - auto& cand = *it; + // Break if the measurement is still invalid + if (L.meas_idx > measurements.size()) { + break; + } + auto& cand = *it; cand = measurements.at(L.meas_idx); // Break the loop if the iterator is at the first candidate and diff --git a/core/include/traccc/finding/finding_config.hpp b/core/include/traccc/finding/finding_config.hpp index 8b6ff41b3c..a7d997c3f8 100644 --- a/core/include/traccc/finding/finding_config.hpp +++ b/core/include/traccc/finding/finding_config.hpp @@ -30,14 +30,11 @@ struct finding_config { unsigned int max_num_branches_per_initial_seed = std::numeric_limits::max(); - /// Maximum allowed number of skipped steps per candidate - unsigned int max_num_skipping_per_cand = 3; - /// Minimum step length that track should make to reach the next surface. It /// should be set higher than the overstep tolerance not to make it stay on /// the same surface scalar_t min_step_length_for_surface_aborter = - 0.1f * detray::unit::mm; + 1.f * detray::unit::mm; /// Maximum Chi-square that is allowed for branching scalar_t chi2_max = 30.f; diff --git a/device/common/CMakeLists.txt b/device/common/CMakeLists.txt index f33903f976..31aeef8e17 100644 --- a/device/common/CMakeLists.txt +++ b/device/common/CMakeLists.txt @@ -63,14 +63,18 @@ traccc_add_library( traccc_device_common device_common TYPE SHARED "include/traccc/finding/device/build_tracks.hpp" "include/traccc/finding/device/count_measurements.hpp" "include/traccc/finding/device/find_tracks.hpp" + "include/traccc/finding/device/add_links_for_holes.hpp" "include/traccc/finding/device/make_barcode_sequence.hpp" "include/traccc/finding/device/propagate_to_next_surface.hpp" + "include/traccc/finding/device/prune_tracks.hpp" "include/traccc/finding/device/impl/apply_interaction.ipp" "include/traccc/finding/device/impl/build_tracks.ipp" "include/traccc/finding/device/impl/count_measurements.ipp" "include/traccc/finding/device/impl/find_tracks.ipp" + "include/traccc/finding/device/impl/add_links_for_holes.ipp" "include/traccc/finding/device/impl/make_barcode_sequence.ipp" "include/traccc/finding/device/impl/propagate_to_next_surface.ipp" + "include/traccc/finding/device/impl/prune_tracks.ipp" # Track fitting funtions(s). "include/traccc/fitting/device/fit.hpp" "include/traccc/fitting/device/impl/fit.ipp" diff --git a/device/common/include/traccc/edm/device/finding_global_counter.hpp b/device/common/include/traccc/edm/device/finding_global_counter.hpp index 977052ad12..81143854cf 100644 --- a/device/common/include/traccc/edm/device/finding_global_counter.hpp +++ b/device/common/include/traccc/edm/device/finding_global_counter.hpp @@ -19,6 +19,9 @@ struct finding_global_counter { // Number of parameters for the next step unsigned int n_out_params; + + // Number of valid tracks that meet criteria + unsigned int n_valid_tracks; }; } // namespace traccc::device diff --git a/device/common/include/traccc/finding/device/add_links_for_holes.hpp b/device/common/include/traccc/finding/device/add_links_for_holes.hpp new file mode 100644 index 0000000000..409349abcd --- /dev/null +++ b/device/common/include/traccc/finding/device/add_links_for_holes.hpp @@ -0,0 +1,29 @@ +/** TRACCC library, part of the ACTS project (R&D line) + * + * (c) 2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// Project include(s). +#include "traccc/definitions/qualifiers.hpp" + +namespace traccc::device { + +/// Function to add a dummy link in case of a hole + +TRACCC_DEVICE inline void add_links_for_holes( + std::size_t globalIndex, + vecmem::data::vector_view n_candidates_view, + bound_track_parameters_collection_types::const_view in_params_view, + const unsigned int step, const unsigned int& n_max_candidates, + bound_track_parameters_collection_types::view out_params_view, + vecmem::data::vector_view links_view, + unsigned int& n_total_candidates); + +} // namespace traccc::device + +// Include the implementation. +#include "traccc/finding/device/impl/add_links_for_holes.ipp" \ No newline at end of file diff --git a/device/common/include/traccc/finding/device/build_tracks.hpp b/device/common/include/traccc/finding/device/build_tracks.hpp index ef4dda7657..0039baf0f3 100644 --- a/device/common/include/traccc/finding/device/build_tracks.hpp +++ b/device/common/include/traccc/finding/device/build_tracks.hpp @@ -18,22 +18,28 @@ namespace traccc::device { /// we can build a full track by iterating from a tip link backwardly. /// /// @param[in] globalIndex The index of the current thread +/// @param[in] cfg Track finding config object /// @param[in] measurements_view Measurements container view /// @param[in] seeds_view Seed container view /// @param[in] link_view Link container view /// @param[in] param_to_link_view Container for param index -> link index /// @param[in] tips_view Tip link container view /// @param[out] track_candidates_view Track candidate container view +/// @param[out] valid_indices_view Valid indices meeting criteria +/// @param[out] n_valid_tracks The number of valid tracks meeting criteria +template TRACCC_DEVICE inline void build_tracks( - std::size_t globalIndex, + std::size_t globalIndex, const config_t cfg, measurement_collection_types::const_view measurements_view, bound_track_parameters_collection_types::const_view seeds_view, vecmem::data::jagged_vector_view links_view, vecmem::data::jagged_vector_view param_to_link_view, vecmem::data::vector_view tips_view, - track_candidate_container_types::view track_candidates_view); + track_candidate_container_types::view track_candidates_view, + vecmem::data::vector_view valid_indices_view, + unsigned int& n_valid_tracks); } // namespace traccc::device diff --git a/device/common/include/traccc/finding/device/find_tracks.hpp b/device/common/include/traccc/finding/device/find_tracks.hpp index ea0a9f27f3..210e7559a0 100644 --- a/device/common/include/traccc/finding/device/find_tracks.hpp +++ b/device/common/include/traccc/finding/device/find_tracks.hpp @@ -32,8 +32,10 @@ namespace traccc::device { /// @param[in] step Step index /// @param[in] n_max_candidates Number of maximum candidates /// @param[out] out_params_view Output parameters +/// @param[out] n_candidates_view Number of candidates per input parameter /// @param[out] links_view link container for the current step -/// @param[out] n_candidates The number of candidates for the current step +/// @param[out] n_total_candidates The number of total candidates for the +/// current step /// template TRACCC_DEVICE inline void find_tracks( @@ -46,8 +48,9 @@ TRACCC_DEVICE inline void find_tracks( vecmem::data::vector_view ref_meas_idx_view, const unsigned int step, const unsigned int& n_max_candidates, bound_track_parameters_collection_types::view out_params_view, + vecmem::data::vector_view n_candidates_view, vecmem::data::vector_view links_view, - unsigned int& n_candidates); + unsigned int& n_total_candidates); } // namespace traccc::device diff --git a/device/common/include/traccc/finding/device/impl/add_links_for_holes.ipp b/device/common/include/traccc/finding/device/impl/add_links_for_holes.ipp new file mode 100644 index 0000000000..9e71eb1b88 --- /dev/null +++ b/device/common/include/traccc/finding/device/impl/add_links_for_holes.ipp @@ -0,0 +1,63 @@ +/** TRACCC library, part of the ACTS project (R&D line) + * + * (c) 2023 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +namespace traccc::device { + +TRACCC_DEVICE inline void add_links_for_holes( + std::size_t globalIndex, + vecmem::data::vector_view n_candidates_view, + bound_track_parameters_collection_types::const_view in_params_view, + const unsigned int step, const unsigned int& n_max_candidates, + bound_track_parameters_collection_types::view out_params_view, + vecmem::data::vector_view links_view, + unsigned int& n_total_candidates) { + + // Number of candidates per parameter + vecmem::device_vector n_candidates(n_candidates_view); + + if (globalIndex >= n_candidates.size()) { + return; + } + + // Input parameters + bound_track_parameters_collection_types::const_device in_params( + in_params_view); + + // Output parameters + bound_track_parameters_collection_types::device out_params(out_params_view); + + // Links + vecmem::device_vector links(links_view); + + // Last step ID + const unsigned int previous_step = + (step == 0) ? std::numeric_limits::max() : step - 1; + + if (n_candidates[globalIndex] == 0u) { + + // Add measurement candidates to link + vecmem::device_atomic_ref num_total_candidates( + n_total_candidates); + + const unsigned int l_pos = num_total_candidates.fetch_add(1); + + if (l_pos >= n_max_candidates) { + + n_total_candidates = n_max_candidates; + return; + } + + // Add a dummy link + links[l_pos] = {{previous_step, globalIndex}, + std::numeric_limits::max()}; + out_params[l_pos] = in_params[globalIndex]; + } +} + +} // namespace traccc::device \ No newline at end of file diff --git a/device/common/include/traccc/finding/device/impl/build_tracks.ipp b/device/common/include/traccc/finding/device/impl/build_tracks.ipp index 84899eca21..bd5b12a5d7 100644 --- a/device/common/include/traccc/finding/device/impl/build_tracks.ipp +++ b/device/common/include/traccc/finding/device/impl/build_tracks.ipp @@ -9,15 +9,18 @@ namespace traccc::device { +template TRACCC_DEVICE inline void build_tracks( - std::size_t globalIndex, + std::size_t globalIndex, const config_t cfg, measurement_collection_types::const_view measurements_view, bound_track_parameters_collection_types::const_view seeds_view, vecmem::data::jagged_vector_view links_view, vecmem::data::jagged_vector_view param_to_link_view, vecmem::data::vector_view tips_view, - track_candidate_container_types::view track_candidates_view) { + track_candidate_container_types::view track_candidates_view, + vecmem::data::vector_view valid_indices_view, + unsigned int& n_valid_tracks) { measurement_collection_types::const_device measurements(measurements_view); @@ -34,6 +37,8 @@ TRACCC_DEVICE inline void build_tracks( track_candidate_container_types::device track_candidates( track_candidates_view); + vecmem::device_vector valid_indices(valid_indices_view); + if (globalIndex >= tips.size()) { return; } @@ -44,13 +49,55 @@ TRACCC_DEVICE inline void build_tracks( // Get the link corresponding to tip auto L = links[tip.first][tip.second]; + const unsigned int n_meas = measurements.size(); + + // Count the number of skipped steps + unsigned int n_skipped{0u}; + while (true) { + + if (L.meas_idx > n_meas) { + n_skipped++; + } + + if (L.previous.first == 0u) { + break; + } + + const unsigned int link_pos = + param_to_link[L.previous.first][L.previous.second]; + L = links[L.previous.first][link_pos]; + } + + // Retrieve tip + L = links[tip.first][tip.second]; + + const unsigned int n_cands = tip.first + 1 - n_skipped; // Resize the candidates with the exact size - cands_per_track.resize(tip.first + 1); + cands_per_track.resize(n_cands); + + unsigned int i = 0; // Reversely iterate to fill the track candidates for (auto it = cands_per_track.rbegin(); it != cands_per_track.rend(); it++) { + i++; + + while (L.meas_idx > n_meas) { + if (L.previous.first < 0) { + break; + } + + const auto link_pos = + param_to_link[L.previous.first][L.previous.second]; + + L = links[L.previous.first][link_pos]; + } + + // Break if the measurement is still invalid + if (L.meas_idx > measurements.size()) { + break; + } auto& cand = *it; cand = {measurements.at(L.meas_idx)}; @@ -66,6 +113,17 @@ TRACCC_DEVICE inline void build_tracks( L = links[L.previous.first][l_pos]; } + + // Criteria for valid tracks + if (n_cands >= cfg.min_track_candidates_per_track && + n_cands <= cfg.max_track_candidates_per_track) { + + vecmem::device_atomic_ref num_valid_tracks( + n_valid_tracks); + + const unsigned int pos = num_valid_tracks.fetch_add(1); + valid_indices[pos] = globalIndex; + } } } // namespace traccc::device \ No newline at end of file diff --git a/device/common/include/traccc/finding/device/impl/count_measurements.ipp b/device/common/include/traccc/finding/device/impl/count_measurements.ipp index 058b76445e..0fe0402532 100644 --- a/device/common/include/traccc/finding/device/impl/count_measurements.ipp +++ b/device/common/include/traccc/finding/device/impl/count_measurements.ipp @@ -34,6 +34,12 @@ TRACCC_DEVICE inline void count_measurements( const auto bcd = params.at(globalIndex).surface_link(); const auto lo = thrust::lower_bound(thrust::seq, barcodes.begin(), barcodes.end(), bcd); + + // If barcode is not found (no measurement) + if (lo == barcodes.end()) { + return; + } + const auto bcd_id = std::distance(barcodes.begin(), lo); // Get the reference measurement index and the number of measurements per diff --git a/device/common/include/traccc/finding/device/impl/find_tracks.ipp b/device/common/include/traccc/finding/device/impl/find_tracks.ipp index 20d4c1ef0f..64ebd64167 100644 --- a/device/common/include/traccc/finding/device/impl/find_tracks.ipp +++ b/device/common/include/traccc/finding/device/impl/find_tracks.ipp @@ -26,8 +26,9 @@ TRACCC_DEVICE inline void find_tracks( vecmem::data::vector_view ref_meas_idx_view, const unsigned int step, const unsigned int& n_max_candidates, bound_track_parameters_collection_types::view out_params_view, + vecmem::data::vector_view n_candidates_view, vecmem::data::vector_view links_view, - unsigned int& n_candidates) { + unsigned int& n_total_candidates) { // Detector detector_t det(det_data); @@ -42,6 +43,9 @@ TRACCC_DEVICE inline void find_tracks( // Output parameters bound_track_parameters_collection_types::device out_params(out_params_view); + // Number of candidates per parameter + vecmem::device_vector n_candidates(n_candidates_view); + // Links vecmem::device_vector links(links_view); @@ -53,8 +57,8 @@ TRACCC_DEVICE inline void find_tracks( vecmem::device_vector ref_meas_idx(ref_meas_idx_view); // Last step ID - const unsigned int previous_step = - (step == 0) ? std::numeric_limits::max() : step - 1; + const int previous_step = + (step == 0) ? std::numeric_limits::max() : step - 1; const unsigned int n_measurements_sum = n_measurements_prefix_sum.back(); const unsigned int stride = globalIndex * cfg.n_measurements_per_thread; @@ -98,18 +102,24 @@ TRACCC_DEVICE inline void find_tracks( if (chi2 < cfg.chi2_max) { // Add measurement candidates to link - vecmem::device_atomic_ref num_candidates( - n_candidates); + vecmem::device_atomic_ref num_total_candidates( + n_total_candidates); - const unsigned int l_pos = num_candidates.fetch_add(1); + const unsigned int l_pos = num_total_candidates.fetch_add(1); if (l_pos >= n_max_candidates) { - n_candidates = n_max_candidates; + n_total_candidates = n_max_candidates; return; } links[l_pos] = {{previous_step, in_param_id}, meas_idx}; + // Increase the number of candidates (or branches) per input + // parameter + vecmem::device_atomic_ref num_candidates( + n_candidates[in_param_id]); + num_candidates.fetch_add(1); + out_params[l_pos] = trk_state.filtered(); } } diff --git a/device/common/include/traccc/finding/device/impl/propagate_to_next_surface.ipp b/device/common/include/traccc/finding/device/impl/propagate_to_next_surface.ipp index 68610a3f8a..ea79f128bc 100644 --- a/device/common/include/traccc/finding/device/impl/propagate_to_next_surface.ipp +++ b/device/common/include/traccc/finding/device/impl/propagate_to_next_surface.ipp @@ -102,6 +102,12 @@ TRACCC_DEVICE inline void propagate_to_next_surface( else if (!s4.success && step >= cfg.min_track_candidates_per_track - 1) { tips.push_back({step, globalIndex}); } + + // If no more CKF step is expected, current candidate is + // kept as a tip + if (s4.success && step == cfg.max_track_candidates_per_track - 1) { + tips.push_back({step, globalIndex}); + } } } // namespace traccc::device diff --git a/device/common/include/traccc/finding/device/impl/prune_tracks.ipp b/device/common/include/traccc/finding/device/impl/prune_tracks.ipp new file mode 100644 index 0000000000..b25335b504 --- /dev/null +++ b/device/common/include/traccc/finding/device/impl/prune_tracks.ipp @@ -0,0 +1,43 @@ +/** TRACCC library, part of the ACTS project (R&D line) + * + * (c) 2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +namespace traccc::device { + +TRACCC_DEVICE inline void prune_tracks( + std::size_t globalIndex, + track_candidate_container_types::const_view track_candidates_view, + vecmem::data::vector_view valid_indices_view, + track_candidate_container_types::view prune_candidates_view) { + + track_candidate_container_types::const_device track_candidates( + track_candidates_view); + vecmem::device_vector valid_indices(valid_indices_view); + track_candidate_container_types::device prune_candidates( + prune_candidates_view); + + if (globalIndex >= prune_candidates.size()) { + return; + } + + const auto idx = valid_indices[globalIndex]; + + auto& seed = track_candidates[idx].header; + auto cands_per_track = track_candidates[idx].items; + + // Copy candidates + prune_candidates[globalIndex].header = seed; + const unsigned int n_cands = cands_per_track.size(); + prune_candidates[globalIndex].items.resize(n_cands); + + for (unsigned int i = 0; i < n_cands; i++) { + prune_candidates[globalIndex].items[i] = cands_per_track[i]; + } +} + +} // namespace traccc::device \ No newline at end of file diff --git a/device/common/include/traccc/finding/device/prune_tracks.hpp b/device/common/include/traccc/finding/device/prune_tracks.hpp new file mode 100644 index 0000000000..5ff52e1ac1 --- /dev/null +++ b/device/common/include/traccc/finding/device/prune_tracks.hpp @@ -0,0 +1,32 @@ +/** TRACCC library, part of the ACTS project (R&D line) + * + * (c) 2024 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// Project include(s). +#include "traccc/definitions/primitives.hpp" +#include "traccc/definitions/qualifiers.hpp" +#include "traccc/edm/track_candidate.hpp" + +namespace traccc::device { + +/// Return a new track_candidates based on the criteria in configuration +/// +/// @param[in] globalIndex The index of the current thread +/// @param[in] track_candidates_view Track candidate container view +/// @param[in] valid_indices_view Valid indices meeting criteria +/// @param[out] prune_candidates_view Track candidate container view +TRACCC_DEVICE inline void prune_tracks( + std::size_t globalIndex, + track_candidate_container_types::const_view track_candidates_view, + vecmem::data::vector_view valid_indices_view, + track_candidate_container_types::view prune_candidates_view); + +} // namespace traccc::device + +// Include the implementation. +#include "traccc/finding/device/impl/prune_tracks.ipp" \ No newline at end of file diff --git a/device/cuda/include/traccc/cuda/finding/finding_algorithm.hpp b/device/cuda/include/traccc/cuda/finding/finding_algorithm.hpp index 576659e3e5..aa9ae6b914 100644 --- a/device/cuda/include/traccc/cuda/finding/finding_algorithm.hpp +++ b/device/cuda/include/traccc/cuda/finding/finding_algorithm.hpp @@ -12,6 +12,7 @@ #include "traccc/definitions/qualifiers.hpp" #include "traccc/edm/measurement.hpp" #include "traccc/edm/track_candidate.hpp" +#include "traccc/finding/ckf_aborter.hpp" #include "traccc/finding/finding_config.hpp" #include "traccc/finding/interaction_register.hpp" #include "traccc/utils/algorithm.hpp" @@ -65,7 +66,7 @@ class finding_algorithm detray::actor_chain, interaction_register, interactor, - detray::next_surface_aborter>; + ckf_aborter>; using propagator_type = detray::propagator; diff --git a/device/cuda/src/finding/finding_algorithm.cu b/device/cuda/src/finding/finding_algorithm.cu index 6eebaa891c..a9630a0817 100644 --- a/device/cuda/src/finding/finding_algorithm.cu +++ b/device/cuda/src/finding/finding_algorithm.cu @@ -12,12 +12,14 @@ #include "traccc/definitions/primitives.hpp" #include "traccc/edm/device/finding_global_counter.hpp" #include "traccc/finding/candidate_link.hpp" +#include "traccc/finding/device/add_links_for_holes.hpp" #include "traccc/finding/device/apply_interaction.hpp" #include "traccc/finding/device/build_tracks.hpp" #include "traccc/finding/device/count_measurements.hpp" #include "traccc/finding/device/find_tracks.hpp" #include "traccc/finding/device/make_barcode_sequence.hpp" #include "traccc/finding/device/propagate_to_next_surface.hpp" +#include "traccc/finding/device/prune_tracks.hpp" // detray include(s). #include "detray/core/detector.hpp" @@ -96,6 +98,7 @@ __global__ void find_tracks( vecmem::data::vector_view ref_meas_idx_view, const unsigned int step, const unsigned int n_max_candidates, bound_track_parameters_collection_types::view out_params_view, + vecmem::data::vector_view n_candidates_view, vecmem::data::vector_view links_view, unsigned int& n_candidates) { @@ -104,7 +107,24 @@ __global__ void find_tracks( device::find_tracks( gid, cfg, det_data, measurements_view, in_params_view, n_measurements_prefix_sum_view, ref_meas_idx_view, step, - n_max_candidates, out_params_view, links_view, n_candidates); + n_max_candidates, out_params_view, n_candidates_view, links_view, + n_candidates); +} + +/// CUDA kernel for running @c traccc::device::add_links_for_holes +__global__ void add_links_for_holes( + vecmem::data::vector_view n_candidates_view, + bound_track_parameters_collection_types::const_view in_params_view, + const unsigned int step, const unsigned int n_max_candidates, + bound_track_parameters_collection_types::view out_params_view, + vecmem::data::vector_view links_view, + unsigned int& n_total_candidates) { + + int gid = threadIdx.x + blockIdx.x * blockDim.x; + + device::add_links_for_holes(gid, n_candidates_view, in_params_view, step, + n_max_candidates, out_params_view, links_view, + n_total_candidates); } /// CUDA kernel for running @c traccc::device::propagate_to_next_surface @@ -133,19 +153,36 @@ __global__ void propagate_to_next_surface( } /// CUDA kernel for running @c traccc::device::build_tracks +template __global__ void build_tracks( + const config_t cfg, measurement_collection_types::const_view measurements_view, bound_track_parameters_collection_types::const_view seeds_view, vecmem::data::jagged_vector_view links_view, vecmem::data::jagged_vector_view param_to_link_view, vecmem::data::vector_view tips_view, - track_candidate_container_types::view track_candidates_view) { + track_candidate_container_types::view track_candidates_view, + vecmem::data::vector_view valid_indices_view, + unsigned int& n_valid_tracks) { int gid = threadIdx.x + blockIdx.x * blockDim.x; - device::build_tracks(gid, measurements_view, seeds_view, links_view, - param_to_link_view, tips_view, track_candidates_view); + device::build_tracks(gid, cfg, measurements_view, seeds_view, links_view, + param_to_link_view, tips_view, track_candidates_view, + valid_indices_view, n_valid_tracks); +} + +/// CUDA kernel for running @c traccc::device::prune_tracks +__global__ void prune_tracks( + track_candidate_container_types::const_view track_candidates_view, + vecmem::data::vector_view valid_indices_view, + track_candidate_container_types::view prune_candidates_view) { + + int gid = threadIdx.x + blockIdx.x * blockDim.x; + + device::prune_tracks(gid, track_candidates_view, valid_indices_view, + prune_candidates_view); } } // namespace kernels @@ -300,6 +337,11 @@ finding_algorithm::operator()( vecmem::data::vector_buffer n_measurements_buffer( n_in_params, m_mr.main); + vecmem::device_vector n_measurements_device( + n_measurements_buffer); + thrust::fill(thrust::cuda::par.on(stream), + n_measurements_device.begin(), n_measurements_device.end(), + 0u); // Create a buffer for the first measurement index of parameter vecmem::data::vector_buffer ref_meas_idx_buffer( @@ -323,15 +365,13 @@ finding_algorithm::operator()( // Create the buffer for the prefix sum of the number of measurements // per parameter - vecmem::device_vector n_measurements( - n_measurements_buffer); vecmem::data::vector_buffer n_measurements_prefix_sum_buffer(n_in_params, m_mr.main); vecmem::device_vector n_measurements_prefix_sum( n_measurements_prefix_sum_buffer); - thrust::inclusive_scan(thrust::cuda::par.on(stream), - n_measurements.begin(), n_measurements.end(), - n_measurements_prefix_sum.begin()); + thrust::inclusive_scan( + thrust::cuda::par.on(stream), n_measurements_device.begin(), + n_measurements_device.end(), n_measurements_prefix_sum.begin()); /***************************************************************** * Kernel4: Find valid tracks @@ -343,6 +383,13 @@ finding_algorithm::operator()( std::min(n_in_params * m_cfg.max_num_branches_per_surface, seeds.size() * m_cfg.max_num_branches_per_seed); + vecmem::data::vector_buffer n_candidates_buffer{ + n_in_params, m_mr.main}; + vecmem::device_vector n_candidates_device( + n_candidates_buffer); + thrust::fill(thrust::cuda::par.on(stream), n_candidates_device.begin(), + n_candidates_device.end(), 0u); + bound_track_parameters_collection_types::buffer updated_params_buffer( n_in_params * m_cfg.max_num_branches_per_surface, m_mr.main); @@ -359,11 +406,26 @@ finding_algorithm::operator()( <<>>( m_cfg, det_view, measurements, in_params_buffer, n_measurements_prefix_sum_buffer, ref_meas_idx_buffer, step, - n_max_candidates, updated_params_buffer, link_map[step], + n_max_candidates, updated_params_buffer, + n_candidates_buffer, link_map[step], (*global_counter_device).n_candidates); TRACCC_CUDA_ERROR_CHECK(cudaGetLastError()); } + /***************************************************************** + * Kernel5: Add a dummy links in case of no branches + *****************************************************************/ + + nBlocks = (n_in_params + nThreads - 1) / nThreads; + + if (nBlocks > 0) { + kernels::add_links_for_holes<<>>( + n_candidates_buffer, in_params_buffer, step, n_max_candidates, + updated_params_buffer, link_map[step], + (*global_counter_device).n_candidates); + TRACCC_CUDA_ERROR_CHECK(cudaGetLastError()); + } + // Global counter object: Device -> Host TRACCC_CUDA_ERROR_CHECK( cudaMemcpyAsync(&global_counter_host, global_counter_device.get(), @@ -373,7 +435,7 @@ finding_algorithm::operator()( m_stream.synchronize(); /***************************************************************** - * Kernel5: Propagate to the next surface + * Kernel6: Propagate to the next surface *****************************************************************/ // Buffer for out parameters for the next step @@ -471,9 +533,8 @@ finding_algorithm::operator()( tips_buffer); unsigned int prefix_sum = 0; - for (unsigned int it = m_cfg.min_track_candidates_per_track - 1; - it < n_steps; it++) { + for (unsigned int it = 0; it < n_steps; it++) { vecmem::device_vector in( tips_map[it]); @@ -486,7 +547,7 @@ finding_algorithm::operator()( } /***************************************************************** - * Kernel6: Build tracks + * Kernel7: Build tracks *****************************************************************/ // Create track candidate buffer @@ -499,20 +560,52 @@ finding_algorithm::operator()( m_copy.setup(track_candidates_buffer.headers); m_copy.setup(track_candidates_buffer.items); + // Create buffer for valid indices + vecmem::data::vector_buffer valid_indices_buffer(n_tips_total, + m_mr.main); + // @Note: nBlocks can be zero in case there is no tip. This happens when // chi2_max config is set tightly and no tips are found if (n_tips_total > 0) { nThreads = m_warp_size * 2; nBlocks = (n_tips_total + nThreads - 1) / nThreads; kernels::build_tracks<<>>( - measurements, seeds_buffer, links_buffer, param_to_link_buffer, - tips_buffer, track_candidates_buffer); - + m_cfg, measurements, seeds_buffer, links_buffer, + param_to_link_buffer, tips_buffer, track_candidates_buffer, + valid_indices_buffer, (*global_counter_device).n_valid_tracks); TRACCC_CUDA_ERROR_CHECK(cudaGetLastError()); } + + // Global counter object: Device -> Host + TRACCC_CUDA_ERROR_CHECK( + cudaMemcpyAsync(&global_counter_host, global_counter_device.get(), + sizeof(device::finding_global_counter), + cudaMemcpyDeviceToHost, stream)); + m_stream.synchronize(); - return track_candidates_buffer; + // Create pruned candidate buffer + track_candidate_container_types::buffer prune_candidates_buffer{ + {global_counter_host.n_valid_tracks, m_mr.main}, + {std::vector(global_counter_host.n_valid_tracks, + m_cfg.max_track_candidates_per_track), + m_mr.main, m_mr.host, vecmem::data::buffer_type::resizable}}; + + m_copy.setup(prune_candidates_buffer.headers); + m_copy.setup(prune_candidates_buffer.items); + + if (global_counter_host.n_valid_tracks > 0) { + nThreads = m_warp_size * 2; + nBlocks = + (global_counter_host.n_valid_tracks + nThreads - 1) / nThreads; + + kernels::prune_tracks<<>>( + track_candidates_buffer, valid_indices_buffer, + prune_candidates_buffer); + TRACCC_CUDA_ERROR_CHECK(cudaGetLastError()); + } + + return prune_candidates_buffer; } // Explicit template instantiation diff --git a/examples/run/cuda/seq_example_cuda.cpp b/examples/run/cuda/seq_example_cuda.cpp index 28ec730413..115216e144 100644 --- a/examples/run/cuda/seq_example_cuda.cpp +++ b/examples/run/cuda/seq_example_cuda.cpp @@ -390,10 +390,12 @@ int seq_run(const traccc::opts::detector& detector_opts, compare cpu and cuda result ----------------------------------*/ + traccc::measurement_collection_types::host measurements_per_event_cuda; traccc::spacepoint_collection_types::host spacepoints_per_event_cuda; traccc::seed_collection_types::host seeds_cuda; traccc::bound_track_parameters_collection_types::host params_cuda; + copy(measurements_cuda_buffer, measurements_per_event_cuda)->wait(); copy(spacepoints_cuda_buffer, spacepoints_per_event_cuda)->wait(); copy(seeds_cuda_buffer, seeds_cuda)->wait(); copy(params_cuda_buffer, params_cuda)->wait(); @@ -407,6 +409,12 @@ int seq_run(const traccc::opts::detector& detector_opts, // Show which event we are currently presenting the results for. std::cout << "===>>> Event " << event << " <<<===" << std::endl; + // Compare the measurements made on the host and on the device. + traccc::collection_comparator + compare_measurements{"measurements"}; + compare_measurements(vecmem::get_data(measurements_per_event), + vecmem::get_data(measurements_per_event_cuda)); + // Compare the spacepoints made on the host and on the device. traccc::collection_comparator compare_spacepoints{"spacepoints"};