From 91d116007a5cb8c2ea4ab81f033b356e9810b3fd Mon Sep 17 00:00:00 2001 From: Jae-Seung Yeom Date: Thu, 8 Aug 2024 10:22:34 -0700 Subject: [PATCH] Compute error statistics on the points originally evaluated by the physics model --- src/AMSlib/wf/validator.hpp | 274 +++++++++++++++++++++--------------- 1 file changed, 161 insertions(+), 113 deletions(-) diff --git a/src/AMSlib/wf/validator.hpp b/src/AMSlib/wf/validator.hpp index e467256a..5099a840 100644 --- a/src/AMSlib/wf/validator.hpp +++ b/src/AMSlib/wf/validator.hpp @@ -2,9 +2,12 @@ #define __AMS_VALIDATOR_HPP__ #include #include +#include // memcpy #include +#include #include #include +#include // is_same #if __STANDALONE_TEST__ namespace ams @@ -53,18 +56,20 @@ template struct ValStats { ValStats() - : m_cnt(0u), - m_avg(static_cast(0)), - m_var(static_cast(0)) + : m_cnt({0u,0u}), + m_avg({static_cast(0), static_cast(0)}), + m_var({static_cast(0), static_cast(0)}) {} - ValStats(unsigned int cnt, FPTypeValue avg, FPTypeValue var) + ValStats(const std::array& cnt, + const std::array& avg, + const std::array& var) : m_cnt(cnt), m_avg(avg), m_var(var) {} - unsigned int m_cnt; - FPTypeValue m_avg; - FPTypeValue m_var; + std::array m_cnt; + std::array m_avg; + std::array m_var; }; template @@ -122,13 +127,21 @@ class VPCollector { unsigned k_v); #if __STANDALONE_TEST__ void backup_surrogate_outs(const FPTypeValue* sur_out, - StepValPoints& step_valpoints) const; + StepValPoints& step_valpoints); + std::array backup_validation_outs(const FPTypeValue* phy_out, + StepValPoints& step_valpoints); + ValStats get_error_stats(const FPTypeValue* phy_out, + StepValPoints& step_valpoints); #endif void backup_surrogate_outs(const std::vector sur_out, - std::vector>& step_valpoints) const; + std::vector>& step_valpoints); const bool* predicate() const { return m_pred_loc_new; } + std::vector> get_error_stats( + const std::vector phy_out, + std::vector>& step_valpoints); + protected: int gather_predicate(const bool* pred_loc, size_t num_pred_loc); size_t pick_num_val_pts(const size_t n_T, const size_t n_F, unsigned k) const; @@ -137,6 +150,13 @@ class VPCollector { bool* distribute_predicate(const AMSResourceType appDataLoc); /// Clear intermediate data for root rank to handle predicate update void clear_intermediate_info(); + std::array sum_sqdev(const FPTypeValue* phy_out, + const FPTypeValue* sur_out, + const StepValPoints& step_valpoints, + const std::array& avg); + std::vector> backup_validation_outs( + const std::vector phy_out, + std::vector>& step_valpoints); protected: MPI_Comm m_comm; @@ -146,6 +166,7 @@ class VPCollector { const bool* m_pred_loc; bool* m_pred_loc_new; AMSResourceType m_appDataLoc; + std::vector m_sur; ///< surrogate output backup // For root rank std::vector m_predicate_all; @@ -181,11 +202,18 @@ VPCollector::~VPCollector() if (m_pred_loc_new) { #if __STANDALONE_TEST__ ams::ResourceManager::deallocate(m_pred_loc_new, m_appDataLoc); + for (size_t i = 0u; i < m_sur.size(); ++i) { + ams::ResourceManager::deallocate(m_sur[i], m_appDataLoc); + } #else auto &rm_d = ams::ResourceManager::getInstance(); rm_d.deallocate(m_pred_loc_new, m_appDataLoc); + for (size_t i = 0u; i < m_sur.size(); ++i) { + rm_d.deallocate(m_sur[i], m_appDataLoc); + } #endif // __STANDALONE_TEST__ m_pred_loc_new = nullptr; + m_sur.clear(); } m_pred_loc = nullptr; m_num_pred_loc = 0ul; @@ -378,15 +406,25 @@ bool VPCollector::set_validation( } template -FPTypeValue sum_sqdev( +std::array VPCollector::sum_sqdev( + const FPTypeValue* phy_out, + const FPTypeValue* sur_out, const StepValPoints& step_valpoints, - const FPTypeValue avg) + const std::array& avg) { - FPTypeValue sum = static_cast(0); + std::array sum{static_cast(0), static_cast(0)}; + size_t k = 0ul; for (auto& pt: step_valpoints.val_pts()) { - auto diff = std::abs(pt.m_o_phy - pt.m_o_sur) - avg; - sum += diff*diff; + for (; k < pt.m_pos; ++k) { + sum[0] += std::abs(phy_out[k] - sur_out[k]); + } + k++; + auto diff = std::abs(pt.m_o_phy - pt.m_o_sur) - avg[1]; + sum[1] += diff*diff; + } + for (; k < m_num_pred_loc; ++k) { + sum[0] += std::abs(phy_out[k] - sur_out[k]); } return sum; @@ -396,8 +434,14 @@ FPTypeValue sum_sqdev( template void VPCollector::backup_surrogate_outs( const FPTypeValue* sur_out, - StepValPoints& step_valpoints) const + StepValPoints& step_valpoints) { + m_sur.resize(1u, nullptr); +#if __STANDALONE_TEST__ + m_sur[0] = ams::ResourceManager::allocate(m_num_pred_loc, m_appDataLoc); +#endif // __STANDALONE_TEST__ + std::memcpy(m_sur[0], sur_out, m_num_pred_loc); + for (size_t i = 0ul; i < m_num_pred_loc; ++i) { if (m_pred_loc_new[i] != m_pred_loc[i]) { step_valpoints.add_pt(ValPoint(i, sur_out[i])); @@ -408,80 +452,89 @@ void VPCollector::backup_surrogate_outs( } template -FPTypeValue backup_validation_outs( +std::array VPCollector::backup_validation_outs( const FPTypeValue* phy_out, StepValPoints& step_valpoints) { - FPTypeValue sum = static_cast(0); + std::array sum {static_cast(0), static_cast(0)}; + size_t k = 0ul; for (auto& pt: step_valpoints.val_pts()) { + for (; k < pt.m_pos; ++k) { + sum[0] += std::abs(phy_out[k] - m_sur[0][k]); + } + k++; pt.m_o_phy = phy_out[pt.m_pos]; - sum += std::abs(pt.m_o_phy - pt.m_o_sur); + sum[1] += std::abs(pt.m_o_phy - pt.m_o_sur); } - return sum; -} -ValStats get_error_stats( - const float* phy_out, - StepValPoints& step_valpoints, - MPI_Comm comm = MPI_COMM_WORLD) -{ - const auto err_sum_loc = backup_validation_outs(phy_out, step_valpoints); - float err_sum_glo = static_cast(0); - float err_avg_glo = static_cast(0); - - unsigned err_cnt_loc = step_valpoints.get_num_points(); - unsigned err_cnt_glo = 0u; - - MPI_Allreduce(&err_cnt_loc, &err_cnt_glo, 1, MPI_UNSIGNED, MPI_SUM, comm); - MPI_Allreduce(&err_sum_loc, &err_sum_glo, 1, MPI_FLOAT, MPI_SUM, comm); - err_avg_glo = err_sum_glo / static_cast(err_cnt_glo); - - float err_var_loc = sum_sqdev(step_valpoints, err_avg_glo) - / static_cast(err_cnt_glo); - float err_var_glo = 0; - MPI_Allreduce(&err_var_loc, &err_var_glo, 1, MPI_FLOAT, MPI_SUM, comm); - err_var_glo = err_var_glo/static_cast(err_cnt_glo); + for (; k < m_num_pred_loc; ++k) { + sum[0] += std::abs(phy_out[k] - m_sur[0][k]); + } - return ValStats(err_cnt_glo, err_avg_glo, err_var_glo); + return sum; } -ValStats get_error_stats( - const double* phy_out, - StepValPoints& step_valpoints, - MPI_Comm comm = MPI_COMM_WORLD) +template +ValStats VPCollector::get_error_stats( + const FPTypeValue* phy_out, + StepValPoints& step_valpoints) { const auto err_sum_loc = backup_validation_outs(phy_out, step_valpoints); - double err_sum_glo = static_cast(0); - double err_avg_glo = static_cast(0); + std::array err_sum_glo {static_cast(0), static_cast(0)}; + std::array err_avg_glo {static_cast(0), static_cast(0)}; + + std::array err_cnt_loc {m_num_pred_loc - step_valpoints.get_num_points(), + step_valpoints.get_num_points()}; + std::array err_cnt_glo {0u, 0u}; + + MPI_Allreduce(&err_cnt_loc[0], &err_cnt_glo[0], 2, MPI_UNSIGNED, MPI_SUM, m_comm); + MPI_Datatype mpi_dtype = MPI_FLOAT; + if (std::is_same::value) { + mpi_dtype = MPI_DOUBLE; + } else //if (std::is_same::value) + { + mpi_dtype = MPI_FLOAT; + } - unsigned err_cnt_loc = step_valpoints.get_num_points(); - unsigned err_cnt_glo = 0u; + MPI_Allreduce(&err_sum_loc[0], &err_sum_glo[0], 2, mpi_dtype, MPI_SUM, m_comm); + err_avg_glo[0] = err_sum_glo[0] / static_cast(err_cnt_glo[0]); + err_avg_glo[1] = err_sum_glo[1] / static_cast(err_cnt_glo[1]); - MPI_Allreduce(&err_cnt_loc, &err_cnt_glo, 1, MPI_UNSIGNED, MPI_SUM, comm); - MPI_Allreduce(&err_sum_loc, &err_sum_glo, 1, MPI_DOUBLE, MPI_SUM, comm); - err_avg_glo = err_sum_glo / static_cast(err_cnt_glo); + const auto err_var_loc = sum_sqdev(phy_out, m_sur[0], step_valpoints, err_avg_glo); + //err_var_loc[0] /= static_cast(err_cnt_glo[0]); + //err_var_loc[1] /= static_cast(err_cnt_glo[1]); - double err_var_loc = sum_sqdev(step_valpoints, err_avg_glo) - / static_cast(err_cnt_glo); - double err_var_glo = 0; - MPI_Allreduce(&err_var_loc, &err_var_glo, 1, MPI_DOUBLE, MPI_SUM, comm); - err_var_glo = err_var_glo/static_cast(err_cnt_glo); + std::array err_var_glo{0,0}; + MPI_Allreduce(&err_var_loc[0], &err_var_glo[0], 2, mpi_dtype, MPI_SUM, m_comm); + err_var_glo[0] /= static_cast(err_cnt_glo[0]); + err_var_glo[1] /= static_cast(err_cnt_glo[1]); - return ValStats(err_cnt_glo, err_avg_glo, err_var_glo); + return ValStats(err_cnt_glo, err_avg_glo, err_var_glo); } - #endif // __STANDALONE_TEST__ + template void VPCollector::backup_surrogate_outs( const std::vector sur_out, - std::vector>& step_valpoints) const + std::vector>& step_valpoints) { const size_t dim = sur_out.size(); step_valpoints.clear(); step_valpoints.resize(dim); + m_sur.resize(dim); + for (size_t j = 0ul; j < dim; ++j) { +#if __STANDALONE_TEST__ + m_sur[j] = ams::ResourceManager::allocate(m_num_pred_loc, m_appDataLoc); +#else + auto &rm_a = ams::ResourceManager::getInstance(); + m_sur[j] = rm_a.allocate(m_num_pred_loc, m_appDataLoc); +#endif // __STANDALONE_TEST__ + std::memcpy(m_sur[j], sur_out[j], m_num_pred_loc); + } + for (size_t i = 0ul; i < m_num_pred_loc; ++i) { if (m_pred_loc_new[i] != m_pred_loc[i]) { for (size_t j = 0ul; j < dim; ++j) { @@ -496,13 +549,16 @@ void VPCollector::backup_surrogate_outs( } template -std::vector backup_validation_outs( +std::vector> VPCollector::backup_validation_outs( const std::vector phy_out, std::vector>& step_valpoints) { const size_t dim = step_valpoints.size(); - std::vector sum(dim, static_cast(0)); + std::vector, 2>> sum( + dim, + std::array({static_cast(0), + static_cast(0)}); if (phy_out.size() != dim) { // exception @@ -511,69 +567,61 @@ std::vector backup_validation_outs( } for (size_t j = 0ul; j < dim; ++j) { + size_t k = 0ul; for (auto& pt: step_valpoints[j].val_pts()) { + for ( ; k < pt.m_pos; ++k) { + sum[j][0] += std::abs(phy_out[j][k] - m_sur[j][k]); + } + k++; pt.m_o_phy = phy_out[j][pt.m_pos]; - sum[j] += std::abs(pt.m_o_phy - pt.m_o_sur); + sum[j][1] += std::abs(pt.m_o_phy - pt.m_o_sur); + } + for ( ; k < m_num_pred_loc; ++k) { + sum[j][0] += std::abs(phy_out[j][k] - m_sur[j][k]); } } return sum; } -std::vector> get_error_stats( - const std::vector phy_out, - std::vector>& step_valpoints, - MPI_Comm comm = MPI_COMM_WORLD) -{ - const auto err_sum_loc = backup_validation_outs(phy_out, step_valpoints); - const size_t dim = err_sum_loc.size(); - std::vector err_sum_glo(dim, static_cast(0)); - std::vector err_avg_glo(dim, static_cast(0)); - std::vector err_var_glo(dim, static_cast(0)); - std::vector> stats(dim); - - unsigned err_cnt_loc = step_valpoints.at(0).get_num_points(); - unsigned err_cnt_glo = 0u; - MPI_Allreduce(&err_cnt_loc, &err_cnt_glo, 1, MPI_UNSIGNED, MPI_SUM, comm); - - for (size_t j = 0ul; j < dim; ++j) { - MPI_Allreduce(&err_sum_loc[j], &err_sum_glo[j], 1, MPI_FLOAT, MPI_SUM, comm); - err_avg_glo[j] = err_sum_glo[j] / static_cast(err_cnt_glo); - float err_var_loc = sum_sqdev(step_valpoints[j], err_avg_glo[j]) - / static_cast(err_cnt_glo); - MPI_Allreduce(&err_var_loc, &err_var_glo[j], 1, MPI_FLOAT, MPI_SUM, comm); - err_var_glo[j] = err_var_glo[j]/static_cast(err_cnt_glo); - stats[j] = ValStats(err_cnt_glo, err_avg_glo[j], err_var_glo[j]); - } - - return stats; -} - -std::vector> get_error_stats( - const std::vector phy_out, - std::vector>& step_valpoints, - MPI_Comm comm = MPI_COMM_WORLD) +template +std::vector> VPCollector::get_error_stats( + const std::vector phy_out, + std::vector>& step_valpoints) { const auto err_sum_loc = backup_validation_outs(phy_out, step_valpoints); const size_t dim = err_sum_loc.size(); - std::vector err_sum_glo(dim, static_cast(0)); - std::vector err_avg_glo(dim, static_cast(0)); - std::vector err_var_glo(dim, static_cast(0)); - std::vector> stats(dim); - - unsigned err_cnt_loc = step_valpoints.at(0).get_num_points(); - unsigned err_cnt_glo = 0u; - - MPI_Allreduce(&err_cnt_loc, &err_cnt_glo, 1, MPI_UNSIGNED, MPI_SUM, comm); + std::vector> err_sum_glo(dim, {static_cast(0), static_cast(0)}); + std::vector> err_avg_glo(dim, {static_cast(0), static_cast(0)}); + std::vector> err_var_glo(dim, {static_cast(0), static_cast(0)}); + std::vector> stats(dim); + + std::array err_cnt_loc {m_num_pred_loc - step_valpoints.at(0).get_num_points(), + step_valpoints.at(0).get_num_points()}; + std::array err_cnt_glo {0u, 0u}; + + MPI_Allreduce(&err_cnt_loc[0], &err_cnt_glo[0], 2, MPI_UNSIGNED, MPI_SUM, m_comm); + + MPI_Datatype mpi_dtype = MPI_FLOAT; + if (std::is_same::value) { + mpi_dtype = MPI_DOUBLE; + } else //if (std::is_same::value) + { + mpi_dtype = MPI_FLOAT; + } for (size_t j = 0ul; j < dim; ++j) { - MPI_Allreduce(&err_sum_loc[j], &err_sum_glo[j], 1, MPI_DOUBLE, MPI_SUM, comm); - err_avg_glo[j] = err_sum_glo[j] / static_cast(err_cnt_glo); - double err_var_loc = sum_sqdev(step_valpoints[j], err_avg_glo[j]) - / static_cast(err_cnt_glo); - MPI_Allreduce(&err_var_loc, &err_var_glo[j], 1, MPI_DOUBLE, MPI_SUM, comm); - err_var_glo[j] = err_var_glo[j]/static_cast(err_cnt_glo); - stats[j] = ValStats(err_cnt_glo, err_avg_glo[j], err_var_glo[j]); + MPI_Allreduce(&err_sum_loc[j][0], &err_sum_glo[j][0], 2, mpi_dtype, MPI_SUM, m_comm); + err_avg_glo[j][0] = err_sum_glo[j][0] / static_cast(err_cnt_glo[0]); + err_avg_glo[j][1] = err_sum_glo[j][1] / static_cast(err_cnt_glo[1]); + std::array err_var_loc = + sum_sqdev(phy_out[j], step_valpoints[j], err_avg_glo[j], j); + //err_var_loc[0] /= static_cast(err_cnt_glo[0]); + //err_var_loc[1] /= static_cast(err_cnt_glo[1]); + MPI_Allreduce(&err_var_loc, &err_var_glo[j], 1, mpi_dtype, MPI_SUM, m_comm); + err_var_glo[j][0] /= static_cast(err_cnt_glo[0]); + err_var_glo[j][1] /= static_cast(err_cnt_glo[1]); + stats[j] = ValStats(err_cnt_glo, err_avg_glo[j], err_var_glo[j]); } return stats;