diff --git a/src/QMCWaveFunctions/lcao/CuspCorrectionConstruction.cpp b/src/QMCWaveFunctions/lcao/CuspCorrectionConstruction.cpp index 049997ba08..8787953e70 100644 --- a/src/QMCWaveFunctions/lcao/CuspCorrectionConstruction.cpp +++ b/src/QMCWaveFunctions/lcao/CuspCorrectionConstruction.cpp @@ -27,6 +27,11 @@ void applyCuspCorrection(const Matrix& info, { typedef QMCTraits::RealType RealType; + NewTimer* cuspApplyTimer = + TimerManager.createTimer("CuspCorrectionConstruction::applyCuspCorrection", timer_level_medium); + + ScopedTimer cuspApplyTimerWrapper(cuspApplyTimer); + LCAOrbitalSet phi = LCAOrbitalSet(lcwc.myBasisSet); phi.setOrbitalSetSize(lcwc.OrbitalSetSize); phi.BasisSetSize = lcwc.BasisSetSize; @@ -110,7 +115,7 @@ void applyCuspCorrection(const Matrix& info, removeSTypeOrbitals(corrCenter, lcwc); } -void saveCusp(int orbital_set_size, int num_centers, Matrix& info, std::string id) +void saveCusp(int orbital_set_size, int num_centers, Matrix& info, const std::string& id) { xmlDocPtr doc = xmlNewDoc((const xmlChar*)"1.0"); xmlNodePtr cuspRoot = xmlNewNode(NULL, BAD_CAST "qmcsystem"); @@ -180,14 +185,21 @@ void saveCusp(int orbital_set_size, int num_centers, Matrix& info, - ParticleSet& targetPtcl, - ParticleSet& sourcePtcl, - LCAOrbitalSetWithCorrection& lcwc, - std::string id, + const ParticleSet& targetPtcl, + const ParticleSet& sourcePtcl, + const LCAOrbitalSetWithCorrection& lcwc, + const std::string& id, Communicate& Comm) { typedef QMCTraits::RealType RealType; + NewTimer* cuspCreateTimer = + TimerManager.createTimer("CuspCorrectionConstruction::createCuspParameters", timer_level_medium); + NewTimer* splitPhiEtaTimer = TimerManager.createTimer("CuspCorrectionConstruction::splitPhiEta", timer_level_fine); + NewTimer* computeTimer = TimerManager.createTimer("CuspCorrectionConstruction::computeCorrection", timer_level_fine); + + ScopedTimer createCuspTimerWrapper(cuspCreateTimer); + LCAOrbitalSet phi = LCAOrbitalSet(lcwc.myBasisSet); phi.setOrbitalSetSize(lcwc.OrbitalSetSize); phi.BasisSetSize = lcwc.BasisSetSize; @@ -211,17 +223,42 @@ void generateCuspInfo(int orbital_set_size, int start_mo = offset[Comm.rank()]; int end_mo = offset[Comm.rank() + 1]; app_log() << " Number of molecular orbitals to compute correction on this rank: " << end_mo - start_mo << std::endl; - for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++) + +// Specify dynamic scheduling explicitly for load balancing. Each iteration should take enough +// time that scheduling overhead is not an issue. +#pragma omp parallel for schedule(dynamic) collapse(2) + for (int center_idx = 0; center_idx < num_centers; center_idx++) { - app_log() << " Working on MO: " << mo_idx << std::endl; - for (int center_idx = 0; center_idx < num_centers; center_idx++) + for (int mo_idx = start_mo; mo_idx < end_mo; mo_idx++) { - *(eta.C) = *(lcwc.C); - *(phi.C) = *(lcwc.C); - splitPhiEta(center_idx, corrCenter, phi, eta); + ParticleSet localTargetPtcl(targetPtcl); + ParticleSet localSourcePtcl(sourcePtcl); + + LCAOrbitalSet local_phi(phi); + local_phi.myBasisSet = phi.myBasisSet->makeClone(); + local_phi.IsCloned = true; + local_phi.C = nullptr; + local_phi.setIdentity(false); + + LCAOrbitalSet local_eta(eta); + local_eta.myBasisSet = eta.myBasisSet->makeClone(); + local_eta.IsCloned = true; + local_eta.C = nullptr; + local_eta.setIdentity(false); + + #pragma omp critical + app_log() << " Working on MO: " << mo_idx << " Center: " << center_idx << std::endl; + + splitPhiEtaTimer->start(); + + *(local_eta.C) = *(lcwc.C); + *(local_phi.C) = *(lcwc.C); + splitPhiEta(center_idx, corrCenter, local_phi, local_eta); + + splitPhiEtaTimer->stop(); bool corrO = false; - auto& cref(*(phi.C)); + auto& cref(*(local_phi.C)); for (int ip = 0; ip < cref.cols(); ip++) { if (std::abs(cref(mo_idx, ip)) > 0) @@ -233,15 +270,15 @@ void generateCuspInfo(int orbital_set_size, if (corrO) { - OneMolecularOrbital etaMO(&targetPtcl, &sourcePtcl, &eta); + OneMolecularOrbital etaMO(&localTargetPtcl, &localSourcePtcl, &local_eta); etaMO.changeOrbital(center_idx, mo_idx); - OneMolecularOrbital phiMO(&targetPtcl, &sourcePtcl, &phi); + OneMolecularOrbital phiMO(&localTargetPtcl, &localSourcePtcl, &local_phi); phiMO.changeOrbital(center_idx, mo_idx); - SpeciesSet& tspecies(sourcePtcl.getSpeciesSet()); + SpeciesSet& tspecies(localSourcePtcl.getSpeciesSet()); int iz = tspecies.addAttribute("charge"); - RealType Z = tspecies(iz, sourcePtcl.GroupID[center_idx]); + RealType Z = tspecies(iz, localSourcePtcl.GroupID[center_idx]); RealType Rc_max = 0.2; RealType rc = 0.1; @@ -258,7 +295,12 @@ void generateCuspInfo(int orbital_set_size, RealType eta0 = etaMO.phi(0.0); ValueVector_t ELorig(npts); CuspCorrection cusp(info(center_idx, mo_idx)); + computeTimer->start(); minimizeForRc(cusp, phiMO, Z, rc, Rc_max, eta0, pos, ELcurr, ELideal); + computeTimer->stop(); + // Update shared object. Each iteration accesses a different element and + // this is an array (no bookkeeping data to update), so no synchronization + // is necessary. info(center_idx, mo_idx) = cusp.cparam; } } diff --git a/src/QMCWaveFunctions/lcao/CuspCorrectionConstruction.h b/src/QMCWaveFunctions/lcao/CuspCorrectionConstruction.h index 970c40464c..1f185c9ae9 100644 --- a/src/QMCWaveFunctions/lcao/CuspCorrectionConstruction.h +++ b/src/QMCWaveFunctions/lcao/CuspCorrectionConstruction.h @@ -30,15 +30,15 @@ void applyCuspCorrection(const Matrix& info, LCAOrbitalSetWithCorrection& lcwc, const std::string& id); -void saveCusp(int orbital_set_size, int num_centers, Matrix& info, std::string id); +void saveCusp(int orbital_set_size, int num_centers, Matrix& info, const std::string &id); void generateCuspInfo(int orbital_set_size, int num_centers, Matrix& info, - ParticleSet& targetPtcl, - ParticleSet& sourcePtcl, - LCAOrbitalSetWithCorrection& lcwc, - std::string id, + const ParticleSet& targetPtcl, + const ParticleSet& sourcePtcl, + const LCAOrbitalSetWithCorrection& lcwc, + const std::string& id, Communicate& Comm); } // namespace qmcplusplus