Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New AFQMC estimators #2097

Merged
merged 12 commits into from
Nov 14, 2019
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ IF(MIXED_PRECISION AND BUILD_LMYENGINE_INTERFACE)
SET(BUILD_LMYENGINE_INTERFACE 0)
ENDIF()
SET(BUILD_AFQMC 0 CACHE BOOL "Build with AFQMC")
SET(BUILD_AFQMC_WITH_NCCL 0 CACHE BOOL "Build AFQMC with NCCL library.")
# AFQMC requires MPI.
If (BUILD_AFQMC AND NOT QMC_MPI)
MESSAGE(FATAL_ERROR "AFQMC requires building with MPI (QMC_MPI=1). Set BUILD_AFQMC=0 or configure MPI.")
Expand Down
4 changes: 2 additions & 2 deletions external_codes/mpi_wrapper/mpi3/shared_window.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ namespace mpi3{
template<class T>
struct shared_window : window<T>{
// shared_communicator& comm_;
shared_window(shared_communicator& comm, mpi3::size_t n, int disp_unit = alignof(T)) : //sizeof(T)) : // here we assume that disp_unit is used for align
window<T>{}//, comm_{comm}
shared_window(shared_communicator& comm, mpi3::size_t n, int disp_unit = alignof(T)) //: //sizeof(T)) : // here we assume that disp_unit is used for align
//window<T>()//, comm_{comm}
{
void* base_ptr = nullptr;
auto e = static_cast<enum error>(
Expand Down
16 changes: 9 additions & 7 deletions src/AFQMC/Estimators/BackPropagatedEstimator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,13 +140,15 @@ class BackPropagatedEstimator: public EstimatorBase
if(iav < 0) return;

using std::fill_n;
// 0. skip if requested
if(bp_step == max_nback_prop && iblock < nblocks_skip) {
if( iblock+1 == nblocks_skip )
for(auto it=wset.begin(); it<wset.end(); ++it)
it->setSlaterMatrixN();
iblock++;
wset.setBPPos(0);
// 0. skip if requested
if(iblock < nblocks_skip) {
if(bp_step == max_nback_prop) {
if( iblock+1 == nblocks_skip )
for(auto it=wset.begin(); it<wset.end(); ++it)
it->setSlaterMatrixN();
iblock++;
wset.setBPPos(0);
}
return;
}

Expand Down
5 changes: 5 additions & 0 deletions src/AFQMC/Estimators/EstimatorHandler.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ class EstimatorHandler: public AFQMCInfo
{
estimators.reserve(10);

app_log()<<"\n****************************************************\n"
<<" Initializing Estimators \n"
<<"****************************************************\n"
<<std::endl;

std::string overwrite_default_energy("no");
xmlNodePtr curRoot = cur;
xmlNodePtr curBasic = NULL;
Expand Down
50 changes: 46 additions & 4 deletions src/AFQMC/Estimators/FullObsHandler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ class FullObsHandler: public AFQMCInfo
using sharedCMatrix_ref = boost::multi::array_ref<ComplexType,2,shared_pointer>;
using sharedC4Tensor_ref = boost::multi::array_ref<ComplexType,4,shared_pointer>;

using mpi3C4Tensor = boost::multi::array<ComplexType,4,shared_allocator<ComplexType>>;

using stdCVector = boost::multi::array<ComplexType,1>;
using stdCMatrix = boost::multi::array<ComplexType,2>;
using stdCVector_ref = boost::multi::array_ref<ComplexType,1>;
Expand All @@ -74,7 +76,8 @@ class FullObsHandler: public AFQMCInfo
AFQMCInfo(info),TG(tg_),walker_type(wlk),
wfn0(wfn), writer(false), block_size(1), nave(1),name(name_),
nspins((walker_type==COLLINEAR)?2:1),
Buff(iextensions<1u>{1},make_localTG_allocator<ComplexType>(TG))
Buff(iextensions<1u>{1},make_localTG_allocator<ComplexType>(TG)),
G4D_host({0,0,0,0},shared_allocator<ComplexType>{TG.TG_local()})
{

using std::fill_n;
Expand All @@ -93,8 +96,29 @@ class FullObsHandler: public AFQMCInfo
cur = curRoot->children;
while (cur != NULL) {
std::string cname((const char*)(cur->name));
if(cname =="OneRDM") {
properties.emplace_back(Observable(std::move(full1rdm(TG,info,cur,walker_type,nave,block_size))));
std::transform(cname.begin(),cname.end(),cname.begin(),(int (*)(int)) tolower);
if(cname =="onerdm") {
properties.emplace_back(Observable(std::move(full1rdm(TG,info,cur,walker_type,nave,block_size))));
} else if(cname =="diag2rdm") {
properties.emplace_back(Observable(std::move(diagonal2rdm(TG,info,cur,walker_type,nave,block_size))));
} else if(cname =="n2r" || cname =="ontop2rdm") {
#if defined(ENABLE_CUDA)
std::string str("false");
ParameterSet m_param;
m_param.add(str, "use_host_memory", "std::string");
m_param.put(cur);
std::transform(str.begin(),str.end(),str.begin(),(int (*)(int)) tolower);
if(str == "false" || str == "no") {
properties.emplace_back(Observable(std::move(n2r<device_allocator<ComplexType>>(
TG,info,cur,walker_type,false,device_allocator<ComplexType>{},
device_allocator<ComplexType>{},nave,block_size))));
} else
#endif
{
properties.emplace_back(Observable(std::move(n2r<shared_allocator<ComplexType>>(
TG,info,cur,walker_type,true,shared_allocator<ComplexType>{TG.TG_local()},
shared_allocator<ComplexType>{TG.Node()},nave,block_size))));
}
}
cur = cur->next;
}
Expand Down Expand Up @@ -141,6 +165,12 @@ class FullObsHandler: public AFQMCInfo
sharedCMatrix_ref G2D(Buff.origin(), {nw, dm_size});
sharedCVector_ref DevOv(G4D.origin()+G4D.num_elements(), {2*nw});

if(G4D_host.num_elements() != G4D.num_elements()) {
G4D_host = std::move(mpi3C4Tensor(G4D.extensions(),
shared_allocator<ComplexType>{TG.TG_local()}));
TG.TG_local().barrier();
}

stdCVector Xw(iextensions<1u>{nw});
stdCVector Ov(iextensions<1u>{2*nw});
stdCMatrix detR(DevdetR);
Expand Down Expand Up @@ -221,8 +251,17 @@ class FullObsHandler: public AFQMCInfo
Xw[iw] = CIcoeff * Ov[iw] * detR[iw][iref];
}

// MAM: Since most of the simpler estimators need G4D in host memory,
// I'm providing a copy of the structure there already
TG.TG_local().barrier();
int i0,iN;
std::tie(i0,iN) = FairDivideBoundary(TG.TG_local().rank(),int(G4D_host.num_elements()),
TG.TG_local().size());
copy_n( make_device_ptr(G4D.origin())+i0, iN-i0, to_address(G4D_host.origin())+i0);
TG.TG_local().barrier();

//3. accumulate references
for(auto& v: properties) v.accumulate_reference(iav,iref,G4D,wgt,Xw,Ov,impsamp);
for(auto& v: properties) v.accumulate_reference(iav,iref,G4D,G4D_host,wgt,Xw,Ov,impsamp);

}
//4. accumulate block (normalize and accumulate sum over references)
Expand Down Expand Up @@ -258,6 +297,9 @@ class FullObsHandler: public AFQMCInfo
// buffer space
sharedCVector Buff;

// space for G in host space
mpi3C4Tensor G4D_host;

void set_buffer(size_t N) {
if(Buff.num_elements() < N)
Buff = std::move(sharedCVector(iextensions<1u>{N},make_localTG_allocator<ComplexType>(TG)));
Expand Down
20 changes: 19 additions & 1 deletion src/AFQMC/Estimators/Observables/Observable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
#include "boost/variant.hpp"

#include "AFQMC/Estimators/Observables/full1rdm.hpp"
#include "AFQMC/Estimators/Observables/diagonal2rdm.hpp"
#include "AFQMC/Estimators/Observables/n2r.hpp"

namespace qmcplusplus
{
Expand Down Expand Up @@ -58,7 +60,12 @@ class dummy_obs
* Variant class for observables.
* Defines a common interface for all observable classes.
*/
class Observable: public boost::variant<dummy::dummy_obs,full1rdm>
class Observable: public boost::variant<dummy::dummy_obs,full1rdm,diagonal2rdm,
n2r<shared_allocator<ComplexType>>
#if defined(ENABLE_CUDA)
,n2r<device_allocator<ComplexType>>
#endif
>
//,full2rdm,contract1rdm,contract2rdm>
{

Expand All @@ -71,6 +78,17 @@ class Observable: public boost::variant<dummy::dummy_obs,full1rdm>
explicit Observable(full1rdm && other) : variant(std::move(other)) {}
explicit Observable(full1rdm const& other) = delete;

explicit Observable(diagonal2rdm && other) : variant(std::move(other)) {}
explicit Observable(diagonal2rdm const& other) = delete;

explicit Observable(n2r<shared_allocator<ComplexType>> && other) : variant(std::move(other)) {}
explicit Observable(n2r<shared_allocator<ComplexType>> const& other) = delete;

#if defined(ENABLE_CUDA)
explicit Observable(n2r<device_allocator<ComplexType>> && other) : variant(std::move(other)) {}
explicit Observable(n2r<device_allocator<ComplexType>> const& other) = delete;
#endif

/*
explicit Observable( && other) : variant(std::move(other)) {}
explicit Observable( const& other) = delete;
Expand Down
Loading