Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial LCAO vgl batched implementation using GEMM #4407

Merged
merged 31 commits into from
Jan 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
7aa3e32
addining initial lcao mw function from spo
amandadumi Sep 7, 2022
2d53994
comment framework, build vgl temp matrix
amandadumi Sep 7, 2022
04b0615
Add blas call and block diagonal MO coeffs
shivupa Sep 8, 2022
1fb2dc9
adding vglanddetratiosgrads function and notes
amandadumi Sep 14, 2022
b6078dd
Add impl2 and move transpose of data there
shivupa Sep 14, 2022
069bdb1
Format and delete comments
shivupa Sep 14, 2022
8cd6b4c
cleaning functions of extra comments and declarations
amandadumi Sep 28, 2022
a2e10c3
Adding mw_evaluateVGL to SoaLocalizedBasis
shivupa Oct 19, 2022
9439898
resolving compilation errors
amandadumi Oct 19, 2022
1fa8711
rough gemm information
amandadumi Oct 20, 2022
e5a8413
Fix compilation
shivupa Oct 25, 2022
3ec7f15
test mw start
amandadumi Oct 26, 2022
523c670
lcao_vgl_mw from Amanda
kgasperich Jan 6, 2023
9fca749
resized some vectors and added a few reference values
kgasperich Jan 10, 2023
a0e58cf
added second walker to test
kgasperich Jan 10, 2023
e5b485b
added larger test and fixed gemm call
kgasperich Jan 12, 2023
1a51180
fixed array size (AO/MO problem)
kgasperich Jan 12, 2023
2737c07
fixed grad copy
kgasperich Jan 12, 2023
5ea35e7
minor cleaning
kgasperich Jan 12, 2023
d25acfb
renaming
kgasperich Jan 12, 2023
0e0fa6b
clang-format
kgasperich Jan 12, 2023
32758f7
add GTO test
kgasperich Jan 13, 2023
bebebcf
Merge pull request #13 from kgasperich/lcao_vgl_offload_3
amandadumi Jan 13, 2023
ddb042f
comment removal and impl2 -> mw_impl function rename
amandadumi Jan 20, 2023
2b170bf
Merge branch 'develop' into lcao_vgl_offload
ye-luo Jan 21, 2023
838af8d
Remove temp_vgl
ye-luo Jan 22, 2023
602599f
Simplify LCAOrbitalSet::mw_evaluateVGLImplGEMM
ye-luo Jan 22, 2023
e35086d
Replace xmlChar with castCharToXMLChar
ye-luo Jan 26, 2023
c4b54d6
Remove typedef MWVGLArray in SPOSet.
ye-luo Jan 26, 2023
c43f8f3
Flip if case and reduce lines.
ye-luo Jan 26, 2023
7f96fa1
Merge branch 'develop' into lcao_vgl_offload
prckent Jan 26, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions src/QMCWaveFunctions/BasisSetBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

#include "Particle/ParticleSet.h"
#include "QMCWaveFunctions/OrbitalSetTraits.h"
#include "OMPTarget/OffloadAlignedAllocators.hpp"


namespace qmcplusplus
{
Expand Down Expand Up @@ -128,10 +130,13 @@ struct BasisSetBase : public OrbitalSetTraits<T>
template<typename T>
struct SoaBasisSetBase
{
using value_type = T;
using vgl_type = VectorSoaContainer<T, OHMMS_DIM + 2>;
using vgh_type = VectorSoaContainer<T, 10>;
using vghgh_type = VectorSoaContainer<T, 20>;
using value_type = T;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Type and _type are redundant here the naming convention is types are leading capital mixed case so:
Value implies a type already.
Other types here would be properly named Value Vgl, Vgh, Vghgh

Copy link
Contributor

@ye-luo ye-luo Jan 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess there SoaBasisSetBase::value_type being used. Need to do renaming outside this PR.

using vgl_type = VectorSoaContainer<T, OHMMS_DIM + 2>;
using vgh_type = VectorSoaContainer<T, 10>;
using vghgh_type = VectorSoaContainer<T, 20>;
using ValueType = QMCTraits::ValueType;
using OffloadMWVGLArray = Array<ValueType, 3, OffloadPinnedAllocator<ValueType>>; // [VGL, walker, Orbs]

///size of the basis set
int BasisSetSize;

Expand All @@ -143,6 +148,8 @@ struct SoaBasisSetBase

//Evaluates value, gradient, and laplacian for electron "iat". Parks them into a temporary data structure "vgl".
virtual void evaluateVGL(const ParticleSet& P, int iat, vgl_type& vgl) = 0;
//Evaluates value, gradient, and laplacian for electron "iat". places them in a offload array for batched code.
virtual void mw_evaluateVGL(const RefVectorWithLeader<ParticleSet>& P_list, int iat, OffloadMWVGLArray& vgl) = 0;
//Evaluates value, gradient, and Hessian for electron "iat". Parks them into a temporary data structure "vgh".
virtual void evaluateVGH(const ParticleSet& P, int iat, vgh_type& vgh) = 0;
//Evaluates value, gradient, and Hessian, and Gradient Hessian for electron "iat". Parks them into a temporary data structure "vghgh".
Expand Down
89 changes: 89 additions & 0 deletions src/QMCWaveFunctions/LCAO/LCAOrbitalSet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,67 @@ void LCAOrbitalSet::evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi,
}
}

void LCAOrbitalSet::mw_evaluateVGL(const RefVectorWithLeader<SPOSet>& spo_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
const RefVector<ValueVector>& psi_v_list,
const RefVector<GradVector>& dpsi_v_list,
const RefVector<ValueVector>& d2psi_v_list) const
{
OffloadMWVGLArray phi_vgl_v;
phi_vgl_v.resize(DIM_VGL, spo_list.size(), OrbitalSetSize);
mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);

const size_t output_size = phi_vgl_v.size(2);
const size_t nw = phi_vgl_v.size(1);

//TODO: make this cleaner?
for (int iw = 0; iw < nw; iw++)
{
std::copy_n(phi_vgl_v.data_at(0, iw, 0), output_size, psi_v_list[iw].get().data());
std::copy_n(phi_vgl_v.data_at(4, iw, 0), output_size, d2psi_v_list[iw].get().data());
// grads are [dim, walker, orb] in phi_vgl_v
// [walker][orb, dim] in dpsi_v_list
for (size_t idim = 0; idim < DIM; idim++)
BLAS::copy(output_size, phi_vgl_v.data_at(idim + 1, iw, 0), 1, &dpsi_v_list[iw].get().data()[0][idim], DIM);
}
}

void LCAOrbitalSet::mw_evaluateVGLImplGEMM(const RefVectorWithLeader<SPOSet>& spo_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
OffloadMWVGLArray& phi_vgl_v) const
{
// [5][NW][NumAO]
OffloadMWVGLArray basis_mw;
PDoakORNL marked this conversation as resolved.
Show resolved Hide resolved
basis_mw.resize(DIM_VGL, spo_list.size(), BasisSetSize);

if (Identity)
{
myBasisSet->mw_evaluateVGL(P_list, iat, basis_mw);
// output_size can be smaller than BasisSetSize
const size_t output_size = phi_vgl_v.size(2);
const size_t nw = phi_vgl_v.size(1);

for (size_t idim = 0; idim < DIM_VGL; idim++)
for (int iw = 0; iw < nw; iw++)
std::copy_n(basis_mw.data_at(idim, iw, 0), output_size, phi_vgl_v.data_at(idim, iw, 0));
}
else
{
const size_t requested_orb_size = phi_vgl_v.size(2);
assert(requested_orb_size <= OrbitalSetSize);
ValueMatrix C_partial_view(C->data(), requested_orb_size, BasisSetSize);
myBasisSet->mw_evaluateVGL(P_list, iat, basis_mw);
BLAS::gemm('T', 'N',
requested_orb_size, // MOs
spo_list.size() * DIM_VGL, // walkers * DIM_VGL
BasisSetSize, // AOs
1, C_partial_view.data(), BasisSetSize, basis_mw.data(), BasisSetSize, 0, phi_vgl_v.data(),
requested_orb_size);
}
}

void LCAOrbitalSet::evaluateDetRatios(const VirtualParticleSet& VP,
ValueVector& psi,
const ValueVector& psiinv,
Expand All @@ -365,6 +426,34 @@ void LCAOrbitalSet::evaluateDetRatios(const VirtualParticleSet& VP,
}
}

void LCAOrbitalSet::mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSet>& spo_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
const std::vector<const ValueType*>& invRow_ptr_list,
OffloadMWVGLArray& phi_vgl_v,
std::vector<ValueType>& ratios,
std::vector<GradType>& grads) const
{
assert(this == &spo_list.getLeader());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this needs test coverage.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will add once I have the shared resource set up.

assert(phi_vgl_v.size(0) == DIM_VGL);
assert(phi_vgl_v.size(1) == spo_list.size());

mw_evaluateVGLImplGEMM(spo_list, P_list, iat, phi_vgl_v);
// Device data of phi_vgl_v must be up-to-date upon return
phi_vgl_v.updateTo();

const size_t nw = spo_list.size();
const size_t norb_requested = phi_vgl_v.size(2);
for (int iw = 0; iw < nw; iw++)
{
ratios[iw] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(0, iw, 0), norb_requested);
GradType dphi;
for (size_t idim = 0; idim < DIM; idim++)
dphi[idim] = simd::dot(invRow_ptr_list[iw], phi_vgl_v.data_at(idim + 1, iw, 0), norb_requested) / ratios[iw];
grads[iw] = dphi;
}
}

void LCAOrbitalSet::evaluateVGH(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, HessVector& dhpsi)
{
//TAKE CARE OF IDENTITY
Expand Down
25 changes: 24 additions & 1 deletion src/QMCWaveFunctions/LCAO/LCAOrbitalSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,27 @@ struct LCAOrbitalSet : public SPOSet

void evaluateVGL(const ParticleSet& P, int iat, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) override;


void mw_evaluateVGL(const RefVectorWithLeader<SPOSet>& spo_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
const RefVector<ValueVector>& psi_v_list,
const RefVector<GradVector>& dpsi_v_list,
const RefVector<ValueVector>& d2psi_v_list) const override;

void evaluateDetRatios(const VirtualParticleSet& VP,
ValueVector& psi,
const ValueVector& psiinv,
std::vector<ValueType>& ratios) override;

void mw_evaluateVGLandDetRatioGrads(const RefVectorWithLeader<SPOSet>& spo_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
const std::vector<const ValueType*>& invRow_ptr_list,
OffloadMWVGLArray& phi_vgl_v,
std::vector<ValueType>& ratios,
std::vector<GradType>& grads) const override;

void evaluateVGH(const ParticleSet& P,
int iat,
ValueVector& psi,
Expand Down Expand Up @@ -219,7 +235,7 @@ struct LCAOrbitalSet : public SPOSet
vghgh_type Tempghv;

private:
///helper functions to handl Identity
///helper functions to handle Identity
void evaluate_vgl_impl(const vgl_type& temp, ValueVector& psi, GradVector& dpsi, ValueVector& d2psi) const;

void evaluate_vgl_impl(const vgl_type& temp,
Expand All @@ -228,6 +244,8 @@ struct LCAOrbitalSet : public SPOSet
GradMatrix& dlogdet,
ValueMatrix& d2logdet) const;
///These two functions unpack the data in vgh_type temp object into wavefunction friendly data structures.


///This unpacks temp into vectors psi, dpsi, and d2psi.
void evaluate_vgh_impl(const vgh_type& temp, ValueVector& psi, GradVector& dpsi, HessVector& d2psi) const;

Expand Down Expand Up @@ -266,6 +284,11 @@ struct LCAOrbitalSet : public SPOSet

///Unpacks data in vgl object and calculates/places ionic gradient of a single row (phi_j(r)) into dlogdet.
void evaluate_ionderiv_v_row_impl(const vgl_type& temp, GradVector& dlogdet) const;

void mw_evaluateVGLImplGEMM(const RefVectorWithLeader<SPOSet>& spo_list,
const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
OffloadMWVGLArray& phi_vgl_v) const;
};
} // namespace qmcplusplus
#endif
31 changes: 31 additions & 0 deletions src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,37 @@ void SoaLocalizedBasisSet<COT, ORBT>::evaluateVGL(const ParticleSet& P, int iat,
}
}

template<class COT, typename ORBT>
void SoaLocalizedBasisSet<COT, ORBT>::mw_evaluateVGL(const RefVectorWithLeader<ParticleSet>& P_list,
int iat,
OffloadMWVGLArray& vgl_v)
{
for (size_t iw = 0; iw < P_list.size(); iw++)
{
const auto& IonID(ions_.GroupID);
PDoakORNL marked this conversation as resolved.
Show resolved Hide resolved
const auto& coordR = P_list[iw].activeR(iat);
const auto& d_table = P_list[iw].getDistTableAB(myTableIndex);
const auto& dist = (P_list[iw].getActivePtcl() == iat) ? d_table.getTempDists() : d_table.getDistRow(iat);
const auto& displ = (P_list[iw].getActivePtcl() == iat) ? d_table.getTempDispls() : d_table.getDisplRow(iat);

PosType Tv;

// number of walkers * BasisSetSize
auto stride = vgl_v.size(1) * BasisSetSize;
assert(BasisSetSize == vgl_v.size(2));
vgl_type vgl_iw(vgl_v.data_at(0, iw, 0), BasisSetSize, stride);

for (int c = 0; c < NumCenters; c++)
{
Tv[0] = (ions_.R[c][0] - coordR[0]) - displ[c][0];
Tv[1] = (ions_.R[c][1] - coordR[1]) - displ[c][1];
Tv[2] = (ions_.R[c][2] - coordR[2]) - displ[c][2];
LOBasisSet[IonID[c]]->evaluateVGL(P_list[iw].getLattice(), dist[c], displ[c], BasisOffset[c], vgl_iw, Tv);
}
}
}


template<class COT, typename ORBT>
void SoaLocalizedBasisSet<COT, ORBT>::evaluateVGH(const ParticleSet& P, int iat, vgh_type& vgh)
{
Expand Down
18 changes: 12 additions & 6 deletions src/QMCWaveFunctions/LCAO/SoaLocalizedBasisSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <memory>
#include "QMCWaveFunctions/BasisSetBase.h"
#include "OMPTarget/OffloadAlignedAllocators.hpp"

namespace qmcplusplus
{
Expand All @@ -36,12 +37,15 @@ template<class COT, typename ORBT>
class SoaLocalizedBasisSet : public SoaBasisSetBase<ORBT>
{
public:
using RealType = typename COT::RealType;
using BaseType = SoaBasisSetBase<ORBT>;
using vgl_type = typename BaseType::vgl_type;
using vgh_type = typename BaseType::vgh_type;
using vghgh_type = typename BaseType::vghgh_type;
using PosType = typename ParticleSet::PosType;
using RealType = typename COT::RealType;
using BaseType = SoaBasisSetBase<ORBT>;
using ValueType = QMCTraits::ValueType;

using vgl_type = typename BaseType::vgl_type;
using vgh_type = typename BaseType::vgh_type;
using vghgh_type = typename BaseType::vghgh_type;
using PosType = typename ParticleSet::PosType;
using OffloadMWVGLArray = Array<ValueType, 3, OffloadPinnedAllocator<ValueType>>; // [VGL, walker, Orbs]

using BaseType::BasisSetSize;

Expand Down Expand Up @@ -104,6 +108,8 @@ class SoaLocalizedBasisSet : public SoaBasisSetBase<ORBT>
*/
void evaluateVGL(const ParticleSet& P, int iat, vgl_type& vgl) override;

void mw_evaluateVGL(const RefVectorWithLeader<ParticleSet>& P_list, int iat, OffloadMWVGLArray& vgl) override;

/** compute VGH
* @param P quantum particleset
* @param iat active particle
Expand Down
Loading