Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft: Add SW4CK kernels #317

Draft
wants to merge 9 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/apps/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ blt_add_library(
DEL_DOT_VEC_2D-Cuda.cpp
DEL_DOT_VEC_2D-OMP.cpp
DEL_DOT_VEC_2D-OMPTarget.cpp
DEL_DOT_VEC_2D-Sycl.cpp
DEL_DOT_VEC_2D-Sycl.cpp
DIFFUSION3DPA.cpp
DIFFUSION3DPA-Cuda.cpp
DIFFUSION3DPA-Hip.cpp
Expand Down Expand Up @@ -86,7 +86,11 @@ blt_add_library(
PRESSURE-Cuda.cpp
PRESSURE-OMP.cpp
PRESSURE-OMPTarget.cpp
PRESSURE-Sycl.cpp
PRESSURE-Sycl.cpp
SW4CK_KERNEL_2.cpp
SW4CK_KERNEL_2-Seq.cpp
artv3 marked this conversation as resolved.
Show resolved Hide resolved
SW4CK_KERNEL_5.cpp
SW4CK_KERNEL_5-Seq.cpp
VOL3D.cpp
VOL3D-Seq.cpp
VOL3D-Hip.cpp
Expand Down
145 changes: 145 additions & 0 deletions src/apps/SW4CK_KERNEL_2-Seq.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC
// and RAJA Performance Suite project contributors.
// See the RAJAPerf/LICENSE file for details.
//
// SPDX-License-Identifier: (BSD-3-Clause)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

#include "SW4CK_KERNEL_2.hpp"

#include "RAJA/RAJA.hpp"

#include "AppsData.hpp"

#include <iostream>

namespace rajaperf
{
namespace apps
{


void SW4CK_KERNEL_2::runSeqVariant(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
const Index_type run_reps = getRunReps();

//To be populated later with

char op = '=';

SW4CK_KERNEL_2_DATA_SETUP;


switch ( vid ) {

case Base_Seq : {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

//Reference impl
for(int k=kstart; k<kend+1; k++) {
for(int j=jstart; j<jend+1; j++) {
for(int i=istart; i<ilast - 1; i++) {

// 5 ops
SW4CK_KERNEL_2_BODY_1;

// pp derivative (u)
// 53 ops, tot=58
SW4CK_KERNEL_2_BODY_2;

// qq derivative (u)
// 43 ops, tot=101
SW4CK_KERNEL_2_BODY_3;

// rr derivative (u)
// 5*11+14+14=83 ops, tot=184
SW4CK_KERNEL_2_BODY_4;

// rr derivative (v)
// 42 ops, tot=226
SW4CK_KERNEL_2_BODY_5;

// rr derivative (w)
// 43 ops, tot=269
SW4CK_KERNEL_2_BODY_6;

// pq-derivatives
// 38 ops, tot=307
SW4CK_KERNEL_2_BODY_7;

// qp-derivatives
// 38 ops, tot=345
SW4CK_KERNEL_2_BODY_8;

// pr-derivatives
// 130 ops., tot=475
SW4CK_KERNEL_2_BODY_9;

// rp derivatives
// 130 ops, tot=605
SW4CK_KERNEL_2_BODY_10;

// qr derivatives
// 82 ops, tot=687
SW4CK_KERNEL_2_BODY_11;

// rq derivatives
// 82 ops, tot=769
SW4CK_KERNEL_2_BODY_12;

// 4 ops, tot=773
SW4CK_KERNEL_2_BODY_13;

}
}
}



}
stopTimer();

break;
}

#if defined(RUN_RAJA_SEQ)
case Lambda_Seq : {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

//Lambda impl

}
stopTimer();

break;
}

case RAJA_Seq : {

startTimer();
for (RepIndex_type irep = 0; irep < run_reps; ++irep) {

//RAJA impl

}
stopTimer();

break;
}
#endif // RUN_RAJA_SEQ

default : {
getCout() << "\n SW4CK_KERNEL_2 : Unknown variant id = " << vid << std::endl;
}

}

}

} // end namespace apps
} // end namespace rajaperf
95 changes: 95 additions & 0 deletions src/apps/SW4CK_KERNEL_2.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//
// Copyright (c) 2017-23, Lawrence Livermore National Security, LLC
// and RAJA Performance Suite project contributors.
// See the RAJAPerf/LICENSE file for details.
//
// SPDX-License-Identifier: (BSD-3-Clause)
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~//

#include "SW4CK_KERNEL_2.hpp"

#include "RAJA/RAJA.hpp"

#include "AppsData.hpp"
#include "common/DataUtils.hpp"

#include <cmath>


namespace rajaperf
{
namespace apps
{


SW4CK_KERNEL_2::SW4CK_KERNEL_2(const RunParams& params)
: KernelBase(rajaperf::Apps_SW4CK_KERNEL_2, params)
{
setDefaultProblemSize(100*100*100); // See rzmax in ADomain struct
setDefaultReps(100);

Index_type rzmax = std::cbrt(getTargetProblemSize())+1;
//m_domain = new ADomain(rzmax, /* ndims = */ 3);

//m_array_length = m_domain->nnalls;

//setActualProblemSize( m_domain->lpz+1 - m_domain->fpz );

//setItsPerRep( m_domain->lpz+1 - m_domain->fpz );
setKernelsPerRep(1);
// touched data size, not actual number of stores and loads
// setBytesPerRep( (1*sizeof(Real_type) + 0*sizeof(Real_type)) * getItsPerRep() +
//(0*sizeof(Real_type) + 3*sizeof(Real_type)) * (getItsPerRep() + 1+m_domain->jp+m_domain->kp) );

//setFLOPsPerRep(72 * (m_domain->lpz+1 - m_domain->fpz));

checksum_scale_factor = 0.001 *
( static_cast<Checksum_type>(getDefaultProblemSize()) /
getActualProblemSize() );

setUsesFeature(Teams);

//Goal is to get the following three variants right first
setVariantDefined( Base_Seq );
setVariantDefined( Lambda_Seq );
setVariantDefined( RAJA_Seq );

/*
setVariantDefined( Base_OpenMP );
setVariantDefined( Lambda_OpenMP );
setVariantDefined( RAJA_OpenMP );

setVariantDefined( Base_OpenMPTarget );
setVariantDefined( RAJA_OpenMPTarget );

setVariantDefined( Base_CUDA );
setVariantDefined( RAJA_CUDA );

setVariantDefined( Base_HIP );
setVariantDefined( RAJA_HIP );
*/
}

SW4CK_KERNEL_2::~SW4CK_KERNEL_2()
{
// delete m_domain;
}

void SW4CK_KERNEL_2::setUp(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{

}

void SW4CK_KERNEL_2::updateChecksum(VariantID vid, size_t tune_idx)
{
//checksum[vid][tune_idx] += calcChecksum(m_vol, m_array_length, checksum_scale_factor );
}

void SW4CK_KERNEL_2::tearDown(VariantID vid, size_t RAJAPERF_UNUSED_ARG(tune_idx))
{
(void) vid;

}

} // end namespace apps
} // end namespace rajaperf
Loading
Loading