Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HIP updates #63

Merged
merged 6 commits into from
Jan 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions arch/eurohpc/lumi/cray-gpu/16.0.1/env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# (C) Copyright 1988- ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

# Source me to get the correct configure/build/run environment

# Store tracing and disable (module is *way* too verbose)
{ tracing_=${-//[^x]/}; set +x; } 2>/dev/null

module_load() {
echo "+ module load $1"
module load $1
}
module_unload() {
echo "+ module unload $1"
module unload $1
}

# Unload to be certain
module reset

# Load modules
module_load LUMI/23.09
module_load partition/G
module_load PrgEnv-cray/8.4.0
module_load cce/16.0.1
module_load cray-mpich/8.1.27
module_load craype-network-ofi
module_load rocm/5.2.3
module_load buildtools/23.09
module_load Boost/1.82.0-cpeCray-23.09
module_load cray-python/3.10.10
module_load cray-hdf5/1.12.2.7
module_load craype-x86-trento
module_load craype-accel-amd-gfx90a

module list

set -x

export CC=cc CXX=CC FC=ftn

# Restore tracing to stored setting
{ if [[ -n "$tracing_" ]]; then set -x; else set +x; fi } 2>/dev/null

export ECBUILD_TOOLCHAIN="./toolchain.cmake"
49 changes: 49 additions & 0 deletions arch/eurohpc/lumi/cray-gpu/16.0.1/toolchain.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# (C) Copyright 1988- ECMWF.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
# In applying this licence, ECMWF does not waive the privileges and immunities
# granted to it by virtue of its status as an intergovernmental organisation
# nor does it submit to any jurisdiction.

####################################################################
# COMPILER
####################################################################

set( ECBUILD_FIND_MPI OFF )
set( ENABLE_USE_STMT_FUNC ON CACHE STRING "" )

####################################################################
# OpenMP FLAGS
####################################################################

set( OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" )
set( OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" )
set( OpenMP_Fortran_FLAGS "-homp -hlist=aimd" CACHE STRING "" )
set( OpenMP_C_LIB_NAMES "craymp" CACHE STRING "" )
set( OpenMP_CXX_LIB_NAMES "craymp" CACHE STRING "" )
set( OpenMP_Fortran_LIB_NAMES "craymp" CACHE STRING "" )
set( OpenMP_craymp_LIBRARY "/opt/cray/pe/cce/16.0.1/cce/x86_64/lib/libcraymp.so" CACHE STRING "" )

####################################################################
# OpenACC FLAGS
####################################################################

set( OpenACC_C_FLAGS "-hacc" CACHE STRING "" )
set( OpenACC_CXX_FLAGS "-hacc" CACHE STRING "" )
set( OpenACC_Fortran_FLAGS "-hacc" CACHE STRING "" )

####################################################################
# Compiler FLAGS
####################################################################

# General Flags (add to default)
set(ECBUILD_Fortran_FLAGS "-hcontiguous")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -hbyteswapio")
set(ECBUILD_Fortran_FLAGS "${ECBUILD_Fortran_FLAGS} -Wl, --as-needed")

set(ECBUILD_Fortran_FLAGS_BIT "-O3 -hfp1 -hscalar3 -hvector3 -G2 -haggress -DNDEBUG")

set( GPU_TARGETS "gfx90a" CACHE STRING "" )
# select OpenMP pragma to be used
set( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL OFF CACHE BOOL "" )
12 changes: 12 additions & 0 deletions cmake/features/OMP.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
if( HAVE_OMP )

if( NOT DEFINED HAVE_OMP_TARGET_TEAMS_DISTRIBUTE )

try_compile(
HAVE_OMP_TARGET_TEAMS_DISTRIBUTE
${CMAKE_CURRENT_BINARY_DIR}
Expand All @@ -11,6 +13,10 @@ if( HAVE_OMP )
ecbuild_debug_var( HAVE_OMP_TARGET_TEAMS_DISTRIBUTE )
ecbuild_debug_var( _HAVE_OMP_TARGET_TEAMS_DISTRIBUTE_OUTPUT )

endif()

if( NOT DEFINED HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL )

try_compile(
HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL
${CMAKE_CURRENT_BINARY_DIR}
Expand All @@ -22,6 +28,10 @@ if( HAVE_OMP )
ecbuild_debug_var( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL )
ecbuild_debug_var( _HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL_OUTPUT )

endif()

if( NOT DEFINED HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD )

try_compile(
HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD
${CMAKE_CURRENT_BINARY_DIR}
Expand All @@ -32,6 +42,8 @@ if( HAVE_OMP )

ecbuild_debug_var( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD )
ecbuild_debug_var( _HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD_OUTPUT )

endif()

if( HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_PARALLEL OR HAVE_OMP_TARGET_LOOP_CONSTRUCT_BIND_THREAD )
set( HAVE_OMP_TARGET_LOOP_CONSTRUCT ON CACHE BOOL "OpenMP target teams loop is supported" )
Expand Down
18 changes: 11 additions & 7 deletions src/cloudsc_hip/cloudsc/cloudsc_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,9 +457,9 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) {
double t2 = omp_get_wtime();

printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks);
printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s\n",
"NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s");
double zfrac, zmflops;
printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s%+10s\n",
"NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s", "col/s");
double zfrac, zmflops, zthrput;
for (int t = 0; t < numthreads; t++) {
const double tloc = zinfo[0][t];
const int coreid = (int) zinfo[1][t];
Expand All @@ -468,21 +468,25 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) {
zfrac = (double)igpc / (double)numcols;
if (tloc > 0.0) {
zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc;
zthrput = (double)numcols/tloc;
} else {
zmflops = 0.;
zthrput = 0.0;
}
printf(" %10d%10d%10d%10d%10d %4d : %10d%10d @ core#\n",
numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops);
printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d @ core#\n",
numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops, (int)zthrput);
}
double tdiff = t2 - t1;
zfrac = 1.0;
if (tdiff > 0.0) {
zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff;
zthrput = (double)numcols/tdiff;
} else {
zmflops = 0.0;
zthrput = 0.0;
}
printf(" %10d%10d%10d%10d%10d %4d : %10d%10d TOTAL\n",
numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops);
printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n",
numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput);

cloudsc_validate(klon, nlev, nclv, numcols, nproma,
plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif,
Expand Down
18 changes: 11 additions & 7 deletions src/cloudsc_hip/cloudsc/cloudsc_driver_hoist.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -498,9 +498,9 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) {
double t2 = omp_get_wtime();

printf(" NUMOMP=%d, NGPTOT=%d, NPROMA=%d, NGPBLKS=%d\n", numthreads, numcols, nproma, nblocks);
printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s\n",
"NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s");
double zfrac, zmflops;
printf(" %+10s%+10s%+10s%+10s%+10s %+4s : %+10s%+10s%+10s\n",
"NUMOMP", "NGPTOT", "#GP-cols", "#BLKS", "NPROMA", "tid#", "Time(msec)", "MFlops/s", "col/s");
double zfrac, zmflops, zthrput;
for (int t = 0; t < numthreads; t++) {
const double tloc = zinfo[0][t];
const int coreid = (int) zinfo[1][t];
Expand All @@ -509,21 +509,25 @@ void cloudsc_driver(int numthreads, int numcols, int nproma) {
zfrac = (double)igpc / (double)numcols;
if (tloc > 0.0) {
zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tloc;
zthrput = (double)numcols/tloc;
} else {
zmflops = 0.;
zthrput = 0.0;
}
printf(" %10d%10d%10d%10d%10d %4d : %10d%10d @ core#\n",
numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops);
printf(" %10d%10d%10d%10d%10d %4d : %10d%10d%10d @ core#\n",
numthreads, numcols, igpc, icalls, nproma, t, (int)(tloc * 1000.), (int)zmflops, (int)zthrput);
}
double tdiff = t2 - t1;
zfrac = 1.0;
if (tdiff > 0.0) {
zmflops = 1.0e-06 * zfrac * zhpm * ((double)numcols / 100.) / tdiff;
zthrput = (double)numcols/tdiff;
} else {
zmflops = 0.0;
zthrput = 0.0;
}
printf(" %10d%10d%10d%10d%10d %4d : %10d%10d TOTAL\n",
numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops);
printf(" %10d%10d%10d%10d%10d %4d: %10d%10d%10d TOTAL\n",
numthreads, numcols, numcols, nblocks, nproma, -1, (int)(tdiff * 1000.), (int)zmflops, (int)zthrput);

cloudsc_validate(klon, nlev, nclv, numcols, nproma,
plude, pcovptot, prainfrac_toprfz, pfsqlf, pfsqif,
Expand Down
11 changes: 10 additions & 1 deletion src/cloudsc_hip/cloudsc/load_state.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
/*
* (C) Copyright 1988- ECMWF.
*
* This software is licensed under the terms of the Apache Licence Version 2.0
* which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.
* In applying this licence, ECMWF does not waive the privileges and immunities
* granted to it by virtue of its status as an intergovernmental organisation
* nor does it submit to any jurisdiction.
*/

#include "load_state.h"
//#include "yomcst_c.hpp"
#include <iostream>

#include <math.h>
Expand Down
Loading