Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integration with ROCm 4.5.2 #179

Merged
merged 7 commits into from
May 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 7 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,28 @@ cmake_minimum_required(VERSION 3.4.3)

project(OptSched)
jrbyrnes marked this conversation as resolved.
Show resolved Hide resolved

option(OPTSCHED_INCLUDE_TESTS "Generate build targets for the OptSched unit tests." ON)
option(OPTSCHED_ENABLE_AMDGPU "Build the AMDGPU code. Requires that the AMDGPU target is supported." OFF)
option(OPTSCHED_INCLUDE_TESTS "Generate build targets for the OptSched unit tests." OFF)
option(OPTSCHED_ENABLE_AMDGPU "Build the AMDGPU code. Requires that the AMDGPU target is supported." ON)

set(OPTSCHED_LIT_ARGS "-sv" CACHE STRING "Arguments to pass to lit")
set(OPTSCHED_EXTRA_LINK_LIBRARIES "" CACHE STRING "Extra link_libraries to pass to OptSched, ;-separated")
set(OPTSCHED_EXTRA_INCLUDE_DIRS "" CACHE STRING "Extra include_directories to pass to OptSched, ;-separated")
# To add OptSched debug defines, e.g.:
# '-DOPTSCHED_EXTRA_DEFINITIONS=-DIS_DEBUG_DEFS_AND_USES;-DIS_DEBUG_DEF_USE_COUNT'
set(OPTSCHED_EXTRA_DEFINITIONS "" CACHE STRING "Extra add_definitions to pass to OptSched, ;-separated")

if(TARGET LLVMCodeGen)
if(TARGET LLVMAMDGPUCodeGen OR TARGET LLVMCodeGen OR TARGET LLVMX86CodeGen)
set(llvm_subproject TRUE)
else()
set(llvm_subproject FALSE)
endif()

# Not supported
if(NOT llvm_subproject)
set(llvm_version 6.0)
if(OPTSCHED_ENABLE_AMDGPU)
set(llvm_version 9.0)
endif()

set(OPTSCHED_LLVM_VERSION ${llvm_version} CACHE STRING "The LLVM version to build OptSched with (independent build only)")

find_package(LLVM ${OPTSCHED_LLVM_VERSION} REQUIRED CONFIG)
Expand All @@ -36,8 +37,8 @@ endif()
if(OPTSCHED_ENABLE_AMDGPU)
if(NOT "AMDGPU" IN_LIST LLVM_ALL_TARGETS)
message(FATAL_ERROR "Trying to build the AMDGPU code, but AMDGPU is not supported by this build of LLVM")
elseif(LLVM_VERSION VERSION_LESS 7.0)
message(FATAL_ERROR "OptSched requries LLVM version >= 7.0 to build the AMDGPU scheduler.")
elseif(LLVM_VERSION VERSION_LESS 13.0)
message(FATAL_ERROR "OptSched requries LLVM version >= 13.0 to build the AMDGPU scheduler.")
endif()
endif()

Expand All @@ -62,10 +63,6 @@ include_directories(
add_definitions(${OPTSCHED_EXTRA_DEFINITIONS})
link_directories(${OPTSCHED_EXTRA_LINK_LIBRARIES})

if(LLVM_VERSION VERSION_LESS 7.0)
add_definitions(-DLLVM_DEBUG=DEBUG)
endif()
Quincunx271 marked this conversation as resolved.
Show resolved Hide resolved

if(NOT llvm_subproject)
include(GetLocalLLVM)

Expand Down Expand Up @@ -98,9 +95,5 @@ if(OPTSCHED_INCLUDE_TESTS)
COMMAND
${LLVM_TOOLS_BINARY_DIR}/clang ${CMAKE_CURRENT_SOURCE_DIR}/example/helloworld.cpp
-O3
-fplugin=$<TARGET_FILE:OptSched>
-mllvm -misched=optsched
-mllvm -enable-misched
-mllvm -optsched-cfg=${CMAKE_CURRENT_SOURCE_DIR}/example/optsched-cfg
)
endif()
13 changes: 9 additions & 4 deletions cmake/superbuild/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@
# - LLVM_PARALLEL_LINK_JOBS.
# - *_EXTRA_CMAKE_ARGS: Passes these CMake arguments on to the corresponding sub-build.
# - The flang builds can be configured to use a custom CMAKE_GENERATOR, separate from the superbuild's generator.


############################################
#
# As of 5/26/2022, the superbuild script is
# no longer gauranteed to work.
#
############################################

cmake_minimum_required(VERSION 3.7)

project(OptSched-SuperBuild)
Expand Down Expand Up @@ -99,8 +108,4 @@ add_test(NAME OptSched-CompileHelloWorld
COMMAND
${OPTSCHEDSUPER_LLVM_INSTALL_PREFIX}/bin/clang ${ROOT_DIR}/example/helloworld.cpp
-O3
-fplugin=${OPTSCHEDSUPER_LLVM_INSTALL_PREFIX}/lib/OptSched.so
-mllvm -misched=optsched
-mllvm -enable-misched
-mllvm -optsched-cfg=${ROOT_DIR}/example/optsched-cfg
)
126 changes: 126 additions & 0 deletions example/optsched-cfg/hotfuncs.ini
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,129 @@ module_big_step_utilities_em_calc_cq_ YES
mgau_eval YES
vector_gautbl_eval_logs3 YES
subvq_mgau_shortlist YES

# ======================================
# SPEC CPU2017 (fp rate only)
# ======================================

#503.bwaves_r Total 97.51% (95.88% selected)
mat_times_vec_ YES #68.22%
bi_cgstab_block_ YES #12.60%
shell_ YES #10.81%
jacobian_ YES # 4.25%
#flux_ YES # 1.63%

#507.cactuBSSN_r Total 90.09%
_ZL16ML_BSSN_RHS_BodyPK4_cGHiiPKdS3_S3_PKiS5_iPKPd YES #35.83%
_ZL19ML_BSSN_Advect_BodyPK4_cGHiiPKdS3_S3_PKiS5_iPKPd YES #30.82%
_ZL24ML_BSSN_constraints_BodyPK4_cGHiiPKdS3_S3_PKiS5_iPKPd YES # 8.90%
_ZL41ML_BSSN_convertToADMBaseDtLapseShift_BodyPK4_cGHiiPKdS3_S3_PKiS5_iPKPd YES # 8.45%
MoL_LinearCombination YES # 3.27%
_ZL29ML_BSSN_convertToADMBase_BodyPK4_cGHiiPKdS3_S3_PKiS5_iPKPd YES # 2.82%

#508.namd_r Total 99.34%
_Z22pairlist_from_pairlistddddPK8CompAtomPKtiPtdPd YES #18.81%
_ZN20ComputeNonbondedUtil26calc_pair_energy_fullelectEP9nonbonded YES #13.12%
_ZN20ComputeNonbondedUtil19calc_pair_fullelectEP9nonbonded YES # 9.52%
_ZN20ComputeNonbondedUtil16calc_pair_energyEP9nonbonded YES # 9.35%
_ZN20ComputeNonbondedUtil32calc_pair_energy_merge_fullelectEP9nonbonded YES # 9.11%
_ZN20ComputeNonbondedUtil25calc_pair_merge_fullelectEP9nonbonded YES # 7.00%
_ZN20ComputeNonbondedUtil9calc_pairEP9nonbonded YES # 6.98%
_ZN20ComputeNonbondedUtil26calc_self_energy_fullelectEP9nonbonded YES # 5.78%
_ZN20ComputeNonbondedUtil32calc_self_energy_merge_fullelectEP9nonbonded YES # 4.80%
_ZN20ComputeNonbondedUtil16calc_self_energyEP9nonbonded YES # 4.73%
_ZN20ComputeNonbondedUtil19calc_self_fullelectEP9nonbonded YES # 4.11%
_ZN20ComputeNonbondedUtil9calc_selfEP9nonbonded YES # 3.02%
_ZN20ComputeNonbondedUtil25calc_self_merge_fullelectEP9nonbonded YES # 3.01%

#510.parest_r Total 85.12% (83.38% selected)
_ZNK6dealii9SparseILUIdE5vmultIdEEvRNS_6VectorIT_EERKS5_ YES #29.73%
_ZNK6dealii12SparseMatrixIdE5vmultINS_6VectorIdEES4_EEvRT_RKT0_ YES #25.33%
_ZNK6dealii6VectorIdEmlIdEEdRKNS0_IT_EE YES #13.83%
_ZNK6dealii12SparseMatrixIdE17precondition_SSORIdEEvRNS_6VectorIT_EERKS5_dRKSt6vectorIjSaIjEE YES # 5.94%
_ZN6dealii11SolverGMRESINS_6VectorIdEEE5solveINS_12SparseMatrixIdEENS_9SparseILUIdEEEEvRKT_RS2_RKS2_RKT0_ YES # 3.79%
_ZN6dealii8FESystemILi3ELi3EE10initializeEv YES # 2.66%
_ZN12METomography5Slave5SlaveILi3EE12GlobalMatrix15assemble_matrixERKN6dealii18TriaActiveIteratorINS4_15DoFCellAccessorINS4_10DoFHandlerILi3ELi3EEEEEEERNS0_8internal13AssemblerDataILi3EEE YES # 2.10%
#_ZNK6dealii15SparsityPatternclEjj YES # 1.74%

#511.povray_r Total 82.24% (78.66% selected)
_ZN3povL23All_Plane_IntersectionsEPNS_13Object_StructEPNS_10Ray_StructEPNS_13istack_structE YES #16.55%
_ZN3povL31All_CSG_Intersect_IntersectionsEPNS_13Object_StructEPNS_10Ray_StructEPNS_13istack_structE YES #10.95%
_ZN3povL24All_Sphere_IntersectionsEPNS_13Object_StructEPNS_10Ray_StructEPNS_13istack_structE YES #10.72%
_ZN3pov17Check_And_EnqueueEPNS_21Priority_Queue_StructEPNS_16BBox_Tree_StructEPNS_19Bounding_Box_StructEPNS_14Rayinfo_StructE YES # 8.58%
_ZN3povL12Inside_PlaneEPdPNS_13Object_StructE YES # 4.83%
_ZN3pov12Ray_In_BoundEPNS_10Ray_StructEPNS_13Object_StructE YES # 4.55%
_ZN3pov19Intersect_BBox_TreeEPNS_16BBox_Tree_StructEPNS_10Ray_StructEPNS_10istk_entryEPPNS_13Object_StructEb YES # 4.09%
_ZN3pov6DNoiseEPdS0_ YES # 4.07%
_ZN3povL25All_Quadric_IntersectionsEPNS_13Object_StructEPNS_10Ray_StructEPNS_13istack_structE YES # 3.78%
_ZN3povL14Inside_QuadricEPdPNS_13Object_StructE YES # 2.93%
_ZN3pov13Inside_ObjectEPdPNS_13Object_StructE YES # 2.90%
_ZN3pov20Intersect_Light_TreeEPNS_10Ray_StructEPNS_24Project_Tree_Node_StructEiiPNS_10istk_entryEPPNS_13Object_StructEPNS_19Light_Source_StructE YES # 2.46%
_ZN3pov5NoiseEPdPNS_14Pattern_StructE YES # 2.25%
#_ZN3pov18MInvTransDirectionEPdS0_PNS_16Transform_StructE YES # 1.83%
#_ZN3pov12IntersectionEPNS_10istk_entryEPNS_13Object_StructEPNS_10Ray_StructE YES # 1.75%

#519.lbm_r
LBM_performStreamCollideTRT YES #99.04%

#526.blender_r Total 91.73% (84.1% selected)
_ZL9intersectILi1024EEiP8VBVHTreeP5Isect YES #61.79%
RE_rayobject_intersect YES #14.34%
add_radiance YES # 3.95%
ray_ao YES # 2.50%
#zbuffer_sss YES # 1.72%
#traverse_octree YES # 1.72%
#zbuffer_solid YES # 1.57%
#zbuf_part_project YES # 1.52%
#ray_shadow YES # 1.42%
#RE_rayobject_raycast YES # 1.20%

#527.cam4_r Total 25.90% (excluding libraries) 47.35% (including libraries)
#__fsd_pow_fma3 YES # 8.05% #From libpgmath.so(runtime shared library)
aer_rad_props_aer_rad_props_sw_ YES # 5.66%
#__fsd_exp_fma3 YES # 5.38% #From libpgmath.so(runtime shared library)
radsw_radcswmx_ YES # 5.14%
radae_radabs_ YES # 3.42%
zm_conv_ientropy_ YES # 3.28%
#__fd_log_1_avx512 YES # 3.25% #From libpgmath.so(runtime shared library)
radsw_raddedmx_ YES # 3.16%
tracer_data_vert_interp_ YES # 3.10%
#__memset_avx2_unaligned_erms YES # 2.64% #From standard library
radae_trcab_ YES # 2.14%
#__memcmp_avx2_movbe YES # 2.13% #From standard library

#538.imagick_r Total 96.98%
MorphologyApply YES #45.04%
MeanShiftImage YES #21.48%
SetPixelCacheNexusPixels YES #16.88%
GetVirtualPixelsFromNexus YES # 9.65%
GetOneCacheViewVirtualPixel YES # 3.93%

#544.nab_r Total 89.04% (excluding libraries) 98.63 (including libraries)
mme34 YES #66.21%
nbond YES # 8.45%
searchkdtree YES # 7.24%
heapsort_pairs YES # 7.14%
#__ieee754_log_fma YES # 4.21%
#__ieee754_exp_fma YES # 3.05%
#exp@@GLIBC_2.29 YES # 2.33%

#549.fotonik3d_r Total 99.18%
upml_mod_upml_updatee_simple_ YES #26.47%
upml_mod_upml_updateh_ YES #24.04%
material_mod_mat_updatee_ YES #21.26%
update_mod_updateh_ YES #17.24%
power_mod_power_dft_ YES #10.17%

#554.roms_r.txt Total 80.63% (excluding library) 84.62%(including library)
step2d_mod_step2d_tile_ YES #27.67%
pre_step3d_mod_pre_step3d_tile_ YES #10.74%
lmd_skpp_mod_lmd_skpp_tile_ YES # 7.19%
step3d_t_mod_step3d_t_tile_ YES # 6.54%
rhs3d_mod_rhs3d_tile_ YES # 6.14%
t3dmix_mod_t3dmix2_tile_ YES # 6.05%
step3d_uv_mod_step3d_uv_tile_ YES # 5.93%
#__fsd_exp_fma3 YES # 3.99% #From libpgmath.so(runtime shared library)
rho_eos_mod_rho_eos_tile_ YES # 3.73%
prsgrd_mod_prsgrd_tile_ YES # 3.62%
uv3dmix_mod_uv3dmix2_tile_ YES # 3.02%
Loading