diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a3d5fcab1..5cfd0c43ec 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,8 @@ endif() option(BUILD_SHARED_LIBS "Build shared libraries." ON) option(DLAF_WITH_OPENMP "${DLAF_WITH_OPENMP_DESCRIPTION}" ${DLAF_WITH_OPENMP_DEFAULT}) -option(DLAF_WITH_MKL "Enable MKL as provider for LAPACK" OFF) +option(DLAF_WITH_MKL "Enable oneMKL as provider for BLAS and LAPACK" OFF) +option(DLAF_WITH_MKL_LEGACY "Enable MKL as provider for BLAS and LAPACK" OFF) option(DLAF_WITH_CUDA "Enable CUDA support" OFF) option(DLAF_WITH_HIP "Enable HIP support" OFF) cmake_dependent_option(DLAF_WITH_CUDA_MPI_RDMA "Enable MPI CUDA RDMA" OFF "DLAF_WITH_CUDA" OFF) @@ -48,7 +49,11 @@ option(DLAF_BUILD_DOC "Build documentation" OFF) option(DLAF_WITH_PRECOMPILED_HEADERS "Use precompiled headers." OFF) option(DLAF_WITH_SCALAPACK "Build ScaLAPACK-like C API (requires ScaLAPACK)" OFF) -if(DLAF_WITH_MKL) +if(DLAF_WITH_MKL AND DLAF_WITH_MKL_LEGACY) + message(SEND_ERROR "DLAF_WITH_MKL and DLAF_WITH_MKL_LEGACY are mutually exclusive.") +endif() + +if(DLAF_WITH_MKL OR DLAF_WITH_MKL_LEGACY) # When using MKL there is no need to set the number of threads with # omp_set_num_threads; it's sufficient to use MKL's own mechanisms. set(DLAF_WITH_OPENMP OFF CACHE BOOL "${DLAF_WITH_OPENMP_DESCRIPTION}" FORCE) @@ -130,8 +135,23 @@ if(DLAF_WITH_OPENMP) endif() # ----- LAPACK/SCALAPACK -if(DLAF_WITH_MKL) - find_package(MKL REQUIRED) +if(DLAF_WITH_MKL) # oneMKL + set(MKL_INTERFACE "lp64" CACHE STRING "") + set(MKL_THREADING "sequential" CACHE STRING "") + set(MKL_MPI "mpich" CACHE STRING "") + + find_package(MKL CONFIG REQUIRED) + + set(LAPACK_FOUND TRUE) + add_library(DLAF::LAPACK INTERFACE IMPORTED GLOBAL) + target_link_libraries(DLAF::LAPACK INTERFACE MKL::MKL) + + if(DLAF_WITH_SCALAPACK) + set(SCALAPACK_FOUND TRUE) + add_library(DLAF::SCALAPACK INTERFACE IMPORTED GLOBAL) + endif() +elseif(DLAF_WITH_MKL_LEGACY) # MKL (deprecated) + find_package(MKL MODULE REQUIRED) set(MKL_LAPACK_TARGET "mkl::mkl_intel_32bit_seq_dyn" CACHE STRING "MKL LAPACK target (see FindMKL for details)" diff --git a/README.md b/README.md index 88ff0a40b8..aee9d2c8d6 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ This will add a new repository with namespace `dlaf`. Example installation: -`spack install dla-future ^intel-mkl` +`spack install dla-future ^intel-oneapi-mkl` Or you can go even further with a more detailed spec like this one, which builds dla-future in debug mode, using the clang compiler, specifying that the pika on which it depends has to be built in debug mode too, and that we want to use MPICH as MPI implementation, without fortran support (because clang does not support it). @@ -59,7 +59,8 @@ CMake option | Values | Note `pika_DIR` | CMAKE:PATH | Location of the pika CMake-config file `blaspp_DIR` | CMAKE:PATH | Location of the blaspp CMake-config file `lapackpp_DIR` | CMAKE:PATH | Location of the lapackpp CMake-config file -`DLAF_WITH_MKL` | `{ON,OFF}` (default: `OFF`) | if blaspp/lapackpp is built with MKL +`DLAF_WITH_MKL` | `{ON,OFF}` (default: `OFF`) | if blaspp/lapackpp is built with oneMKL +`DLAF_WITH_LEGACY_MKL` | `{ON,OFF}` (default: `OFF`) | if blaspp/lapackpp is built with MKL (deprecated) `MKL_ROOT` | CMAKE:PATH | Location of the MKL library `DLAF_ASSERT_ENABLE` | `{ON,OFF}` (default: `ON`) | enable/disable cheap assertions `DLAF_ASSERT_MODERATE_ENABLE` | `{ON,OFF}` (default: `ON` in Debug, `OFF` otherwise) | enable/disable moderate assertions diff --git a/cmake/template/DLAFConfig.cmake.in b/cmake/template/DLAFConfig.cmake.in index 5101a3e0ed..f13bdf47c0 100644 --- a/cmake/template/DLAFConfig.cmake.in +++ b/cmake/template/DLAFConfig.cmake.in @@ -20,6 +20,7 @@ list(PREPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}) # ===== VARIABLES set(DLAF_WITH_OPENMP @DLAF_WITH_OPENMP@) set(DLAF_WITH_MKL @DLAF_WITH_MKL@) +set(DLAF_WITH_MKL_LEGACY @DLAF_WITH_MKL_LEGACY@) set(DLAF_WITH_CUDA @DLAF_WITH_CUDA@) set(DLAF_WITH_HIP @DLAF_WITH_HIP@) set(DLAF_WITH_GPU @DLAF_WITH_GPU@) @@ -53,6 +54,19 @@ endif() # ----- LAPACK if(DLAF_WITH_MKL) + set(MKL_INTERFACE "@MKL_INTERFACE@") + set(MKL_THREADING "@MKL_THREADING@") + set(MKL_MPI "@MKL_MPI@") + + find_dependency(MKL CONFIG) + + add_library(DLAF::LAPACK INTERFACE IMPORTED GLOBAL) + target_link_libraries(DLAF::LAPACK INTERFACE MKL::MKL) + + if(DLAF_WITH_SCALAPACK) + add_library(DLAF::SCALAPACK INTERFACE IMPORTED GLOBAL) + endif() +elseif(DLAF_WITH_MKL_LEGACY) set(MKL_ROOT "@MKL_ROOT@") set(MKL_CUSTOM_THREADING "@MKL_THREADING@") diff --git a/spack/packages/dla-future/package.py b/spack/packages/dla-future/package.py index 42350f4305..b07d39d967 100644 --- a/spack/packages/dla-future/package.py +++ b/spack/packages/dla-future/package.py @@ -4,7 +4,6 @@ # SPDX-License-Identifier: (Apache-2.0 OR MIT) # dlaf-no-license-check - from spack.package import * @@ -54,6 +53,9 @@ class DlaFuture(CMakePackage, CudaPackage, ROCmPackage): depends_on("scalapack", when="+scalapack") depends_on("blaspp@2022.05.00:") depends_on("lapackpp@2022.05.00:") + depends_on("intel-oneapi-mkl +cluster", when="^[virtuals=scalapack] intel-oneapi-mkl") + + conflicts("intel-oneapi-mkl", when="@:0.3") depends_on("umpire~examples") depends_on("umpire~cuda", when="~cuda") @@ -136,35 +138,67 @@ def cmake_args(self): args.append(self.define_from_variant("BUILD_SHARED_LIBS", "shared")) # BLAS/LAPACK - if self.spec["lapack"].name in INTEL_MATH_LIBRARIES: + if spec["lapack"].name in INTEL_MATH_LIBRARIES: + mkl_provider = spec["lapack"].name + vmap = { - "none": "seq", - "openmp": "omp", - "tbb": "tbb", - } # Map MKL variants to LAPACK target name - mkl_threads = vmap[spec["intel-mkl"].variants["threads"].value] - # TODO: Generalise for intel-oneapi-mkl - args += [ - self.define("DLAF_WITH_MKL", True), - self.define("MKL_LAPACK_TARGET", f"mkl::mkl_intel_32bit_{mkl_threads}_dyn"), - ] + "intel-oneapi-mkl": { + "threading": { + "none": "sequential", + "openmp": "gnu_thread", + "tbb": "tbb_thread", + }, + "mpi": {"intel-mpi": "intelmpi", "mpich": "mpich", "openmpi": "openmpi"}, + }, + "intel-mkl": { + "threading": {"none": "seq", "openmp": "omp", "tbb": "tbb"}, + "mpi": {"intel-mpi": "mpich", "mpich": "mpich", "openmpi": "ompi"}, + }, + } + + if mkl_provider not in vmap.keys(): + raise RuntimeError( + f"dla-future does not support {mkl_provider} as lapack provider" + ) + mkl_mapper = vmap[mkl_provider] + + mkl_threads = mkl_mapper["threading"][spec[mkl_provider].variants["threads"].value] + if mkl_provider == "intel-oneapi-mkl": + args += [ + self.define("DLAF_WITH_MKL", True), + self.define("MKL_INTERFACE", "lp64"), + self.define("MKL_THREADING", mkl_threads), + ] + elif mkl_provider == "intel-mkl": + args += [ + self.define("DLAF_WITH_MKL", True) + if spec.version.satisfies(":0.3") + else self.define("DLAF_WITH_MKL_LEGACY", True), + self.define("MKL_LAPACK_TARGET", f"mkl::mkl_intel_32bit_{mkl_threads}_dyn"), + ] + if "+scalapack" in spec: - if ( - "^mpich" in spec - or "^cray-mpich" in spec - or "^intel-mpi" in spec - or "^mvapich" in spec - or "^mvapich2" in spec - ): - mkl_mpi = "mpich" - elif "^openmpi" in spec: - mkl_mpi = "ompi" - args.append( - self.define( - "MKL_SCALAPACK_TARGET", - f"mkl::scalapack_{mkl_mpi}_intel_32bit_{mkl_threads}_dyn", + try: + mpi_provider = spec["mpi"].name + if mpi_provider in ["mpich", "cray-mpich", "mvapich", "mvapich2"]: + mkl_mpi = mkl_mapper["mpi"]["mpich"] + else: + mkl_mpi = mkl_mapper["mpi"][mpi_provider] + except KeyError: + raise RuntimeError( + f"dla-future does not support {spec['mpi'].name} as mpi provider with " + f"the selected scalapack provider {mkl_provider}" + ) + + if mkl_provider == "intel-oneapi-mkl": + args.append(self.define("MKL_MPI", mkl_mpi)) + elif mkl_provider == "intel-mkl": + args.append( + self.define( + "MKL_SCALAPACK_TARGET", + f"mkl::scalapack_{mkl_mpi}_intel_32bit_{mkl_threads}_dyn", + ) ) - ) else: args.append(self.define("DLAF_WITH_MKL", False)) args.append( @@ -182,12 +216,12 @@ def cmake_args(self): args.append(self.define_from_variant("DLAF_WITH_CUDA", "cuda")) args.append(self.define_from_variant("DLAF_WITH_HIP", "rocm")) if "+rocm" in spec: - archs = self.spec.variants["amdgpu_target"].value + archs = spec.variants["amdgpu_target"].value if "none" not in archs: arch_str = ";".join(archs) args.append(self.define("CMAKE_HIP_ARCHITECTURES", arch_str)) if "+cuda" in spec: - archs = self.spec.variants["cuda_arch"].value + archs = spec.variants["cuda_arch"].value if "none" not in archs: arch_str = ";".join(archs) args.append(self.define("CMAKE_CUDA_ARCHITECTURES", arch_str)) @@ -199,12 +233,12 @@ def cmake_args(self): args.append(self.define_from_variant("DLAF_BUILD_DOC", "doc")) ### For the spack repo only the else branch should remain. - if "+ci-test" in self.spec: + if "+ci-test" in spec: # Enable TESTS and setup CI specific parameters args.append(self.define("CMAKE_CXX_FLAGS", "-Werror")) - if "+cuda" in self.spec: + if "+cuda" in spec: args.append(self.define("CMAKE_CUDA_FLAGS", "-Werror=all-warnings")) - if "+rocm" in self.spec: + if "+rocm" in spec: args.append(self.define("CMAKE_HIP_FLAGS", "-Werror")) args.append(self.define("BUILD_TESTING", True)) args.append(self.define("DLAF_BUILD_TESTING", True)) @@ -215,7 +249,7 @@ def cmake_args(self): args.append(self.define("DLAF_BUILD_TESTING", self.run_tests)) ### Variants available only in the DLAF repo spack package - if "+ci-check-threads" in self.spec: + if "+ci-check-threads" in spec: args.append(self.define("DLAF_TEST_PREFLAGS", "check-threads")) ### diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 276cd0d566..e92533f0a6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -94,6 +94,7 @@ target_compile_definitions( $<$:DLAF_ASSERT_HEAVY_ENABLE> DLAF_FUNCTION_NAME=$,__PRETTY_FUNCTION__,__func__> $<$:DLAF_WITH_MKL> + $<$:DLAF_WITH_MKL> $<$:DLAF_WITH_OPENMP> $<$:DLAF_WITH_GPU> $<$:DLAF_WITH_CUDA>