diff --git a/pkgs/applications/science/math/mxnet/default.nix b/pkgs/applications/science/math/mxnet/default.nix index c1a329c608864..240a1759397fe 100644 --- a/pkgs/applications/science/math/mxnet/default.nix +++ b/pkgs/applications/science/math/mxnet/default.nix @@ -50,7 +50,7 @@ stdenv.mkDerivation rec { "-DUSE_OLDCMAKECUDA=ON" # see https://github.com/apache/incubator-mxnet/issues/10743 "-DCUDA_ARCH_NAME=All" "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc" - "-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.cudaRealArches}" + "-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.realArches}" ] else [ "-DUSE_CUDA=OFF" ]) ++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF"; diff --git a/pkgs/development/compilers/cudatoolkit/common.nix b/pkgs/development/compilers/cudatoolkit/common.nix index e986ae2dc14da..fb3b50b981504 100644 --- a/pkgs/development/compilers/cudatoolkit/common.nix +++ b/pkgs/development/compilers/cudatoolkit/common.nix @@ -11,7 +11,6 @@ args@ , fetchurl , fontconfig , freetype -, gcc , gdk-pixbuf , glib , glibc @@ -22,13 +21,13 @@ args@ , perl , python3 , requireFile -, stdenv +, backendStdenv # E.g. gcc11Stdenv, set in extension.nix , unixODBC , xorg , zlib }: -stdenv.mkDerivation rec { +backendStdenv.mkDerivation rec { pname = "cudatoolkit"; inherit version runPatches; @@ -146,14 +145,23 @@ stdenv.mkDerivation rec { # Fix builds with newer glibc version sed -i "1 i#define _BITS_FLOATN_H" "$out/include/host_defines.h" - - # Ensure that cmake can find CUDA. + '' + + # Point NVCC at a compatible compiler + # FIXME: redist cuda_nvcc copy-pastes this code + # Refer to comments in the overrides for cuda_nvcc for explanation + # CUDA_TOOLKIT_ROOT_DIR is legacy, + # Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables + '' mkdir -p $out/nix-support - echo "cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'" >> $out/nix-support/setup-hook - - # Set the host compiler to be used by nvcc for CMake-based projects: - # https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables - echo "cmakeFlags+=' -DCUDA_HOST_COMPILER=${gcc}/bin'" >> $out/nix-support/setup-hook + cat <> $out/nix-support/setup-hook + cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out' + cmakeFlags+=' -DCUDA_HOST_COMPILER=${backendStdenv.cc}/bin' + cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${backendStdenv.cc}/bin' + if [ -z "\''${CUDAHOSTCXX-}" ]; then + export CUDAHOSTCXX=${backendStdenv.cc}/bin; + fi + export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin' + EOF # Move some libraries to the lib output so that programs that # depend on them don't pull in this entire monstrosity. @@ -167,10 +175,6 @@ stdenv.mkDerivation rec { mv $out/extras/CUPTI/lib64/libcupti* $out/lib ''} - # Set compiler for NVCC. - wrapProgram $out/bin/nvcc \ - --prefix PATH : ${gcc}/bin - # nvprof do not find any program to profile if LD_LIBRARY_PATH is not set wrapProgram $out/bin/nvprof \ --prefix LD_LIBRARY_PATH : $out/lib @@ -191,7 +195,14 @@ stdenv.mkDerivation rec { preFixup = let rpath = lib.concatStringsSep ":" [ (lib.makeLibraryPath (runtimeDependencies ++ [ "$lib" "$out" "$out/nvvm" ])) - "${stdenv.cc.cc.lib}/lib64" + + # The path to libstdc++ and such + # + # `backendStdenv` is the cuda-compatible toolchain that we pick in + # extension.nix; we hand it to NVCC to use as a back-end, and we link + # cudatoolkit's binaries against its libstdc++ + "${backendStdenv.cc.cc.lib}/lib64" + "$out/jre/lib/amd64/jli" "$out/lib64" "$out/nvvm/lib64" @@ -260,7 +271,7 @@ stdenv.mkDerivation rec { popd ''; passthru = { - cc = gcc; + inherit (backendStdenv) cc; majorMinorVersion = lib.versions.majorMinor version; majorVersion = lib.versions.majorMinor version; }; diff --git a/pkgs/development/compilers/cudatoolkit/extension.nix b/pkgs/development/compilers/cudatoolkit/extension.nix index c11f12b118a2f..dd6f7ff2abe7b 100644 --- a/pkgs/development/compilers/cudatoolkit/extension.nix +++ b/pkgs/development/compilers/cudatoolkit/extension.nix @@ -7,11 +7,29 @@ final: prev: let # Version info for the classic cudatoolkit packages that contain everything that is in redist. cudatoolkitVersions = final.lib.importTOML ./versions.toml; + finalVersion = cudatoolkitVersions.${final.cudaVersion}; + + # Exposed as cudaPackages.backendStdenv. + # We don't call it just "stdenv" to avoid confusion: e.g. this toolchain doesn't contain nvcc. + # Instead, it's the back-end toolchain for nvcc to use. + # We also use this to link a compatible libstdc++ (backendStdenv.cc.cc.lib) + # Cf. https://github.com/NixOS/nixpkgs/pull/218265 for context + backendStdenv = prev.pkgs."${finalVersion.gcc}Stdenv"; + ### Add classic cudatoolkit package - cudatoolkit = buildCudaToolkitPackage ((attrs: attrs // { gcc = prev.pkgs.${attrs.gcc}; }) cudatoolkitVersions.${final.cudaVersion}); + cudatoolkit = + let + attrs = builtins.removeAttrs finalVersion [ "gcc" ]; + attrs' = attrs // { inherit backendStdenv; }; + in + buildCudaToolkitPackage attrs'; cudaFlags = final.callPackage ./flags.nix {}; -in { - inherit cudatoolkit cudaFlags; +in +{ + inherit + backendStdenv + cudatoolkit + cudaFlags; } diff --git a/pkgs/development/compilers/cudatoolkit/flags.nix b/pkgs/development/compilers/cudatoolkit/flags.nix index 8e1e54723b2e4..989fdb06c5dfb 100644 --- a/pkgs/development/compilers/cudatoolkit/flags.nix +++ b/pkgs/development/compilers/cudatoolkit/flags.nix @@ -1,6 +1,6 @@ { config , lib -, cudatoolkit +, cudaVersion }: # Type aliases @@ -13,14 +13,21 @@ let inherit (lib) attrsets lists strings trivial versions; - cudaVersion = cudatoolkit.version; # Flags are determined based on your CUDA toolkit by default. You may benefit # from improved performance, reduced file size, or greater hardware suppport by # passing a configuration based on your specific GPU environment. # - # config.cudaCapabilities: list of hardware generations to support (e.g., "8.0") - # config.cudaForwardCompat: bool for compatibility with future GPU generations + # config.cudaCapabilities :: List Capability + # List of hardware generations to build. + # E.g. [ "8.0" ] + # Currently, the last item is considered the optional forward-compatibility arch, + # but this may change in the future. + # + # config.cudaForwardCompat :: Bool + # Whether to include the forward compatibility gencode (+PTX) + # to support future GPU generations. + # E.g. true # # Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351 @@ -40,6 +47,9 @@ let # GPUs which are supported by the provided CUDA version. supportedGpus = builtins.filter isSupported gpus; + # supportedCapabilities :: List Capability + supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus; + # cudaArchNameToVersions :: AttrSet String (List String) # Maps the name of a GPU architecture to different versions of that architecture. # For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ]. @@ -50,12 +60,6 @@ let (gpu: gpu.archName) supportedGpus; - # cudaArchNames :: List String - # NOTE: It's important that we don't rely on builtins.attrNames cudaArchNameToVersions here; - # otherwise, we'll get the names sorted in alphabetical order. The JSON array we read them - # from is already sorted, so we'll preserve that order here. - cudaArchNames = lists.unique (lists.map (gpu: gpu.archName) supportedGpus); - # cudaComputeCapabilityToName :: AttrSet String String # Maps the version of a GPU architecture to the name of that architecture. # For example, "8.0" maps to "Ampere". @@ -68,23 +72,6 @@ let supportedGpus ); - # cudaComputeCapabilities :: List String - # NOTE: It's important that we don't rely on builtins.attrNames cudaComputeCapabilityToName here; - # otherwise, we'll get the versions sorted in alphabetical order. The JSON array we read them - # from is already sorted, so we'll preserve that order here. - # Use the user-provided list of CUDA capabilities if it's provided. - cudaComputeCapabilities = config.cudaCapabilities - or (lists.map (gpu: gpu.computeCapability) supportedGpus); - - # cudaForwardComputeCapability :: String - cudaForwardComputeCapability = (lists.last cudaComputeCapabilities) + "+PTX"; - - # cudaComputeCapabilitiesAndForward :: List String - # The list of supported CUDA architectures, including the forward compatibility architecture. - # If forward compatibility is disabled, this will be the same as cudaComputeCapabilities. - cudaComputeCapabilitiesAndForward = cudaComputeCapabilities - ++ lists.optional (config.cudaForwardCompat or true) cudaForwardComputeCapability; - # dropDot :: String -> String dropDot = ver: builtins.replaceStrings [ "." ] [ "" ] ver; @@ -102,38 +89,68 @@ let "-gencode=arch=compute_${dropDot computeCapability},code=${feat}_${dropDot computeCapability}" ); - # cudaRealArches :: List String - # The real architectures are physical architectures supported by the CUDA version. - # For example, "sm_80". - cudaRealArches = archMapper "sm" cudaComputeCapabilities; - - # cudaVirtualArches :: List String - # The virtual architectures are typically used for forward compatibility, when trying to support - # an architecture newer than the CUDA version allows. - # For example, "compute_80". - cudaVirtualArches = archMapper "compute" cudaComputeCapabilities; - - # cudaArches :: List String - # By default, build for all supported architectures and forward compatibility via a virtual - # architecture for the newest supported architecture. - cudaArches = cudaRealArches ++ - lists.optional (config.cudaForwardCompat or true) (lists.last cudaVirtualArches); - - # cudaGencode :: List String - # A list of CUDA gencode arguments to pass to NVCC. - cudaGencode = - let - base = gencodeMapper "sm" cudaComputeCapabilities; - forwardCompat = gencodeMapper "compute" [ (lists.last cudaComputeCapabilities) ]; - in - base ++ lists.optionals (config.cudaForwardCompat or true) forwardCompat; + formatCapabilities = { cudaCapabilities, enableForwardCompat ? true }: rec { + inherit cudaCapabilities enableForwardCompat; + + # archNames :: List String + # E.g. [ "Turing" "Ampere" ] + archNames = lists.unique (builtins.map (cap: cudaComputeCapabilityToName.${cap}) cudaCapabilities); + + # realArches :: List String + # The real architectures are physical architectures supported by the CUDA version. + # E.g. [ "sm_75" "sm_86" ] + realArches = archMapper "sm" cudaCapabilities; + + # virtualArches :: List String + # The virtual architectures are typically used for forward compatibility, when trying to support + # an architecture newer than the CUDA version allows. + # E.g. [ "compute_75" "compute_86" ] + virtualArches = archMapper "compute" cudaCapabilities; + + # arches :: List String + # By default, build for all supported architectures and forward compatibility via a virtual + # architecture for the newest supported architecture. + # E.g. [ "sm_75" "sm_86" "compute_86" ] + arches = realArches ++ + lists.optional enableForwardCompat (lists.last virtualArches); + + # gencode :: List String + # A list of CUDA gencode arguments to pass to NVCC. + # E.g. [ "-gencode=arch=compute_75,code=sm_75" ... "-gencode=arch=compute_86,code=compute_86" ] + gencode = + let + base = gencodeMapper "sm" cudaCapabilities; + forward = gencodeMapper "compute" [ (lists.last cudaCapabilities) ]; + in + base ++ lib.optionals enableForwardCompat forward; + }; in +# When changing names or formats: pause, validate, and update the assert +assert (formatCapabilities { cudaCapabilities = [ "7.5" "8.6" ]; }) == { + cudaCapabilities = [ "7.5" "8.6" ]; + enableForwardCompat = true; + + archNames = [ "Turing" "Ampere" ]; + realArches = [ "sm_75" "sm_86" ]; + virtualArches = [ "compute_75" "compute_86" ]; + arches = [ "sm_75" "sm_86" "compute_86" ]; + + gencode = [ "-gencode=arch=compute_75,code=sm_75" "-gencode=arch=compute_86,code=sm_86" "-gencode=arch=compute_86,code=compute_86" ]; +}; { - inherit - cudaArchNames - cudaArchNameToVersions cudaComputeCapabilityToName - cudaRealArches cudaVirtualArches cudaArches - cudaGencode; - cudaCapabilities = cudaComputeCapabilitiesAndForward; + # formatCapabilities :: { cudaCapabilities: List Capability, cudaForwardCompat: Boolean } -> { ... } + inherit formatCapabilities; + + # cudaArchNameToVersions :: String => String + inherit cudaArchNameToVersions; + + # cudaComputeCapabilityToName :: String => String + inherit cudaComputeCapabilityToName; + + # dropDot :: String -> String + inherit dropDot; +} // formatCapabilities { + cudaCapabilities = config.cudaCapabilities or supportedCapabilities; + enableForwardCompat = config.cudaForwardCompat or true; } diff --git a/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix b/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix index 9bbd7ea1da119..1b216ee625a89 100644 --- a/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix +++ b/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix @@ -1,5 +1,5 @@ { lib -, stdenv +, backendStdenv , fetchurl , autoPatchelfHook , autoAddOpenGLRunpathHook @@ -10,7 +10,8 @@ attrs: let arch = "linux-x86_64"; -in stdenv.mkDerivation { +in +backendStdenv.mkDerivation { inherit pname; inherit (attrs) version; @@ -29,7 +30,11 @@ in stdenv.mkDerivation { ]; buildInputs = [ - stdenv.cc.cc.lib + # autoPatchelfHook will search for a libstdc++ and we're giving it a + # "compatible" libstdc++ from the same toolchain that NVCC uses. + # + # NB: We don't actually know if this is the right thing to do + backendStdenv.cc.cc.lib ]; dontBuild = true; @@ -43,6 +48,8 @@ in stdenv.mkDerivation { runHook postInstall ''; + passthru.stdenv = backendStdenv; + meta = { description = attrs.name; license = lib.licenses.unfree; diff --git a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix index bcf16db6e12eb..96b782d8c990d 100644 --- a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix +++ b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix @@ -1,6 +1,8 @@ -final: prev: let +final: prev: +let inherit (prev) lib pkgs; -in (lib.filterAttrs (attr: _: (prev ? "${attr}")) { +in +(lib.filterAttrs (attr: _: (prev ? "${attr}")) { ### Overrides to fix the components of cudatoolkit-redist # Attributes that don't exist in the previous set are removed. @@ -20,6 +22,38 @@ in (lib.filterAttrs (attr: _: (prev ? "${attr}")) { prev.libcublas ]; + cuda_nvcc = prev.cuda_nvcc.overrideAttrs (oldAttrs: + let + inherit (prev.backendStdenv) cc; + in + { + # Point NVCC at a compatible compiler + # FIXME: non-redist cudatoolkit copy-pastes this code + + # For CMake-based projects: + # https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables + # https://cmake.org/cmake/help/latest/envvar/CUDAHOSTCXX.html + # https://cmake.org/cmake/help/latest/variable/CMAKE_CUDA_HOST_COMPILER.html + + # For non-CMake projects: + # We prepend --compiler-bindir to nvcc flags. + # Downstream packages can override these, because NVCC + # uses the last --compiler-bindir it gets on the command line. + # FIXME: this results in "incompatible redefinition" warnings. + # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin + postInstall = (oldAttrs.postInstall or "") + '' + mkdir -p $out/nix-support + cat <> $out/nix-support/setup-hook + cmakeFlags+=' -DCUDA_HOST_COMPILER=${cc}/bin' + cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${cc}/bin' + if [ -z "\''${CUDAHOSTCXX-}" ]; then + export CUDAHOSTCXX=${cc}/bin; + fi + export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin' + EOF + ''; + }); + cuda_nvprof = prev.cuda_nvprof.overrideAttrs (oldAttrs: { nativeBuildInputs = oldAttrs.nativeBuildInputs ++ [ pkgs.addOpenGLRunpath ]; buildInputs = oldAttrs.buildInputs ++ [ prev.cuda_cupti ]; diff --git a/pkgs/development/compilers/cudatoolkit/versions.toml b/pkgs/development/compilers/cudatoolkit/versions.toml index 7e9fcae3271ac..a201a4a263f5e 100644 --- a/pkgs/development/compilers/cudatoolkit/versions.toml +++ b/pkgs/development/compilers/cudatoolkit/versions.toml @@ -76,8 +76,4 @@ gcc = "gcc11" version = "12.0.1" url = "https://developer.download.nvidia.com/compute/cuda/12.0.1/local_installers/cuda_12.0.1_525.85.12_linux.run" sha256 = "sha256-GyBaBicvFGP0dydv2rkD8/ZmkXwGjlIHOAAeacehh1s=" -# CUDA 12 is compatible with gcc12, but nixpkgs default gcc is still on gcc11 as -# of 2023-01-08. See https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements. -# This should be upgraded to gcc12 once nixpkgs default gcc is upgraded. Other -# CUDA versions should likely have their gcc versions upgraded as well. -gcc = "gcc11" +gcc = "gcc12" diff --git a/pkgs/development/libraries/opencv/3.x.nix b/pkgs/development/libraries/opencv/3.x.nix index e1a13c9fe573f..75d8c712df1f6 100644 --- a/pkgs/development/libraries/opencv/3.x.nix +++ b/pkgs/development/libraries/opencv/3.x.nix @@ -15,8 +15,8 @@ , enableContrib ? true , enableCuda ? (config.cudaSupport or false) && - stdenv.hostPlatform.isx86_64, cudatoolkit - + stdenv.hostPlatform.isx86_64 +, cudaPackages ? { } , enableUnfree ? false , enableIpp ? false , enablePython ? false, pythonPackages ? null @@ -40,6 +40,9 @@ assert blas.implementation == "openblas" && lapack.implementation == "openblas"; assert enablePython -> pythonPackages != null; let + inherit (cudaPackages) cudatoolkit; + inherit (cudaPackages.cudaFlags) cudaCapabilities; + version = "3.4.18"; src = fetchFromGitHub { @@ -242,6 +245,8 @@ stdenv.mkDerivation { "-DCUDA_FAST_MATH=ON" "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc" "-DCUDA_NVCC_FLAGS=--expt-relaxed-constexpr" + "-DCUDA_ARCH_BIN=${lib.concatStringsSep ";" cudaCapabilities}" + "-DCUDA_ARCH_PTX=${lib.last cudaCapabilities}" ] ++ lib.optionals stdenv.isDarwin [ "-DWITH_OPENCL=OFF" "-DWITH_LAPACK=OFF" diff --git a/pkgs/development/libraries/opencv/4.x.nix b/pkgs/development/libraries/opencv/4.x.nix index ac021c2b61082..a9f7b0304e84d 100644 --- a/pkgs/development/libraries/opencv/4.x.nix +++ b/pkgs/development/libraries/opencv/4.x.nix @@ -37,7 +37,7 @@ , enableContrib ? true , enableCuda ? (config.cudaSupport or false) && stdenv.hostPlatform.isx86_64 -, cudatoolkit +, cudaPackages ? { } , nvidia-optical-flow-sdk , enableUnfree ? false @@ -79,6 +79,9 @@ }: let + inherit (cudaPackages) cudatoolkit; + inherit (cudaPackages.cudaFlags) cudaCapabilities; + version = "4.7.0"; src = fetchFromGitHub { @@ -342,6 +345,14 @@ stdenv.mkDerivation { "-DCUDA_FAST_MATH=ON" "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc" "-DCUDA_NVCC_FLAGS=--expt-relaxed-constexpr" + + # OpenCV respects at least three variables: + # -DCUDA_GENERATION takes a single arch name, e.g. Volta + # -DCUDA_ARCH_BIN takes a semi-colon separated list of real arches, e.g. "8.0;8.6" + # -DCUDA_ARCH_PTX takes the virtual arch, e.g. "8.6" + "-DCUDA_ARCH_BIN=${lib.concatStringsSep ";" cudaCapabilities}" + "-DCUDA_ARCH_PTX=${lib.last cudaCapabilities}" + "-DNVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH=${nvidia-optical-flow-sdk}" ] ++ lib.optionals stdenv.isDarwin [ "-DWITH_OPENCL=OFF" diff --git a/pkgs/development/libraries/science/math/cudnn/generic.nix b/pkgs/development/libraries/science/math/cudnn/generic.nix index d4e1f641a956e..b2844ae6b074c 100644 --- a/pkgs/development/libraries/science/math/cudnn/generic.nix +++ b/pkgs/development/libraries/science/math/cudnn/generic.nix @@ -1,11 +1,11 @@ { - stdenv, + backendStdenv, lib, zlib, useCudatoolkitRunfile ? false, cudaVersion, cudaMajorVersion, - cudatoolkit, # if cuda>=11: only used for .cc + cudatoolkit, # For cuda < 11 libcublas ? null, # cuda <11 doesn't ship redist packages autoPatchelfHook, autoAddOpenGLRunpathHook, @@ -26,7 +26,7 @@ maxCudaVersion, }: assert useCudatoolkitRunfile || (libcublas != null); let - inherit (cudatoolkit) cc; + inherit (backendStdenv) cc; inherit (lib) lists strings trivial versions; # majorMinorPatch :: String -> String @@ -46,7 +46,7 @@ assert useCudatoolkitRunfile || (libcublas != null); let then cudatoolkit else libcublas; in - stdenv.mkDerivation { + backendStdenv.mkDerivation { pname = "cudatoolkit-${cudaMajorVersion}-cudnn"; version = versionTriple; diff --git a/pkgs/development/libraries/science/math/magma/generic.nix b/pkgs/development/libraries/science/math/magma/generic.nix index ab0a2125ec00e..c997fcc090133 100644 --- a/pkgs/development/libraries/science/math/magma/generic.nix +++ b/pkgs/development/libraries/science/math/magma/generic.nix @@ -8,10 +8,16 @@ { blas , cmake , cudaPackages + # FIXME: cuda being unfree means ofborg won't eval "magma". + # respecting config.cudaSupport -> false by default + # -> ofborg eval -> throws "no GPU targets specified". + # Probably should delete everything but "magma-cuda" and "magma-hip" + # from all-packages.nix , cudaSupport ? true , fetchurl , gfortran -, gpuTargets ? [ ] +, cudaCapabilities ? cudaPackages.cudaFlags.cudaCapabilities +, gpuTargets ? [ ] # Non-CUDA targets, that is HIP , hip , hipblas , hipsparse @@ -36,14 +42,8 @@ let # of the first list *from* the second list. That means: # lists.subtractLists a b = b - a - # For CUDA - supportedCudaSmArches = lists.intersectLists cudaFlags.cudaRealArches supportedGpuTargets; - # Subtract the supported SM architectures from the real SM architectures to get the unsupported - # SM architectures. - unsupportedCudaSmArches = lists.subtractLists supportedCudaSmArches cudaFlags.cudaRealArches; - # For ROCm - # NOTE: The hip.gpuTargets are prefixed with "gfx" instead of "sm" like cudaFlags.cudaRealArches. + # NOTE: The hip.gpuTargets are prefixed with "gfx" instead of "sm" like cudaFlags.realArches. # For some reason, Magma's CMakeLists.txt file does not handle the "gfx" prefix, so we must # remove it. rocmArches = lists.map (x: strings.removePrefix "gfx" x) hip.gpuTargets; @@ -62,19 +62,32 @@ let ) supported; - # Create the gpuTargetString. gpuTargetString = strings.concatStringsSep "," ( if gpuTargets != [ ] then # If gpuTargets is specified, it always takes priority. gpuArchWarner supportedCustomGpuTargets unsupportedCustomGpuTargets - else if cudaSupport then - gpuArchWarner supportedCudaSmArches unsupportedCudaSmArches else if rocmSupport then gpuArchWarner supportedRocmArches unsupportedRocmArches + else if cudaSupport then + [ ] # It's important we pass explicit -DGPU_TARGET to reset magma's defaults else throw "No GPU targets specified" ); + # E.g. [ "80" "86" "90" ] + cudaArchitectures = (builtins.map cudaFlags.dropDot cudaCapabilities); + + cudaArchitecturesString = strings.concatStringsSep ";" cudaArchitectures; + minArch = + let + minArch' = builtins.head (builtins.sort builtins.lessThan cudaArchitectures); + in + # If this fails some day, something must've changed and we should re-validate our assumptions + assert builtins.stringLength minArch' == 2; + # "75" -> "750" Cf. https://bitbucket.org/icl/magma/src/f4ec79e2c13a2347eff8a77a3be6f83bc2daec20/CMakeLists.txt#lines-273 + "${minArch'}0"; + + cuda_joined = symlinkJoin { name = "cuda-redist-${cudaVersion}"; paths = with cudaPackages; [ @@ -87,6 +100,8 @@ let }; in +assert (builtins.match "[^[:space:]]*" gpuTargetString) != null; + stdenv.mkDerivation { pname = "magma"; inherit version; @@ -116,7 +131,11 @@ stdenv.mkDerivation { openmp ]; - cmakeFlags = lists.optionals cudaSupport [ + cmakeFlags = [ + "-DGPU_TARGET=${gpuTargetString}" + ] ++ lists.optionals cudaSupport [ + "-DCMAKE_CUDA_ARCHITECTURES=${cudaArchitecturesString}" + "-DMIN_ARCH=${minArch}" # Disarms magma's asserts "-DCMAKE_C_COMPILER=${cudatoolkit.cc}/bin/cc" "-DCMAKE_CXX_COMPILER=${cudatoolkit.cc}/bin/c++" "-DMAGMA_ENABLE_CUDA=ON" @@ -126,14 +145,10 @@ stdenv.mkDerivation { "-DMAGMA_ENABLE_HIP=ON" ]; - # NOTE: We must set GPU_TARGET in preConfigure in this way because it may contain spaces. - preConfigure = '' - cmakeFlagsArray+=("-DGPU_TARGET=${gpuTargetString}") - '' # NOTE: The stdenv's CXX is used when compiling the CMake test to determine the version of # CUDA available. This isn't necessarily the same as cudatoolkit.cc, so we must set # CUDAHOSTCXX. - + strings.optionalString cudaSupport '' + preConfigure = strings.optionalString cudaSupport '' export CUDAHOSTCXX=${cudatoolkit.cc}/bin/c++ ''; diff --git a/pkgs/development/libraries/science/math/magma/releases.nix b/pkgs/development/libraries/science/math/magma/releases.nix index 3d08aa95d4d18..029f418edce3c 100644 --- a/pkgs/development/libraries/science/math/magma/releases.nix +++ b/pkgs/development/libraries/science/math/magma/releases.nix @@ -1,27 +1,13 @@ # NOTE: Order matters! Put the oldest version first, and the newest version last. # NOTE: Make sure the supportedGpuTargets are in order of oldest to newest. # You can update the supportedGpuTargets by looking at the CMakeLists.txt file. -# CUDA starts here: https://bitbucket.org/icl/magma/src/f4ec79e2c13a2347eff8a77a3be6f83bc2daec20/CMakeLists.txt#lines-175 # HIP is here: https://bitbucket.org/icl/magma/src/f4ec79e2c13a2347eff8a77a3be6f83bc2daec20/CMakeLists.txt#lines-386 +# CUDA works around magma's wrappers and uses FindCUDAToolkit directly [ { version = "2.6.2"; hash = "sha256-dbVU2rAJA+LRC5cskT5Q5/iMvGLzrkMrWghsfk7aCnE="; supportedGpuTargets = [ - "sm_20" - "sm_30" - "sm_35" - "sm_37" - "sm_50" - "sm_52" - "sm_53" - "sm_60" - "sm_61" - "sm_62" - "sm_70" - "sm_71" - "sm_75" - "sm_80" "700" "701" "702" @@ -53,21 +39,6 @@ version = "2.7.1"; hash = "sha256-2chxHAR6OMrhbv3nS+4uszMyF/0nEeHpuGBsu7SuGlA="; supportedGpuTargets = [ - "sm_20" - "sm_30" - "sm_35" - "sm_37" - "sm_50" - "sm_52" - "sm_53" - "sm_60" - "sm_61" - "sm_62" - "sm_70" - "sm_71" - "sm_75" - "sm_80" - "sm_90" "700" "701" "702" diff --git a/pkgs/development/libraries/science/math/nccl/default.nix b/pkgs/development/libraries/science/math/nccl/default.nix index 99aed3a6a30ef..155e863bf21e4 100644 --- a/pkgs/development/libraries/science/math/nccl/default.nix +++ b/pkgs/development/libraries/science/math/nccl/default.nix @@ -1,11 +1,19 @@ -{ lib, stdenv, fetchFromGitHub, which, cudaPackages, addOpenGLRunpath }: +{ lib +, backendStdenv +, fetchFromGitHub +, which +, cudaPackages ? { } +, addOpenGLRunpath +}: + +with cudaPackages; let - inherit (cudaPackages) cudatoolkit; + # Output looks like "-gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_86,code=compute_86" + gencode = lib.concatStringsSep " " cudaFlags.gencode; in - -stdenv.mkDerivation rec { - name = "nccl-${version}-cuda-${cudatoolkit.majorVersion}"; +backendStdenv.mkDerivation rec { + name = "nccl-${version}-cuda-${cudaPackages.cudaMajorVersion}"; version = "2.16.5-1"; src = fetchFromGitHub { @@ -17,16 +25,29 @@ stdenv.mkDerivation rec { outputs = [ "out" "dev" ]; - nativeBuildInputs = [ which addOpenGLRunpath ]; + nativeBuildInputs = [ + which + addOpenGLRunpath + cuda_nvcc + ]; - buildInputs = [ cudatoolkit ]; + buildInputs = [ + cuda_cudart + ] ++ lib.optionals (lib.versionAtLeast cudaVersion "12.0.0") [ + cuda_cccl + ]; preConfigure = '' patchShebangs src/collectives/device/gen_rules.sh + makeFlagsArray+=( + "NVCC_GENCODE=${gencode}" + ) ''; makeFlags = [ - "CUDA_HOME=${cudatoolkit}" + "CUDA_HOME=${cuda_nvcc}" + "CUDA_LIB=${cuda_cudart}/lib64" + "CUDA_INC=${cuda_cudart}/include" "PREFIX=$(out)" ]; diff --git a/pkgs/development/libraries/science/math/tensorrt/generic.nix b/pkgs/development/libraries/science/math/tensorrt/generic.nix index 3447087051f1e..31090f715c222 100644 --- a/pkgs/development/libraries/science/math/tensorrt/generic.nix +++ b/pkgs/development/libraries/science/math/tensorrt/generic.nix @@ -1,5 +1,5 @@ { lib -, stdenv +, backendStdenv , requireFile , autoPatchelfHook , autoAddOpenGLRunpathHook @@ -18,7 +18,7 @@ assert lib.assertMsg (lib.strings.versionAtLeast cudnn.version fileVersionCudnn) "This version of TensorRT requires at least cuDNN ${fileVersionCudnn} (current version is ${cudnn.version})"; -stdenv.mkDerivation rec { +backendStdenv.mkDerivation rec { pname = "cudatoolkit-${cudatoolkit.majorVersion}-tensorrt"; version = fullVersion; src = requireFile rec { @@ -45,7 +45,7 @@ stdenv.mkDerivation rec { # Used by autoPatchelfHook buildInputs = [ - cudatoolkit.cc.cc.lib # libstdc++ + backendStdenv.cc.cc.lib # libstdc++ cudatoolkit cudnn ]; @@ -74,6 +74,8 @@ stdenv.mkDerivation rec { "$out/lib/libnvinfer_builder_resource.so.${mostOfVersion}" ''; + passthru.stdenv = backendStdenv; + meta = with lib; { # Check that the cudatoolkit version satisfies our min/max constraints (both # inclusive). We mark the package as broken if it fails to satisfies the diff --git a/pkgs/development/python-modules/jaxlib/default.nix b/pkgs/development/python-modules/jaxlib/default.nix index 2c13defe43838..ad48af827ee56 100644 --- a/pkgs/development/python-modules/jaxlib/default.nix +++ b/pkgs/development/python-modules/jaxlib/default.nix @@ -164,7 +164,7 @@ let build --action_env TF_CUDA_PATHS="${cudatoolkit_joined},${cudnn},${nccl}" build --action_env TF_CUDA_VERSION="${lib.versions.majorMinor cudatoolkit.version}" build --action_env TF_CUDNN_VERSION="${lib.versions.major cudnn.version}" - build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.cudaRealArches}" + build:cuda --action_env TF_CUDA_COMPUTE_CAPABILITIES="${builtins.concatStringsSep "," cudaFlags.realArches}" '' + '' CFG ''; diff --git a/pkgs/development/python-modules/tensorflow/default.nix b/pkgs/development/python-modules/tensorflow/default.nix index f7d920c372217..adc7b1c1e0b3f 100644 --- a/pkgs/development/python-modules/tensorflow/default.nix +++ b/pkgs/development/python-modules/tensorflow/default.nix @@ -17,7 +17,9 @@ # that in nix as well. It would make some things easier and less confusing, but # it would also make the default tensorflow package unfree. See # https://groups.google.com/a/tensorflow.org/forum/#!topic/developers/iRCt5m4qUz0 -, cudaSupport ? false, cudaPackages ? {} +, cudaSupport ? false +, cudaPackages ? { } +, cudaCapabilities ? cudaPackages.cudaFlags.cudaCapabilities , mklSupport ? false, mkl , tensorboardSupport ? true # XLA without CUDA is broken @@ -30,7 +32,27 @@ }: let - inherit (cudaPackages) cudatoolkit cudaFlags cudnn nccl; + originalStdenv = stdenv; +in +let + # Tensorflow looks at many toolchain-related variables which may diverge. + # + # Toolchain for cuda-enabled builds. + # We want to achieve two things: + # 1. NVCC should use a compatible back-end (e.g. gcc11 for cuda11) + # 2. Normal C++ files should be compiled with the same toolchain, + # to avoid potential weird dynamic linkage errors at runtime. + # This may not be necessary though + # + # Toolchain for Darwin: + # clang 7 fails to emit a symbol for + # __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the + # translation units, so the build fails at link time + stdenv = + if cudaSupport then cudaPackages.backendStdenv + else if originalStdenv.isDarwin then llvmPackages_11.stdenv + else originalStdenv; + inherit (cudaPackages) cudatoolkit cudnn nccl; in assert cudaSupport -> cudatoolkit != null @@ -42,6 +64,7 @@ assert ! (stdenv.isDarwin && cudaSupport); let withTensorboard = (pythonOlder "3.6") || tensorboardSupport; + # FIXME: migrate to redist cudaPackages cudatoolkit_joined = symlinkJoin { name = "${cudatoolkit.name}-merged"; paths = [ @@ -54,10 +77,13 @@ let ]; }; + # Tensorflow expects bintools at hard-coded paths, e.g. /usr/bin/ar + # The only way to overcome that is to set GCC_HOST_COMPILER_PREFIX, + # but that path must contain cc as well, so we merge them cudatoolkit_cc_joined = symlinkJoin { - name = "${cudatoolkit.cc.name}-merged"; + name = "${stdenv.cc.name}-merged"; paths = [ - cudatoolkit.cc + stdenv.cc binutils.bintools # for ar, dwp, nm, objcopy, objdump, strip ]; }; @@ -173,12 +199,7 @@ let ''; }) else _bazel-build; - _bazel-build = (buildBazelPackage.override (lib.optionalAttrs stdenv.isDarwin { - # clang 7 fails to emit a symbol for - # __ZN4llvm11SmallPtrSetIPKNS_10AllocaInstELj8EED1Ev in any of the - # translation units, so the build fails at link time - stdenv = llvmPackages_11.stdenv; - })) { + _bazel-build = buildBazelPackage.override { inherit stdenv; } { name = "${pname}-${version}"; bazel = bazel_5; @@ -209,12 +230,13 @@ let flatbuffers-core giflib grpc - icu + # Necessary to fix the "`GLIBCXX_3.4.30' not found" error + (icu.override { inherit stdenv; }) jsoncpp libjpeg_turbo libpng lmdb-core - pybind11 + (pybind11.overridePythonAttrs (_: { inherit stdenv; })) snappy sqlite ] ++ lib.optionals cudaSupport [ @@ -299,9 +321,11 @@ let TF_NEED_CUDA = tfFeature cudaSupport; TF_CUDA_PATHS = lib.optionalString cudaSupport "${cudatoolkit_joined},${cudnn},${nccl}"; + TF_CUDA_COMPUTE_CAPABILITIES = lib.concatStringsSep "," cudaCapabilities; + + # Needed even when we override stdenv: e.g. for ar GCC_HOST_COMPILER_PREFIX = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin"; - GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/gcc"; - TF_CUDA_COMPUTE_CAPABILITIES = builtins.concatStringsSep "," cudaFlags.cudaRealArches; + GCC_HOST_COMPILER_PATH = lib.optionalString cudaSupport "${cudatoolkit_cc_joined}/bin/cc"; postPatch = '' # bazel 3.3 should work just as well as bazel 3.1 diff --git a/pkgs/test/cuda/cuda-library-samples/generic.nix b/pkgs/test/cuda/cuda-library-samples/generic.nix index e01664bab3191..e9a481c94a7a4 100644 --- a/pkgs/test/cuda/cuda-library-samples/generic.nix +++ b/pkgs/test/cuda/cuda-library-samples/generic.nix @@ -1,4 +1,4 @@ -{ lib, stdenv, fetchFromGitHub +{ lib, backendStdenv, fetchFromGitHub , cmake, addOpenGLRunpath , cudatoolkit , cutensor @@ -35,13 +35,13 @@ let in { - cublas = stdenv.mkDerivation (commonAttrs // { + cublas = backendStdenv.mkDerivation (commonAttrs // { pname = "cuda-library-samples-cublas"; src = "${src}/cuBLASLt"; }); - cusolver = stdenv.mkDerivation (commonAttrs // { + cusolver = backendStdenv.mkDerivation (commonAttrs // { pname = "cuda-library-samples-cusolver"; src = "${src}/cuSOLVER"; @@ -49,7 +49,7 @@ in sourceRoot = "cuSOLVER/gesv"; }); - cutensor = stdenv.mkDerivation (commonAttrs // { + cutensor = backendStdenv.mkDerivation (commonAttrs // { pname = "cuda-library-samples-cutensor"; src = "${src}/cuTENSOR";