NixOS · samuela · Mar 6, 2023 · Feb 25, 2023 · Feb 27, 2023 · Feb 27, 2023
diff --git a/pkgs/applications/science/math/mxnet/default.nix b/pkgs/applications/science/math/mxnet/default.nix
@@ -50,7 +50,7 @@ stdenv.mkDerivation rec {
       "-DUSE_OLDCMAKECUDA=ON"  # see https://github.com/apache/incubator-mxnet/issues/10743
       "-DCUDA_ARCH_NAME=All"
       "-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
-      "-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.cudaRealArches}"
+      "-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.realArches}"
     ] else [ "-DUSE_CUDA=OFF" ])
     ++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF";
 

diff --git a/pkgs/development/compilers/cudatoolkit/common.nix b/pkgs/development/compilers/cudatoolkit/common.nix
@@ -11,7 +11,6 @@ args@
 , fetchurl
 , fontconfig
 , freetype
-, gcc
 , gdk-pixbuf
 , glib
 , glibc
@@ -22,13 +21,13 @@ args@
 , perl
 , python3
 , requireFile
-, stdenv
+, backendStdenv # E.g. gcc11Stdenv, set in extension.nix
 , unixODBC
 , xorg
 , zlib
 }:
 
-stdenv.mkDerivation rec {
+backendStdenv.mkDerivation rec {
   pname = "cudatoolkit";
   inherit version runPatches;
 
@@ -146,14 +145,23 @@ stdenv.mkDerivation rec {
 
     # Fix builds with newer glibc version
     sed -i "1 i#define _BITS_FLOATN_H" "$out/include/host_defines.h"
-
-    # Ensure that cmake can find CUDA.
+  '' +
+  # Point NVCC at a compatible compiler
+  # FIXME: redist cuda_nvcc copy-pastes this code
+  # Refer to comments in the overrides for cuda_nvcc for explanation
+  # CUDA_TOOLKIT_ROOT_DIR is legacy,
+  # Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
+  ''
     mkdir -p $out/nix-support
-    echo "cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'" >> $out/nix-support/setup-hook
-
-    # Set the host compiler to be used by nvcc for CMake-based projects:
-    # https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
-    echo "cmakeFlags+=' -DCUDA_HOST_COMPILER=${gcc}/bin'" >> $out/nix-support/setup-hook
+    cat <<EOF >> $out/nix-support/setup-hook
+    cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'
+    cmakeFlags+=' -DCUDA_HOST_COMPILER=${backendStdenv.cc}/bin'
+    cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${backendStdenv.cc}/bin'
+    if [ -z "\''${CUDAHOSTCXX-}" ]; then
+      export CUDAHOSTCXX=${backendStdenv.cc}/bin;
+    fi
+    export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin'
+    EOF
 
     # Move some libraries to the lib output so that programs that
     # depend on them don't pull in this entire monstrosity.
@@ -167,10 +175,6 @@ stdenv.mkDerivation rec {
       mv $out/extras/CUPTI/lib64/libcupti* $out/lib
     ''}
 
-    # Set compiler for NVCC.
-    wrapProgram $out/bin/nvcc \
-      --prefix PATH : ${gcc}/bin
-
     # nvprof do not find any program to profile if LD_LIBRARY_PATH is not set
     wrapProgram $out/bin/nvprof \
       --prefix LD_LIBRARY_PATH : $out/lib
@@ -191,7 +195,14 @@ stdenv.mkDerivation rec {
   preFixup =
     let rpath = lib.concatStringsSep ":" [
       (lib.makeLibraryPath (runtimeDependencies ++ [ "$lib" "$out" "$out/nvvm" ]))
-      "${stdenv.cc.cc.lib}/lib64"
+
+      # The path to libstdc++ and such
+      #
+      # `backendStdenv` is the cuda-compatible toolchain that we pick in
+      # extension.nix; we hand it to NVCC to use as a back-end, and we link
+      # cudatoolkit's binaries against its libstdc++
+      "${backendStdenv.cc.cc.lib}/lib64"
+
       "$out/jre/lib/amd64/jli"
       "$out/lib64"
       "$out/nvvm/lib64"
@@ -260,7 +271,7 @@ stdenv.mkDerivation rec {
     popd
   '';
   passthru = {
-    cc = gcc;
+    inherit (backendStdenv) cc;
     majorMinorVersion = lib.versions.majorMinor version;
     majorVersion = lib.versions.majorMinor version;
   };

diff --git a/pkgs/development/compilers/cudatoolkit/extension.nix b/pkgs/development/compilers/cudatoolkit/extension.nix
@@ -7,11 +7,29 @@ final: prev: let
   # Version info for the classic cudatoolkit packages that contain everything that is in redist.
   cudatoolkitVersions = final.lib.importTOML ./versions.toml;
 
+  finalVersion = cudatoolkitVersions.${final.cudaVersion};
+
+  # Exposed as cudaPackages.backendStdenv.
+  # We don't call it just "stdenv" to avoid confusion: e.g. this toolchain doesn't contain nvcc.
+  # Instead, it's the back-end toolchain for nvcc to use.
+  # We also use this to link a compatible libstdc++ (backendStdenv.cc.cc.lib)
+  # Cf. https://github.com/NixOS/nixpkgs/pull/218265 for context
+  backendStdenv = prev.pkgs."${finalVersion.gcc}Stdenv";
+
   ### Add classic cudatoolkit package
-  cudatoolkit = buildCudaToolkitPackage ((attrs: attrs // { gcc = prev.pkgs.${attrs.gcc}; }) cudatoolkitVersions.${final.cudaVersion});
+  cudatoolkit =
+    let
+      attrs = builtins.removeAttrs finalVersion [ "gcc" ];
+      attrs' = attrs // { inherit backendStdenv; };
+    in
+    buildCudaToolkitPackage attrs';
 
   cudaFlags = final.callPackage ./flags.nix {};
 
-in {
-  inherit cudatoolkit cudaFlags;
+in
+{
+  inherit
+    backendStdenv
+    cudatoolkit
+    cudaFlags;
 }
diff --git a/pkgs/development/compilers/cudatoolkit/flags.nix b/pkgs/development/compilers/cudatoolkit/flags.nix
@@ -1,6 +1,6 @@
 { config
 , lib
-, cudatoolkit
+, cudaVersion
 }:
 
 # Type aliases
@@ -13,14 +13,21 @@
 
 let
   inherit (lib) attrsets lists strings trivial versions;
-  cudaVersion = cudatoolkit.version;
 
   # Flags are determined based on your CUDA toolkit by default.  You may benefit
   # from improved performance, reduced file size, or greater hardware suppport by
   # passing a configuration based on your specific GPU environment.
   #
-  # config.cudaCapabilities: list of hardware generations to support (e.g., "8.0")
-  # config.cudaForwardCompat: bool for compatibility with future GPU generations
+  # config.cudaCapabilities :: List Capability
+  # List of hardware generations to build.
+  # E.g. [ "8.0" ]
+  # Currently, the last item is considered the optional forward-compatibility arch,
+  # but this may change in the future.
+  #
+  # config.cudaForwardCompat :: Bool
+  # Whether to include the forward compatibility gencode (+PTX)
+  # to support future GPU generations.
+  # E.g. true
   #
   # Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351
 
@@ -40,6 +47,9 @@ let
   # GPUs which are supported by the provided CUDA version.
   supportedGpus = builtins.filter isSupported gpus;
 
+  # supportedCapabilities :: List Capability
+  supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus;
+
   # cudaArchNameToVersions :: AttrSet String (List String)
   # Maps the name of a GPU architecture to different versions of that architecture.
   # For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ].
@@ -50,12 +60,6 @@ let
       (gpu: gpu.archName)
       supportedGpus;
 
-  # cudaArchNames :: List String
-  # NOTE: It's important that we don't rely on builtins.attrNames cudaArchNameToVersions here;
-  #   otherwise, we'll get the names sorted in alphabetical order. The JSON array we read them
-  #   from is already sorted, so we'll preserve that order here.
-  cudaArchNames = lists.unique (lists.map (gpu: gpu.archName) supportedGpus);
-
   # cudaComputeCapabilityToName :: AttrSet String String
   # Maps the version of a GPU architecture to the name of that architecture.
   # For example, "8.0" maps to "Ampere".
@@ -68,23 +72,6 @@ let
       supportedGpus
   );
 
-  # cudaComputeCapabilities :: List String
-  # NOTE: It's important that we don't rely on builtins.attrNames cudaComputeCapabilityToName here;
-  #   otherwise, we'll get the versions sorted in alphabetical order. The JSON array we read them
-  #   from is already sorted, so we'll preserve that order here.
-  # Use the user-provided list of CUDA capabilities if it's provided.
-  cudaComputeCapabilities = config.cudaCapabilities
-    or (lists.map (gpu: gpu.computeCapability) supportedGpus);
-
-  # cudaForwardComputeCapability :: String
-  cudaForwardComputeCapability = (lists.last cudaComputeCapabilities) + "+PTX";
-
-  # cudaComputeCapabilitiesAndForward :: List String
-  # The list of supported CUDA architectures, including the forward compatibility architecture.
-  # If forward compatibility is disabled, this will be the same as cudaComputeCapabilities.
-  cudaComputeCapabilitiesAndForward = cudaComputeCapabilities
-    ++ lists.optional (config.cudaForwardCompat or true) cudaForwardComputeCapability;
-
   # dropDot :: String -> String
   dropDot = ver: builtins.replaceStrings [ "." ] [ "" ] ver;
 
@@ -102,38 +89,68 @@ let
     "-gencode=arch=compute_${dropDot computeCapability},code=${feat}_${dropDot computeCapability}"
   );
 
-  # cudaRealArches :: List String
-  # The real architectures are physical architectures supported by the CUDA version.
-  # For example, "sm_80".
-  cudaRealArches = archMapper "sm" cudaComputeCapabilities;
-
-  # cudaVirtualArches :: List String
-  # The virtual architectures are typically used for forward compatibility, when trying to support
-  # an architecture newer than the CUDA version allows.
-  # For example, "compute_80".
-  cudaVirtualArches = archMapper "compute" cudaComputeCapabilities;
-
-  # cudaArches :: List String
-  # By default, build for all supported architectures and forward compatibility via a virtual
-  # architecture for the newest supported architecture.
-  cudaArches = cudaRealArches ++
-    lists.optional (config.cudaForwardCompat or true) (lists.last cudaVirtualArches);
-
-  # cudaGencode :: List String
-  # A list of CUDA gencode arguments to pass to NVCC.
-  cudaGencode =
-    let
-      base = gencodeMapper "sm" cudaComputeCapabilities;
-      forwardCompat = gencodeMapper "compute" [ (lists.last cudaComputeCapabilities) ];
-    in
-    base ++ lists.optionals (config.cudaForwardCompat or true) forwardCompat;
+  formatCapabilities = { cudaCapabilities, enableForwardCompat ? true }: rec {
+    inherit cudaCapabilities enableForwardCompat;
+
+    # archNames :: List String
+    # E.g. [ "Turing" "Ampere" ]
+    archNames = lists.unique (builtins.map (cap: cudaComputeCapabilityToName.${cap}) cudaCapabilities);
+
+    # realArches :: List String
+    # The real architectures are physical architectures supported by the CUDA version.
+    # E.g. [ "sm_75" "sm_86" ]
+    realArches = archMapper "sm" cudaCapabilities;
+
+    # virtualArches :: List String
+    # The virtual architectures are typically used for forward compatibility, when trying to support
+    # an architecture newer than the CUDA version allows.
+    # E.g. [ "compute_75" "compute_86" ]
+    virtualArches = archMapper "compute" cudaCapabilities;
+
+    # arches :: List String
+    # By default, build for all supported architectures and forward compatibility via a virtual
+    # architecture for the newest supported architecture.
+    # E.g. [ "sm_75" "sm_86" "compute_86" ]
+    arches = realArches ++
+      lists.optional enableForwardCompat (lists.last virtualArches);
+
+    # gencode :: List String
+    # A list of CUDA gencode arguments to pass to NVCC.
+    # E.g. [ "-gencode=arch=compute_75,code=sm_75" ... "-gencode=arch=compute_86,code=compute_86" ]
+    gencode =
+      let
+        base = gencodeMapper "sm" cudaCapabilities;
+        forward = gencodeMapper "compute" [ (lists.last cudaCapabilities) ];
+      in
+      base ++ lib.optionals enableForwardCompat forward;
+  };
 
 in
+# When changing names or formats: pause, validate, and update the assert
+assert (formatCapabilities { cudaCapabilities = [ "7.5" "8.6" ]; }) == {
+  cudaCapabilities = [ "7.5" "8.6" ];
+  enableForwardCompat = true;
+
+  archNames = [ "Turing" "Ampere" ];
+  realArches = [ "sm_75" "sm_86" ];
+  virtualArches = [ "compute_75" "compute_86" ];
+  arches = [ "sm_75" "sm_86" "compute_86" ];
+
+  gencode = [ "-gencode=arch=compute_75,code=sm_75" "-gencode=arch=compute_86,code=sm_86" "-gencode=arch=compute_86,code=compute_86" ];
+};
 {
-  inherit
-    cudaArchNames
-    cudaArchNameToVersions cudaComputeCapabilityToName
-    cudaRealArches cudaVirtualArches cudaArches
-    cudaGencode;
-  cudaCapabilities = cudaComputeCapabilitiesAndForward;
+  # formatCapabilities :: { cudaCapabilities: List Capability, cudaForwardCompat: Boolean } ->  { ... }
+  inherit formatCapabilities;
+
+  # cudaArchNameToVersions :: String => String
+  inherit cudaArchNameToVersions;
+
+  # cudaComputeCapabilityToName :: String => String
+  inherit cudaComputeCapabilityToName;
+
+  # dropDot :: String -> String
+  inherit dropDot;
+} // formatCapabilities {
+  cudaCapabilities = config.cudaCapabilities or supportedCapabilities;
+  enableForwardCompat = config.cudaForwardCompat or true;
 }
diff --git a/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix b/pkgs/development/compilers/cudatoolkit/redist/build-cuda-redist-package.nix
@@ -1,5 +1,5 @@
 { lib
-, stdenv
+, backendStdenv
 , fetchurl
 , autoPatchelfHook
 , autoAddOpenGLRunpathHook
@@ -10,7 +10,8 @@ attrs:
 
 let
   arch = "linux-x86_64";
-in stdenv.mkDerivation {
+in
+backendStdenv.mkDerivation {
   inherit pname;
   inherit (attrs) version;
 
@@ -29,7 +30,11 @@ in stdenv.mkDerivation {
   ];
 
   buildInputs = [
-    stdenv.cc.cc.lib
+    # autoPatchelfHook will search for a libstdc++ and we're giving it a
+    # "compatible" libstdc++ from the same toolchain that NVCC uses.
+    #
+    # NB: We don't actually know if this is the right thing to do
+    backendStdenv.cc.cc.lib
   ];
 
   dontBuild = true;
@@ -43,6 +48,8 @@ in stdenv.mkDerivation {
     runHook postInstall
   '';
 
+  passthru.stdenv = backendStdenv;
+
   meta = {
     description = attrs.name;
     license = lib.licenses.unfree;