From bdfb61e117c28d9ebfcdceb0d64d952aa5308a16 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Thu, 9 Mar 2023 18:38:51 -0500 Subject: [PATCH] cudaPackages: use -Xfatbin=-compress-all; prune default cudaCapabilities --- .../compilers/cudatoolkit/common.nix | 6 +- .../compilers/cudatoolkit/flags.nix | 32 ++++++--- .../compilers/cudatoolkit/gpus.nix | 66 +++++++++++++++---- .../cudatoolkit/redist/overrides.nix | 6 +- 4 files changed, 85 insertions(+), 25 deletions(-) diff --git a/pkgs/development/compilers/cudatoolkit/common.nix b/pkgs/development/compilers/cudatoolkit/common.nix index fb3b50b981504..894c0b8fb5244 100644 --- a/pkgs/development/compilers/cudatoolkit/common.nix +++ b/pkgs/development/compilers/cudatoolkit/common.nix @@ -151,6 +151,10 @@ backendStdenv.mkDerivation rec { # Refer to comments in the overrides for cuda_nvcc for explanation # CUDA_TOOLKIT_ROOT_DIR is legacy, # Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables + # NOTE: We unconditionally set -Xfatbin=-compress-all, which reduces the size of the compiled + # binaries. If binaries grow over 2GB, they will fail to link. This is a problem for us, as + # the default set of CUDA capabilities we build can regularly cause this to occur (for + # example, with Magma). '' mkdir -p $out/nix-support cat <> $out/nix-support/setup-hook @@ -160,7 +164,7 @@ backendStdenv.mkDerivation rec { if [ -z "\''${CUDAHOSTCXX-}" ]; then export CUDAHOSTCXX=${backendStdenv.cc}/bin; fi - export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin' + export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin -Xfatbin=-compress-all' EOF # Move some libraries to the lib output so that programs that diff --git a/pkgs/development/compilers/cudatoolkit/flags.nix b/pkgs/development/compilers/cudatoolkit/flags.nix index 989fdb06c5dfb..28d5bda4da5f0 100644 --- a/pkgs/development/compilers/cudatoolkit/flags.nix +++ b/pkgs/development/compilers/cudatoolkit/flags.nix @@ -4,12 +4,8 @@ }: # Type aliases -# Gpu = { -# archName: String, # e.g., "Hopper" -# computeCapability: String, # e.g., "9.0" -# minCudaVersion: String, # e.g., "11.8" -# maxCudaVersion: String, # e.g., "12.0" -# } +# Gpu :: AttrSet +# - See the documentation in ./gpus.nix. let inherit (lib) attrsets lists strings trivial versions; @@ -34,22 +30,40 @@ let # gpus :: List Gpu gpus = builtins.import ./gpus.nix; - # isVersionIn :: Gpu -> Bool + # isSupported :: Gpu -> Bool isSupported = gpu: let inherit (gpu) minCudaVersion maxCudaVersion; lowerBoundSatisfied = strings.versionAtLeast cudaVersion minCudaVersion; - upperBoundSatisfied = !(strings.versionOlder maxCudaVersion cudaVersion); + upperBoundSatisfied = (maxCudaVersion == null) + || !(strings.versionOlder maxCudaVersion cudaVersion); in lowerBoundSatisfied && upperBoundSatisfied; + # isDefault :: Gpu -> Bool + isDefault = gpu: + let + inherit (gpu) dontDefaultAfter; + newGpu = dontDefaultAfter == null; + recentGpu = newGpu || strings.versionAtLeast dontDefaultAfter cudaVersion; + in + recentGpu; + # supportedGpus :: List Gpu # GPUs which are supported by the provided CUDA version. supportedGpus = builtins.filter isSupported gpus; + # defaultGpus :: List Gpu + # GPUs which are supported by the provided CUDA version and we want to build for by default. + defaultGpus = builtins.filter isDefault supportedGpus; + # supportedCapabilities :: List Capability supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus; + # defaultCapabilities :: List Capability + # The default capabilities to target, if not overridden by the user. + defaultCapabilities = lists.map (gpu: gpu.computeCapability) defaultGpus; + # cudaArchNameToVersions :: AttrSet String (List String) # Maps the name of a GPU architecture to different versions of that architecture. # For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ]. @@ -151,6 +165,6 @@ assert (formatCapabilities { cudaCapabilities = [ "7.5" "8.6" ]; }) == { # dropDot :: String -> String inherit dropDot; } // formatCapabilities { - cudaCapabilities = config.cudaCapabilities or supportedCapabilities; + cudaCapabilities = config.cudaCapabilities or defaultCapabilities; enableForwardCompat = config.cudaForwardCompat or true; } diff --git a/pkgs/development/compilers/cudatoolkit/gpus.nix b/pkgs/development/compilers/cudatoolkit/gpus.nix index e938e91297478..be157df89624f 100644 --- a/pkgs/development/compilers/cudatoolkit/gpus.nix +++ b/pkgs/development/compilers/cudatoolkit/gpus.nix @@ -1,110 +1,148 @@ [ + # Type alias + # Gpu = { + # archName: String + # - The name of the microarchitecture. + # computeCapability: String + # - The compute capability of the GPU. + # minCudaVersion: String + # - The minimum (inclusive) CUDA version that supports this GPU. + # dontDefaultAfter: null | String + # - The CUDA version after which to exclude this GPU from the list of default capabilities + # we build. null means we always include this GPU in the default capabilities if it is + # supported. + # maxCudaVersion: null | String + # - The maximum (exclusive) CUDA version that supports this GPU. null means there is no + # maximum. + # } { archName = "Kepler"; computeCapability = "3.0"; minCudaVersion = "10.0"; + dontDefaultAfter = "10.2"; maxCudaVersion = "10.2"; } { archName = "Kepler"; computeCapability = "3.2"; minCudaVersion = "10.0"; + dontDefaultAfter = "10.2"; maxCudaVersion = "10.2"; } { archName = "Kepler"; computeCapability = "3.5"; minCudaVersion = "10.0"; + dontDefaultAfter = "11.0"; maxCudaVersion = "11.8"; } { archName = "Kepler"; computeCapability = "3.7"; minCudaVersion = "10.0"; + dontDefaultAfter = "11.0"; maxCudaVersion = "11.8"; } { archName = "Maxwell"; computeCapability = "5.0"; minCudaVersion = "10.0"; - maxCudaVersion = "12.0"; + dontDefaultAfter = "11.0"; + maxCudaVersion = null; } { archName = "Maxwell"; computeCapability = "5.2"; minCudaVersion = "10.0"; - maxCudaVersion = "12.0"; + dontDefaultAfter = "11.0"; + maxCudaVersion = null; } { archName = "Maxwell"; computeCapability = "5.3"; minCudaVersion = "10.0"; - maxCudaVersion = "12.0"; + dontDefaultAfter = "11.0"; + maxCudaVersion = null; } { archName = "Pascal"; computeCapability = "6.0"; minCudaVersion = "10.0"; - maxCudaVersion = "12.0"; + dontDefaultAfter = null; + maxCudaVersion = null; } { archName = "Pascal"; computeCapability = "6.1"; minCudaVersion = "10.0"; - maxCudaVersion = "12.0"; + dontDefaultAfter = null; + maxCudaVersion = null; } { archName = "Pascal"; computeCapability = "6.2"; minCudaVersion = "10.0"; - maxCudaVersion = "12.0"; + dontDefaultAfter = null; + maxCudaVersion = null; } { archName = "Volta"; computeCapability = "7.0"; minCudaVersion = "10.0"; - maxCudaVersion = "12.0"; + dontDefaultAfter = null; + maxCudaVersion = null; } { archName = "Volta"; computeCapability = "7.2"; minCudaVersion = "10.0"; - maxCudaVersion = "12.0"; + dontDefaultAfter = null; + maxCudaVersion = null; } { archName = "Turing"; computeCapability = "7.5"; minCudaVersion = "10.0"; - maxCudaVersion = "12.0"; + dontDefaultAfter = null; + maxCudaVersion = null; } { archName = "Ampere"; computeCapability = "8.0"; minCudaVersion = "11.2"; - maxCudaVersion = "12.0"; + dontDefaultAfter = null; + maxCudaVersion = null; } { archName = "Ampere"; computeCapability = "8.6"; minCudaVersion = "11.2"; - maxCudaVersion = "12.0"; + dontDefaultAfter = null; + maxCudaVersion = null; } { archName = "Ampere"; computeCapability = "8.7"; minCudaVersion = "11.5"; - maxCudaVersion = "12.0"; + # NOTE: This is purposefully before 11.5 to ensure it is never a capability we target by + # default. 8.7 is the Jetson Orin series of devices which are a very specific platform. + # We keep this entry here in case we ever want to target it explicitly, but we don't + # want to target it by default. + dontDefaultAfter = "11.4"; + maxCudaVersion = null; } { archName = "Ada"; computeCapability = "8.9"; minCudaVersion = "11.8"; - maxCudaVersion = "12.0"; + dontDefaultAfter = null; + maxCudaVersion = null; } { archName = "Hopper"; computeCapability = "9.0"; minCudaVersion = "11.8"; - maxCudaVersion = "12.0"; + dontDefaultAfter = null; + maxCudaVersion = null; } ] diff --git a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix index 96b782d8c990d..7b8e02de24741 100644 --- a/pkgs/development/compilers/cudatoolkit/redist/overrides.nix +++ b/pkgs/development/compilers/cudatoolkit/redist/overrides.nix @@ -41,6 +41,10 @@ in # uses the last --compiler-bindir it gets on the command line. # FIXME: this results in "incompatible redefinition" warnings. # https://docs.nvidia.com/cuda/cuda-compiler-driver-nvcc/index.html#compiler-bindir-directory-ccbin + # NOTE: We unconditionally set -Xfatbin=-compress-all, which reduces the size of the + # compiled binaries. If binaries grow over 2GB, they will fail to link. This is a problem + # for us, as the default set of CUDA capabilities we build can regularly cause this to + # occur (for example, with Magma). postInstall = (oldAttrs.postInstall or "") + '' mkdir -p $out/nix-support cat <> $out/nix-support/setup-hook @@ -49,7 +53,7 @@ in if [ -z "\''${CUDAHOSTCXX-}" ]; then export CUDAHOSTCXX=${cc}/bin; fi - export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin' + export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${cc}/bin -Xfatbin=-compress-all' EOF ''; });