Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cudaPackages: point nvcc at a compatible -ccbin #218265

Merged
merged 13 commits into from
Mar 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkgs/applications/science/math/mxnet/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ stdenv.mkDerivation rec {
"-DUSE_OLDCMAKECUDA=ON" # see https://github.com/apache/incubator-mxnet/issues/10743
"-DCUDA_ARCH_NAME=All"
"-DCUDA_HOST_COMPILER=${cudatoolkit.cc}/bin/cc"
"-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.cudaRealArches}"
"-DMXNET_CUDA_ARCH=${builtins.concatStringsSep ";" cudaFlags.realArches}"
] else [ "-DUSE_CUDA=OFF" ])
++ lib.optional (!cudnnSupport) "-DUSE_CUDNN=OFF";

Expand Down
43 changes: 27 additions & 16 deletions pkgs/development/compilers/cudatoolkit/common.nix
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ args@
, fetchurl
, fontconfig
, freetype
, gcc
, gdk-pixbuf
, glib
, glibc
Expand All @@ -22,13 +21,13 @@ args@
, perl
, python3
, requireFile
, stdenv
, backendStdenv # E.g. gcc11Stdenv, set in extension.nix
, unixODBC
, xorg
, zlib
}:

stdenv.mkDerivation rec {
backendStdenv.mkDerivation rec {
pname = "cudatoolkit";
inherit version runPatches;

Expand Down Expand Up @@ -146,14 +145,23 @@ stdenv.mkDerivation rec {

# Fix builds with newer glibc version
sed -i "1 i#define _BITS_FLOATN_H" "$out/include/host_defines.h"

# Ensure that cmake can find CUDA.
'' +
# Point NVCC at a compatible compiler
# FIXME: redist cuda_nvcc copy-pastes this code
# Refer to comments in the overrides for cuda_nvcc for explanation
# CUDA_TOOLKIT_ROOT_DIR is legacy,
# Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
''
mkdir -p $out/nix-support
echo "cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'" >> $out/nix-support/setup-hook

# Set the host compiler to be used by nvcc for CMake-based projects:
# https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables
echo "cmakeFlags+=' -DCUDA_HOST_COMPILER=${gcc}/bin'" >> $out/nix-support/setup-hook
cat <<EOF >> $out/nix-support/setup-hook
cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out'
cmakeFlags+=' -DCUDA_HOST_COMPILER=${backendStdenv.cc}/bin'
cmakeFlags+=' -DCMAKE_CUDA_HOST_COMPILER=${backendStdenv.cc}/bin'
Copy link
Contributor Author

@SomeoneSerge SomeoneSerge Apr 1, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NOTE: nvidia/thrust treats this as a path to the executable, not parent directory
TODO: check if maybe nvidia/thrust actually does this right

if [ -z "\''${CUDAHOSTCXX-}" ]; then
export CUDAHOSTCXX=${backendStdenv.cc}/bin;
fi
export NVCC_PREPEND_FLAGS+=' --compiler-bindir=${backendStdenv.cc}/bin'
EOF

# Move some libraries to the lib output so that programs that
# depend on them don't pull in this entire monstrosity.
Expand All @@ -167,10 +175,6 @@ stdenv.mkDerivation rec {
mv $out/extras/CUPTI/lib64/libcupti* $out/lib
''}

# Set compiler for NVCC.
wrapProgram $out/bin/nvcc \
--prefix PATH : ${gcc}/bin

# nvprof do not find any program to profile if LD_LIBRARY_PATH is not set
wrapProgram $out/bin/nvprof \
--prefix LD_LIBRARY_PATH : $out/lib
Expand All @@ -191,7 +195,14 @@ stdenv.mkDerivation rec {
preFixup =
let rpath = lib.concatStringsSep ":" [
(lib.makeLibraryPath (runtimeDependencies ++ [ "$lib" "$out" "$out/nvvm" ]))
"${stdenv.cc.cc.lib}/lib64"

# The path to libstdc++ and such
#
# `backendStdenv` is the cuda-compatible toolchain that we pick in
# extension.nix; we hand it to NVCC to use as a back-end, and we link
# cudatoolkit's binaries against its libstdc++
"${backendStdenv.cc.cc.lib}/lib64"

"$out/jre/lib/amd64/jli"
"$out/lib64"
"$out/nvvm/lib64"
Expand Down Expand Up @@ -260,7 +271,7 @@ stdenv.mkDerivation rec {
popd
'';
passthru = {
cc = gcc;
inherit (backendStdenv) cc;
majorMinorVersion = lib.versions.majorMinor version;
majorVersion = lib.versions.majorMinor version;
};
Expand Down
24 changes: 21 additions & 3 deletions pkgs/development/compilers/cudatoolkit/extension.nix
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,29 @@ final: prev: let
# Version info for the classic cudatoolkit packages that contain everything that is in redist.
cudatoolkitVersions = final.lib.importTOML ./versions.toml;

finalVersion = cudatoolkitVersions.${final.cudaVersion};

# Exposed as cudaPackages.backendStdenv.
# We don't call it just "stdenv" to avoid confusion: e.g. this toolchain doesn't contain nvcc.
# Instead, it's the back-end toolchain for nvcc to use.
# We also use this to link a compatible libstdc++ (backendStdenv.cc.cc.lib)
SomeoneSerge marked this conversation as resolved.
Show resolved Hide resolved
# Cf. https://github.com/NixOS/nixpkgs/pull/218265 for context
backendStdenv = prev.pkgs."${finalVersion.gcc}Stdenv";

### Add classic cudatoolkit package
cudatoolkit = buildCudaToolkitPackage ((attrs: attrs // { gcc = prev.pkgs.${attrs.gcc}; }) cudatoolkitVersions.${final.cudaVersion});
cudatoolkit =
let
attrs = builtins.removeAttrs finalVersion [ "gcc" ];
attrs' = attrs // { inherit backendStdenv; };
in
buildCudaToolkitPackage attrs';

cudaFlags = final.callPackage ./flags.nix {};

in {
inherit cudatoolkit cudaFlags;
in
{
inherit
backendStdenv
cudatoolkit
cudaFlags;
}
133 changes: 75 additions & 58 deletions pkgs/development/compilers/cudatoolkit/flags.nix
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{ config
, lib
, cudatoolkit
, cudaVersion
}:

# Type aliases
Expand All @@ -13,14 +13,21 @@

let
inherit (lib) attrsets lists strings trivial versions;
cudaVersion = cudatoolkit.version;

# Flags are determined based on your CUDA toolkit by default. You may benefit
# from improved performance, reduced file size, or greater hardware suppport by
# passing a configuration based on your specific GPU environment.
#
# config.cudaCapabilities: list of hardware generations to support (e.g., "8.0")
# config.cudaForwardCompat: bool for compatibility with future GPU generations
# config.cudaCapabilities :: List Capability
# List of hardware generations to build.
# E.g. [ "8.0" ]
# Currently, the last item is considered the optional forward-compatibility arch,
# but this may change in the future.
#
# config.cudaForwardCompat :: Bool
# Whether to include the forward compatibility gencode (+PTX)
# to support future GPU generations.
# E.g. true
#
# Please see the accompanying documentation or https://github.com/NixOS/nixpkgs/pull/205351

Expand All @@ -40,6 +47,9 @@ let
# GPUs which are supported by the provided CUDA version.
supportedGpus = builtins.filter isSupported gpus;

# supportedCapabilities :: List Capability
supportedCapabilities = lists.map (gpu: gpu.computeCapability) supportedGpus;

# cudaArchNameToVersions :: AttrSet String (List String)
# Maps the name of a GPU architecture to different versions of that architecture.
# For example, "Ampere" maps to [ "8.0" "8.6" "8.7" ].
Expand All @@ -50,12 +60,6 @@ let
(gpu: gpu.archName)
supportedGpus;

# cudaArchNames :: List String
# NOTE: It's important that we don't rely on builtins.attrNames cudaArchNameToVersions here;
# otherwise, we'll get the names sorted in alphabetical order. The JSON array we read them
# from is already sorted, so we'll preserve that order here.
cudaArchNames = lists.unique (lists.map (gpu: gpu.archName) supportedGpus);
SomeoneSerge marked this conversation as resolved.
Show resolved Hide resolved

# cudaComputeCapabilityToName :: AttrSet String String
# Maps the version of a GPU architecture to the name of that architecture.
# For example, "8.0" maps to "Ampere".
Expand All @@ -68,23 +72,6 @@ let
supportedGpus
);

# cudaComputeCapabilities :: List String
# NOTE: It's important that we don't rely on builtins.attrNames cudaComputeCapabilityToName here;
# otherwise, we'll get the versions sorted in alphabetical order. The JSON array we read them
# from is already sorted, so we'll preserve that order here.
# Use the user-provided list of CUDA capabilities if it's provided.
cudaComputeCapabilities = config.cudaCapabilities
or (lists.map (gpu: gpu.computeCapability) supportedGpus);

# cudaForwardComputeCapability :: String
cudaForwardComputeCapability = (lists.last cudaComputeCapabilities) + "+PTX";

# cudaComputeCapabilitiesAndForward :: List String
# The list of supported CUDA architectures, including the forward compatibility architecture.
# If forward compatibility is disabled, this will be the same as cudaComputeCapabilities.
cudaComputeCapabilitiesAndForward = cudaComputeCapabilities
++ lists.optional (config.cudaForwardCompat or true) cudaForwardComputeCapability;

# dropDot :: String -> String
dropDot = ver: builtins.replaceStrings [ "." ] [ "" ] ver;

Expand All @@ -102,38 +89,68 @@ let
"-gencode=arch=compute_${dropDot computeCapability},code=${feat}_${dropDot computeCapability}"
);

# cudaRealArches :: List String
# The real architectures are physical architectures supported by the CUDA version.
# For example, "sm_80".
cudaRealArches = archMapper "sm" cudaComputeCapabilities;

# cudaVirtualArches :: List String
# The virtual architectures are typically used for forward compatibility, when trying to support
# an architecture newer than the CUDA version allows.
# For example, "compute_80".
cudaVirtualArches = archMapper "compute" cudaComputeCapabilities;

# cudaArches :: List String
# By default, build for all supported architectures and forward compatibility via a virtual
# architecture for the newest supported architecture.
cudaArches = cudaRealArches ++
lists.optional (config.cudaForwardCompat or true) (lists.last cudaVirtualArches);

# cudaGencode :: List String
# A list of CUDA gencode arguments to pass to NVCC.
cudaGencode =
let
base = gencodeMapper "sm" cudaComputeCapabilities;
forwardCompat = gencodeMapper "compute" [ (lists.last cudaComputeCapabilities) ];
in
base ++ lists.optionals (config.cudaForwardCompat or true) forwardCompat;
formatCapabilities = { cudaCapabilities, enableForwardCompat ? true }: rec {
SomeoneSerge marked this conversation as resolved.
Show resolved Hide resolved
inherit cudaCapabilities enableForwardCompat;

# archNames :: List String
# E.g. [ "Turing" "Ampere" ]
archNames = lists.unique (builtins.map (cap: cudaComputeCapabilityToName.${cap}) cudaCapabilities);

# realArches :: List String
# The real architectures are physical architectures supported by the CUDA version.
# E.g. [ "sm_75" "sm_86" ]
realArches = archMapper "sm" cudaCapabilities;

# virtualArches :: List String
# The virtual architectures are typically used for forward compatibility, when trying to support
# an architecture newer than the CUDA version allows.
# E.g. [ "compute_75" "compute_86" ]
virtualArches = archMapper "compute" cudaCapabilities;

# arches :: List String
# By default, build for all supported architectures and forward compatibility via a virtual
# architecture for the newest supported architecture.
# E.g. [ "sm_75" "sm_86" "compute_86" ]
arches = realArches ++
lists.optional enableForwardCompat (lists.last virtualArches);

# gencode :: List String
# A list of CUDA gencode arguments to pass to NVCC.
# E.g. [ "-gencode=arch=compute_75,code=sm_75" ... "-gencode=arch=compute_86,code=compute_86" ]
gencode =
let
base = gencodeMapper "sm" cudaCapabilities;
forward = gencodeMapper "compute" [ (lists.last cudaCapabilities) ];
in
base ++ lib.optionals enableForwardCompat forward;
};

in
# When changing names or formats: pause, validate, and update the assert
assert (formatCapabilities { cudaCapabilities = [ "7.5" "8.6" ]; }) == {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would be more interesting to test [ "8.6" "7.5" ]. Should this preserve the order? Should this print a warning?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's my opinion that capabilities should be sorted, so I would want the order of the output to be invariant with respect to the order of the input (which should already be sorted). Although, I'd love to hear other views!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The way we handle this parameter now, the order is significant. It's our semi-implicit convention that the last element goes into PTX. Maybe the take away is rather that we don't want this to be implicit:)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, I think you're right there -- the last capability in the list shouldn't be the one which gets turned into a virtual architecture.

Although, I do like the idea of having them ordered so packages can decide what to build for. For example, Magma doesn't support 8.6/8.9, so I can imagine at some point in the future Magma iterating over the list of cuda capabilities to find the greatest lower bound (in Magma's case, 8.0) and building for that architecture.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Left as a TODO

cudaCapabilities = [ "7.5" "8.6" ];
enableForwardCompat = true;

archNames = [ "Turing" "Ampere" ];
realArches = [ "sm_75" "sm_86" ];
virtualArches = [ "compute_75" "compute_86" ];
arches = [ "sm_75" "sm_86" "compute_86" ];

gencode = [ "-gencode=arch=compute_75,code=sm_75" "-gencode=arch=compute_86,code=sm_86" "-gencode=arch=compute_86,code=compute_86" ];
};
{
inherit
cudaArchNames
cudaArchNameToVersions cudaComputeCapabilityToName
cudaRealArches cudaVirtualArches cudaArches
cudaGencode;
cudaCapabilities = cudaComputeCapabilitiesAndForward;
# formatCapabilities :: { cudaCapabilities: List Capability, cudaForwardCompat: Boolean } -> { ... }
inherit formatCapabilities;

# cudaArchNameToVersions :: String => String
inherit cudaArchNameToVersions;

# cudaComputeCapabilityToName :: String => String
inherit cudaComputeCapabilityToName;

# dropDot :: String -> String
inherit dropDot;
} // formatCapabilities {
SomeoneSerge marked this conversation as resolved.
Show resolved Hide resolved
cudaCapabilities = config.cudaCapabilities or supportedCapabilities;
enableForwardCompat = config.cudaForwardCompat or true;
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{ lib
, stdenv
, backendStdenv
, fetchurl
, autoPatchelfHook
, autoAddOpenGLRunpathHook
Expand All @@ -10,7 +10,8 @@ attrs:

let
arch = "linux-x86_64";
in stdenv.mkDerivation {
in
backendStdenv.mkDerivation {
inherit pname;
inherit (attrs) version;

Expand All @@ -29,7 +30,11 @@ in stdenv.mkDerivation {
];

buildInputs = [
stdenv.cc.cc.lib
# autoPatchelfHook will search for a libstdc++ and we're giving it a
# "compatible" libstdc++ from the same toolchain that NVCC uses.
#
# NB: We don't actually know if this is the right thing to do
backendStdenv.cc.cc.lib
];

dontBuild = true;
Expand All @@ -43,6 +48,8 @@ in stdenv.mkDerivation {
runHook postInstall
'';

passthru.stdenv = backendStdenv;

meta = {
description = attrs.name;
license = lib.licenses.unfree;
Expand Down
Loading