diff --git a/.devops/nix/devshells.nix b/.devops/nix/devshells.nix index 1862f0f0851001..9895d803f041f0 100644 --- a/.devops/nix/devshells.nix +++ b/.devops/nix/devshells.nix @@ -2,12 +2,9 @@ perSystem = { config, lib, ... }: { - devShells = - lib.concatMapAttrs - (name: package: { - ${name} = package.passthru.shell; - ${name + "-extra"} = package.passthru.shell-extra; - }) - config.packages; + devShells = lib.concatMapAttrs (name: package: { + ${name} = package.passthru.shell; + ${name + "-extra"} = package.passthru.shell-extra; + }) config.packages; }; } diff --git a/.devops/nix/nixpkgs-instances.nix b/.devops/nix/nixpkgs-instances.nix index 4a2f81c4bfd044..90d683a713aa1f 100644 --- a/.devops/nix/nixpkgs-instances.nix +++ b/.devops/nix/nixpkgs-instances.nix @@ -26,16 +26,14 @@ config.cudaSupport = true; config.allowUnfreePredicate = p: - builtins.all - ( - license: - license.free - || builtins.elem license.shortName [ - "CUDA EULA" - "cuDNN EULA" - ] - ) - (p.meta.licenses or [ p.meta.license ]); + builtins.all ( + license: + license.free + || builtins.elem license.shortName [ + "CUDA EULA" + "cuDNN EULA" + ] + ) (p.meta.licenses or [ p.meta.license ]); }; # Ensure dependencies use ROCm consistently pkgsRocm = import inputs.nixpkgs { diff --git a/.devops/nix/package-gguf-py.nix b/.devops/nix/package-gguf-py.nix index ca2207e6555394..944ebd700b4496 100644 --- a/.devops/nix/package-gguf-py.nix +++ b/.devops/nix/package-gguf-py.nix @@ -1,18 +1,15 @@ -{ lib -, llamaVersion -, python3 -} @inputs: +{ + lib, + llamaVersion, + python3, +}@inputs: python3.pkgs.buildPythonPackage rec { pname = "gguf"; version = llamaVersion; pyproject = true; - nativeBuildInputs = with python3.pkgs; [ - poetry-core - ]; - propagatedBuildInputs = with python3.pkgs; [ - numpy - ]; + nativeBuildInputs = with python3.pkgs; [ poetry-core ]; + propagatedBuildInputs = with python3.pkgs; [ numpy ]; src = lib.cleanSource ../../gguf-py; doCheck = false; meta = with lib; { diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 9a7b8f54fecc1f..29fea3c9aa8e87 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -1,39 +1,37 @@ -{ lib -, config -, stdenv -, mkShell -, cmake -, ninja -, pkg-config -, git -, python3 -, mpi -, openblas -, # TODO: Use the generic `blas` so users could switch between alternative implementations - cudaPackages -, darwin -, rocmPackages -, vulkan-headers -, vulkan-loader -, clblast -, useBlas ? builtins.all (x: !x) [ +{ + lib, + config, + stdenv, + mkShell, + cmake, + ninja, + pkg-config, + git, + python3, + mpi, + openblas, # TODO: Use the generic `blas` so users could switch between alternative implementations + cudaPackages, + darwin, + rocmPackages, + vulkan-headers, + vulkan-loader, + clblast, + useBlas ? builtins.all (x: !x) [ useCuda useMetalKit useOpenCL useRocm useVulkan - ] -, useCuda ? config.cudaSupport -, useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL -, useMpi ? false -, # Increases the runtime closure size by ~700M - useOpenCL ? false -, useRocm ? config.rocmSupport -, useVulkan ? false -, # Arbitrary version, substituted by the flake - llamaVersion ? "0.0.0" -, gguf-py -} @ inputs: + ], + useCuda ? config.cudaSupport, + useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL, + useMpi ? false, # Increases the runtime closure size by ~700M + useOpenCL ? false, + useRocm ? config.rocmSupport, + useVulkan ? false, + llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake + gguf-py, +}@inputs: let inherit (lib) @@ -61,32 +59,28 @@ let pnameSuffix = strings.optionalString (suffices != [ ]) "-${strings.concatMapStringsSep "-" strings.toLower suffices}"; - descriptionSuffix = - strings.optionalString (suffices != [ ]) - ", accelerated with ${strings.concatStringsSep ", " suffices}"; + descriptionSuffix = strings.optionalString ( + suffices != [ ] + ) ", accelerated with ${strings.concatStringsSep ", " suffices}"; # TODO: package the Python in this repository in a Nix-like way. # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo # is PEP 517-compatible, and ensure the correct .dist-info is generated. # https://peps.python.org/pep-0517/ - llama-python = python3.withPackages ( - ps: [ - ps.numpy - ps.sentencepiece - gguf-py - ] - ); + llama-python = python3.withPackages (ps: [ + ps.numpy + ps.sentencepiece + gguf-py + ]); # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime - llama-python-extra = python3.withPackages ( - ps: [ - ps.numpy - ps.sentencepiece - ps.tiktoken - ps.torchWithoutCuda - ps.transformers - ] - ); + llama-python-extra = python3.withPackages (ps: [ + ps.numpy + ps.sentencepiece + ps.tiktoken + ps.torchWithoutCuda + ps.transformers + ]); # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64 # separately @@ -122,173 +116,171 @@ let vulkan-headers vulkan-loader ]; - in -effectiveStdenv.mkDerivation ( - finalAttrs: { - pname = "llama-cpp${pnameSuffix}"; - version = llamaVersion; - - # Note: none of the files discarded here are visible in the sandbox or - # affect the output hash. This also means they can be modified without - # triggering a rebuild. - src = lib.cleanSourceWith { - filter = - name: type: - let - noneOf = builtins.all (x: !x); - baseName = baseNameOf name; - in - noneOf [ - (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths - (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths - (lib.hasPrefix "." baseName) # Skip hidden files and directories - (baseName == "flake.lock") - ]; - src = lib.cleanSource ../../.; - }; - - postPatch = '' - substituteInPlace ./ggml-metal.m \ - --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" - - # TODO: Package up each Python script or service appropriately. - # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`, - # we could make those *.py into setuptools' entrypoints - substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python" - ''; - - nativeBuildInputs = - [ - cmake - ninja - pkg-config - git - ] - ++ optionals useCuda [ - cudaPackages.cuda_nvcc - - # TODO: Replace with autoAddDriverRunpath - # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged - cudaPackages.autoAddOpenGLRunpathHook +effectiveStdenv.mkDerivation (finalAttrs: { + pname = "llama-cpp${pnameSuffix}"; + version = llamaVersion; + + # Note: none of the files discarded here are visible in the sandbox or + # affect the output hash. This also means they can be modified without + # triggering a rebuild. + src = lib.cleanSourceWith { + filter = + name: type: + let + noneOf = builtins.all (x: !x); + baseName = baseNameOf name; + in + noneOf [ + (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths + (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths + (lib.hasPrefix "." baseName) # Skip hidden files and directories + (baseName == "flake.lock") ]; + src = lib.cleanSource ../../.; + }; - buildInputs = optionals effectiveStdenv.isDarwin darwinBuildInputs - ++ optionals useCuda cudaBuildInputs - ++ optionals useMpi [ mpi ] - ++ optionals useOpenCL [ clblast ] - ++ optionals useRocm rocmBuildInputs - ++ optionals useVulkan vulkanBuildInputs; - - cmakeFlags = - [ - (cmakeBool "LLAMA_NATIVE" false) - (cmakeBool "LLAMA_BUILD_SERVER" true) - (cmakeBool "BUILD_SHARED_LIBS" true) - (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) - (cmakeBool "LLAMA_BLAS" useBlas) - (cmakeBool "LLAMA_CLBLAST" useOpenCL) - (cmakeBool "LLAMA_CUBLAS" useCuda) - (cmakeBool "LLAMA_HIPBLAS" useRocm) - (cmakeBool "LLAMA_METAL" useMetalKit) - (cmakeBool "LLAMA_MPI" useMpi) - (cmakeBool "LLAMA_VULKAN" useVulkan) - ] - ++ optionals useCuda [ - ( - with cudaPackages.flags; - cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( - builtins.concatStringsSep ";" (map dropDot cudaCapabilities) - ) - ) - ] - ++ optionals useRocm [ - (cmakeFeature "CMAKE_C_COMPILER" "hipcc") - (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") - - # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM - # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt - # and select the line that matches the current nixpkgs version of rocBLAS. - # Should likely use `rocmPackages.clr.gpuTargets`. - "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" - ] - ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ] - ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ]; - - # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, - # if they haven't been added yet. - postInstall = '' - mv $out/bin/main $out/bin/llama - mv $out/bin/server $out/bin/llama-server - mkdir -p $out/include - cp $src/llama.h $out/include/ - ''; - - # Define the shells here, but don't add in the inputsFrom to avoid recursion. - passthru = { - inherit - useBlas - useCuda - useMetalKit - useMpi - useOpenCL - useRocm - useVulkan - ; + postPatch = '' + substituteInPlace ./ggml-metal.m \ + --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" - shell = mkShell { - name = "shell-${finalAttrs.finalPackage.name}"; - description = "contains numpy and sentencepiece"; - buildInputs = [ llama-python ]; - inputsFrom = [ finalAttrs.finalPackage ]; - shellHook = '' - addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib" - ''; - }; + # TODO: Package up each Python script or service appropriately. + # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`, + # we could make those *.py into setuptools' entrypoints + substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python" + ''; - shell-extra = mkShell { - name = "shell-extra-${finalAttrs.finalPackage.name}"; - description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; - buildInputs = [ llama-python-extra ]; - inputsFrom = [ finalAttrs.finalPackage ]; - }; + nativeBuildInputs = + [ + cmake + ninja + pkg-config + git + ] + ++ optionals useCuda [ + cudaPackages.cuda_nvcc + + # TODO: Replace with autoAddDriverRunpath + # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged + cudaPackages.autoAddOpenGLRunpathHook + ]; + + buildInputs = + optionals effectiveStdenv.isDarwin darwinBuildInputs + ++ optionals useCuda cudaBuildInputs + ++ optionals useMpi [ mpi ] + ++ optionals useOpenCL [ clblast ] + ++ optionals useRocm rocmBuildInputs + ++ optionals useVulkan vulkanBuildInputs; + + cmakeFlags = + [ + (cmakeBool "LLAMA_NATIVE" false) + (cmakeBool "LLAMA_BUILD_SERVER" true) + (cmakeBool "BUILD_SHARED_LIBS" true) + (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) + (cmakeBool "LLAMA_BLAS" useBlas) + (cmakeBool "LLAMA_CLBLAST" useOpenCL) + (cmakeBool "LLAMA_CUBLAS" useCuda) + (cmakeBool "LLAMA_HIPBLAS" useRocm) + (cmakeBool "LLAMA_METAL" useMetalKit) + (cmakeBool "LLAMA_MPI" useMpi) + (cmakeBool "LLAMA_VULKAN" useVulkan) + ] + ++ optionals useCuda [ + ( + with cudaPackages.flags; + cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( + builtins.concatStringsSep ";" (map dropDot cudaCapabilities) + ) + ) + ] + ++ optionals useRocm [ + (cmakeFeature "CMAKE_C_COMPILER" "hipcc") + (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") + + # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM + # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt + # and select the line that matches the current nixpkgs version of rocBLAS. + # Should likely use `rocmPackages.clr.gpuTargets`. + "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" + ] + ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ] + ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ]; + + # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, + # if they haven't been added yet. + postInstall = '' + mv $out/bin/main $out/bin/llama + mv $out/bin/server $out/bin/llama-server + mkdir -p $out/include + cp $src/llama.h $out/include/ + ''; + + # Define the shells here, but don't add in the inputsFrom to avoid recursion. + passthru = { + inherit + useBlas + useCuda + useMetalKit + useMpi + useOpenCL + useRocm + useVulkan + ; + + shell = mkShell { + name = "shell-${finalAttrs.finalPackage.name}"; + description = "contains numpy and sentencepiece"; + buildInputs = [ llama-python ]; + inputsFrom = [ finalAttrs.finalPackage ]; + shellHook = '' + addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib" + ''; }; - meta = { - # Configurations we don't want even the CI to evaluate. Results in the - # "unsupported platform" messages. This is mostly a no-op, because - # cudaPackages would've refused to evaluate anyway. - badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin; - - # Configurations that are known to result in build failures. Can be - # overridden by importing Nixpkgs with `allowBroken = true`. - broken = (useMetalKit && !effectiveStdenv.isDarwin); - - description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; - homepage = "https://github.com/ggerganov/llama.cpp/"; - license = lib.licenses.mit; - - # Accommodates `nix run` and `lib.getExe` - mainProgram = "llama"; - - # These people might respond, on the best effort basis, if you ping them - # in case of Nix-specific regressions or for reviewing Nix-specific PRs. - # Consider adding yourself to this list if you want to ensure this flake - # stays maintained and you're willing to invest your time. Do not add - # other people without their consent. Consider removing people after - # they've been unreachable for long periods of time. - - # Note that lib.maintainers is defined in Nixpkgs, but you may just add - # an attrset following the same format as in - # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix - maintainers = with lib.maintainers; [ - philiptaron - SomeoneSerge - ]; - - # Extend `badPlatforms` instead - platforms = lib.platforms.all; + shell-extra = mkShell { + name = "shell-extra-${finalAttrs.finalPackage.name}"; + description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; + buildInputs = [ llama-python-extra ]; + inputsFrom = [ finalAttrs.finalPackage ]; }; - } -) + }; + + meta = { + # Configurations we don't want even the CI to evaluate. Results in the + # "unsupported platform" messages. This is mostly a no-op, because + # cudaPackages would've refused to evaluate anyway. + badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin; + + # Configurations that are known to result in build failures. Can be + # overridden by importing Nixpkgs with `allowBroken = true`. + broken = (useMetalKit && !effectiveStdenv.isDarwin); + + description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; + homepage = "https://github.com/ggerganov/llama.cpp/"; + license = lib.licenses.mit; + + # Accommodates `nix run` and `lib.getExe` + mainProgram = "llama"; + + # These people might respond, on the best effort basis, if you ping them + # in case of Nix-specific regressions or for reviewing Nix-specific PRs. + # Consider adding yourself to this list if you want to ensure this flake + # stays maintained and you're willing to invest your time. Do not add + # other people without their consent. Consider removing people after + # they've been unreachable for long periods of time. + + # Note that lib.maintainers is defined in Nixpkgs, but you may just add + # an attrset following the same format as in + # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix + maintainers = with lib.maintainers; [ + philiptaron + SomeoneSerge + ]; + + # Extend `badPlatforms` instead + platforms = lib.platforms.all; + }; +}) diff --git a/.devops/nix/scope.nix b/.devops/nix/scope.nix index 4a21449bf51fa0..7a7249ca3b92ef 100644 --- a/.devops/nix/scope.nix +++ b/.devops/nix/scope.nix @@ -1,16 +1,15 @@ -{ lib -, newScope -, llamaVersion ? "0.0.0" +{ + lib, + newScope, + llamaVersion ? "0.0.0", }: # We're using `makeScope` instead of just writing out an attrset # because it allows users to apply overlays later using `overrideScope'`. # Cf. https://noogle.dev/f/lib/makeScope -lib.makeScope newScope ( - self: { - inherit llamaVersion; - gguf-py = self.callPackage ./package-gguf-py.nix { }; - llama-cpp = self.callPackage ./package.nix { }; - } -) +lib.makeScope newScope (self: { + inherit llamaVersion; + gguf-py = self.callPackage ./package-gguf-py.nix { }; + llama-cpp = self.callPackage ./package.nix { }; +})