diff --git a/.devops/nix/devshells.nix b/.devops/nix/devshells.nix
index 1862f0f0851001..9895d803f041f0 100644
--- a/.devops/nix/devshells.nix
+++ b/.devops/nix/devshells.nix
@@ -2,12 +2,9 @@
   perSystem =
     { config, lib, ... }:
     {
-      devShells =
-        lib.concatMapAttrs
-          (name: package: {
-            ${name} = package.passthru.shell;
-            ${name + "-extra"} = package.passthru.shell-extra;
-          })
-          config.packages;
+      devShells = lib.concatMapAttrs (name: package: {
+        ${name} = package.passthru.shell;
+        ${name + "-extra"} = package.passthru.shell-extra;
+      }) config.packages;
     };
 }
diff --git a/.devops/nix/nixpkgs-instances.nix b/.devops/nix/nixpkgs-instances.nix
index 4a2f81c4bfd044..90d683a713aa1f 100644
--- a/.devops/nix/nixpkgs-instances.nix
+++ b/.devops/nix/nixpkgs-instances.nix
@@ -26,16 +26,14 @@
           config.cudaSupport = true;
           config.allowUnfreePredicate =
             p:
-            builtins.all
-              (
-                license:
-                license.free
-                || builtins.elem license.shortName [
-                  "CUDA EULA"
-                  "cuDNN EULA"
-                ]
-              )
-              (p.meta.licenses or [ p.meta.license ]);
+            builtins.all (
+              license:
+              license.free
+              || builtins.elem license.shortName [
+                "CUDA EULA"
+                "cuDNN EULA"
+              ]
+            ) (p.meta.licenses or [ p.meta.license ]);
         };
         # Ensure dependencies use ROCm consistently
         pkgsRocm = import inputs.nixpkgs {
diff --git a/.devops/nix/package-gguf-py.nix b/.devops/nix/package-gguf-py.nix
index ca2207e6555394..944ebd700b4496 100644
--- a/.devops/nix/package-gguf-py.nix
+++ b/.devops/nix/package-gguf-py.nix
@@ -1,18 +1,15 @@
-{ lib
-, llamaVersion
-, python3
-} @inputs:
+{
+  lib,
+  llamaVersion,
+  python3,
+}@inputs:
 
 python3.pkgs.buildPythonPackage rec {
   pname = "gguf";
   version = llamaVersion;
   pyproject = true;
-  nativeBuildInputs = with python3.pkgs; [
-    poetry-core
-  ];
-  propagatedBuildInputs = with python3.pkgs; [
-    numpy
-  ];
+  nativeBuildInputs = with python3.pkgs; [ poetry-core ];
+  propagatedBuildInputs = with python3.pkgs; [ numpy ];
   src = lib.cleanSource ../../gguf-py;
   doCheck = false;
   meta = with lib; {
diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix
index 9a7b8f54fecc1f..29fea3c9aa8e87 100644
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@@ -1,39 +1,37 @@
-{ lib
-, config
-, stdenv
-, mkShell
-, cmake
-, ninja
-, pkg-config
-, git
-, python3
-, mpi
-, openblas
-, # TODO: Use the generic `blas` so users could switch between alternative implementations
-  cudaPackages
-, darwin
-, rocmPackages
-, vulkan-headers
-, vulkan-loader
-, clblast
-, useBlas ? builtins.all (x: !x) [
+{
+  lib,
+  config,
+  stdenv,
+  mkShell,
+  cmake,
+  ninja,
+  pkg-config,
+  git,
+  python3,
+  mpi,
+  openblas, # TODO: Use the generic `blas` so users could switch between alternative implementations
+  cudaPackages,
+  darwin,
+  rocmPackages,
+  vulkan-headers,
+  vulkan-loader,
+  clblast,
+  useBlas ? builtins.all (x: !x) [
     useCuda
     useMetalKit
     useOpenCL
     useRocm
     useVulkan
-  ]
-, useCuda ? config.cudaSupport
-, useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL
-, useMpi ? false
-, # Increases the runtime closure size by ~700M
-  useOpenCL ? false
-, useRocm ? config.rocmSupport
-, useVulkan ? false
-, # Arbitrary version, substituted by the flake
-  llamaVersion ? "0.0.0"
-, gguf-py
-} @ inputs:
+  ],
+  useCuda ? config.cudaSupport,
+  useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
+  useMpi ? false, # Increases the runtime closure size by ~700M
+  useOpenCL ? false,
+  useRocm ? config.rocmSupport,
+  useVulkan ? false,
+  llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
+  gguf-py,
+}@inputs:
 
 let
   inherit (lib)
@@ -61,32 +59,28 @@ let
   pnameSuffix =
     strings.optionalString (suffices != [ ])
       "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
-  descriptionSuffix =
-    strings.optionalString (suffices != [ ])
-      ", accelerated with ${strings.concatStringsSep ", " suffices}";
+  descriptionSuffix = strings.optionalString (
+    suffices != [ ]
+  ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
 
   # TODO: package the Python in this repository in a Nix-like way.
   # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
   # is PEP 517-compatible, and ensure the correct .dist-info is generated.
   # https://peps.python.org/pep-0517/
-  llama-python = python3.withPackages (
-    ps: [
-      ps.numpy
-      ps.sentencepiece
-      gguf-py
-    ]
-  );
+  llama-python = python3.withPackages (ps: [
+    ps.numpy
+    ps.sentencepiece
+    gguf-py
+  ]);
 
   # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
-  llama-python-extra = python3.withPackages (
-    ps: [
-      ps.numpy
-      ps.sentencepiece
-      ps.tiktoken
-      ps.torchWithoutCuda
-      ps.transformers
-    ]
-  );
+  llama-python-extra = python3.withPackages (ps: [
+    ps.numpy
+    ps.sentencepiece
+    ps.tiktoken
+    ps.torchWithoutCuda
+    ps.transformers
+  ]);
 
   # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
   # separately
@@ -122,173 +116,171 @@ let
     vulkan-headers
     vulkan-loader
   ];
-
 in
 
-effectiveStdenv.mkDerivation (
-  finalAttrs: {
-    pname = "llama-cpp${pnameSuffix}";
-    version = llamaVersion;
-
-    # Note: none of the files discarded here are visible in the sandbox or
-    # affect the output hash. This also means they can be modified without
-    # triggering a rebuild.
-    src = lib.cleanSourceWith {
-      filter =
-        name: type:
-        let
-          noneOf = builtins.all (x: !x);
-          baseName = baseNameOf name;
-        in
-        noneOf [
-          (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
-          (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
-          (lib.hasPrefix "." baseName) # Skip hidden files and directories
-          (baseName == "flake.lock")
-        ];
-      src = lib.cleanSource ../../.;
-    };
-
-    postPatch = ''
-      substituteInPlace ./ggml-metal.m \
-        --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
-
-      # TODO: Package up each Python script or service appropriately.
-      # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
-      # we could make those *.py into setuptools' entrypoints
-      substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
-    '';
-
-    nativeBuildInputs =
-      [
-        cmake
-        ninja
-        pkg-config
-        git
-      ]
-      ++ optionals useCuda [
-        cudaPackages.cuda_nvcc
-
-        # TODO: Replace with autoAddDriverRunpath
-        # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
-        cudaPackages.autoAddOpenGLRunpathHook
+effectiveStdenv.mkDerivation (finalAttrs: {
+  pname = "llama-cpp${pnameSuffix}";
+  version = llamaVersion;
+
+  # Note: none of the files discarded here are visible in the sandbox or
+  # affect the output hash. This also means they can be modified without
+  # triggering a rebuild.
+  src = lib.cleanSourceWith {
+    filter =
+      name: type:
+      let
+        noneOf = builtins.all (x: !x);
+        baseName = baseNameOf name;
+      in
+      noneOf [
+        (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
+        (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
+        (lib.hasPrefix "." baseName) # Skip hidden files and directories
+        (baseName == "flake.lock")
       ];
+    src = lib.cleanSource ../../.;
+  };
 
-    buildInputs = optionals effectiveStdenv.isDarwin darwinBuildInputs
-      ++ optionals useCuda cudaBuildInputs
-      ++ optionals useMpi [ mpi ]
-      ++ optionals useOpenCL [ clblast ]
-      ++ optionals useRocm rocmBuildInputs
-      ++ optionals useVulkan vulkanBuildInputs;
-
-    cmakeFlags =
-      [
-        (cmakeBool "LLAMA_NATIVE" false)
-        (cmakeBool "LLAMA_BUILD_SERVER" true)
-        (cmakeBool "BUILD_SHARED_LIBS" true)
-        (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
-        (cmakeBool "LLAMA_BLAS" useBlas)
-        (cmakeBool "LLAMA_CLBLAST" useOpenCL)
-        (cmakeBool "LLAMA_CUBLAS" useCuda)
-        (cmakeBool "LLAMA_HIPBLAS" useRocm)
-        (cmakeBool "LLAMA_METAL" useMetalKit)
-        (cmakeBool "LLAMA_MPI" useMpi)
-        (cmakeBool "LLAMA_VULKAN" useVulkan)
-      ]
-      ++ optionals useCuda [
-        (
-          with cudaPackages.flags;
-          cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
-            builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
-          )
-        )
-      ]
-      ++ optionals useRocm [
-        (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
-        (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
-
-        # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
-        # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
-        # and select the line that matches the current nixpkgs version of rocBLAS.
-        # Should likely use `rocmPackages.clr.gpuTargets`.
-        "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
-      ]
-      ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
-      ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
-
-    # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
-    # if they haven't been added yet.
-    postInstall = ''
-      mv $out/bin/main $out/bin/llama
-      mv $out/bin/server $out/bin/llama-server
-      mkdir -p $out/include
-      cp $src/llama.h $out/include/
-    '';
-
-    # Define the shells here, but don't add in the inputsFrom to avoid recursion.
-    passthru = {
-      inherit
-        useBlas
-        useCuda
-        useMetalKit
-        useMpi
-        useOpenCL
-        useRocm
-        useVulkan
-        ;
+  postPatch = ''
+    substituteInPlace ./ggml-metal.m \
+      --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
 
-      shell = mkShell {
-        name = "shell-${finalAttrs.finalPackage.name}";
-        description = "contains numpy and sentencepiece";
-        buildInputs = [ llama-python ];
-        inputsFrom = [ finalAttrs.finalPackage ];
-        shellHook = ''
-          addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
-        '';
-      };
+    # TODO: Package up each Python script or service appropriately.
+    # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
+    # we could make those *.py into setuptools' entrypoints
+    substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
+  '';
 
-      shell-extra = mkShell {
-        name = "shell-extra-${finalAttrs.finalPackage.name}";
-        description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
-        buildInputs = [ llama-python-extra ];
-        inputsFrom = [ finalAttrs.finalPackage ];
-      };
+  nativeBuildInputs =
+    [
+      cmake
+      ninja
+      pkg-config
+      git
+    ]
+    ++ optionals useCuda [
+      cudaPackages.cuda_nvcc
+
+      # TODO: Replace with autoAddDriverRunpath
+      # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
+      cudaPackages.autoAddOpenGLRunpathHook
+    ];
+
+  buildInputs =
+    optionals effectiveStdenv.isDarwin darwinBuildInputs
+    ++ optionals useCuda cudaBuildInputs
+    ++ optionals useMpi [ mpi ]
+    ++ optionals useOpenCL [ clblast ]
+    ++ optionals useRocm rocmBuildInputs
+    ++ optionals useVulkan vulkanBuildInputs;
+
+  cmakeFlags =
+    [
+      (cmakeBool "LLAMA_NATIVE" false)
+      (cmakeBool "LLAMA_BUILD_SERVER" true)
+      (cmakeBool "BUILD_SHARED_LIBS" true)
+      (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
+      (cmakeBool "LLAMA_BLAS" useBlas)
+      (cmakeBool "LLAMA_CLBLAST" useOpenCL)
+      (cmakeBool "LLAMA_CUBLAS" useCuda)
+      (cmakeBool "LLAMA_HIPBLAS" useRocm)
+      (cmakeBool "LLAMA_METAL" useMetalKit)
+      (cmakeBool "LLAMA_MPI" useMpi)
+      (cmakeBool "LLAMA_VULKAN" useVulkan)
+    ]
+    ++ optionals useCuda [
+      (
+        with cudaPackages.flags;
+        cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
+          builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
+        )
+      )
+    ]
+    ++ optionals useRocm [
+      (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
+      (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
+
+      # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
+      # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
+      # and select the line that matches the current nixpkgs version of rocBLAS.
+      # Should likely use `rocmPackages.clr.gpuTargets`.
+      "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
+    ]
+    ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ]
+    ++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ];
+
+  # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
+  # if they haven't been added yet.
+  postInstall = ''
+    mv $out/bin/main $out/bin/llama
+    mv $out/bin/server $out/bin/llama-server
+    mkdir -p $out/include
+    cp $src/llama.h $out/include/
+  '';
+
+  # Define the shells here, but don't add in the inputsFrom to avoid recursion.
+  passthru = {
+    inherit
+      useBlas
+      useCuda
+      useMetalKit
+      useMpi
+      useOpenCL
+      useRocm
+      useVulkan
+      ;
+
+    shell = mkShell {
+      name = "shell-${finalAttrs.finalPackage.name}";
+      description = "contains numpy and sentencepiece";
+      buildInputs = [ llama-python ];
+      inputsFrom = [ finalAttrs.finalPackage ];
+      shellHook = ''
+        addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
+      '';
     };
 
-    meta = {
-      # Configurations we don't want even the CI to evaluate. Results in the
-      # "unsupported platform" messages. This is mostly a no-op, because
-      # cudaPackages would've refused to evaluate anyway.
-      badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
-
-      # Configurations that are known to result in build failures. Can be
-      # overridden by importing Nixpkgs with `allowBroken = true`.
-      broken = (useMetalKit && !effectiveStdenv.isDarwin);
-
-      description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
-      homepage = "https://github.com/ggerganov/llama.cpp/";
-      license = lib.licenses.mit;
-
-      # Accommodates `nix run` and `lib.getExe`
-      mainProgram = "llama";
-
-      # These people might respond, on the best effort basis, if you ping them
-      # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
-      # Consider adding yourself to this list if you want to ensure this flake
-      # stays maintained and you're willing to invest your time. Do not add
-      # other people without their consent. Consider removing people after
-      # they've been unreachable for long periods of time.
-
-      # Note that lib.maintainers is defined in Nixpkgs, but you may just add
-      # an attrset following the same format as in
-      # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
-      maintainers = with lib.maintainers; [
-        philiptaron
-        SomeoneSerge
-      ];
-
-      # Extend `badPlatforms` instead
-      platforms = lib.platforms.all;
+    shell-extra = mkShell {
+      name = "shell-extra-${finalAttrs.finalPackage.name}";
+      description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
+      buildInputs = [ llama-python-extra ];
+      inputsFrom = [ finalAttrs.finalPackage ];
     };
-  }
-)
+  };
+
+  meta = {
+    # Configurations we don't want even the CI to evaluate. Results in the
+    # "unsupported platform" messages. This is mostly a no-op, because
+    # cudaPackages would've refused to evaluate anyway.
+    badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
+
+    # Configurations that are known to result in build failures. Can be
+    # overridden by importing Nixpkgs with `allowBroken = true`.
+    broken = (useMetalKit && !effectiveStdenv.isDarwin);
+
+    description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
+    homepage = "https://github.com/ggerganov/llama.cpp/";
+    license = lib.licenses.mit;
+
+    # Accommodates `nix run` and `lib.getExe`
+    mainProgram = "llama";
+
+    # These people might respond, on the best effort basis, if you ping them
+    # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
+    # Consider adding yourself to this list if you want to ensure this flake
+    # stays maintained and you're willing to invest your time. Do not add
+    # other people without their consent. Consider removing people after
+    # they've been unreachable for long periods of time.
+
+    # Note that lib.maintainers is defined in Nixpkgs, but you may just add
+    # an attrset following the same format as in
+    # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
+    maintainers = with lib.maintainers; [
+      philiptaron
+      SomeoneSerge
+    ];
+
+    # Extend `badPlatforms` instead
+    platforms = lib.platforms.all;
+  };
+})
diff --git a/.devops/nix/scope.nix b/.devops/nix/scope.nix
index 4a21449bf51fa0..7a7249ca3b92ef 100644
--- a/.devops/nix/scope.nix
+++ b/.devops/nix/scope.nix
@@ -1,16 +1,15 @@
-{ lib
-, newScope
-, llamaVersion ? "0.0.0"
+{
+  lib,
+  newScope,
+  llamaVersion ? "0.0.0",
 }:
 
 # We're using `makeScope` instead of just writing out an attrset
 # because it allows users to apply overlays later using `overrideScope'`.
 # Cf. https://noogle.dev/f/lib/makeScope
 
-lib.makeScope newScope (
-  self: {
-    inherit llamaVersion;
-    gguf-py = self.callPackage ./package-gguf-py.nix { };
-    llama-cpp = self.callPackage ./package.nix { };
-  }
-)
+lib.makeScope newScope (self: {
+  inherit llamaVersion;
+  gguf-py = self.callPackage ./package-gguf-py.nix { };
+  llama-cpp = self.callPackage ./package.nix { };
+})