flake.nix: rewrite

1. Split into separate files per output. 2. Added overlays, so that this flake can be integrated into others. The names in the overlay are `llama-cpp`, `llama-cpp-opencl`, `llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs). 3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/) rather than `with pkgs;` so that there's dependency injection rather than dependency lookup. 4. Add a description and meta information for each package. The description includes a bit about what's trying to accelerate each one. 5. Use specific CUDA packages instead of cudatoolkit on the advice of SomeoneSerge. 6. Format with `serokell/nixfmt` for a consistent style. 7. Update `flake.lock` with the latest goods.
2023-12-22 12:33:09 -08:00 · 2023-12-22 12:33:09 -08:00 · 8364cf4d0b
commit 8364cf4d0b
parent d3e73df66a
6 changed files with 310 additions and 167 deletions
--- a/.devops/nix/apps.nix
+++ b/.devops/nix/apps.nix
@ -0,0 +1,14 @@
 { package, binaries }:
 let
  default = builtins.elemAt binaries 0;
  mkApp = name: {
    ${name} = {
      type = "app";
      program = "${package}/bin/${name}";
    };
  };
  result = builtins.foldl' (acc: name: (mkApp name) // acc) { } binaries;
 in
 result // { default = result.${default}; }
--- a/.devops/nix/devshells.nix
+++ b/.devops/nix/devshells.nix
@ -0,0 +1,10 @@
 { concatMapAttrs, packages }:
 concatMapAttrs
  (name: package: {
    ${name} = package.passthru.shell.overrideAttrs (prevAttrs: { inputsFrom = [ package ]; });
    ${name + "-extra"} = package.passthru.shell-extra.overrideAttrs (
      prevAttrs: { inputsFrom = [ package ]; }
    );
  })
  packages
--- a/.devops/nix/overlay.nix
+++ b/.devops/nix/overlay.nix
@ -0,0 +1,17 @@
 final: prev:
 let
  inherit (final.stdenv) isAarch64 isDarwin;
  darwinSpecific =
    if isAarch64 then
      { inherit (final.darwin.apple_sdk_11_0.frameworks) Accelerate MetalKit; }
    else
      { inherit (final.darwin.apple_sdk.frameworks) Accelerate CoreGraphics CoreVideo; };
  osSpecific = if isDarwin then darwinSpecific else { };
 in
 {
  llama-cpp = final.callPackage ./package.nix osSpecific;
 }
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@ -0,0 +1,182 @@
 {
  lib,
  config,
  stdenv,
  mkShell,
  cmake,
  ninja,
  pkg-config,
  git,
  python3,
  mpi,
  openblas, # TODO: Use the generic `blas` so users could switch betwen alternative implementations
  cudaPackages,
  rocmPackages,
  clblast,
  Accelerate ? null,
  MetalKit ? null,
  CoreVideo ? null,
  CoreGraphics ? null,
  useOpenCL ? false,
  useCuda ? config.cudaSupport,
  useRocm ? config.rocmSupport,
 }@inputs:
 let
  inherit (lib)
    cmakeBool
    cmakeFeature
    optionals
    versionOlder
    ;
  isDefault = !useOpenCL && !useCuda && !useRocm;
  # It's necessary to consistently use backendStdenv when building with CUDA support,
  # otherwise we get libstdc++ errors downstream.
  stdenv = throw "Use effectiveStdenv instead";
  effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
  # Give a little description difference between the flavors.
  descriptionSuffix =
    if useOpenCL then
      " (OpenCL accelerated)"
    else if useCuda then
      " (CUDA accelerated)"
    else if useRocm then
      " (ROCm accelerated)"
    else if (MetalKit != null) then
      " (MetalKit accelerated)"
    else
      "";
  # TODO: package the Python in this repository in a Nix-like way.
  # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
  # is PEP 517-compatible, and ensure the correct .dist-info is generated.
  # https://peps.python.org/pep-0517/
  llama-python = python3.withPackages (
    ps: [
      ps.numpy
      ps.sentencepiece
    ]
  );
  # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
  llama-python-extra = python3.withPackages (
    ps: [
      ps.numpy
      ps.sentencepiece
      ps.torchWithoutCuda
      ps.transformers
    ]
  );
  # See ./overlay.nix for where these dependencies are passed in.
  defaultBuildInputs = builtins.filter (p: p != null) [
    Accelerate
    MetalKit
    CoreVideo
    CoreGraphics
  ];
  cudaBuildInputs = with cudaPackages; [
    cuda_cccl.dev # <nv/target>
    cuda_cudart
    libcublas
  ];
  rocmBuildInputs = with rocmPackages; [
    clr
    hipblas
    rocblas
  ];
 in
 effectiveStdenv.mkDerivation {
  name = "llama.cpp";
  src = ../../.;
  meta = {
    description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
    mainProgram = "llama";
  };
  postPatch = ''
    substituteInPlace ./ggml-metal.m \
      --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
    # TODO: Package up each Python script or service appropriately.
    # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
    # we could make those *.py into setuptools' entrypoints
    substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
  '';
  nativeBuildInputs = [
    cmake
    ninja
    pkg-config
    git
  ] ++ optionals useCuda [ cudaPackages.cuda_nvcc ];
  buildInputs =
    [ mpi ]
    ++ optionals useOpenCL [ clblast ]
    ++ optionals useCuda cudaBuildInputs
    ++ optionals useRocm rocmBuildInputs
    ++ optionals isDefault defaultBuildInputs;
  cmakeFlags =
    [
      (cmakeBool "LLAMA_NATIVE" true)
      (cmakeBool "LLAMA_BUILD_SERVER" true)
      (cmakeBool "BUILD_SHARED_LIBS" true)
      (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
    ]
    ++ optionals useOpenCL [ (cmakeBool "LLAMA_CLBLAST" true) ]
    ++ optionals useCuda [ (cmakeBool "LLAMA_CUBLAS" true) ]
    ++ optionals useRocm [
      (cmakeBool "LLAMA_HIPBLAS" true)
      (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
      (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
      # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
      # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
      # and select the line that matches the current nixpkgs version of rocBLAS.
      # Should likely use `rocmPackages.clr.gpuTargets`.
      "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
    ]
    ++ optionals isDefault (
      if (MetalKit != null) then
        [
          "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
          "-DLLAMA_METAL=ON"
        ]
      else
        [
          "-DLLAMA_BLAS=ON"
          "-DLLAMA_BLAS_VENDOR=OpenBLAS"
        ]
    );
  # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
  # if they haven't been added yet.
  postInstall = ''
    mv $out/bin/main $out/bin/llama
    mv $out/bin/server $out/bin/llama-server
    mkdir -p $out/include
    cp $src/llama.h $out/include/
  '';
  # Define the shells here, but don't add in the inputsFrom to avoid recursion.
  passthru = {
    shell = mkShell {
      name = "default${descriptionSuffix}";
      description = "contains numpy and sentencepiece";
      buildInputs = [ llama-python ];
    };
    shell-extra = mkShell {
      name = "extra${descriptionSuffix}";
      description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
      buildInputs = [ llama-python-extra ];
    };
  };
 }
--- a/flake.lock
+++ b/flake.lock
@ -1,23 +1,5 @@
 {
  "nodes": {
    "flake-utils": {
      "inputs": {
        "systems": "systems"
      },
      "locked": {
        "lastModified": 1694529238,
        "narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=",
        "owner": "numtide",
        "repo": "flake-utils",
        "rev": "ff7b65b44d01cf9ba6a71320833626af21126384",
        "type": "github"
      },
      "original": {
        "owner": "numtide",
        "repo": "flake-utils",
        "type": "github"
      }
    },
    "nixpkgs": {
      "locked": {
        "lastModified": 1703559957,
@ -36,24 +18,8 @@
    },
    "root": {
      "inputs": {
        "flake-utils": "flake-utils",
        "nixpkgs": "nixpkgs"
      }
    },
    "systems": {
      "locked": {
        "lastModified": 1681028828,
        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
        "owner": "nix-systems",
        "repo": "default",
        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
        "type": "github"
      },
      "original": {
        "owner": "nix-systems",
        "repo": "default",
        "type": "github"
      }
    }
  },
  "root": "root",
--- a/flake.nix
+++ b/flake.nix
@ -1,139 +1,93 @@
 {
  inputs = {
    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
    flake-utils.url = "github:numtide/flake-utils";
  };
-  outputs = { self, nixpkgs, flake-utils }:
+
-    flake-utils.lib.eachDefaultSystem (system:
+  outputs =
    { self, nixpkgs }:
    let
-        name = "llama.cpp";
+      systems = [
-        src = ./.;
+        "aarch64-darwin"
-        meta.mainProgram = "llama";
+        "aarch64-linux"
-        inherit (pkgs.stdenv) isAarch32 isAarch64 isDarwin;
+        "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant)
-        buildInputs = with pkgs; [ openmpi ];
+        "x86_64-linux"
        osSpecific = with pkgs; buildInputs ++ (
          if isAarch64 && isDarwin then
            with pkgs.darwin.apple_sdk_11_0.frameworks; [
              Accelerate
              MetalKit
            ]
          else if isAarch32 && isDarwin then
            with pkgs.darwin.apple_sdk.frameworks; [
              Accelerate
              CoreGraphics
              CoreVideo
            ]
          else if isDarwin then
            with pkgs.darwin.apple_sdk.frameworks; [
              Accelerate
              CoreGraphics
              CoreVideo
            ]
          else
            with pkgs; [ openblas ]
        );
        pkgs = import nixpkgs { inherit system; };
        nativeBuildInputs = with pkgs; [ cmake ninja pkg-config ];
        cudatoolkit_joined = with pkgs; symlinkJoin {
          # HACK(Green-Sky): nix currently has issues with cmake findcudatoolkit
          # see https://github.com/NixOS/nixpkgs/issues/224291
          # copied from jaxlib
          name = "${cudaPackages.cudatoolkit.name}-merged";
          paths = [
            cudaPackages.cudatoolkit.lib
            cudaPackages.cudatoolkit.out
          ] ++ lib.optionals (lib.versionOlder cudaPackages.cudatoolkit.version "11") [
            # for some reason some of the required libs are in the targets/x86_64-linux
            # directory; not sure why but this works around it
            "${cudaPackages.cudatoolkit}/targets/${system}"
      ];
      eachSystem = f: nixpkgs.lib.genAttrs systems (system: f system);
    in
    {
      # These define the various ways to build the llama.cpp project.
      # Integrate them into your flake.nix configuration by adding this overlay to nixpkgs.overlays.
      overlays.default = import ./.devops/nix/overlay.nix;
      # These use the package definition from `./.devops/nix/package.nix`.
      # There's one per backend that llama-cpp uses. Add more as needed!
      packages = eachSystem (
        system:
        let
          defaultConfig = {
            inherit system;
            overlays = [ self.overlays.default ];
          };
-        llama-python =
+          pkgs = import nixpkgs defaultConfig;
-          pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece ]);
+
-        # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
+          # Let's not make a big deal about getting the CUDA bits.
-        llama-python-extra =
+          cudaConfig = defaultConfig // {
-          pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece torchWithoutCuda transformers ]);
+            config.cudaSupport = true;
-        postPatch = ''
+            config.allowUnfreePredicate =
-          substituteInPlace ./ggml-metal.m \
+              p:
-            --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
+              builtins.all
-          substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python'
+                (
-        '';
+                  license:
-        postInstall = ''
+                  license.free
-          mv $out/bin/main $out/bin/llama
+                  || builtins.elem license.shortName [
-          mv $out/bin/server $out/bin/llama-server
+                    "CUDA EULA"
-          mkdir -p $out/include
+                    "cuDNN EULA"
-          cp ${src}/llama.h $out/include/
+                  ]
-        '';
+                )
-        cmakeFlags = [ "-DLLAMA_NATIVE=OFF" "-DLLAMA_BUILD_SERVER=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ];
+                (p.meta.licenses or [ p.meta.license ]);
          };
          pkgsCuda = import nixpkgs cudaConfig;
          # Let's make sure to turn on ROCm support across the whole package ecosystem.
          rocmConfig = defaultConfig // {
            config.rocmSupport = true;
          };
          pkgsRocm = import nixpkgs rocmConfig;
        in
        {
-        packages.default = pkgs.stdenv.mkDerivation {
+          default = pkgs.llama-cpp;
-          inherit name src meta postPatch nativeBuildInputs postInstall;
+          opencl = pkgs.llama-cpp.override { useOpenCL = true; };
-          buildInputs = osSpecific;
+          cuda = pkgsCuda.llama-cpp;
-          cmakeFlags = cmakeFlags
+          rocm = pkgsRocm.llama-cpp;
-            ++ (if isAarch64 && isDarwin then [
+        }
-            "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
+      );
-            "-DLLAMA_METAL=ON"
+
-          ] else [
+      # These use the definition of llama-cpp from `./.devops/nix/package.nix`
-            "-DLLAMA_BLAS=ON"
+      # and expose various binaries as apps with `nix run .#app-name`.
-            "-DLLAMA_BLAS_VENDOR=OpenBLAS"
+      # Note that none of these apps use anything other than the default backend.
-          ]);
+      apps = eachSystem (
-        };
+        system:
-        packages.opencl = pkgs.stdenv.mkDerivation {
+        import ./.devops/nix/apps.nix {
-          inherit name src meta postPatch nativeBuildInputs postInstall;
+          package = self.packages.${system}.default;
-          buildInputs = with pkgs; buildInputs ++ [ clblast ];
+          binaries = [
-          cmakeFlags = cmakeFlags ++ [
+            "llama"
-            "-DLLAMA_CLBLAST=ON"
+            "llama-embedding"
            "llama-server"
            "quantize"
            "train-text-from-scratch"
          ];
        }
      );
      # These expose a build environment for either a "default" or an "extra" set of dependencies.
      devShells = eachSystem (
        system:
        import ./.devops/nix/devshells.nix {
          concatMapAttrs = nixpkgs.lib.concatMapAttrs;
          packages = self.packages.${system};
        }
      );
    };
        packages.cuda = pkgs.stdenv.mkDerivation {
          inherit name src meta postPatch nativeBuildInputs postInstall;
          buildInputs = with pkgs; buildInputs ++ [ cudatoolkit_joined ];
          cmakeFlags = cmakeFlags ++ [
            "-DLLAMA_CUBLAS=ON"
          ];
        };
        packages.rocm = pkgs.stdenv.mkDerivation {
          inherit name src meta postPatch nativeBuildInputs postInstall;
          buildInputs = with pkgs.rocmPackages; buildInputs ++ [ clr hipblas rocblas ];
          cmakeFlags = cmakeFlags ++ [
            "-DLLAMA_HIPBLAS=1"
            "-DCMAKE_C_COMPILER=hipcc"
            "-DCMAKE_CXX_COMPILER=hipcc"
            # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
            # in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
            # and select the line that matches the current nixpkgs version of rocBLAS.
            "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
          ];
        };
        apps.llama-server = {
          type = "app";
          program = "${self.packages.${system}.default}/bin/llama-server";
        };
        apps.llama-embedding = {
          type = "app";
          program = "${self.packages.${system}.default}/bin/embedding";
        };
        apps.llama = {
          type = "app";
          program = "${self.packages.${system}.default}/bin/llama";
        };
        apps.quantize = {
          type = "app";
          program = "${self.packages.${system}.default}/bin/quantize";
        };
        apps.train-text-from-scratch = {
          type = "app";
          program = "${self.packages.${system}.default}/bin/train-text-from-scratch";
        };
        apps.default = self.apps.${system}.llama;
        devShells.default = pkgs.mkShell {
          buildInputs = [ llama-python ];
          packages = nativeBuildInputs ++ osSpecific;
        };
        devShells.extra = pkgs.mkShell {
          buildInputs = [ llama-python-extra ];
          packages = nativeBuildInputs ++ osSpecific;
        };
      });
 }