flake.nix: rewrite

1. Split into separate files per output.

2. Added overlays, so that this flake can be integrated into others.
   The names in the overlay are `llama-cpp`, `llama-cpp-opencl`,
   `llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the
   broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs).

3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/)
   rather than `with pkgs;` so that there's dependency injection rather
   than dependency lookup.

4. Add a description and meta information for each package.
   The description includes a bit about what's trying to accelerate each one.

5. Use specific CUDA packages instead of cudatoolkit on the advice of @SomeoneSerge.

6. Format with `serokell/nixfmt` for a consistent style.

7. Update `flake.lock` with the latest goods.
This commit is contained in:
Philip Taron 2023-12-22 12:33:09 -08:00
parent 708e179e85
commit cbf07e7178
No known key found for this signature in database
6 changed files with 320 additions and 170 deletions

14
.devops/nix/apps.nix Normal file
View file

@ -0,0 +1,14 @@
{ package, binaries }:
let
default = builtins.elemAt binaries 0;
mkApp = name: {
${name} = {
type = "app";
program = "${package}/bin/${name}";
};
};
result = builtins.foldl' (acc: name: (mkApp name) // acc) { } binaries;
in
result // { default = result.${default}; }

10
.devops/nix/devshells.nix Normal file
View file

@ -0,0 +1,10 @@
{ concatMapAttrs, packages }:
concatMapAttrs
(name: package: {
${name} = package.passthru.shell.overrideAttrs (prevAttrs: { inputsFrom = [ package ]; });
${name + "-extra"} = package.passthru.shell-extra.overrideAttrs (
prevAttrs: { inputsFrom = [ package ]; }
);
})
packages

17
.devops/nix/overlay.nix Normal file
View file

@ -0,0 +1,17 @@
final: prev:
let
inherit (final.stdenv) isAarch64 isDarwin;
darwinSpecific =
if isAarch64 then
{ inherit (final.darwin.apple_sdk_11_0.frameworks) Accelerate MetalKit; }
else
{ inherit (final.darwin.apple_sdk.frameworks) Accelerate CoreGraphics CoreVideo; };
osSpecific = if isDarwin then darwinSpecific else { };
in
{
llama-cpp = final.callPackage ./package.nix osSpecific;
}

189
.devops/nix/package.nix Normal file
View file

@ -0,0 +1,189 @@
{
lib,
config,
stdenv,
mkShell,
cmake,
ninja,
pkg-config,
git,
python3,
mpi,
openblas, # This could be `blas` to enable easy swapping out with `lapack`
cudaPackages,
rocmPackages,
clblast,
Accelerate ? null,
MetalKit ? null,
CoreVideo ? null,
CoreGraphics ? null,
useOpenCL ? false,
useCuda ? config.cudaSupport,
useRocm ? config.rocmSupport,
}@inputs:
let
inherit (lib)
cmakeBool
cmakeFeature
optionals
versionOlder
;
isDefault = !useOpenCL && !useCuda && !useRocm;
# It's necessary to consistently use backendStdenv when building with CUDA support,
# otherwise we get libstdc++ errors downstream.
stdenv = throw "Use effectiveStdenv instead";
effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv;
# Give a little description difference between the flavors.
descriptionSuffix =
if useOpenCL then
" (OpenCL accelerated)"
else if useCuda then
" (CUDA accelerated)"
else if useRocm then
" (ROCm accelerated)"
else if (MetalKit != null) then
" (MetalKit accelerated)"
else
"";
# TODO: package the Python in this repository in a Nix-like way.
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
# https://peps.python.org/pep-0517/
llama-python = python3.withPackages (
ps: [
ps.numpy
ps.sentencepiece
]
);
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
llama-python-extra = python3.withPackages (
ps: [
ps.numpy
ps.sentencepiece
ps.torchWithoutCuda
ps.transformers
]
);
# See ./overlay.nix for where these dependencies are passed in.
defaultBuildInputs = builtins.filter (p: p != null) [
Accelerate
MetalKit
CoreVideo
CoreGraphics
];
cudaBuildInputs = with cudaPackages; [
cuda_cccl.dev # <nv/target>
cuda_cudart
libcublas
];
rocmBuildInputs = with rocmPackages; [
clr
hipblas
rocblas
];
in
effectiveStdenv.mkDerivation {
name = "llama.cpp";
src = ../../.;
meta = {
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
mainProgram = "llama";
};
postPatch = ''
substituteInPlace ./ggml-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
# TODO: Package up each Python script or service appropriately.
# If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
# we could make those *.py into setuptools' entrypoints
substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
'';
nativeBuildInputs = [
cmake
ninja
pkg-config
git
] ++ optionals useCuda [ cudaPackages.cuda_nvcc ];
buildInputs =
[ mpi ]
++ optionals useOpenCL [ clblast ]
++ optionals useCuda cudaBuildInputs
++ optionals useRocm rocmBuildInputs
++ optionals isDefault defaultBuildInputs;
cmakeFlags =
[
(cmakeBool "LLAMA_NATIVE" true)
(cmakeBool "LLAMA_BUILD_SERVER" true)
(cmakeBool "BUILD_SHARED_LIBS" true)
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
]
++ optionals useOpenCL [ (cmakeBool "LLAMA_CLBLAST" true) ]
++ optionals useCuda [ (cmakeBool "LLAMA_CUBLAS" true) ]
++ optionals useRocm [
(cmakeBool "LLAMA_HIPBLAS" true)
(cmakeFeature "CMAKE_C_COMPILER" "hipcc")
(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")
# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
# and select the line that matches the current nixpkgs version of rocBLAS.
# Should likely use `rocmPackages.clr.gpuTargets`.
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
]
++ optionals isDefault (
if (MetalKit != null) then
[
"-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
"-DLLAMA_METAL=ON"
]
else
[
"-DLLAMA_BLAS=ON"
"-DLLAMA_BLAS_VENDOR=OpenBLAS"
]
);
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
# if they haven't been added yet.
#
# For example:
#
# 1. Avoid GLOBs
# 2. Add whatever COMPONENTs are missing
# 3. Fix whatever issues remain with override-ability.
#
postInstall = ''
mv $out/bin/main $out/bin/llama
mv $out/bin/server $out/bin/llama-server
mkdir -p $out/include
cp $src/llama.h $out/include/
'';
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
passthru = {
shell = mkShell {
name = "default${descriptionSuffix}";
description = "contains numpy and sentencepiece";
buildInputs = [ llama-python ];
};
shell-extra = mkShell {
name = "extra${descriptionSuffix}";
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
buildInputs = [ llama-python-extra ];
};
};
}

40
flake.lock generated
View file

@ -1,30 +1,12 @@
{
"nodes": {
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1694529238,
"narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "ff7b65b44d01cf9ba6a71320833626af21126384",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1698318101,
"narHash": "sha256-gUihHt3yPD7bVqg+k/UVHgngyaJ3DMEBchbymBMvK1E=",
"lastModified": 1703013332,
"narHash": "sha256-+tFNwMvlXLbJZXiMHqYq77z/RfmpfpiI3yjL6o/Zo9M=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "63678e9f3d3afecfeafa0acead6239cdb447574c",
"rev": "54aac082a4d9bb5bbc5c4e899603abfb76a3f6d6",
"type": "github"
},
"original": {
@ -36,24 +18,8 @@
},
"root": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",

208
flake.nix
View file

@ -1,139 +1,93 @@
{
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
outputs =
{ self, nixpkgs }:
let
name = "llama.cpp";
src = ./.;
meta.mainProgram = "llama";
inherit (pkgs.stdenv) isAarch32 isAarch64 isDarwin;
buildInputs = with pkgs; [ openmpi ];
osSpecific = with pkgs; buildInputs ++ (
if isAarch64 && isDarwin then
with pkgs.darwin.apple_sdk_11_0.frameworks; [
Accelerate
MetalKit
]
else if isAarch32 && isDarwin then
with pkgs.darwin.apple_sdk.frameworks; [
Accelerate
CoreGraphics
CoreVideo
]
else if isDarwin then
with pkgs.darwin.apple_sdk.frameworks; [
Accelerate
CoreGraphics
CoreVideo
]
else
with pkgs; [ openblas ]
);
pkgs = import nixpkgs { inherit system; };
nativeBuildInputs = with pkgs; [ cmake ninja pkg-config ];
cudatoolkit_joined = with pkgs; symlinkJoin {
# HACK(Green-Sky): nix currently has issues with cmake findcudatoolkit
# see https://github.com/NixOS/nixpkgs/issues/224291
# copied from jaxlib
name = "${cudaPackages.cudatoolkit.name}-merged";
paths = [
cudaPackages.cudatoolkit.lib
cudaPackages.cudatoolkit.out
] ++ lib.optionals (lib.versionOlder cudaPackages.cudatoolkit.version "11") [
# for some reason some of the required libs are in the targets/x86_64-linux
# directory; not sure why but this works around it
"${cudaPackages.cudatoolkit}/targets/${system}"
systems = [
"aarch64-darwin"
"aarch64-linux"
"x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant)
"x86_64-linux"
];
eachSystem = f: nixpkgs.lib.genAttrs systems (system: f system);
in
{
# These define the various ways to build the llama.cpp project.
# Integrate them into your flake.nix configuration by adding this overlay to nixpkgs.overlays.
overlays.default = import ./.devops/nix/overlay.nix;
# These use the package definition from `./.devops/nix/package.nix`.
# There's one per backend that llama-cpp uses. Add more as needed!
packages = eachSystem (
system:
let
defaultConfig = {
inherit system;
overlays = [ self.overlays.default ];
};
llama-python =
pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece ]);
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
llama-python-extra =
pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece torchWithoutCuda transformers ]);
postPatch = ''
substituteInPlace ./ggml-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python'
'';
postInstall = ''
mv $out/bin/main $out/bin/llama
mv $out/bin/server $out/bin/llama-server
mkdir -p $out/include
cp ${src}/llama.h $out/include/
'';
cmakeFlags = [ "-DLLAMA_NATIVE=OFF" "-DLLAMA_BUILD_SERVER=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ];
pkgs = import nixpkgs defaultConfig;
# Let's not make a big deal about getting the CUDA bits.
cudaConfig = defaultConfig // {
config.cudaSupport = true;
config.allowUnfreePredicate =
p:
builtins.all
(
license:
license.free
|| builtins.elem license.shortName [
"CUDA EULA"
"cuDNN EULA"
]
)
(p.meta.licenses or [ p.meta.license ]);
};
pkgsCuda = import nixpkgs cudaConfig;
# Let's make sure to turn on ROCm support across the whole package ecosystem.
rocmConfig = defaultConfig // {
config.rocmSupport = true;
};
pkgsRocm = import nixpkgs rocmConfig;
in
{
packages.default = pkgs.stdenv.mkDerivation {
inherit name src meta postPatch nativeBuildInputs postInstall;
buildInputs = osSpecific;
cmakeFlags = cmakeFlags
++ (if isAarch64 && isDarwin then [
"-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
"-DLLAMA_METAL=ON"
] else [
"-DLLAMA_BLAS=ON"
"-DLLAMA_BLAS_VENDOR=OpenBLAS"
]);
};
packages.opencl = pkgs.stdenv.mkDerivation {
inherit name src meta postPatch nativeBuildInputs postInstall;
buildInputs = with pkgs; buildInputs ++ [ clblast ];
cmakeFlags = cmakeFlags ++ [
"-DLLAMA_CLBLAST=ON"
default = pkgs.llama-cpp;
opencl = pkgs.llama-cpp.override { useOpenCL = true; };
cuda = pkgsCuda.llama-cpp;
rocm = pkgsRocm.llama-cpp;
}
);
# These use the definition of llama-cpp from `./.devops/nix/package.nix`
# and expose various binaries as apps with `nix run .#app-name`.
# Note that none of these apps use anything other than the default backend.
apps = eachSystem (
system:
import ./.devops/nix/apps.nix {
package = self.packages.${system}.default;
binaries = [
"llama"
"llama-embedding"
"llama-server"
"quantize"
"train-text-from-scratch"
];
}
);
# These expose a build environment for either a "default" or an "extra" set of dependencies.
devShells = eachSystem (
system:
import ./.devops/nix/devshells.nix {
concatMapAttrs = nixpkgs.lib.concatMapAttrs;
packages = self.packages.${system};
}
);
};
packages.cuda = pkgs.stdenv.mkDerivation {
inherit name src meta postPatch nativeBuildInputs postInstall;
buildInputs = with pkgs; buildInputs ++ [ cudatoolkit_joined ];
cmakeFlags = cmakeFlags ++ [
"-DLLAMA_CUBLAS=ON"
];
};
packages.rocm = pkgs.stdenv.mkDerivation {
inherit name src meta postPatch nativeBuildInputs postInstall;
buildInputs = with pkgs.rocmPackages; buildInputs ++ [ clr hipblas rocblas ];
cmakeFlags = cmakeFlags ++ [
"-DLLAMA_HIPBLAS=1"
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
# in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
# and select the line that matches the current nixpkgs version of rocBLAS.
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
];
};
apps.llama-server = {
type = "app";
program = "${self.packages.${system}.default}/bin/llama-server";
};
apps.llama-embedding = {
type = "app";
program = "${self.packages.${system}.default}/bin/embedding";
};
apps.llama = {
type = "app";
program = "${self.packages.${system}.default}/bin/llama";
};
apps.quantize = {
type = "app";
program = "${self.packages.${system}.default}/bin/quantize";
};
apps.train-text-from-scratch = {
type = "app";
program = "${self.packages.${system}.default}/bin/train-text-from-scratch";
};
apps.default = self.apps.${system}.llama;
devShells.default = pkgs.mkShell {
buildInputs = [ llama-python ];
packages = nativeBuildInputs ++ osSpecific;
};
devShells.extra = pkgs.mkShell {
buildInputs = [ llama-python-extra ];
packages = nativeBuildInputs ++ osSpecific;
};
});
}