diff --git a/.devops/nix/apps.nix b/.devops/nix/apps.nix index d9b6a1e00..b8a12cc0a 100644 --- a/.devops/nix/apps.nix +++ b/.devops/nix/apps.nix @@ -1,14 +1,22 @@ -{ package, binaries }: - -let - default = builtins.elemAt binaries 0; - mkApp = name: { - ${name} = { - type = "app"; - program = "${package}/bin/${name}"; +{ + perSystem = + { config, lib, ... }: + { + apps = + let + inherit (config.packages) default; + binaries = [ + "llama" + "llama-embedding" + "llama-server" + "quantize" + "train-text-from-scratch" + ]; + mkApp = name: { + type = "app"; + program = "${default}/bin/${name}"; + }; + in + lib.genAttrs binaries mkApp; }; - }; - result = builtins.foldl' (acc: name: (mkApp name) // acc) { } binaries; -in - -result // { default = result.${default}; } +} diff --git a/.devops/nix/devshells.nix b/.devops/nix/devshells.nix index afaaa2644..1862f0f08 100644 --- a/.devops/nix/devshells.nix +++ b/.devops/nix/devshells.nix @@ -1,8 +1,13 @@ -{ concatMapAttrs, packages }: - -concatMapAttrs - (name: package: { - ${name} = package.passthru.shell; - ${name + "-extra"} = package.passthru.shell-extra; - }) - packages +{ + perSystem = + { config, lib, ... }: + { + devShells = + lib.concatMapAttrs + (name: package: { + ${name} = package.passthru.shell; + ${name + "-extra"} = package.passthru.shell-extra; + }) + config.packages; + }; +} diff --git a/.devops/nix/nixpkgs-instances.nix b/.devops/nix/nixpkgs-instances.nix new file mode 100644 index 000000000..6e9872b28 --- /dev/null +++ b/.devops/nix/nixpkgs-instances.nix @@ -0,0 +1,35 @@ +{ inputs, ... }: +{ + # The _module.args definitions are passed on to modules as arguments. E.g. + # the module `{ pkgs ... }: { /* config */ }` implicitly uses + # `_module.args.pkgs` (defined in this case by flake-parts). + perSystem = + { system, ... }: + { + _module.args = { + pkgsCuda = import inputs.nixpkgs { + inherit system; + # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc, + # and ucx are built with CUDA support) + config.cudaSupport = true; + config.allowUnfreePredicate = + p: + builtins.all + ( + license: + license.free + || builtins.elem license.shortName [ + "CUDA EULA" + "cuDNN EULA" + ] + ) + (p.meta.licenses or [ p.meta.license ]); + }; + # Ensure dependencies use ROCm consistently + pkgsRocm = import inputs.nixpkgs { + inherit system; + config.rocmSupport = true; + }; + }; + }; +} diff --git a/flake.lock b/flake.lock index 656792f21..3fcd1f45d 100644 --- a/flake.lock +++ b/flake.lock @@ -1,5 +1,23 @@ { "nodes": { + "flake-parts": { + "inputs": { + "nixpkgs-lib": "nixpkgs-lib" + }, + "locked": { + "lastModified": 1701473968, + "narHash": "sha256-YcVE5emp1qQ8ieHUnxt1wCZCC3ZfAS+SRRWZ2TMda7E=", + "owner": "hercules-ci", + "repo": "flake-parts", + "rev": "34fed993f1674c8d06d58b37ce1e0fe5eebcb9f5", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "flake-parts", + "type": "github" + } + }, "nixpkgs": { "locked": { "lastModified": 1703559957, @@ -16,8 +34,27 @@ "type": "github" } }, + "nixpkgs-lib": { + "locked": { + "dir": "lib", + "lastModified": 1701253981, + "narHash": "sha256-ztaDIyZ7HrTAfEEUt9AtTDNoCYxUdSd6NrRHaYOIxtk=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "e92039b55bcd58469325ded85d4f58dd5a4eaf58", + "type": "github" + }, + "original": { + "dir": "lib", + "owner": "NixOS", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, "root": { "inputs": { + "flake-parts": "flake-parts", "nixpkgs": "nixpkgs" } } diff --git a/flake.nix b/flake.nix index f837f47cf..ff610ec64 100644 --- a/flake.nix +++ b/flake.nix @@ -1,111 +1,79 @@ { + description = "Port of Facebook's LLaMA model in C/C++"; + inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; + flake-parts.url = "github:hercules-ci/flake-parts"; }; + # For inspection, use `nix flake show github:ggerganov/llama.cpp` or the nix repl: + # + # ```bash + # ❯ nix repl + # nix-repl> :lf github:ggerganov/llama.cpp + # Added 13 variables. + # nix-repl> outputs.apps.x86_64-linux.quantize + # { program = "/nix/store/00000000000000000000000000000000-llama.cpp/bin/quantize"; type = "app"; } + # ``` outputs = - { self, nixpkgs }: + { flake-parts, ... }@inputs: + flake-parts.lib.mkFlake { inherit inputs; } - let - systems = [ - "aarch64-darwin" - "aarch64-linux" - "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant) - "x86_64-linux" - ]; - eachSystem = f: nixpkgs.lib.genAttrs systems (system: f system); - in + { - { - # An overlay can be used to have a more granular control over llama-cpp's - # dependencies and configuration, than that offered by the `.override` - # mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays. - # - # E.g. in a flake: - # ``` - # { nixpkgs, llama-cpp, ... }: - # let pkgs = import nixpkgs { - # overlays = [ (llama-cpp.overlays.default) ]; - # system = "aarch64-linux"; - # config.allowUnfree = true; - # config.cudaSupport = true; - # config.cudaCapabilities = [ "7.2" ]; - # config.cudaEnableForwardCompat = false; - # }; in { - # packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp; - # } - # ``` - # - # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format - overlays.default = (final: prev: { llamaPackages = final.callPackage .devops/nix/scope.nix { }; }); + imports = [ + .devops/nix/nixpkgs-instances.nix + .devops/nix/apps.nix + .devops/nix/devshells.nix + ]; - # These use the package definition from `./.devops/nix/package.nix`. - # There's one per backend that llama-cpp uses. Add more as needed! - packages = eachSystem ( - system: - let - # Avoid re-evaluation for the nixpkgs instance, - # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs - pkgs = nixpkgs.legacyPackages.${system}; + # An overlay can be used to have a more granular control over llama-cpp's + # dependencies and configuration, than that offered by the `.override` + # mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays. + # + # E.g. in a flake: + # ``` + # { nixpkgs, llama-cpp, ... }: + # let pkgs = import nixpkgs { + # overlays = [ (llama-cpp.overlays.default) ]; + # system = "aarch64-linux"; + # config.allowUnfree = true; + # config.cudaSupport = true; + # config.cudaCapabilities = [ "7.2" ]; + # config.cudaEnableForwardCompat = false; + # }; in { + # packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp; + # } + # ``` + # + # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format + flake.overlays.default = + (final: prev: { llamaPackages = final.callPackage .devops/nix/scope.nix { }; }); - # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc, - # and ucx are built with CUDA support) - pkgsCuda = import nixpkgs { - inherit system; + systems = [ + "aarch64-darwin" + "aarch64-linux" + "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant) + "x86_64-linux" + ]; - config.cudaSupport = true; - config.allowUnfreePredicate = - p: - builtins.all - ( - license: - license.free - || builtins.elem license.shortName [ - "CUDA EULA" - "cuDNN EULA" - ] - ) - (p.meta.licenses or [ p.meta.license ]); + perSystem = + { + config, + pkgs, + pkgsCuda, + pkgsRocm, + ... + }: + { + # We don't use the overlay here so as to avoid making too many instances of nixpkgs, + # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs + packages = { + default = (pkgs.callPackage .devops/nix/scope.nix { }).llama-cpp; + opencl = config.packages.default.override { useOpenCL = true; }; + cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { }).llama-cpp; + rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { }).llama-cpp; + }; }; - - # Ensure dependencies use ROCm consistently - pkgsRocm = import nixpkgs { - inherit system; - config.rocmSupport = true; - }; - in - { - default = (pkgs.callPackage .devops/nix/scope.nix { }).llama-cpp; - opencl = self.packages.${system}.default.override { useOpenCL = true; }; - cuda = (pkgsCuda.callPackage .devops/nix/scope.nix { }).llama-cpp; - rocm = (pkgsRocm.callPackage .devops/nix/scope.nix { }).llama-cpp; - } - ); - - # These use the definition of llama-cpp from `./.devops/nix/package.nix` - # and expose various binaries as apps with `nix run .#app-name`. - # Note that none of these apps use anything other than the default backend. - apps = eachSystem ( - system: - import ./.devops/nix/apps.nix { - package = self.packages.${system}.default; - binaries = [ - "llama" - "llama-embedding" - "llama-server" - "quantize" - "train-text-from-scratch" - ]; - } - ); - - # These expose a build environment for either a "default" or an "extra" set of dependencies. - devShells = eachSystem ( - system: - import ./.devops/nix/devshells.nix { - concatMapAttrs = nixpkgs.lib.concatMapAttrs; - packages = self.packages.${system}; - } - ); - }; + }; }