Merge branch 'ggerganov:master' into master

This commit is contained in:
Ziang Wu 2024-03-28 16:55:03 +08:00 committed by GitHub
commit b97e6fc812
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 15 additions and 10 deletions

View file

@ -24,7 +24,7 @@
useOpenCL useOpenCL
useRocm useRocm
useVulkan useVulkan
], ] && blas.meta.available,
useCuda ? config.cudaSupport, useCuda ? config.cudaSupport,
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL, useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
useMpi ? false, # Increases the runtime closure size by ~700M useMpi ? false, # Increases the runtime closure size by ~700M
@ -67,10 +67,15 @@ let
strings.optionalString (suffices != [ ]) strings.optionalString (suffices != [ ])
", accelerated with ${strings.concatStringsSep ", " suffices}"; ", accelerated with ${strings.concatStringsSep ", " suffices}";
executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
# TODO: package the Python in this repository in a Nix-like way. # TODO: package the Python in this repository in a Nix-like way.
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
# is PEP 517-compatible, and ensure the correct .dist-info is generated. # is PEP 517-compatible, and ensure the correct .dist-info is generated.
# https://peps.python.org/pep-0517/ # https://peps.python.org/pep-0517/
#
# TODO: Package up each Python script or service appropriately, by making
# them into "entrypoints"
llama-python = python3.withPackages ( llama-python = python3.withPackages (
ps: [ ps: [
ps.numpy ps.numpy
@ -159,11 +164,6 @@ effectiveStdenv.mkDerivation (
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
substituteInPlace ./ggml-metal.m \ substituteInPlace ./ggml-metal.m \
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
# TODO: Package up each Python script or service appropriately.
# If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`,
# we could make those *.py into setuptools' entrypoints
substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python"
''; '';
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015, # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
@ -244,8 +244,8 @@ effectiveStdenv.mkDerivation (
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
# if they haven't been added yet. # if they haven't been added yet.
postInstall = '' postInstall = ''
mv $out/bin/main $out/bin/llama mv $out/bin/main${executableSuffix} $out/bin/llama${executableSuffix}
mv $out/bin/server $out/bin/llama-server mv $out/bin/server${executableSuffix} $out/bin/llama-server${executableSuffix}
mkdir -p $out/include mkdir -p $out/include
cp $src/llama.h $out/include/ cp $src/llama.h $out/include/
''; '';

View file

@ -6,7 +6,7 @@ for more information, please go to [Meituan-AutoML/MobileVLM](https://github.com
The implementation is based on llava, and is compatible with llava and mobileVLM. The usage is basically same as llava. The implementation is based on llava, and is compatible with llava and mobileVLM. The usage is basically same as llava.
Notice: The overall process of model inference for both **MobileVLM** and **MobileVLM_V2** models is the same, but the process of model conversion is a little different. Therefore, using MobiVLM as an example, the different conversion step will be shown. Notice: The overall process of model inference for both **MobileVLM** and **MobileVLM_V2** models is the same, but the process of model conversion is a little different. Therefore, using MobileVLM as an example, the different conversion step will be shown.
## Usage ## Usage
Build with cmake or run `make llava-cli` to build it. Build with cmake or run `make llava-cli` to build it.

View file

@ -296,7 +296,9 @@ These options help improve the performance and memory usage of the LLaMA models.
### Batch Size ### Batch Size
- `-b N, --batch-size N`: Set the batch size for prompt processing (default: 512). This large batch size benefits users who have BLAS installed and enabled it during the build. If you don't have BLAS enabled ("BLAS=0"), you can use a smaller number, such as 8, to see the prompt progress as it's evaluated in some situations. - `-b N, --batch-size N`: Set the batch size for prompt processing (default: `2048`). This large batch size benefits users who have BLAS installed and enabled it during the build. If you don't have BLAS enabled ("BLAS=0"), you can use a smaller number, such as 8, to see the prompt progress as it's evaluated in some situations.
- `-ub N`, `--ubatch-size N`: physical maximum batch size. This is for pipeline parallelization. Default: `512`.
### Prompt Caching ### Prompt Caching

View file

@ -3566,6 +3566,7 @@ int main(int argc, char ** argv) {
sigemptyset (&sigint_action.sa_mask); sigemptyset (&sigint_action.sa_mask);
sigint_action.sa_flags = 0; sigint_action.sa_flags = 0;
sigaction(SIGINT, &sigint_action, NULL); sigaction(SIGINT, &sigint_action, NULL);
sigaction(SIGTERM, &sigint_action, NULL);
#elif defined (_WIN32) #elif defined (_WIN32)
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL { auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false; return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;

View file

@ -145,6 +145,7 @@
# the same path you would with an overlay. # the same path you would with an overlay.
legacyPackages = { legacyPackages = {
llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
}; };
@ -155,6 +156,7 @@
{ {
default = config.legacyPackages.llamaPackages.llama-cpp; default = config.legacyPackages.llamaPackages.llama-cpp;
vulkan = config.packages.default.override { useVulkan = true; }; vulkan = config.packages.default.override { useVulkan = true; };
windows = config.legacyPackages.llamaPackagesWindows.llama-cpp;
} }
// lib.optionalAttrs pkgs.stdenv.isLinux { // lib.optionalAttrs pkgs.stdenv.isLinux {
opencl = config.packages.default.override { useOpenCL = true; }; opencl = config.packages.default.override { useOpenCL = true; };