From d0e2f6416bd43eddb70137f6b96c7bc3d0246102 Mon Sep 17 00:00:00 2001 From: Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com> Date: Thu, 28 Mar 2024 12:03:30 +0800 Subject: [PATCH 01/10] doc: fix typo in MobileVLM-README.md (#6181) --- examples/llava/MobileVLM-README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llava/MobileVLM-README.md b/examples/llava/MobileVLM-README.md index b3b66331f..1fc83247a 100644 --- a/examples/llava/MobileVLM-README.md +++ b/examples/llava/MobileVLM-README.md @@ -6,7 +6,7 @@ for more information, please go to [Meituan-AutoML/MobileVLM](https://github.com The implementation is based on llava, and is compatible with llava and mobileVLM. The usage is basically same as llava. -Notice: The overall process of model inference for both **MobileVLM** and **MobileVLM_V2** models is the same, but the process of model conversion is a little different. Therefore, using MobiVLM as an example, the different conversion step will be shown. +Notice: The overall process of model inference for both **MobileVLM** and **MobileVLM_V2** models is the same, but the process of model conversion is a little different. Therefore, using MobileVLM as an example, the different conversion step will be shown. ## Usage Build with cmake or run `make llava-cli` to build it. From f6a0f5c6422200764b7929064c39dcf4bb0e9cd6 Mon Sep 17 00:00:00 2001 From: hutli Date: Thu, 15 Feb 2024 14:25:04 +0100 Subject: [PATCH 02/10] nix: .#widnows: init initial nix build for windows using zig mingwW64 build removes nix zig windows build removes nix zig windows build removed unnessesary glibc.static removed unnessesary import of pkgs in nix fixed missing trailing newline on non-windows nix builds overriding stdenv when building for crosscompiling to windows in nix better variables when crosscompiling windows in nix cross compile windows on macos removed trailing whitespace remove unnessesary overwrite of "CMAKE_SYSTEM_NAME" in nix windows build nix: keep file extension when copying result files during cross compile for windows nix: better checking for file extensions when using MinGW nix: using hostPlatform instead of targetPlatform when cross compiling for Windows using hostPlatform.extensions.executable to extract executable format --- .devops/nix/package.nix | 6 ++++-- flake.nix | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index b651f9e61..83858c829 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -67,6 +67,8 @@ let strings.optionalString (suffices != [ ]) ", accelerated with ${strings.concatStringsSep ", " suffices}"; + executableSuffix = effectiveStdenv.hostPlatform.extensions.executable; + # TODO: package the Python in this repository in a Nix-like way. # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo # is PEP 517-compatible, and ensure the correct .dist-info is generated. @@ -244,8 +246,8 @@ effectiveStdenv.mkDerivation ( # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, # if they haven't been added yet. postInstall = '' - mv $out/bin/main $out/bin/llama - mv $out/bin/server $out/bin/llama-server + mv $out/bin/main${executableSuffix} $out/bin/llama${executableSuffix} + mv $out/bin/server${executableSuffix} $out/bin/llama-server${executableSuffix} mkdir -p $out/include cp $src/llama.h $out/include/ ''; diff --git a/flake.nix b/flake.nix index 9a528a66b..cdce3e0a9 100644 --- a/flake.nix +++ b/flake.nix @@ -155,6 +155,9 @@ { default = config.legacyPackages.llamaPackages.llama-cpp; vulkan = config.packages.default.override { useVulkan = true; }; + windows = config.legacyPackages.llamaPackages.llama-cpp.override { + stdenv = pkgs.pkgsCross.mingwW64.stdenv; + }; } // lib.optionalAttrs pkgs.stdenv.isLinux { opencl = config.packages.default.override { useOpenCL = true; }; From 22a462cc1f69873f7d4c6d0201bd93478afa2ecb Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 26 Mar 2024 16:22:07 +0000 Subject: [PATCH 03/10] nix: package: don't introduce the dependency on python - The generic /usr/bin/env shebangs are good enough - Python deps are provisioned in the devShells - We need to be able to leave python out at least on windows (currently breaks eval) --- .devops/nix/package.nix | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 83858c829..490ad0cc5 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -73,6 +73,9 @@ let # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo # is PEP 517-compatible, and ensure the correct .dist-info is generated. # https://peps.python.org/pep-0517/ + # + # TODO: Package up each Python script or service appropriately, by making + # them into "entrypoints" llama-python = python3.withPackages ( ps: [ ps.numpy @@ -161,11 +164,6 @@ effectiveStdenv.mkDerivation ( --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" substituteInPlace ./ggml-metal.m \ --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" - - # TODO: Package up each Python script or service appropriately. - # If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`, - # we could make those *.py into setuptools' entrypoints - substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python" ''; # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015, From e9f17dc3bf0da76c8b35130f9ca2fda5246c418e Mon Sep 17 00:00:00 2001 From: Someone Serge Date: Tue, 26 Mar 2024 16:22:42 +0000 Subject: [PATCH 04/10] nix: .#windows: proper cross-compilation set-up Take all dependencies from the cross stage, rather tha only stdenv --- flake.nix | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/flake.nix b/flake.nix index cdce3e0a9..9cd3756e5 100644 --- a/flake.nix +++ b/flake.nix @@ -145,6 +145,7 @@ # the same path you would with an overlay. legacyPackages = { llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; + llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; }; @@ -155,9 +156,7 @@ { default = config.legacyPackages.llamaPackages.llama-cpp; vulkan = config.packages.default.override { useVulkan = true; }; - windows = config.legacyPackages.llamaPackages.llama-cpp.override { - stdenv = pkgs.pkgsCross.mingwW64.stdenv; - }; + windows = config.legacyPackages.llamaPackagesWindows.llama-cpp; } // lib.optionalAttrs pkgs.stdenv.isLinux { opencl = config.packages.default.override { useOpenCL = true; }; From dbb03e2b9c4fead73062926b6a134f28d3a3b46d Mon Sep 17 00:00:00 2001 From: hutli Date: Wed, 27 Mar 2024 17:25:05 +0100 Subject: [PATCH 05/10] only using explicit blas if hostPlatform is allowed --- .devops/nix/package.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 490ad0cc5..cb6ddb3b4 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -200,7 +200,7 @@ effectiveStdenv.mkDerivation ( ++ optionals useMpi [ mpi ] ++ optionals useOpenCL [ clblast ] ++ optionals useRocm rocmBuildInputs - ++ optionals useBlas [ blas ] + ++ optionals (useBlas && builtins.elem effectiveStdenv.hostPlatform.system blas.meta.platforms) [ blas ] ++ optionals useVulkan vulkanBuildInputs; cmakeFlags = From c87397664964e5a2a21de1877d504b23a2a35332 Mon Sep 17 00:00:00 2001 From: hutli Date: Wed, 27 Mar 2024 18:10:08 +0100 Subject: [PATCH 06/10] using blas.meta.available to check host platform --- .devops/nix/package.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index cb6ddb3b4..77d7e4f5e 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -200,7 +200,7 @@ effectiveStdenv.mkDerivation ( ++ optionals useMpi [ mpi ] ++ optionals useOpenCL [ clblast ] ++ optionals useRocm rocmBuildInputs - ++ optionals (useBlas && builtins.elem effectiveStdenv.hostPlatform.system blas.meta.platforms) [ blas ] + ++ optionals (useBlas && blas.meta.available) [ blas ] ++ optionals useVulkan vulkanBuildInputs; cmakeFlags = From d39b308eaf0ac91c2e1f432bf66751193a470a56 Mon Sep 17 00:00:00 2001 From: hutli Date: Wed, 27 Mar 2024 19:14:28 +0100 Subject: [PATCH 07/10] nix: moved blas availability check to package inputs so it is still overridable --- .devops/nix/package.nix | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 77d7e4f5e..d0db0f10e 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -18,13 +18,13 @@ vulkan-headers, vulkan-loader, clblast, - useBlas ? builtins.all (x: !x) [ + useBlas ? builtins.all (x: !x) [ useCuda useMetalKit useOpenCL useRocm useVulkan - ], + ] && blas.meta.available, useCuda ? config.cudaSupport, useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL, useMpi ? false, # Increases the runtime closure size by ~700M @@ -200,7 +200,7 @@ effectiveStdenv.mkDerivation ( ++ optionals useMpi [ mpi ] ++ optionals useOpenCL [ clblast ] ++ optionals useRocm rocmBuildInputs - ++ optionals (useBlas && blas.meta.available) [ blas ] + ++ optionals useBlas [ blas ] ++ optionals useVulkan vulkanBuildInputs; cmakeFlags = From d2d8f389960a106b66313ff1621bdb1aaaaaa285 Mon Sep 17 00:00:00 2001 From: hutli Date: Wed, 27 Mar 2024 19:17:30 +0100 Subject: [PATCH 08/10] nix: removed unnessesary indentation --- .devops/nix/package.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index d0db0f10e..2c0ae4e2a 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -18,7 +18,7 @@ vulkan-headers, vulkan-loader, clblast, - useBlas ? builtins.all (x: !x) [ + useBlas ? builtins.all (x: !x) [ useCuda useMetalKit useOpenCL From 6902cb7f2e3479f364ee177118200fb7e4e9fc92 Mon Sep 17 00:00:00 2001 From: Eric Zhang <34133756+EZForever@users.noreply.github.com> Date: Thu, 28 Mar 2024 16:50:48 +0800 Subject: [PATCH 09/10] server : stop gracefully on SIGTERM (#6348) --- examples/server/server.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 53ad9239e..92090b920 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -3566,6 +3566,7 @@ int main(int argc, char ** argv) { sigemptyset (&sigint_action.sa_mask); sigint_action.sa_flags = 0; sigaction(SIGINT, &sigint_action, NULL); + sigaction(SIGTERM, &sigint_action, NULL); #elif defined (_WIN32) auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL { return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false; From cfc4d75df6399b36153ef739f2c1abee4c114bb8 Mon Sep 17 00:00:00 2001 From: Ting Sun Date: Thu, 28 Mar 2024 16:51:06 +0800 Subject: [PATCH 10/10] doc: fix outdated default value of batch size (#6336) * doc: fix outdated default value of batch size * doc: add doc for ubatch-size --- examples/main/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/main/README.md b/examples/main/README.md index 9c83fd3bf..bb696b562 100644 --- a/examples/main/README.md +++ b/examples/main/README.md @@ -296,7 +296,9 @@ These options help improve the performance and memory usage of the LLaMA models. ### Batch Size -- `-b N, --batch-size N`: Set the batch size for prompt processing (default: 512). This large batch size benefits users who have BLAS installed and enabled it during the build. If you don't have BLAS enabled ("BLAS=0"), you can use a smaller number, such as 8, to see the prompt progress as it's evaluated in some situations. +- `-b N, --batch-size N`: Set the batch size for prompt processing (default: `2048`). This large batch size benefits users who have BLAS installed and enabled it during the build. If you don't have BLAS enabled ("BLAS=0"), you can use a smaller number, such as 8, to see the prompt progress as it's evaluated in some situations. + +- `-ub N`, `--ubatch-size N`: physical maximum batch size. This is for pipeline parallelization. Default: `512`. ### Prompt Caching